1 | #!/usr/pkg/bin/python2.5 |
---|
2 | |
---|
3 | # drupal2fullblog uses Html2Wiki here. |
---|
4 | # find my code below |
---|
5 | |
---|
6 | # Copyright (C) 2006 Samuel Abels, http://debain.org |
---|
7 | # |
---|
8 | # This program is free software; you can redistribute it and/or modify |
---|
9 | # it under the terms of the GNU General Public License version 2, as |
---|
10 | # published by the Free Software Foundation. |
---|
11 | # |
---|
12 | # This program is distributed in the hope that it will be useful, |
---|
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
15 | # GNU General Public License for more details. |
---|
16 | # |
---|
17 | # You should have received a copy of the GNU General Public License |
---|
18 | # along with this program; if not, write to the Free Software |
---|
19 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
20 | |
---|
21 | import HTMLParser, re, sys |
---|
22 | |
---|
23 | class Cell: |
---|
24 | data = '' |
---|
25 | colspan = 1 |
---|
26 | rowspan = 1 |
---|
27 | |
---|
28 | class Html2Wiki(HTMLParser.HTMLParser): |
---|
29 | def __init__(self): |
---|
30 | HTMLParser.HTMLParser.__init__(self) |
---|
31 | self.wiki = '' |
---|
32 | self.buffer = '' |
---|
33 | self.indent = 0 |
---|
34 | self.linebreak = '\n' |
---|
35 | self.rows = [] |
---|
36 | self.cells = [] |
---|
37 | self.in_table = False |
---|
38 | self.in_td = False |
---|
39 | self.in_heading = False |
---|
40 | self.in_ul = False |
---|
41 | self.in_ol = False |
---|
42 | self.in_li = False |
---|
43 | self.in_a = False |
---|
44 | self.in_pre = False |
---|
45 | self.last_href = '' |
---|
46 | self.span_path = [] |
---|
47 | |
---|
48 | def __output(self, text, linebreak = True): |
---|
49 | self.buffer += (' ' * self.indent * 2) |
---|
50 | self.buffer += text + self.linebreak |
---|
51 | |
---|
52 | def __flush(self): |
---|
53 | self.wiki += self.buffer |
---|
54 | self.buffer = '' |
---|
55 | |
---|
56 | def handle_starttag(self, tag, attrs): |
---|
57 | if tag == 'table': self.start_table() |
---|
58 | elif tag == 'tr': self.start_tr() |
---|
59 | elif tag == 'th': self.start_th(attrs) |
---|
60 | elif tag == 'td': self.start_td(attrs) |
---|
61 | elif tag == 'h1': self.start_h1() |
---|
62 | elif tag == 'h2': self.start_h2() |
---|
63 | elif tag == 'h3': self.start_h3() |
---|
64 | elif tag == 'ul': self.start_ul() |
---|
65 | elif tag == 'ol': self.start_ol() |
---|
66 | elif tag == 'li': self.start_li() |
---|
67 | elif tag == 'i': self.start_i() |
---|
68 | elif tag == 'b': self.start_b() |
---|
69 | elif tag == 'u': self.start_u() |
---|
70 | elif tag == 'a': self.start_a(attrs) |
---|
71 | elif tag == 'pre': self.start_pre() |
---|
72 | elif tag == 'strike': self.start_strike() |
---|
73 | elif tag == 'span': self.start_span(attrs) |
---|
74 | elif tag == 'br': self.newline() |
---|
75 | |
---|
76 | def handle_endtag(self, tag): |
---|
77 | if tag == 'table': self.end_table(); |
---|
78 | elif tag == 'tr': self.end_tr() |
---|
79 | elif tag == 'th': self.end_th() |
---|
80 | elif tag == 'td': self.end_td() |
---|
81 | elif tag == 'h1': self.end_h1() |
---|
82 | elif tag == 'h2': self.end_h2() |
---|
83 | elif tag == 'h3': self.end_h3() |
---|
84 | elif tag == 'ul': self.end_ul() |
---|
85 | elif tag == 'ol': self.end_ol() |
---|
86 | elif tag == 'li': self.end_li() |
---|
87 | elif tag == 'i': self.end_i() |
---|
88 | elif tag == 'b': self.end_b() |
---|
89 | elif tag == 'u': self.end_u() |
---|
90 | elif tag == 'a': self.end_a() |
---|
91 | elif tag == 'pre': self.end_pre() |
---|
92 | elif tag == 'strike': self.end_strike() |
---|
93 | elif tag == 'span': self.end_span() |
---|
94 | |
---|
95 | def start_h1(self): |
---|
96 | self.buffer += '=' |
---|
97 | |
---|
98 | def end_h1(self): |
---|
99 | self.buffer += '=\n\n' |
---|
100 | |
---|
101 | def start_h2(self): |
---|
102 | self.buffer += '==' |
---|
103 | |
---|
104 | def end_h2(self): |
---|
105 | self.buffer += '==\n\n' |
---|
106 | |
---|
107 | def start_h3(self): |
---|
108 | self.buffer += '===' |
---|
109 | |
---|
110 | def end_h3(self): |
---|
111 | self.buffer += '===\n\n' |
---|
112 | |
---|
113 | def start_ul(self): |
---|
114 | self.in_ul = True |
---|
115 | |
---|
116 | def end_ul(self): |
---|
117 | self.in_ul = False |
---|
118 | |
---|
119 | def start_ol(self): |
---|
120 | self.in_ol = True |
---|
121 | |
---|
122 | def end_ol(self): |
---|
123 | self.in_ol = False |
---|
124 | |
---|
125 | def start_li(self): |
---|
126 | self.in_li = True |
---|
127 | if self.in_ol: |
---|
128 | self.buffer += '# ' |
---|
129 | elif self.in_ul: |
---|
130 | self.buffer += '* ' |
---|
131 | |
---|
132 | def end_li(self): |
---|
133 | self.in_li = False |
---|
134 | |
---|
135 | def start_i(self): |
---|
136 | self.buffer += '/' |
---|
137 | |
---|
138 | def end_i(self): |
---|
139 | self.buffer += '/' |
---|
140 | |
---|
141 | def start_b(self): |
---|
142 | self.buffer += '*' |
---|
143 | |
---|
144 | def end_b(self): |
---|
145 | self.buffer += '*' |
---|
146 | |
---|
147 | def start_u(self): |
---|
148 | self.buffer += '_' |
---|
149 | |
---|
150 | def end_u(self): |
---|
151 | self.buffer += '_' |
---|
152 | |
---|
153 | def start_a(self, attrs): |
---|
154 | self.in_a = True |
---|
155 | self.last_href = '' |
---|
156 | for key, value in attrs: |
---|
157 | if key == 'href': |
---|
158 | self.last_href = value |
---|
159 | self.buffer += '[' + self.last_href |
---|
160 | |
---|
161 | def end_a(self): |
---|
162 | self.in_a = False |
---|
163 | self.buffer += ']' |
---|
164 | |
---|
165 | def start_pre(self): |
---|
166 | self.in_pre = True |
---|
167 | self.buffer += '#Text\n' |
---|
168 | |
---|
169 | def end_pre(self): |
---|
170 | self.in_pre = False |
---|
171 | self.buffer += '#End\n' |
---|
172 | |
---|
173 | def start_strike(self): |
---|
174 | self.buffer += '-' |
---|
175 | |
---|
176 | def end_strike(self): |
---|
177 | self.buffer += '-' |
---|
178 | |
---|
179 | def start_span(self, attrs): |
---|
180 | cls = None |
---|
181 | for key, value in attrs: |
---|
182 | if key == 'class': |
---|
183 | cls = value |
---|
184 | if cls == 'underline': |
---|
185 | char = '_' |
---|
186 | elif cls == 'bold': |
---|
187 | char = '*' |
---|
188 | elif cls == 'italic': |
---|
189 | char = '/' |
---|
190 | else: |
---|
191 | char = '' |
---|
192 | self.buffer += char |
---|
193 | self.span_path.append(char) |
---|
194 | |
---|
195 | def end_span(self): |
---|
196 | self.buffer += self.span_path.pop() |
---|
197 | |
---|
198 | def start_table(self): |
---|
199 | self.in_table = True |
---|
200 | |
---|
201 | def start_tr(self): |
---|
202 | pass |
---|
203 | |
---|
204 | def start_th(self, attrs): |
---|
205 | self.in_heading = True |
---|
206 | self.start_td(attrs) |
---|
207 | |
---|
208 | def start_td(self, attrs): |
---|
209 | self.__flush() |
---|
210 | self.in_td = True |
---|
211 | cell = Cell() |
---|
212 | for key, value in attrs: |
---|
213 | if key == 'rowspan': |
---|
214 | cell.rowspan = int(value) |
---|
215 | elif key == 'colspan': |
---|
216 | cell.colspan = int(value) |
---|
217 | self.cells.append(cell) |
---|
218 | |
---|
219 | |
---|
220 | def handle_data(self, data): |
---|
221 | if not self.in_pre: |
---|
222 | data = data.replace('\n', '') |
---|
223 | if self.in_a: |
---|
224 | if data == self.last_href: |
---|
225 | return |
---|
226 | self.buffer += ' ' |
---|
227 | if self.in_li: |
---|
228 | self.buffer += data.strip() + '\n' |
---|
229 | if self.in_ul or self.in_ol: |
---|
230 | self.__flush() |
---|
231 | elif self.in_td: |
---|
232 | self.buffer += data |
---|
233 | elif not self.in_table: |
---|
234 | self.buffer += data |
---|
235 | self.__flush() |
---|
236 | |
---|
237 | def end_td(self): |
---|
238 | self.cells[-1].data += self.buffer.strip() |
---|
239 | self.buffer = '' |
---|
240 | self.in_td = False |
---|
241 | |
---|
242 | def end_th(self): |
---|
243 | self.end_td() |
---|
244 | |
---|
245 | def end_tr(self): |
---|
246 | if len(self.cells) is 0: |
---|
247 | return |
---|
248 | if self.in_heading: |
---|
249 | self.__output('#Heading') |
---|
250 | self.in_heading = False |
---|
251 | else: |
---|
252 | self.__output('#Row') |
---|
253 | self.indent += 1 |
---|
254 | line = ('|' * self.cells[0].colspan) + ' ' + self.cells[0].data.strip() |
---|
255 | for cell in self.cells[1:]: |
---|
256 | line += ' ' + ('|' * cell.colspan) + ' ' + cell.data.strip() |
---|
257 | if len(line) <= 80: |
---|
258 | self.__output(line) |
---|
259 | else: |
---|
260 | for cell in self.cells: |
---|
261 | self.__output(('|' * cell.colspan) + ' ' + cell.data.strip()) |
---|
262 | self.cells = [] |
---|
263 | self.indent -= 1 |
---|
264 | self.__flush() |
---|
265 | |
---|
266 | def end_table(self): |
---|
267 | self.in_table = False |
---|
268 | self.__flush() |
---|
269 | |
---|
270 | def newline(self): |
---|
271 | self.buffer += '\n' |
---|
272 | |
---|
273 | |
---|
274 | # drupal2fullblog |
---|
275 | # Copyright 2009 Roy Marples <roy@marples.name> |
---|
276 | # BSD-2 licensed |
---|
277 | # I'm crap at python, but this works for me :) |
---|
278 | |
---|
279 | import time, datetime |
---|
280 | from pyPgSQL import PgSQL |
---|
281 | |
---|
282 | # Tunables |
---|
283 | drupal = PgSQL.connect(database='drupal') |
---|
284 | trac = PgSQL.connect(database='trac.blog') |
---|
285 | home = "http://roy.marples.name" |
---|
286 | node = home + "/node/" |
---|
287 | blog = home + "/projects/blog" |
---|
288 | |
---|
289 | dc = drupal.cursor() |
---|
290 | tc = trac.cursor() |
---|
291 | |
---|
292 | tc.execute("DELETE FROM fullblog_posts") |
---|
293 | dc.execute("SELECT users.name AS author, node.title, body, node.created, node.changed, term_data.name AS category" |
---|
294 | " FROM node" |
---|
295 | " JOIN node_revisions ON node_revisions.nid=node.nid" |
---|
296 | " JOIN term_node ON term_node.nid=node.nid" |
---|
297 | " JOIN term_data ON term_data.tid=term_node.tid" |
---|
298 | " JOIN users ON users.uid=node.uid" |
---|
299 | " WHERE type='blog'" |
---|
300 | " ORDER BY created") |
---|
301 | r = dc.fetchall() |
---|
302 | for author, title, body, created, changed, category in r: |
---|
303 | # Create a blog link |
---|
304 | npath = datetime.datetime.fromtimestamp(created).strftime("%Y/%m/%d/") |
---|
305 | title = title.lower() |
---|
306 | sp = title.find(" ") |
---|
307 | if sp < 1: |
---|
308 | npath += title |
---|
309 | else: |
---|
310 | npath += title[0:sp] |
---|
311 | |
---|
312 | # Make relative for the below code |
---|
313 | body = body.replace("<a href=\"" + node, "<a href=\"/node/") |
---|
314 | |
---|
315 | # Convert /node/n links to blog links |
---|
316 | while body.find("<a href=\"/node/") != -1: |
---|
317 | start = body.find("<a href=\"/node/") + len("<a href=\"/node/") |
---|
318 | end = body.find("\"", start) |
---|
319 | nid = body[start:end] |
---|
320 | dc.execute("SELECT title, created FROM node WHERE nid=%s", (nid)) |
---|
321 | nr = dc.fetchone() |
---|
322 | np = datetime.datetime.fromtimestamp(nr["created"]).strftime("%Y/%m/%d/") |
---|
323 | sp = nr["title"].find(" ") |
---|
324 | if sp < 1: |
---|
325 | np += nr["title"] |
---|
326 | else: |
---|
327 | np += nr["title"][0:sp] |
---|
328 | body = body.replace("<a href=\"/node/" + nid, "<a href=\"/blog/" + np) |
---|
329 | |
---|
330 | # trac does not like relative links |
---|
331 | body = body.replace("<a href=\"/blog/", "<a href=\"" + blog + "/blog/") |
---|
332 | body = body.replace("<a href=\"/", "<a href=\"" + home) |
---|
333 | |
---|
334 | # Code tags |
---|
335 | body = body.replace("<code>", "{{{\r\n") |
---|
336 | body = body.replace("</code>", "\r\n}}}") |
---|
337 | |
---|
338 | # Special case stuff |
---|
339 | body = body.replace("NetworkManager", "!NetworkManager") |
---|
340 | |
---|
341 | # Markup |
---|
342 | parser = Html2Wiki() |
---|
343 | parser.feed(body) |
---|
344 | |
---|
345 | tc.execute("INSERT INTO fullblog_posts (name, version, title, body, publish_time, version_time, version_comment, version_author, author, categories)" |
---|
346 | " VALUES(%s, 1, %s, %s, %s, %s, '', %s, %s, %s)", |
---|
347 | (npath, title, parser.wiki, created, changed, author, author, category)) |
---|
348 | |
---|
349 | tc.execute("DELETE FROM fullblog_comments") |
---|
350 | dc.execute("SELECT name, mail, node.title, node.created, comments.comment, timestamp" |
---|
351 | " FROM comments" |
---|
352 | " JOIN node ON node.nid=comments.nid" |
---|
353 | " WHERE type='blog'" |
---|
354 | " ORDER BY timestamp") |
---|
355 | r = dc.fetchall() |
---|
356 | for name, mail, title, created, comment, timestamp in r: |
---|
357 | # Create a blog link |
---|
358 | npath = datetime.datetime.fromtimestamp(created).strftime("%Y/%m/%d/") |
---|
359 | title = title.lower() |
---|
360 | sp = title.find(" ") |
---|
361 | if sp < 1: |
---|
362 | npath += title |
---|
363 | else: |
---|
364 | npath += title[0:sp] |
---|
365 | |
---|
366 | # Make relative for the below code |
---|
367 | body = body.replace("<a href=\"" + node, "<a href=\"/node/") |
---|
368 | |
---|
369 | # Convert /node/n links to blog links |
---|
370 | while comment.find("<a href=\"/node/") != -1: |
---|
371 | start = comment.find("<a href=\"/node/") + len("<a href=\"/node/") |
---|
372 | end = comment.find("\"", start) |
---|
373 | nid = comment[start:end] |
---|
374 | dc.execute("SELECT title, created FROM node WHERE nid=%s", (nid)) |
---|
375 | nr = dc.fetchone() |
---|
376 | np = datetime.datetime.fromtimestamp(nr["created"]).strftime("%Y/%m/%d/") |
---|
377 | sp = nr["title"].find(" ") |
---|
378 | if sp < 1: |
---|
379 | np += nr["title"] |
---|
380 | else: |
---|
381 | np += nr["title"][0:sp] |
---|
382 | comment = comment.replace("<a href=\"/node/" + nid, "<a href=\"/blog/" + np) |
---|
383 | |
---|
384 | # trac does not like relative links |
---|
385 | comment = comment.replace("<a href=\"/blog/", "<a href=\"" + blog + "/blog/") |
---|
386 | comment = comment.replace("<a href=\"/", "<a href=\"" + home) |
---|
387 | |
---|
388 | # Code tags |
---|
389 | comment = comment.replace("<code>", "{{{\r\n") |
---|
390 | comment = comment.replace("</code>", "\r\n}}}") |
---|
391 | |
---|
392 | # Special case stuff |
---|
393 | comment = comment.replace("NetworkManager", "!NetworkManager") |
---|
394 | |
---|
395 | # Markup |
---|
396 | parser = Html2Wiki() |
---|
397 | parser.feed(comment) |
---|
398 | |
---|
399 | if mail != "": |
---|
400 | if name == "": |
---|
401 | name = mail |
---|
402 | else: |
---|
403 | name += " <" + mail + ">" |
---|
404 | |
---|
405 | tc.execute("SELECT COUNT(*) FROM fullblog_comments WHERE name=%s", (npath)) |
---|
406 | nr = tc.fetchone() |
---|
407 | tc.execute("INSERT INTO fullblog_comments (name, number, comment, author, time)" |
---|
408 | " VALUES(%s, %s, %s, %s, %s)", |
---|
409 | (npath, nr[0] + 1, parser.wiki, name, timestamp)) |
---|
410 | |
---|
411 | trac.commit() |
---|