FullBlogPlugin: drupal2fullblog

File drupal2fullblog, 10.6 KB (added by Roy Marples, 12 years ago)

Drupal-6.x to FullBlog python script

Line 
1#!/usr/pkg/bin/python2.5
2
3# drupal2fullblog uses Html2Wiki here.
4# find my code below
5
6# Copyright (C) 2006 Samuel Abels, http://debain.org
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License version 2, as
10# published by the Free Software Foundation.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20
21import HTMLParser, re, sys
22
23class Cell:
24    data    = ''
25    colspan = 1
26    rowspan = 1
27
28class Html2Wiki(HTMLParser.HTMLParser):
29    def __init__(self):
30    HTMLParser.HTMLParser.__init__(self)
31    self.wiki       = ''
32    self.buffer     = ''
33    self.indent     = 0
34    self.linebreak  = '\n'
35    self.rows       = []
36    self.cells      = []
37    self.in_table   = False
38    self.in_td      = False
39    self.in_heading = False
40    self.in_ul      = False
41    self.in_ol      = False
42    self.in_li      = False
43    self.in_a       = False
44    self.in_pre     = False
45    self.last_href  = ''
46    self.span_path  = []
47
48    def __output(self, text, linebreak = True):
49    self.buffer += (' ' * self.indent * 2)
50    self.buffer += text + self.linebreak
51
52    def __flush(self):
53    self.wiki  += self.buffer
54    self.buffer = ''
55
56    def handle_starttag(self, tag, attrs):
57    if   tag == 'table':  self.start_table()
58    elif tag == 'tr':     self.start_tr()
59    elif tag == 'th':     self.start_th(attrs)
60    elif tag == 'td':     self.start_td(attrs)
61    elif tag == 'h1':     self.start_h1()
62        elif tag == 'h2':     self.start_h2()
63    elif tag == 'h3':     self.start_h3()
64    elif tag == 'ul':     self.start_ul()
65        elif tag == 'ol':     self.start_ol()
66    elif tag == 'li':     self.start_li()
67    elif tag == 'i':      self.start_i()
68        elif tag == 'b':      self.start_b()
69    elif tag == 'u':      self.start_u()
70    elif tag == 'a':      self.start_a(attrs)
71    elif tag == 'pre':    self.start_pre()
72    elif tag == 'strike': self.start_strike()
73    elif tag == 'span':   self.start_span(attrs)
74    elif tag == 'br':     self.newline()
75
76    def handle_endtag(self, tag):
77    if   tag == 'table':  self.end_table();
78    elif tag == 'tr':     self.end_tr()
79    elif tag == 'th':     self.end_th()
80    elif tag == 'td':     self.end_td()
81    elif tag == 'h1':     self.end_h1()
82    elif tag == 'h2':     self.end_h2()
83    elif tag == 'h3':     self.end_h3()
84    elif tag == 'ul':     self.end_ul()
85    elif tag == 'ol':     self.end_ol()
86    elif tag == 'li':     self.end_li()
87    elif tag == 'i':      self.end_i()
88    elif tag == 'b':      self.end_b()
89    elif tag == 'u':      self.end_u()
90    elif tag == 'a':      self.end_a()
91    elif tag == 'pre':    self.end_pre()
92    elif tag == 'strike': self.end_strike()
93    elif tag == 'span':   self.end_span()
94
95    def start_h1(self):
96    self.buffer += '='
97
98    def end_h1(self):
99    self.buffer += '=\n\n'
100
101    def start_h2(self):
102    self.buffer += '=='
103
104    def end_h2(self):
105    self.buffer += '==\n\n'
106
107    def start_h3(self):
108    self.buffer += '==='
109
110    def end_h3(self):
111    self.buffer += '===\n\n'
112
113    def start_ul(self):
114    self.in_ul = True
115
116    def end_ul(self):
117    self.in_ul = False
118
119    def start_ol(self):
120    self.in_ol = True
121
122    def end_ol(self):
123    self.in_ol = False
124
125    def start_li(self):
126    self.in_li = True
127    if self.in_ol:
128        self.buffer += '# '
129    elif self.in_ul:
130        self.buffer += '* '
131
132    def end_li(self):
133    self.in_li = False
134
135    def start_i(self):
136    self.buffer += '/'
137
138    def end_i(self):
139    self.buffer += '/'
140
141    def start_b(self):
142    self.buffer += '*'
143
144    def end_b(self):
145    self.buffer += '*'
146
147    def start_u(self):
148    self.buffer += '_'
149
150    def end_u(self):
151    self.buffer += '_'
152
153    def start_a(self, attrs):
154    self.in_a = True
155    self.last_href = ''
156    for key, value in attrs:
157        if key == 'href':
158        self.last_href = value
159    self.buffer += '[' + self.last_href
160
161    def end_a(self):
162    self.in_a = False
163    self.buffer += ']'
164
165    def start_pre(self):
166    self.in_pre = True
167    self.buffer += '#Text\n'
168
169    def end_pre(self):
170    self.in_pre = False
171    self.buffer += '#End\n'
172
173    def start_strike(self):
174    self.buffer += '-'
175
176    def end_strike(self):
177    self.buffer += '-'
178
179    def start_span(self, attrs):
180    cls = None
181    for key, value in attrs:
182        if key == 'class':
183        cls = value
184    if cls == 'underline':
185        char = '_'
186    elif cls == 'bold':
187        char = '*'
188    elif cls == 'italic':
189        char = '/'
190    else:
191        char = ''
192    self.buffer += char
193    self.span_path.append(char)
194
195    def end_span(self):
196    self.buffer += self.span_path.pop()
197
198    def start_table(self):
199    self.in_table = True
200
201    def start_tr(self):
202    pass
203
204    def start_th(self, attrs):
205    self.in_heading = True
206    self.start_td(attrs)
207
208    def start_td(self, attrs):
209    self.__flush()
210    self.in_td = True
211    cell       = Cell()
212    for key, value in attrs:
213        if key == 'rowspan':
214        cell.rowspan = int(value)
215        elif key == 'colspan':
216        cell.colspan = int(value)
217    self.cells.append(cell)
218
219
220    def handle_data(self, data):
221    if not self.in_pre:
222        data = data.replace('\n', '')
223    if self.in_a:
224        if data == self.last_href:
225        return
226        self.buffer += ' '
227    if self.in_li:
228        self.buffer += data.strip() + '\n'
229    if self.in_ul or self.in_ol:
230        self.__flush()
231    elif self.in_td:
232        self.buffer += data
233    elif not self.in_table:
234        self.buffer += data
235        self.__flush()
236
237    def end_td(self):
238    self.cells[-1].data += self.buffer.strip()
239    self.buffer = ''
240    self.in_td = False
241
242    def end_th(self):
243    self.end_td()
244
245    def end_tr(self):
246    if len(self.cells) is 0:
247        return
248    if self.in_heading:
249        self.__output('#Heading')
250        self.in_heading = False
251    else:
252        self.__output('#Row')
253    self.indent += 1
254    line = ('|' * self.cells[0].colspan) + ' ' + self.cells[0].data.strip()
255    for cell in self.cells[1:]:
256        line += ' ' + ('|' * cell.colspan) + ' ' + cell.data.strip()
257    if len(line) <= 80:
258        self.__output(line)
259    else:
260        for cell in self.cells:
261        self.__output(('|' * cell.colspan) + ' ' + cell.data.strip())
262    self.cells   = []
263    self.indent -= 1
264    self.__flush()
265
266    def end_table(self):
267    self.in_table = False
268    self.__flush()
269
270    def newline(self):
271    self.buffer += '\n'
272
273
274# drupal2fullblog
275# Copyright 2009 Roy Marples <roy@marples.name>
276# BSD-2 licensed
277# I'm crap at python, but this works for me :)
278
279import time, datetime
280from pyPgSQL import PgSQL
281
282# Tunables
283drupal = PgSQL.connect(database='drupal')
284trac = PgSQL.connect(database='trac.blog')
285home = "http://roy.marples.name"
286node = home + "/node/"
287blog = home + "/projects/blog"
288
289dc = drupal.cursor()
290tc = trac.cursor()
291
292tc.execute("DELETE FROM fullblog_posts")
293dc.execute("SELECT users.name AS author, node.title, body, node.created, node.changed, term_data.name AS category"
294    " FROM node"
295    " JOIN node_revisions ON node_revisions.nid=node.nid"
296    " JOIN term_node ON term_node.nid=node.nid"
297    " JOIN term_data ON term_data.tid=term_node.tid"
298    " JOIN users ON users.uid=node.uid"
299    " WHERE type='blog'"
300    " ORDER BY created")
301r = dc.fetchall()
302for author, title, body, created, changed, category in r:
303    # Create a blog link
304    npath = datetime.datetime.fromtimestamp(created).strftime("%Y/%m/%d/")
305    title = title.lower()
306    sp = title.find(" ")
307    if sp < 1:
308    npath += title
309    else:
310    npath += title[0:sp]
311
312    # Make relative for the below code
313    body = body.replace("<a href=\"" + node, "<a href=\"/node/")
314
315    # Convert /node/n links to blog links
316    while body.find("<a href=\"/node/") != -1:
317    start = body.find("<a href=\"/node/") + len("<a href=\"/node/")
318    end = body.find("\"", start)
319    nid = body[start:end]
320    dc.execute("SELECT title, created FROM node WHERE nid=%s", (nid))
321    nr = dc.fetchone()
322    np = datetime.datetime.fromtimestamp(nr["created"]).strftime("%Y/%m/%d/")
323        sp = nr["title"].find(" ")
324    if sp < 1:
325       np += nr["title"]
326        else:
327       np += nr["title"][0:sp]
328    body = body.replace("<a href=\"/node/" + nid, "<a href=\"/blog/" + np)
329
330    # trac does not like relative links
331    body = body.replace("<a href=\"/blog/", "<a href=\"" + blog + "/blog/")
332    body = body.replace("<a href=\"/", "<a href=\"" + home)
333
334    # Code tags
335    body = body.replace("<code>", "{{{\r\n")
336    body = body.replace("</code>", "\r\n}}}")
337
338    # Special case stuff
339    body = body.replace("NetworkManager", "!NetworkManager")
340
341    # Markup
342    parser = Html2Wiki()
343    parser.feed(body)
344
345    tc.execute("INSERT INTO fullblog_posts (name, version, title, body, publish_time, version_time, version_comment, version_author, author, categories)"
346        " VALUES(%s, 1, %s, %s, %s, %s, '', %s, %s, %s)",
347        (npath, title, parser.wiki, created, changed, author, author, category))
348
349tc.execute("DELETE FROM fullblog_comments")
350dc.execute("SELECT name, mail, node.title, node.created, comments.comment, timestamp"
351    " FROM comments"
352    " JOIN node ON node.nid=comments.nid"
353    " WHERE type='blog'"
354    " ORDER BY timestamp")
355r = dc.fetchall()
356for name, mail, title, created, comment, timestamp in r:
357    # Create a blog link
358    npath = datetime.datetime.fromtimestamp(created).strftime("%Y/%m/%d/")
359    title = title.lower()
360    sp = title.find(" ")
361    if sp < 1:
362    npath += title
363    else:
364    npath += title[0:sp]
365
366    # Make relative for the below code
367    body = body.replace("<a href=\"" + node, "<a href=\"/node/")
368
369    # Convert /node/n links to blog links
370    while comment.find("<a href=\"/node/") != -1:
371    start = comment.find("<a href=\"/node/") + len("<a href=\"/node/")
372    end = comment.find("\"", start)
373    nid = comment[start:end]
374    dc.execute("SELECT title, created FROM node WHERE nid=%s", (nid))
375    nr = dc.fetchone()
376    np = datetime.datetime.fromtimestamp(nr["created"]).strftime("%Y/%m/%d/")
377        sp = nr["title"].find(" ")
378    if sp < 1:
379       np += nr["title"]
380        else:
381       np += nr["title"][0:sp]
382    comment = comment.replace("<a href=\"/node/" + nid, "<a href=\"/blog/" + np)
383
384    # trac does not like relative links
385    comment = comment.replace("<a href=\"/blog/", "<a href=\"" + blog + "/blog/")
386    comment = comment.replace("<a href=\"/", "<a href=\"" + home)
387
388    #  Code tags
389    comment = comment.replace("<code>", "{{{\r\n")
390    comment = comment.replace("</code>", "\r\n}}}")
391
392    # Special case stuff
393    comment = comment.replace("NetworkManager", "!NetworkManager")
394
395    # Markup
396    parser = Html2Wiki()
397    parser.feed(comment)
398
399    if mail != "":
400    if name == "":
401        name = mail
402    else:
403        name += " <" + mail + ">"
404
405    tc.execute("SELECT COUNT(*) FROM fullblog_comments WHERE name=%s", (npath))
406    nr = tc.fetchone()
407    tc.execute("INSERT INTO fullblog_comments (name, number, comment, author, time)"
408        " VALUES(%s, %s, %s, %s, %s)",
409        (npath, nr[0] + 1, parser.wiki, name, timestamp))
410
411trac.commit()