Context Navigation

Back to FullBlogPlugin

FullBlogPlugin: drupal2fullblog

File drupal2fullblog, 10.6 KB (added by Roy Marples, 12 years ago)
Drupal-6.x to FullBlog python script

Line
1	#!/usr/pkg/bin/python2.5
2
3	# drupal2fullblog uses Html2Wiki here.
4	# find my code below
5
6	# Copyright (C) 2006 Samuel Abels, http://debain.org
7	#
8	# This program is free software; you can redistribute it and/or modify
9	# it under the terms of the GNU General Public License version 2, as
10	# published by the Free Software Foundation.
11	#
12	# This program is distributed in the hope that it will be useful,
13	# but WITHOUT ANY WARRANTY; without even the implied warranty of
14	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	# GNU General Public License for more details.
16	#
17	# You should have received a copy of the GNU General Public License
18	# along with this program; if not, write to the Free Software
19	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21	import HTMLParser, re, sys
22
23	class Cell:
24	data = ''
25	colspan = 1
26	rowspan = 1
27
28	class Html2Wiki(HTMLParser.HTMLParser):
29	def __init__(self):
30	HTMLParser.HTMLParser.__init__(self)
31	self.wiki = ''
32	self.buffer = ''
33	self.indent = 0
34	self.linebreak = '\n'
35	self.rows = []
36	self.cells = []
37	self.in_table = False
38	self.in_td = False
39	self.in_heading = False
40	self.in_ul = False
41	self.in_ol = False
42	self.in_li = False
43	self.in_a = False
44	self.in_pre = False
45	self.last_href = ''
46	self.span_path = []
47
48	def __output(self, text, linebreak = True):
49	self.buffer += (' ' * self.indent * 2)
50	self.buffer += text + self.linebreak
51
52	def __flush(self):
53	self.wiki += self.buffer
54	self.buffer = ''
55
56	def handle_starttag(self, tag, attrs):
57	if tag == 'table': self.start_table()
58	elif tag == 'tr': self.start_tr()
59	elif tag == 'th': self.start_th(attrs)
60	elif tag == 'td': self.start_td(attrs)
61	elif tag == 'h1': self.start_h1()
62	elif tag == 'h2': self.start_h2()
63	elif tag == 'h3': self.start_h3()
64	elif tag == 'ul': self.start_ul()
65	elif tag == 'ol': self.start_ol()
66	elif tag == 'li': self.start_li()
67	elif tag == 'i': self.start_i()
68	elif tag == 'b': self.start_b()
69	elif tag == 'u': self.start_u()
70	elif tag == 'a': self.start_a(attrs)
71	elif tag == 'pre': self.start_pre()
72	elif tag == 'strike': self.start_strike()
73	elif tag == 'span': self.start_span(attrs)
74	elif tag == 'br': self.newline()
75
76	def handle_endtag(self, tag):
77	if tag == 'table': self.end_table();
78	elif tag == 'tr': self.end_tr()
79	elif tag == 'th': self.end_th()
80	elif tag == 'td': self.end_td()
81	elif tag == 'h1': self.end_h1()
82	elif tag == 'h2': self.end_h2()
83	elif tag == 'h3': self.end_h3()
84	elif tag == 'ul': self.end_ul()
85	elif tag == 'ol': self.end_ol()
86	elif tag == 'li': self.end_li()
87	elif tag == 'i': self.end_i()
88	elif tag == 'b': self.end_b()
89	elif tag == 'u': self.end_u()
90	elif tag == 'a': self.end_a()
91	elif tag == 'pre': self.end_pre()
92	elif tag == 'strike': self.end_strike()
93	elif tag == 'span': self.end_span()
94
95	def start_h1(self):
96	self.buffer += '='
97
98	def end_h1(self):
99	self.buffer += '=\n\n'
100
101	def start_h2(self):
102	self.buffer += '=='
103
104	def end_h2(self):
105	self.buffer += '==\n\n'
106
107	def start_h3(self):
108	self.buffer += '==='
109
110	def end_h3(self):
111	self.buffer += '===\n\n'
112
113	def start_ul(self):
114	self.in_ul = True
115
116	def end_ul(self):
117	self.in_ul = False
118
119	def start_ol(self):
120	self.in_ol = True
121
122	def end_ol(self):
123	self.in_ol = False
124
125	def start_li(self):
126	self.in_li = True
127	if self.in_ol:
128	self.buffer += '# '
129	elif self.in_ul:
130	self.buffer += '* '
131
132	def end_li(self):
133	self.in_li = False
134
135	def start_i(self):
136	self.buffer += '/'
137
138	def end_i(self):
139	self.buffer += '/'
140
141	def start_b(self):
142	self.buffer += '*'
143
144	def end_b(self):
145	self.buffer += '*'
146
147	def start_u(self):
148	self.buffer += '_'
149
150	def end_u(self):
151	self.buffer += '_'
152
153	def start_a(self, attrs):
154	self.in_a = True
155	self.last_href = ''
156	for key, value in attrs:
157	if key == 'href':
158	self.last_href = value
159	self.buffer += '[' + self.last_href
160
161	def end_a(self):
162	self.in_a = False
163	self.buffer += ']'
164
165	def start_pre(self):
166	self.in_pre = True
167	self.buffer += '#Text\n'
168
169	def end_pre(self):
170	self.in_pre = False
171	self.buffer += '#End\n'
172
173	def start_strike(self):
174	self.buffer += '-'
175
176	def end_strike(self):
177	self.buffer += '-'
178
179	def start_span(self, attrs):
180	cls = None
181	for key, value in attrs:
182	if key == 'class':
183	cls = value
184	if cls == 'underline':
185	char = '_'
186	elif cls == 'bold':
187	char = '*'
188	elif cls == 'italic':
189	char = '/'
190	else:
191	char = ''
192	self.buffer += char
193	self.span_path.append(char)
194
195	def end_span(self):
196	self.buffer += self.span_path.pop()
197
198	def start_table(self):
199	self.in_table = True
200
201	def start_tr(self):
202	pass
203
204	def start_th(self, attrs):
205	self.in_heading = True
206	self.start_td(attrs)
207
208	def start_td(self, attrs):
209	self.__flush()
210	self.in_td = True
211	cell = Cell()
212	for key, value in attrs:
213	if key == 'rowspan':
214	cell.rowspan = int(value)
215	elif key == 'colspan':
216	cell.colspan = int(value)
217	self.cells.append(cell)
218
219
220	def handle_data(self, data):
221	if not self.in_pre:
222	data = data.replace('\n', '')
223	if self.in_a:
224	if data == self.last_href:
225	return
226	self.buffer += ' '
227	if self.in_li:
228	self.buffer += data.strip() + '\n'
229	if self.in_ul or self.in_ol:
230	self.__flush()
231	elif self.in_td:
232	self.buffer += data
233	elif not self.in_table:
234	self.buffer += data
235	self.__flush()
236
237	def end_td(self):
238	self.cells[-1].data += self.buffer.strip()
239	self.buffer = ''
240	self.in_td = False
241
242	def end_th(self):
243	self.end_td()
244
245	def end_tr(self):
246	if len(self.cells) is 0:
247	return
248	if self.in_heading:
249	self.__output('#Heading')
250	self.in_heading = False
251	else:
252	self.__output('#Row')
253	self.indent += 1
254	line = ('\|' * self.cells[0].colspan) + ' ' + self.cells[0].data.strip()
255	for cell in self.cells[1:]:
256	line += ' ' + ('\|' * cell.colspan) + ' ' + cell.data.strip()
257	if len(line) <= 80:
258	self.__output(line)
259	else:
260	for cell in self.cells:
261	self.__output(('\|' * cell.colspan) + ' ' + cell.data.strip())
262	self.cells = []
263	self.indent -= 1
264	self.__flush()
265
266	def end_table(self):
267	self.in_table = False
268	self.__flush()
269
270	def newline(self):
271	self.buffer += '\n'
272
273
274	# drupal2fullblog
275	# Copyright 2009 Roy Marples <roy@marples.name>
276	# BSD-2 licensed
277	# I'm crap at python, but this works for me :)
278
279	import time, datetime
280	from pyPgSQL import PgSQL
281
282	# Tunables
283	drupal = PgSQL.connect(database='drupal')
284	trac = PgSQL.connect(database='trac.blog')
285	home = "http://roy.marples.name"
286	node = home + "/node/"
287	blog = home + "/projects/blog"
288
289	dc = drupal.cursor()
290	tc = trac.cursor()
291
292	tc.execute("DELETE FROM fullblog_posts")
293	dc.execute("SELECT users.name AS author, node.title, body, node.created, node.changed, term_data.name AS category"
294	" FROM node"
295	" JOIN node_revisions ON node_revisions.nid=node.nid"
296	" JOIN term_node ON term_node.nid=node.nid"
297	" JOIN term_data ON term_data.tid=term_node.tid"
298	" JOIN users ON users.uid=node.uid"
299	" WHERE type='blog'"
300	" ORDER BY created")
301	r = dc.fetchall()
302	for author, title, body, created, changed, category in r:
303	# Create a blog link
304	npath = datetime.datetime.fromtimestamp(created).strftime("%Y/%m/%d/")
305	title = title.lower()
306	sp = title.find(" ")
307	if sp < 1:
308	npath += title
309	else:
310	npath += title[0:sp]
311
312	# Make relative for the below code
313	body = body.replace("<a href=\"" + node, "<a href=\"/node/")
314
315	# Convert /node/n links to blog links
316	while body.find("<a href=\"/node/") != -1:
317	start = body.find("<a href=\"/node/") + len("<a href=\"/node/")
318	end = body.find("\"", start)
319	nid = body[start:end]
320	dc.execute("SELECT title, created FROM node WHERE nid=%s", (nid))
321	nr = dc.fetchone()
322	np = datetime.datetime.fromtimestamp(nr["created"]).strftime("%Y/%m/%d/")
323	sp = nr["title"].find(" ")
324	if sp < 1:
325	np += nr["title"]
326	else:
327	np += nr["title"][0:sp]
328	body = body.replace("<a href=\"/node/" + nid, "<a href=\"/blog/" + np)
329
330	# trac does not like relative links
331	body = body.replace("<a href=\"/blog/", "<a href=\"" + blog + "/blog/")
332	body = body.replace("<a href=\"/", "<a href=\"" + home)
333
334	# Code tags
335	body = body.replace("<code>", "{{{\r\n")
336	body = body.replace("</code>", "\r\n}}}")
337
338	# Special case stuff
339	body = body.replace("NetworkManager", "!NetworkManager")
340
341	# Markup
342	parser = Html2Wiki()
343	parser.feed(body)
344
345	tc.execute("INSERT INTO fullblog_posts (name, version, title, body, publish_time, version_time, version_comment, version_author, author, categories)"
346	" VALUES(%s, 1, %s, %s, %s, %s, '', %s, %s, %s)",
347	(npath, title, parser.wiki, created, changed, author, author, category))
348
349	tc.execute("DELETE FROM fullblog_comments")
350	dc.execute("SELECT name, mail, node.title, node.created, comments.comment, timestamp"
351	" FROM comments"
352	" JOIN node ON node.nid=comments.nid"
353	" WHERE type='blog'"
354	" ORDER BY timestamp")
355	r = dc.fetchall()
356	for name, mail, title, created, comment, timestamp in r:
357	# Create a blog link
358	npath = datetime.datetime.fromtimestamp(created).strftime("%Y/%m/%d/")
359	title = title.lower()
360	sp = title.find(" ")
361	if sp < 1:
362	npath += title
363	else:
364	npath += title[0:sp]
365
366	# Make relative for the below code
367	body = body.replace("<a href=\"" + node, "<a href=\"/node/")
368
369	# Convert /node/n links to blog links
370	while comment.find("<a href=\"/node/") != -1:
371	start = comment.find("<a href=\"/node/") + len("<a href=\"/node/")
372	end = comment.find("\"", start)
373	nid = comment[start:end]
374	dc.execute("SELECT title, created FROM node WHERE nid=%s", (nid))
375	nr = dc.fetchone()
376	np = datetime.datetime.fromtimestamp(nr["created"]).strftime("%Y/%m/%d/")
377	sp = nr["title"].find(" ")
378	if sp < 1:
379	np += nr["title"]
380	else:
381	np += nr["title"][0:sp]
382	comment = comment.replace("<a href=\"/node/" + nid, "<a href=\"/blog/" + np)
383
384	# trac does not like relative links
385	comment = comment.replace("<a href=\"/blog/", "<a href=\"" + blog + "/blog/")
386	comment = comment.replace("<a href=\"/", "<a href=\"" + home)
387
388	# Code tags
389	comment = comment.replace("<code>", "{{{\r\n")
390	comment = comment.replace("</code>", "\r\n}}}")
391
392	# Special case stuff
393	comment = comment.replace("NetworkManager", "!NetworkManager")
394
395	# Markup
396	parser = Html2Wiki()
397	parser.feed(comment)
398
399	if mail != "":
400	if name == "":
401	name = mail
402	else:
403	name += " <" + mail + ">"
404
405	tc.execute("SELECT COUNT(*) FROM fullblog_comments WHERE name=%s", (npath))
406	nr = tc.fetchone()
407	tc.execute("INSERT INTO fullblog_comments (name, number, comment, author, time)"
408	" VALUES(%s, %s, %s, %s, %s)",
409	(npath, nr[0] + 1, parser.wiki, name, timestamp))
410
411	trac.commit()

Download in other formats:

Original Format