1
0
Fork 0
tekuti/wordpress-to-dir.py
2008-02-29 13:38:07 +01:00

106 lines
3.4 KiB
Python

#!/usr/bin/env python
import sys
import tempfile
import MySQLdb as db
import os
import urllib
import time
cxn = None
def all_posts():
cur = cxn.cursor()
sql = ('select ID, post_author, post_date_gmt, post_content,'
' post_title, post_status, comment_status, post_name,'
' post_modified_gmt, post_content_filtered'
' from wp_posts')
cur.execute(sql)
while True:
row = cur.fetchone()
if row:
keys = ('id', 'author', 'date', 'content', 'title',
'status', 'comment_status', 'name', 'modified',
'content_filtered')
yield dict(zip(keys, row))
else:
break
def post_categories(post):
cur = cxn.cursor()
sql = ('select cat_name from wp_categories c, wp_post2cat p2c'
' where p2c.post_id=%s and p2c.category_id=c.cat_ID')
cur.execute(sql, (post['id'],))
return [row[0] for row in cur.fetchall()]
def post_comments(post):
cur = cxn.cursor()
sql = ('select comment_ID, comment_author, comment_author_email,'
' comment_author_url, comment_author_IP,'
' comment_date, comment_date_gmt, comment_content, comment_approved'
' from wp_comments where comment_post_ID=%s and comment_approved!=\'spam\'')
cur.execute(sql, (post['id'],))
keys = ('id', 'author', 'author_email', 'author_url', 'author_ip',
'date', 'date-gmt', 'content', 'approved')
return [dict(zip(keys, row)) for row in cur.fetchall()]
def write_file(path, content):
f = open(path, 'w')
f.write(content)
f.close()
def make_dir(path):
os.mkdir(path)
return path + '/'
def write_comment(comment, dir):
def make_metadata():
out = ''
for k, v in comment.items():
if k not in ('content',):
out += '%s: %s\n' % (k, v)
date = comment['date-gmt'] or comment['date']
out += 'timestamp: %s\n' % int(time.mktime(date.timetuple()))
return out
write_file(dir + str(comment['id']),
make_metadata() + '\n' + comment['content'])
def make_post_key(post):
d = post['date']
pre = '%d/%02d/%02d/%s' % (d.year, d.month, d.day, post['name'])
return urllib.quote(pre, '').lower()
def write_post(post, categories, comments):
def make_metadata():
out = ''
for k, v in post.items():
if k not in ('content', 'content_filtered'):
out += '%s: %s\n' % (k, v)
out += 'tags: %s\n' % ', '.join(categories)
out += 'timestamp: %s\n' % int(time.mktime(post['date'].timetuple()))
return out
key = make_post_key(post)
d = make_dir(key)
write_file(d + 'content', post['content'])
write_file(d + 'content-filtered', post['content_filtered'])
write_file(d + 'metadata', make_metadata())
if comments:
c = make_dir(d + 'comments')
for comment in comments:
write_comment(comment, c)
def main(args):
global cxn
d = tempfile.mkdtemp(prefix='wp2dir')
print 'writing dir', d
os.chdir(d)
_, host, user, passwd, database = args
cxn = db.connect(host=host, user=user, passwd=passwd, db=database)
cxn.cursor().execute("set names 'utf8'")
for post in all_posts():
write_post (post, post_categories (post), post_comments (post))
if __name__ == '__main__':
main(sys.argv)