1
0
Fork 0
tekuti/wordpress-to-dir.py
Ramakrishnan Muthukrishnan b8b1f47d65 Change wordpress import to use the newer wordpress database layout.
* wordpress_to_dir.py (all_posts): select only published posts.
  (post_categories): get the post category based on the new set
  of tables wp_terms, wp_term_relationships and wp_term_taxonomy.
2010-11-15 22:05:15 +01:00

110 lines
3.6 KiB
Python

#!/usr/bin/env python
import sys
import tempfile
import MySQLdb as db
import os
import urllib
import time
cxn = None
def all_posts():
cur = cxn.cursor()
sql = ('select ID, post_author, post_date_gmt, post_content,'
' post_title, post_status, comment_status, post_name,'
' post_modified_gmt, post_content_filtered'
' from wp_posts WHERE post_status="publish" AND'
' post_type ="post" ORDER BY post_date DESC')
cur.execute(sql)
while True:
row = cur.fetchone()
if row:
keys = ('id', 'author', 'date', 'content', 'title',
'status', 'comment_status', 'name', 'modified',
'content_filtered')
yield dict(zip(keys, row))
else:
break
def post_categories(post):
cur = cxn.cursor()
sql = ('select name from wp_terms INNER JOIN wp_term_taxonomy ON'
' wp_terms.term_id = wp_term_taxonomy.term_id INNER JOIN'
' wp_term_relationships ON wp_term_relationships.term_taxonomy_id = '
' wp_term_taxonomy.term_taxonomy_id WHERE'
' wp_term_relationships.object_id=%s')
cur.execute(sql, (post['id'],))
return [row[0] for row in cur.fetchall()]
def post_comments(post):
cur = cxn.cursor()
sql = ('select comment_ID, comment_author, comment_author_email,'
' comment_author_url, comment_author_IP,'
' comment_date, comment_date_gmt, comment_content, comment_approved'
' from wp_comments where comment_post_ID=%s and comment_approved!=\'spam\'')
cur.execute(sql, (post['id'],))
keys = ('id', 'author', 'author_email', 'author_url', 'author_ip',
'date', 'date-gmt', 'content', 'approved')
return [dict(zip(keys, row)) for row in cur.fetchall()]
def write_file(path, content):
f = open(path, 'w')
f.write(content)
f.close()
def make_dir(path):
os.mkdir(path)
return path + '/'
def write_comment(comment, dir):
def make_metadata():
out = ''
for k, v in comment.items():
if k not in ('content',):
out += '%s: %s\n' % (k, v)
date = comment['date-gmt'] or comment['date']
out += 'timestamp: %s\n' % int(time.mktime(date.timetuple()))
return out
write_file(dir + str(comment['id']),
make_metadata() + '\n' + comment['content'])
def make_post_key(post):
d = post['date']
pre = '%d/%02d/%02d/%s' % (d.year, d.month, d.day, post['name'])
return urllib.quote(pre, '').lower()
def write_post(post, categories, comments):
def make_metadata():
out = ''
for k, v in post.items():
if k not in ('content', 'content_filtered'):
out += '%s: %s\n' % (k, v)
out += 'tags: %s\n' % ', '.join(categories)
out += 'timestamp: %s\n' % int(time.mktime(post['date'].timetuple()))
return out
key = make_post_key(post)
d = make_dir(key)
write_file(d + 'content', post['content'])
write_file(d + 'content-filtered', post['content_filtered'])
write_file(d + 'metadata', make_metadata())
if comments:
c = make_dir(d + 'comments')
for comment in comments:
write_comment(comment, c)
def main(args):
global cxn
d = tempfile.mkdtemp(prefix='wp2dir')
print 'writing dir', d
os.chdir(d)
_, host, user, passwd, database = args
cxn = db.connect(host=host, user=user, passwd=passwd, db=database)
cxn.cursor().execute("set names 'utf8'")
for post in all_posts():
write_post (post, post_categories (post), post_comments (post))
if __name__ == '__main__':
main(sys.argv)