summaryrefslogtreecommitdiffstatshomepage
path: root/wordpress-xmlrpc-to-dir.py
diff options
context:
space:
mode:
authorGravatar Aleix Conchillo Flaque2010-12-22 16:12:07 +0100
committerGravatar Andy Wingo2011-02-18 23:22:11 +0100
commitbf90b687e087bb13fd8a0eb0be3bf82bdaabdcb8 (patch)
treefcecce8168a63807c997acb6e34abfa4188ddb70 /wordpress-xmlrpc-to-dir.py
parentf350e252fcb7cd29292805619a10d450d2a9ebb1 (diff)
downloadtekuti-bf90b687e087bb13fd8a0eb0be3bf82bdaabdcb8.tar.gz
tekuti-bf90b687e087bb13fd8a0eb0be3bf82bdaabdcb8.zip
added wordpress (xmlprc) to tekuti script
Diffstat (limited to 'wordpress-xmlrpc-to-dir.py')
-rw-r--r--wordpress-xmlrpc-to-dir.py162
1 files changed, 162 insertions, 0 deletions
diff --git a/wordpress-xmlrpc-to-dir.py b/wordpress-xmlrpc-to-dir.py
new file mode 100644
index 0000000..6c442ba
--- /dev/null
+++ b/wordpress-xmlrpc-to-dir.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+
+import os
+import pycurl
+import string
+import sys
+import tempfile
+import time
+import urllib
+import urlparse
+import xmlrpclib
+
+from HTMLParser import HTMLParser
+
+class WordPress:
+
+ def __init__(self, url, username, password):
+ self.__username = username
+ self.__password = password
+ self.__proxy = xmlrpclib.ServerProxy(url)
+
+ def all_posts(self):
+ MAX_POST_NO = 10000
+ return self.__proxy.metaWeblog.getRecentPosts("",
+ self.__username,
+ self.__password,
+ MAX_POST_NO)
+
+ def post_comments(self, post):
+ return self.__proxy.wp.getComments("",
+ self.__username,
+ self.__password,
+ {"post_id" : post["postid"]})
+
+class MyHTMLParser(HTMLParser):
+
+ def __init__(self, url, imgdir):
+ HTMLParser.__init__(self)
+ self.url = url
+ self.imgdir = imgdir
+
+ def handle_starttag(self, tag, attrs):
+ if tag == "img":
+ for attr in attrs:
+ if attr[0] == "src":
+ self.download_image(attr[1])
+
+ def download_image(self, url):
+ parsed = urlparse.urlparse(url)
+ if parsed.netloc == self.url:
+ print " downloading %s" % url
+ filename = os.path.join(self.imgdir, parsed.path[1:])
+ try:
+ os.makedirs(os.path.dirname(filename))
+ except os.error:
+ pass
+ fp = open(filename, "wb")
+ curl = pycurl.Curl()
+ curl.setopt(pycurl.URL, url)
+ curl.setopt(pycurl.FOLLOWLOCATION, 1)
+ curl.setopt(pycurl.MAXREDIRS, 5)
+ curl.setopt(pycurl.CONNECTTIMEOUT, 30)
+ curl.setopt(pycurl.TIMEOUT, 300)
+ curl.setopt(pycurl.NOSIGNAL, 1)
+ curl.setopt(pycurl.WRITEDATA, fp)
+ curl.perform()
+ curl.close()
+ fp.close()
+
+def make_dir(path):
+ os.mkdir(path)
+ return path + "/"
+
+def write_file(path, content):
+ f = open(path, "w")
+ f.write(content.encode("utf-8"))
+ f.close()
+
+def write_comment(comment, dir):
+ def make_metadata():
+ out = ""
+ keys = {"comment_id" : "id",
+ "author" : "author",
+ "author_email" : "author_email",
+ "author_url" : "author_url",
+ "author_ip" : "author_ip",
+ "status" : "approved"}
+ for k in keys:
+ if (k != "status") or ((k == "status") and (comment[k] != "spam")):
+ out += "%s: %s\n" % (k, comment[k])
+ date = comment["date_created_gmt"]
+ out += "timestamp: %s\n" % int(time.mktime(date.timetuple()))
+ return out
+
+ write_file(dir + str(comment["comment_id"]),
+ make_metadata() + "\n" + comment["content"])
+
+def unescape(s):
+ s = s.replace("&lt;", "<")
+ s = s.replace("&gt;", ">")
+ s = s.replace("&amp;", "&")
+ return s
+
+def make_post_key(post):
+ d = post["dateCreated"].timetuple()
+ pre = "%d/%02d/%02d/%s" % (d.tm_year, d.tm_mon, d.tm_mday, post["wp_slug"])
+ return urllib.quote(pre, "").lower()
+
+def get_post_images(post, url, imgdir):
+ parser = MyHTMLParser(url, imgdir)
+ parser.feed(post["description"])
+
+def write_post(post, categories, comments, images_url, new_images_url):
+ print "writing post %s" % unescape(post["title"])
+ def make_metadata():
+ out = ""
+ keys = {"postid" : "id",
+ "wp_author_display_name" : "author",
+ "title" : "title",
+ "post_status" : "status",
+ "mt_allow_comments" : "comment_status"}
+ for k in keys:
+ value = post[k]
+ if k == "mt_allow_comments":
+ if value == 1:
+ value = "open"
+ else:
+ value = "closed"
+ out += "%s: %s\n" % (keys[k], value)
+ out += "tags: %s\n" % ", ".join(categories)
+ out += "timestamp: %s\n" % int(time.mktime(post["dateCreated"].timetuple()))
+ return unicode(out)
+
+ key = make_post_key(post)
+ d = make_dir(key)
+ content = string.replace(post["description"], images_url, new_images_url)
+ write_file(d + "content", content)
+ write_file(d + "metadata", make_metadata())
+ if comments:
+ c = make_dir(d + "comments")
+ for comment in comments:
+ write_comment(comment, c)
+
+def main(args):
+ d_posts = tempfile.mkdtemp(prefix="wp2dir")
+ d_images = tempfile.mkdtemp(prefix="wp2img")
+ print "creating directories %s and %s" % (d_posts, d_images)
+
+ _, url, images_url, new_images_url, user, passwd = args
+
+ wp = WordPress(url, user, passwd)
+ posts = wp.all_posts()
+ for post in posts:
+ comments = wp.post_comments(post)
+ os.chdir(d_posts)
+ write_post(post, post["categories"], comments, images_url, new_images_url)
+ os.chdir(d_images)
+ get_post_images(post, images_url, d_images)
+
+if __name__ == "__main__":
+ main(sys.argv)
+