aboutsummaryrefslogtreecommitdiffstats
path: root/ryuslash/aggregator/management/commands/loadfeeds.py
blob: 198e22797341197397ea5820c65d6fcb9bec0882 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import datetime
import feedparser
import markdown
import os
import re
import urllib2

from django.conf import settings
from django.core.management.base import BaseCommand

from aggregator.models import Feed, Post


class Command(BaseCommand):
    help = "Load data from saved feeds."

    def prep_feedname(self, value):
        value = re.sub('[^\w\s-]', '', value).strip().lower()
        return re.sub('[-\s]+', '-', value)

    def get_logopath(self, feed):
        ext = feed.favicon_ext
        filename = self.prep_feedname(feed.name) + '.' + ext
        basedir = settings.STATICFILES_DIRS[0]
        return os.path.join(basedir, 'images/logos', filename)

    def have_logo(self, feed):
        logopath = self.get_logopath(feed)
        return os.path.exists(logopath)

    def save_logo(self, feed):
        ext = feed.favicon_ext
        url = feed.base_url + '/favicon.' + ext

        try:
            logo = urllib2.urlopen(url)
        except:
            return

        save = open(self.get_logopath(feed), 'w')

        save.write(logo.read())
        save.close()
        logo.close()

    def construct_feed_url(self, feed):
        return feed.base_url + feed.feed_url

    def handle(self, *args, **kwargs):
        for feed in Feed.objects.all():
            parsed = feedparser.parse(self.construct_feed_url(feed))
            icon = self.prep_feedname(feed.name) + '.' + feed.favicon_ext
            newcount = 0

            if not self.have_logo(feed):
                self.save_logo(feed)

            for entry in parsed.entries:
                if Post.objects.filter(post_id=entry.id).exists():
                    continue

                dt = entry.updated_parsed or entry.published_parsed

                if dt:
                    updated = datetime.datetime(
                        dt.tm_year, dt.tm_mon, dt.tm_mday,
                        dt.tm_hour, dt.tm_min, dt.tm_sec)
                else:
                    updated = datetime.datetime.now()

                if 'content' in entry.keys():
                    content = entry.content[0]['value']
                else:
                    content = entry.summary

                if feed.uses_markdown:
                    content = markdown.markdown(content)

                if feed.convert_newlines:
                    content = re.sub('\n', '</br>\n', content)

                post = Post(
                    post_id=entry.id,
                    title=entry.title if feed.uses_titles else '',
                    link=entry.link,
                    updated=updated,
                    icon=icon,
                    content=content,
                    feed=feed
                )

                post.save()
                newcount += 1

            print 'Grabbed %d new posts from %s' % (newcount, feed.name)