diff options
Diffstat (limited to 'download.py')
-rw-r--r-- | download.py | 39 |
1 files changed, 25 insertions, 14 deletions
diff --git a/download.py b/download.py index 2405805..3b12cc8 100644 --- a/download.py +++ b/download.py @@ -22,20 +22,35 @@ import urllib import os import htmlparser -#import progressbar import config -savedir = config.Configuration().get_download_location() -if not os.path.exists(savedir): - os.makedirs(savedir) +def get_savedir(): + conf = config.Configuration() + savedir = conf.get_download_location() + if not os.path.exists(savedir): + os.makedirs(savedir) + return savedir +def check_archive(fullpath): + conf = config.Configuration() + archive = conf.get_archive_location() + filename = os.path.basename(fullpath) + archfile = os.path.join(archive, filename) + #print "Path", archfile, "exists:", os.path.exists(archfile) + return os.path.exists(archfile) class Downloader(object): def __init__(self, progress_reporter): self.progress_reporter = progress_reporter + self.on_downloaded = None + + def set_on_downloaded(self, on_downloaded): + self.on_downloaded = on_downloaded def get_thread_links(self, baseurl): myparser = htmlparser.MyParser() - t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + t = ["0", "1", "2", "3", "4", + "5", "6", "7", "8", "9", + "10", "11", "12", "13", "14", "15"] i = 1 total = len(t) progress = self.progress_reporter(total) @@ -105,8 +120,8 @@ class Downloader(object): i = 1 for link in t: progress.show_progress(i) - filename = os.path.join(savedir, os.path.split(link)[1]) - if not os.path.exists(filename): + filename = os.path.join(get_savedir(), os.path.split(link)[1]) + if not check_archive(filename): tries = 10 while tries > 0: try: @@ -118,6 +133,8 @@ class Downloader(object): failed += 1 else: downloaded += 1 + if self.on_downloaded is not None: + self.on_downloaded(filename) else: skipped += 1 i += 1 @@ -126,10 +143,4 @@ class Downloader(object): return (skipped, failed, downloaded, total) if __name__ == "__main__": - # Get a file-like object for the 4chan.org w/imgboard - base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/" - - # Get the hyperlinks. - t = get_thread_links(base_url) - t = get_image_links(base_url, t) - get_images(t) + print "Don't run me, run 4grab.py" |