diff options
author | ryuslash | 2010-03-19 08:26:26 +0100 |
---|---|---|
committer | ryuslash | 2010-03-19 08:26:26 +0100 |
commit | 96247d41d50c53e7d3e1c5aa4c6a8dcba6d647f2 (patch) | |
tree | 12564b5ac501f2a35b660d7f5bd7461581f3f6ae /download.py | |
parent | caba2811b94577eb89e13d9d1a7f7de64c979acc (diff) | |
parent | 7f8dfa1d30583dd1d8c40c6cd1c079d2a722c9df (diff) | |
download | 4grab-96247d41d50c53e7d3e1c5aa4c6a8dcba6d647f2.tar.gz 4grab-96247d41d50c53e7d3e1c5aa4c6a8dcba6d647f2.zip |
Merge branch 'sorting' into develop
Diffstat (limited to 'download.py')
-rw-r--r-- | download.py | 39 |
1 files changed, 25 insertions, 14 deletions
diff --git a/download.py b/download.py index 2405805..3b12cc8 100644 --- a/download.py +++ b/download.py @@ -22,20 +22,35 @@ import urllib import os import htmlparser -#import progressbar import config -savedir = config.Configuration().get_download_location() -if not os.path.exists(savedir): - os.makedirs(savedir) +def get_savedir(): + conf = config.Configuration() + savedir = conf.get_download_location() + if not os.path.exists(savedir): + os.makedirs(savedir) + return savedir +def check_archive(fullpath): + conf = config.Configuration() + archive = conf.get_archive_location() + filename = os.path.basename(fullpath) + archfile = os.path.join(archive, filename) + #print "Path", archfile, "exists:", os.path.exists(archfile) + return os.path.exists(archfile) class Downloader(object): def __init__(self, progress_reporter): self.progress_reporter = progress_reporter + self.on_downloaded = None + + def set_on_downloaded(self, on_downloaded): + self.on_downloaded = on_downloaded def get_thread_links(self, baseurl): myparser = htmlparser.MyParser() - t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + t = ["0", "1", "2", "3", "4", + "5", "6", "7", "8", "9", + "10", "11", "12", "13", "14", "15"] i = 1 total = len(t) progress = self.progress_reporter(total) @@ -105,8 +120,8 @@ class Downloader(object): i = 1 for link in t: progress.show_progress(i) - filename = os.path.join(savedir, os.path.split(link)[1]) - if not os.path.exists(filename): + filename = os.path.join(get_savedir(), os.path.split(link)[1]) + if not check_archive(filename): tries = 10 while tries > 0: try: @@ -118,6 +133,8 @@ class Downloader(object): failed += 1 else: downloaded += 1 + if self.on_downloaded is not None: + self.on_downloaded(filename) else: skipped += 1 i += 1 @@ -126,10 +143,4 @@ class Downloader(object): return (skipped, failed, downloaded, total) if __name__ == "__main__": - # Get a file-like object for the 4chan.org w/imgboard - base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/" - - # Get the hyperlinks. - t = get_thread_links(base_url) - t = get_image_links(base_url, t) - get_images(t) + print "Don't run me, run 4grab.py" |