From ba6b659fb80e843c1e7062c116a856873e14ad6a Mon Sep 17 00:00:00 2001 From: ryuslash Date: Sun, 7 Mar 2010 00:20:37 +0100 Subject: Removed download.py dependency download.py no longer requires progressbar.py, it now contains a class that accepts another class as a parameter, this may be useful later when adding different ways of interaction --- download.py | 162 +++++++++++++++++++++++++++++++----------------------------- 1 file changed, 83 insertions(+), 79 deletions(-) (limited to 'download.py') diff --git a/download.py b/download.py index b9f9f72..2405805 100644 --- a/download.py +++ b/download.py @@ -22,104 +22,108 @@ import urllib import os import htmlparser -import progressbar +#import progressbar import config savedir = config.Configuration().get_download_location() if not os.path.exists(savedir): os.makedirs(savedir) -def get_thread_links(baseurl): - myparser = htmlparser.MyParser() - t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] - i = 1 - total = len(t) - progress = progressbar.Progress(total) +class Downloader(object): + def __init__(self, progress_reporter): + self.progress_reporter = progress_reporter - for pagenum in t: - progress.show_progress(i) - - url = baseurl + pagenum - tries = 10 - while tries > 0: - try: - f = urllib.urlopen(url) - break - except IOError: - tries -= 1 - print "\rTry of", url, "failed,", tries, "tries left" - if not f is None: - # Read the response - s = f.read() - f.close() - - # Process the page. - myparser.parse(s) - else: - "\rOpening of", url, "did not succeed, trying next one..." - i += 1 + def get_thread_links(self, baseurl): + myparser = htmlparser.MyParser() + t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + i = 1 + total = len(t) + progress = self.progress_reporter(total) + + for pagenum in t: + progress.show_progress(i) + + url = baseurl + pagenum + tries = 10 + while tries > 0: + try: + f = urllib.urlopen(url) + break + except IOError: + tries -= 1 + print "\rTry of", url, "failed,", tries, "tries left" + if not f is None: + # Read the response + s = f.read() + f.close() + + # Process the page. + myparser.parse(s) + else: + "\rOpening of", url, "did not succeed, trying next one..." + i += 1 - progress.complete() - return myparser.get_hyperlinks() + progress.complete() + return myparser.get_hyperlinks() -def get_image_links(baseurl, t = []): - mysubparser = htmlparser.MySubParser() - total = len(t) - progress = progressbar.Progress(total) - i = 1 + def get_image_links(self, baseurl, t = []): + mysubparser = htmlparser.MySubParser() + total = len(t) + progress = self.progress_reporter(total) + i = 1 - for link in t: - progress.show_progress(i) - - img_url = baseurl + link - tries = 10 - while tries > 0: - try: - f = urllib.urlopen(img_url) - break - except IOError: - tries -= 1 - print "\rTry of", img_url, "failed,", tries, "tries left" - if not f is None: - s = f.read() - f.close() - - mysubparser.parse(s) - else: - print "\rOpening of", img_url, "did not succeed, trying next one..." - i += 1 - - progress.complete() - return mysubparser.get_hyperlinks() + for link in t: + progress.show_progress(i) -def get_images(t = []): - skipped = 0 - failed = 0 - downloaded = 0 - total = len(t) - progress = progressbar.Progress(total) - i = 1 - for link in t: - progress.show_progress(i) - filename = os.path.join(savedir, os.path.split(link)[1]) - if not os.path.exists(filename): + img_url = baseurl + link tries = 10 while tries > 0: try: - urllib.urlretrieve(link, filename) + f = urllib.urlopen(img_url) break except IOError: tries -= 1 - if tries == 0: - failed += 1 + print "\rTry of", img_url, "failed,", tries, "tries left" + if not f is None: + s = f.read() + f.close() + + mysubparser.parse(s) + else: + print "\rOpening of", img_url, "did not succeed, trying next one..." + i += 1 + + progress.complete() + return mysubparser.get_hyperlinks() + + def get_images(self, t = []): + skipped = 0 + failed = 0 + downloaded = 0 + total = len(t) + progress = self.progress_reporter(total) + i = 1 + for link in t: + progress.show_progress(i) + filename = os.path.join(savedir, os.path.split(link)[1]) + if not os.path.exists(filename): + tries = 10 + while tries > 0: + try: + urllib.urlretrieve(link, filename) + break + except IOError: + tries -= 1 + if tries == 0: + failed += 1 + else: + downloaded += 1 else: - downloaded += 1 - else: - skipped += 1 - i += 1 + skipped += 1 + i += 1 - progress.complete() - return (skipped, failed, downloaded, total) + progress.complete() + return (skipped, failed, downloaded, total) if __name__ == "__main__": # Get a file-like object for the 4chan.org w/imgboard -- cgit v1.2.3-54-g00ecf