Removed download.py dependency

download.py no longer requires progressbar.py, it now contains a class that accepts another class as a parameter, this may be useful later when adding different ways of interaction
This commit is contained in:
ryuslash 2010-03-07 00:20:37 +01:00
parent 5516dbbcae
commit ba6b659fb8
2 changed files with 120 additions and 98 deletions

View file

@ -24,9 +24,11 @@ import sys
import config import config
import download import download
import progressbar
base_url = "http://boards.4chan.org/" base_url = "http://boards.4chan.org/"
parser = optparse.OptionParser() parser = optparse.OptionParser()
downloader = download.Downloader(progressbar.Progress)
def walk_with_wizard(baseurl): def walk_with_wizard(baseurl):
print "Alright, let me put on my robe and wizard hat." print "Alright, let me put on my robe and wizard hat."
@ -42,19 +44,19 @@ def walk_with_wizard(baseurl):
if inp == "single": if inp == "single":
inp = raw_input("Which thread would you like to download? ") inp = raw_input("Which thread would you like to download? ")
if inp[:7] == "http://": if inp[:7] == "http://":
t = download.get_image_links("", [inp]) t = downloader.get_image_links("", [inp])
else: else:
thread = inp thread = inp
inp = raw_input("Which category is this thread in? ") inp = raw_input("Which category is this thread in? ")
t = download.get_image_links("%s%s/res/" % (baseurl, inp), [thread]) t = downloader.get_image_links("%s%s/res/" % (baseurl, inp), [thread])
else: else:
inp = raw_input("Which category would you like to download? ") inp = raw_input("Which category would you like to download? ")
config.Configuration().set_category(inp) config.Configuration().set_category(inp)
baseurl = "%s%s/" % (baseurl, config.Configuration().get_category()) baseurl = "%s%s/" % (baseurl, config.Configuration().get_category())
t = download.get_thread_links(baseurl) t = downloader.get_thread_links(baseurl)
t = download.get_image_links(baseurl, t) t = downloader.get_image_links(baseurl, t)
(skipped, failed, downloaded, total) = download.get_images(t) (skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded print "Downloaded: ", downloaded
print "Skipped: ", skipped print "Skipped: ", skipped
print "Failed: ", failed print "Failed: ", failed
@ -67,10 +69,26 @@ parser.set_usage(
This program comes with ABSOLUTELY NO WARRANTY. This program comes with ABSOLUTELY NO WARRANTY.
This is free software, and you are welcome to redistribute it This is free software, and you are welcome to redistribute it
under certain conditions.""") under certain conditions.""")
parser.add_option("-e", nargs=2, dest="confval", metavar="CONF VALUE", help="Set configuration option CONF to be VALUE") parser.add_option("-e",
parser.add_option("-c", "--category", dest="tempcat", metavar="CATEGORY", help="Set the category to CATEGORY only for this run") nargs=2,
parser.add_option("-t", "--thread", dest="thread", metavar="THREAD", help="Download only THREAD. If THREAD is only an ID, CATEGORY must also be set. Otherwise, no problem :-)") dest="confval",
parser.add_option("-w", "--wizard", action="store_true", dest="wizard", help="I'll put on my robe and wizard hat and help you get some of those pictures you like") metavar="CONF VALUE",
help="Set configuration option CONF to be VALUE")
parser.add_option("-c",
"--category",
dest="tempcat",
metavar="CATEGORY",
help="Set the category to CATEGORY only for this run")
parser.add_option("-t",
"--thread",
dest="thread",
metavar="THREAD",
help="Download only THREAD. If THREAD is only an ID, CATEGORY must also be set. Otherwise, no problem :-)")
parser.add_option("-w",
"--wizard",
action="store_true",
dest="wizard",
help="I'll put on my robe and wizard hat and help you get some of those pictures you like")
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
@ -94,14 +112,14 @@ elif options.wizard:
exit(0) exit(0)
elif options.thread: elif options.thread:
if options.thread[:7] == "http://": if options.thread[:7] == "http://":
t = download.get_image_links("", [options.thread]) t = downloader.get_image_links("", [options.thread])
elif options.tempcat: elif options.tempcat:
url = "%s%s/res/" % (base_url, options.tempcat) url = "%s%s/res/" % (base_url, options.tempcat)
t = download.get_image_links(url, [options.thread]) t = downloader.get_image_links(url, [options.thread])
else: else:
print "if THREAD is not an absolute URL, CATEGORY must also be specified" print "if THREAD is not an absolute URL, CATEGORY must also be specified"
exit(1) exit(1)
(skipped, failed, downloaded, total) = download.get_images(t) (skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded print "Downloaded: ", downloaded
print "Skipped: ", skipped print "Skipped: ", skipped
print "Failed: ", failed print "Failed: ", failed
@ -112,9 +130,9 @@ elif options.tempcat:
base_url = "%s%s/" % (base_url, config.Configuration().get_category()) base_url = "%s%s/" % (base_url, config.Configuration().get_category())
t = download.get_thread_links(base_url) t = downloader.get_thread_links(base_url)
t = download.get_image_links(base_url, t) t = downloader.get_image_links(base_url, t)
(skipped, failed, downloaded, total) = download.get_images(t) (skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded print "Downloaded: ", downloaded
print "Skipped: ", skipped print "Skipped: ", skipped
print "Failed: ", failed print "Failed: ", failed

View file

@ -22,104 +22,108 @@
import urllib import urllib
import os import os
import htmlparser import htmlparser
import progressbar #import progressbar
import config import config
savedir = config.Configuration().get_download_location() savedir = config.Configuration().get_download_location()
if not os.path.exists(savedir): if not os.path.exists(savedir):
os.makedirs(savedir) os.makedirs(savedir)
def get_thread_links(baseurl): class Downloader(object):
myparser = htmlparser.MyParser() def __init__(self, progress_reporter):
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] self.progress_reporter = progress_reporter
i = 1
total = len(t)
progress = progressbar.Progress(total)
for pagenum in t: def get_thread_links(self, baseurl):
progress.show_progress(i) myparser = htmlparser.MyParser()
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
i = 1
total = len(t)
progress = self.progress_reporter(total)
url = baseurl + pagenum for pagenum in t:
tries = 10 progress.show_progress(i)
while tries > 0:
try:
f = urllib.urlopen(url)
break
except IOError:
tries -= 1
print "\rTry of", url, "failed,", tries, "tries left"
if not f is None:
# Read the response
s = f.read()
f.close()
# Process the page. url = baseurl + pagenum
myparser.parse(s)
else:
"\rOpening of", url, "did not succeed, trying next one..."
i += 1
progress.complete()
return myparser.get_hyperlinks()
def get_image_links(baseurl, t = []):
mysubparser = htmlparser.MySubParser()
total = len(t)
progress = progressbar.Progress(total)
i = 1
for link in t:
progress.show_progress(i)
img_url = baseurl + link
tries = 10
while tries > 0:
try:
f = urllib.urlopen(img_url)
break
except IOError:
tries -= 1
print "\rTry of", img_url, "failed,", tries, "tries left"
if not f is None:
s = f.read()
f.close()
mysubparser.parse(s)
else:
print "\rOpening of", img_url, "did not succeed, trying next one..."
i += 1
progress.complete()
return mysubparser.get_hyperlinks()
def get_images(t = []):
skipped = 0
failed = 0
downloaded = 0
total = len(t)
progress = progressbar.Progress(total)
i = 1
for link in t:
progress.show_progress(i)
filename = os.path.join(savedir, os.path.split(link)[1])
if not os.path.exists(filename):
tries = 10 tries = 10
while tries > 0: while tries > 0:
try: try:
urllib.urlretrieve(link, filename) f = urllib.urlopen(url)
break break
except IOError: except IOError:
tries -= 1 tries -= 1
if tries == 0: print "\rTry of", url, "failed,", tries, "tries left"
failed += 1 if not f is None:
else: # Read the response
downloaded += 1 s = f.read()
else: f.close()
skipped += 1
i += 1
progress.complete() # Process the page.
return (skipped, failed, downloaded, total) myparser.parse(s)
else:
"\rOpening of", url, "did not succeed, trying next one..."
i += 1
progress.complete()
return myparser.get_hyperlinks()
def get_image_links(self, baseurl, t = []):
mysubparser = htmlparser.MySubParser()
total = len(t)
progress = self.progress_reporter(total)
i = 1
for link in t:
progress.show_progress(i)
img_url = baseurl + link
tries = 10
while tries > 0:
try:
f = urllib.urlopen(img_url)
break
except IOError:
tries -= 1
print "\rTry of", img_url, "failed,", tries, "tries left"
if not f is None:
s = f.read()
f.close()
mysubparser.parse(s)
else:
print "\rOpening of", img_url, "did not succeed, trying next one..."
i += 1
progress.complete()
return mysubparser.get_hyperlinks()
def get_images(self, t = []):
skipped = 0
failed = 0
downloaded = 0
total = len(t)
progress = self.progress_reporter(total)
i = 1
for link in t:
progress.show_progress(i)
filename = os.path.join(savedir, os.path.split(link)[1])
if not os.path.exists(filename):
tries = 10
while tries > 0:
try:
urllib.urlretrieve(link, filename)
break
except IOError:
tries -= 1
if tries == 0:
failed += 1
else:
downloaded += 1
else:
skipped += 1
i += 1
progress.complete()
return (skipped, failed, downloaded, total)
if __name__ == "__main__": if __name__ == "__main__":
# Get a file-like object for the 4chan.org w/imgboard # Get a file-like object for the 4chan.org w/imgboard