summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar ryuslash2010-03-07 00:20:37 +0100
committerGravatar ryuslash2010-03-07 00:20:37 +0100
commitba6b659fb80e843c1e7062c116a856873e14ad6a (patch)
treef305587466cdfd54b9242558e3e1e006229ba7e3
parent5516dbbcae07b9623565c9dc8f06267b3357366e (diff)
download4grab-ba6b659fb80e843c1e7062c116a856873e14ad6a.tar.gz
4grab-ba6b659fb80e843c1e7062c116a856873e14ad6a.zip
Removed download.py dependency
download.py no longer requires progressbar.py, it now contains a class that accepts another class as a parameter, this may be useful later when adding different ways of interaction
-rwxr-xr-x4grab.py48
-rw-r--r--download.py162
2 files changed, 116 insertions, 94 deletions
diff --git a/4grab.py b/4grab.py
index ae550b4..7240f16 100755
--- a/4grab.py
+++ b/4grab.py
@@ -24,9 +24,11 @@ import sys
import config
import download
+import progressbar
base_url = "http://boards.4chan.org/"
parser = optparse.OptionParser()
+downloader = download.Downloader(progressbar.Progress)
def walk_with_wizard(baseurl):
print "Alright, let me put on my robe and wizard hat."
@@ -42,19 +44,19 @@ def walk_with_wizard(baseurl):
if inp == "single":
inp = raw_input("Which thread would you like to download? ")
if inp[:7] == "http://":
- t = download.get_image_links("", [inp])
+ t = downloader.get_image_links("", [inp])
else:
thread = inp
inp = raw_input("Which category is this thread in? ")
- t = download.get_image_links("%s%s/res/" % (baseurl, inp), [thread])
+ t = downloader.get_image_links("%s%s/res/" % (baseurl, inp), [thread])
else:
inp = raw_input("Which category would you like to download? ")
config.Configuration().set_category(inp)
baseurl = "%s%s/" % (baseurl, config.Configuration().get_category())
- t = download.get_thread_links(baseurl)
- t = download.get_image_links(baseurl, t)
- (skipped, failed, downloaded, total) = download.get_images(t)
+ t = downloader.get_thread_links(baseurl)
+ t = downloader.get_image_links(baseurl, t)
+ (skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
@@ -67,10 +69,26 @@ parser.set_usage(
This program comes with ABSOLUTELY NO WARRANTY.
This is free software, and you are welcome to redistribute it
under certain conditions.""")
-parser.add_option("-e", nargs=2, dest="confval", metavar="CONF VALUE", help="Set configuration option CONF to be VALUE")
-parser.add_option("-c", "--category", dest="tempcat", metavar="CATEGORY", help="Set the category to CATEGORY only for this run")
-parser.add_option("-t", "--thread", dest="thread", metavar="THREAD", help="Download only THREAD. If THREAD is only an ID, CATEGORY must also be set. Otherwise, no problem :-)")
-parser.add_option("-w", "--wizard", action="store_true", dest="wizard", help="I'll put on my robe and wizard hat and help you get some of those pictures you like")
+parser.add_option("-e",
+ nargs=2,
+ dest="confval",
+ metavar="CONF VALUE",
+ help="Set configuration option CONF to be VALUE")
+parser.add_option("-c",
+ "--category",
+ dest="tempcat",
+ metavar="CATEGORY",
+ help="Set the category to CATEGORY only for this run")
+parser.add_option("-t",
+ "--thread",
+ dest="thread",
+ metavar="THREAD",
+ help="Download only THREAD. If THREAD is only an ID, CATEGORY must also be set. Otherwise, no problem :-)")
+parser.add_option("-w",
+ "--wizard",
+ action="store_true",
+ dest="wizard",
+ help="I'll put on my robe and wizard hat and help you get some of those pictures you like")
(options, args) = parser.parse_args()
@@ -94,14 +112,14 @@ elif options.wizard:
exit(0)
elif options.thread:
if options.thread[:7] == "http://":
- t = download.get_image_links("", [options.thread])
+ t = downloader.get_image_links("", [options.thread])
elif options.tempcat:
url = "%s%s/res/" % (base_url, options.tempcat)
- t = download.get_image_links(url, [options.thread])
+ t = downloader.get_image_links(url, [options.thread])
else:
print "if THREAD is not an absolute URL, CATEGORY must also be specified"
exit(1)
- (skipped, failed, downloaded, total) = download.get_images(t)
+ (skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
@@ -112,9 +130,9 @@ elif options.tempcat:
base_url = "%s%s/" % (base_url, config.Configuration().get_category())
-t = download.get_thread_links(base_url)
-t = download.get_image_links(base_url, t)
-(skipped, failed, downloaded, total) = download.get_images(t)
+t = downloader.get_thread_links(base_url)
+t = downloader.get_image_links(base_url, t)
+(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
diff --git a/download.py b/download.py
index b9f9f72..2405805 100644
--- a/download.py
+++ b/download.py
@@ -22,104 +22,108 @@
import urllib
import os
import htmlparser
-import progressbar
+#import progressbar
import config
savedir = config.Configuration().get_download_location()
if not os.path.exists(savedir):
os.makedirs(savedir)
-def get_thread_links(baseurl):
- myparser = htmlparser.MyParser()
- t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
- i = 1
- total = len(t)
- progress = progressbar.Progress(total)
+class Downloader(object):
+ def __init__(self, progress_reporter):
+ self.progress_reporter = progress_reporter
- for pagenum in t:
- progress.show_progress(i)
-
- url = baseurl + pagenum
- tries = 10
- while tries > 0:
- try:
- f = urllib.urlopen(url)
- break
- except IOError:
- tries -= 1
- print "\rTry of", url, "failed,", tries, "tries left"
- if not f is None:
- # Read the response
- s = f.read()
- f.close()
-
- # Process the page.
- myparser.parse(s)
- else:
- "\rOpening of", url, "did not succeed, trying next one..."
- i += 1
+ def get_thread_links(self, baseurl):
+ myparser = htmlparser.MyParser()
+ t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
+ i = 1
+ total = len(t)
+ progress = self.progress_reporter(total)
+
+ for pagenum in t:
+ progress.show_progress(i)
+
+ url = baseurl + pagenum
+ tries = 10
+ while tries > 0:
+ try:
+ f = urllib.urlopen(url)
+ break
+ except IOError:
+ tries -= 1
+ print "\rTry of", url, "failed,", tries, "tries left"
+ if not f is None:
+ # Read the response
+ s = f.read()
+ f.close()
+
+ # Process the page.
+ myparser.parse(s)
+ else:
+ "\rOpening of", url, "did not succeed, trying next one..."
+ i += 1
- progress.complete()
- return myparser.get_hyperlinks()
+ progress.complete()
+ return myparser.get_hyperlinks()
-def get_image_links(baseurl, t = []):
- mysubparser = htmlparser.MySubParser()
- total = len(t)
- progress = progressbar.Progress(total)
- i = 1
+ def get_image_links(self, baseurl, t = []):
+ mysubparser = htmlparser.MySubParser()
+ total = len(t)
+ progress = self.progress_reporter(total)
+ i = 1
- for link in t:
- progress.show_progress(i)
-
- img_url = baseurl + link
- tries = 10
- while tries > 0:
- try:
- f = urllib.urlopen(img_url)
- break
- except IOError:
- tries -= 1
- print "\rTry of", img_url, "failed,", tries, "tries left"
- if not f is None:
- s = f.read()
- f.close()
-
- mysubparser.parse(s)
- else:
- print "\rOpening of", img_url, "did not succeed, trying next one..."
- i += 1
-
- progress.complete()
- return mysubparser.get_hyperlinks()
+ for link in t:
+ progress.show_progress(i)
-def get_images(t = []):
- skipped = 0
- failed = 0
- downloaded = 0
- total = len(t)
- progress = progressbar.Progress(total)
- i = 1
- for link in t:
- progress.show_progress(i)
- filename = os.path.join(savedir, os.path.split(link)[1])
- if not os.path.exists(filename):
+ img_url = baseurl + link
tries = 10
while tries > 0:
try:
- urllib.urlretrieve(link, filename)
+ f = urllib.urlopen(img_url)
break
except IOError:
tries -= 1
- if tries == 0:
- failed += 1
+ print "\rTry of", img_url, "failed,", tries, "tries left"
+ if not f is None:
+ s = f.read()
+ f.close()
+
+ mysubparser.parse(s)
+ else:
+ print "\rOpening of", img_url, "did not succeed, trying next one..."
+ i += 1
+
+ progress.complete()
+ return mysubparser.get_hyperlinks()
+
+ def get_images(self, t = []):
+ skipped = 0
+ failed = 0
+ downloaded = 0
+ total = len(t)
+ progress = self.progress_reporter(total)
+ i = 1
+ for link in t:
+ progress.show_progress(i)
+ filename = os.path.join(savedir, os.path.split(link)[1])
+ if not os.path.exists(filename):
+ tries = 10
+ while tries > 0:
+ try:
+ urllib.urlretrieve(link, filename)
+ break
+ except IOError:
+ tries -= 1
+ if tries == 0:
+ failed += 1
+ else:
+ downloaded += 1
else:
- downloaded += 1
- else:
- skipped += 1
- i += 1
+ skipped += 1
+ i += 1
- progress.complete()
- return (skipped, failed, downloaded, total)
+ progress.complete()
+ return (skipped, failed, downloaded, total)
if __name__ == "__main__":
# Get a file-like object for the 4chan.org w/imgboard