Removed download.py dependency
download.py no longer requires progressbar.py, it now contains a class that accepts another class as a parameter, this may be useful later when adding different ways of interaction
This commit is contained in:
parent
5516dbbcae
commit
ba6b659fb8
2 changed files with 120 additions and 98 deletions
48
4grab.py
48
4grab.py
|
@ -24,9 +24,11 @@ import sys
|
||||||
|
|
||||||
import config
|
import config
|
||||||
import download
|
import download
|
||||||
|
import progressbar
|
||||||
|
|
||||||
base_url = "http://boards.4chan.org/"
|
base_url = "http://boards.4chan.org/"
|
||||||
parser = optparse.OptionParser()
|
parser = optparse.OptionParser()
|
||||||
|
downloader = download.Downloader(progressbar.Progress)
|
||||||
|
|
||||||
def walk_with_wizard(baseurl):
|
def walk_with_wizard(baseurl):
|
||||||
print "Alright, let me put on my robe and wizard hat."
|
print "Alright, let me put on my robe and wizard hat."
|
||||||
|
@ -42,19 +44,19 @@ def walk_with_wizard(baseurl):
|
||||||
if inp == "single":
|
if inp == "single":
|
||||||
inp = raw_input("Which thread would you like to download? ")
|
inp = raw_input("Which thread would you like to download? ")
|
||||||
if inp[:7] == "http://":
|
if inp[:7] == "http://":
|
||||||
t = download.get_image_links("", [inp])
|
t = downloader.get_image_links("", [inp])
|
||||||
else:
|
else:
|
||||||
thread = inp
|
thread = inp
|
||||||
inp = raw_input("Which category is this thread in? ")
|
inp = raw_input("Which category is this thread in? ")
|
||||||
t = download.get_image_links("%s%s/res/" % (baseurl, inp), [thread])
|
t = downloader.get_image_links("%s%s/res/" % (baseurl, inp), [thread])
|
||||||
else:
|
else:
|
||||||
inp = raw_input("Which category would you like to download? ")
|
inp = raw_input("Which category would you like to download? ")
|
||||||
config.Configuration().set_category(inp)
|
config.Configuration().set_category(inp)
|
||||||
baseurl = "%s%s/" % (baseurl, config.Configuration().get_category())
|
baseurl = "%s%s/" % (baseurl, config.Configuration().get_category())
|
||||||
|
|
||||||
t = download.get_thread_links(baseurl)
|
t = downloader.get_thread_links(baseurl)
|
||||||
t = download.get_image_links(baseurl, t)
|
t = downloader.get_image_links(baseurl, t)
|
||||||
(skipped, failed, downloaded, total) = download.get_images(t)
|
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
||||||
print "Downloaded: ", downloaded
|
print "Downloaded: ", downloaded
|
||||||
print "Skipped: ", skipped
|
print "Skipped: ", skipped
|
||||||
print "Failed: ", failed
|
print "Failed: ", failed
|
||||||
|
@ -67,10 +69,26 @@ parser.set_usage(
|
||||||
This program comes with ABSOLUTELY NO WARRANTY.
|
This program comes with ABSOLUTELY NO WARRANTY.
|
||||||
This is free software, and you are welcome to redistribute it
|
This is free software, and you are welcome to redistribute it
|
||||||
under certain conditions.""")
|
under certain conditions.""")
|
||||||
parser.add_option("-e", nargs=2, dest="confval", metavar="CONF VALUE", help="Set configuration option CONF to be VALUE")
|
parser.add_option("-e",
|
||||||
parser.add_option("-c", "--category", dest="tempcat", metavar="CATEGORY", help="Set the category to CATEGORY only for this run")
|
nargs=2,
|
||||||
parser.add_option("-t", "--thread", dest="thread", metavar="THREAD", help="Download only THREAD. If THREAD is only an ID, CATEGORY must also be set. Otherwise, no problem :-)")
|
dest="confval",
|
||||||
parser.add_option("-w", "--wizard", action="store_true", dest="wizard", help="I'll put on my robe and wizard hat and help you get some of those pictures you like")
|
metavar="CONF VALUE",
|
||||||
|
help="Set configuration option CONF to be VALUE")
|
||||||
|
parser.add_option("-c",
|
||||||
|
"--category",
|
||||||
|
dest="tempcat",
|
||||||
|
metavar="CATEGORY",
|
||||||
|
help="Set the category to CATEGORY only for this run")
|
||||||
|
parser.add_option("-t",
|
||||||
|
"--thread",
|
||||||
|
dest="thread",
|
||||||
|
metavar="THREAD",
|
||||||
|
help="Download only THREAD. If THREAD is only an ID, CATEGORY must also be set. Otherwise, no problem :-)")
|
||||||
|
parser.add_option("-w",
|
||||||
|
"--wizard",
|
||||||
|
action="store_true",
|
||||||
|
dest="wizard",
|
||||||
|
help="I'll put on my robe and wizard hat and help you get some of those pictures you like")
|
||||||
|
|
||||||
(options, args) = parser.parse_args()
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
|
@ -94,14 +112,14 @@ elif options.wizard:
|
||||||
exit(0)
|
exit(0)
|
||||||
elif options.thread:
|
elif options.thread:
|
||||||
if options.thread[:7] == "http://":
|
if options.thread[:7] == "http://":
|
||||||
t = download.get_image_links("", [options.thread])
|
t = downloader.get_image_links("", [options.thread])
|
||||||
elif options.tempcat:
|
elif options.tempcat:
|
||||||
url = "%s%s/res/" % (base_url, options.tempcat)
|
url = "%s%s/res/" % (base_url, options.tempcat)
|
||||||
t = download.get_image_links(url, [options.thread])
|
t = downloader.get_image_links(url, [options.thread])
|
||||||
else:
|
else:
|
||||||
print "if THREAD is not an absolute URL, CATEGORY must also be specified"
|
print "if THREAD is not an absolute URL, CATEGORY must also be specified"
|
||||||
exit(1)
|
exit(1)
|
||||||
(skipped, failed, downloaded, total) = download.get_images(t)
|
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
||||||
print "Downloaded: ", downloaded
|
print "Downloaded: ", downloaded
|
||||||
print "Skipped: ", skipped
|
print "Skipped: ", skipped
|
||||||
print "Failed: ", failed
|
print "Failed: ", failed
|
||||||
|
@ -112,9 +130,9 @@ elif options.tempcat:
|
||||||
|
|
||||||
base_url = "%s%s/" % (base_url, config.Configuration().get_category())
|
base_url = "%s%s/" % (base_url, config.Configuration().get_category())
|
||||||
|
|
||||||
t = download.get_thread_links(base_url)
|
t = downloader.get_thread_links(base_url)
|
||||||
t = download.get_image_links(base_url, t)
|
t = downloader.get_image_links(base_url, t)
|
||||||
(skipped, failed, downloaded, total) = download.get_images(t)
|
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
||||||
print "Downloaded: ", downloaded
|
print "Downloaded: ", downloaded
|
||||||
print "Skipped: ", skipped
|
print "Skipped: ", skipped
|
||||||
print "Failed: ", failed
|
print "Failed: ", failed
|
||||||
|
|
168
download.py
168
download.py
|
@ -22,104 +22,108 @@
|
||||||
import urllib
|
import urllib
|
||||||
import os
|
import os
|
||||||
import htmlparser
|
import htmlparser
|
||||||
import progressbar
|
#import progressbar
|
||||||
import config
|
import config
|
||||||
|
|
||||||
savedir = config.Configuration().get_download_location()
|
savedir = config.Configuration().get_download_location()
|
||||||
if not os.path.exists(savedir):
|
if not os.path.exists(savedir):
|
||||||
os.makedirs(savedir)
|
os.makedirs(savedir)
|
||||||
|
|
||||||
def get_thread_links(baseurl):
|
class Downloader(object):
|
||||||
myparser = htmlparser.MyParser()
|
def __init__(self, progress_reporter):
|
||||||
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
|
self.progress_reporter = progress_reporter
|
||||||
i = 1
|
|
||||||
total = len(t)
|
|
||||||
progress = progressbar.Progress(total)
|
|
||||||
|
|
||||||
for pagenum in t:
|
def get_thread_links(self, baseurl):
|
||||||
progress.show_progress(i)
|
myparser = htmlparser.MyParser()
|
||||||
|
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
|
||||||
|
i = 1
|
||||||
|
total = len(t)
|
||||||
|
progress = self.progress_reporter(total)
|
||||||
|
|
||||||
url = baseurl + pagenum
|
for pagenum in t:
|
||||||
tries = 10
|
progress.show_progress(i)
|
||||||
while tries > 0:
|
|
||||||
try:
|
|
||||||
f = urllib.urlopen(url)
|
|
||||||
break
|
|
||||||
except IOError:
|
|
||||||
tries -= 1
|
|
||||||
print "\rTry of", url, "failed,", tries, "tries left"
|
|
||||||
if not f is None:
|
|
||||||
# Read the response
|
|
||||||
s = f.read()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
# Process the page.
|
url = baseurl + pagenum
|
||||||
myparser.parse(s)
|
|
||||||
else:
|
|
||||||
"\rOpening of", url, "did not succeed, trying next one..."
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
progress.complete()
|
|
||||||
return myparser.get_hyperlinks()
|
|
||||||
|
|
||||||
def get_image_links(baseurl, t = []):
|
|
||||||
mysubparser = htmlparser.MySubParser()
|
|
||||||
total = len(t)
|
|
||||||
progress = progressbar.Progress(total)
|
|
||||||
i = 1
|
|
||||||
|
|
||||||
for link in t:
|
|
||||||
progress.show_progress(i)
|
|
||||||
|
|
||||||
img_url = baseurl + link
|
|
||||||
tries = 10
|
|
||||||
while tries > 0:
|
|
||||||
try:
|
|
||||||
f = urllib.urlopen(img_url)
|
|
||||||
break
|
|
||||||
except IOError:
|
|
||||||
tries -= 1
|
|
||||||
print "\rTry of", img_url, "failed,", tries, "tries left"
|
|
||||||
if not f is None:
|
|
||||||
s = f.read()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
mysubparser.parse(s)
|
|
||||||
else:
|
|
||||||
print "\rOpening of", img_url, "did not succeed, trying next one..."
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
progress.complete()
|
|
||||||
return mysubparser.get_hyperlinks()
|
|
||||||
|
|
||||||
def get_images(t = []):
|
|
||||||
skipped = 0
|
|
||||||
failed = 0
|
|
||||||
downloaded = 0
|
|
||||||
total = len(t)
|
|
||||||
progress = progressbar.Progress(total)
|
|
||||||
i = 1
|
|
||||||
for link in t:
|
|
||||||
progress.show_progress(i)
|
|
||||||
filename = os.path.join(savedir, os.path.split(link)[1])
|
|
||||||
if not os.path.exists(filename):
|
|
||||||
tries = 10
|
tries = 10
|
||||||
while tries > 0:
|
while tries > 0:
|
||||||
try:
|
try:
|
||||||
urllib.urlretrieve(link, filename)
|
f = urllib.urlopen(url)
|
||||||
break
|
break
|
||||||
except IOError:
|
except IOError:
|
||||||
tries -= 1
|
tries -= 1
|
||||||
if tries == 0:
|
print "\rTry of", url, "failed,", tries, "tries left"
|
||||||
failed += 1
|
if not f is None:
|
||||||
else:
|
# Read the response
|
||||||
downloaded += 1
|
s = f.read()
|
||||||
else:
|
f.close()
|
||||||
skipped += 1
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
progress.complete()
|
# Process the page.
|
||||||
return (skipped, failed, downloaded, total)
|
myparser.parse(s)
|
||||||
|
else:
|
||||||
|
"\rOpening of", url, "did not succeed, trying next one..."
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
progress.complete()
|
||||||
|
return myparser.get_hyperlinks()
|
||||||
|
|
||||||
|
def get_image_links(self, baseurl, t = []):
|
||||||
|
mysubparser = htmlparser.MySubParser()
|
||||||
|
total = len(t)
|
||||||
|
progress = self.progress_reporter(total)
|
||||||
|
i = 1
|
||||||
|
|
||||||
|
for link in t:
|
||||||
|
progress.show_progress(i)
|
||||||
|
|
||||||
|
img_url = baseurl + link
|
||||||
|
tries = 10
|
||||||
|
while tries > 0:
|
||||||
|
try:
|
||||||
|
f = urllib.urlopen(img_url)
|
||||||
|
break
|
||||||
|
except IOError:
|
||||||
|
tries -= 1
|
||||||
|
print "\rTry of", img_url, "failed,", tries, "tries left"
|
||||||
|
if not f is None:
|
||||||
|
s = f.read()
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
mysubparser.parse(s)
|
||||||
|
else:
|
||||||
|
print "\rOpening of", img_url, "did not succeed, trying next one..."
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
progress.complete()
|
||||||
|
return mysubparser.get_hyperlinks()
|
||||||
|
|
||||||
|
def get_images(self, t = []):
|
||||||
|
skipped = 0
|
||||||
|
failed = 0
|
||||||
|
downloaded = 0
|
||||||
|
total = len(t)
|
||||||
|
progress = self.progress_reporter(total)
|
||||||
|
i = 1
|
||||||
|
for link in t:
|
||||||
|
progress.show_progress(i)
|
||||||
|
filename = os.path.join(savedir, os.path.split(link)[1])
|
||||||
|
if not os.path.exists(filename):
|
||||||
|
tries = 10
|
||||||
|
while tries > 0:
|
||||||
|
try:
|
||||||
|
urllib.urlretrieve(link, filename)
|
||||||
|
break
|
||||||
|
except IOError:
|
||||||
|
tries -= 1
|
||||||
|
if tries == 0:
|
||||||
|
failed += 1
|
||||||
|
else:
|
||||||
|
downloaded += 1
|
||||||
|
else:
|
||||||
|
skipped += 1
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
progress.complete()
|
||||||
|
return (skipped, failed, downloaded, total)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Get a file-like object for the 4chan.org w/imgboard
|
# Get a file-like object for the 4chan.org w/imgboard
|
||||||
|
|
Loading…
Reference in a new issue