cc571e6d80
if savedir did not exists, then downloading would never work. Now if it does not exist, it is created, or it crashes and burns if it isn't allowed.
99 lines
2.7 KiB
Python
99 lines
2.7 KiB
Python
#!/usr/bin/env python
|
|
import urllib
|
|
import os
|
|
import htmlparser
|
|
import progressbar
|
|
import config
|
|
|
|
savedir = config.Configuration().get_download_location()
|
|
if not os.path.exists(savedir):
|
|
os.makedirs(savedir)
|
|
|
|
def get_thread_links(baseurl):
|
|
myparser = htmlparser.MyParser()
|
|
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
|
|
i = 1
|
|
total = len(t)
|
|
progress = progressbar.Progress(total)
|
|
|
|
for pagenum in t:
|
|
progress.show_progress(i)
|
|
|
|
url = base_url + pagenum
|
|
tries = 10
|
|
while tries > 0:
|
|
try:
|
|
f = urllib.urlopen(url)
|
|
break
|
|
except IOError:
|
|
tries -= 1
|
|
print "\rTry of", url, "failed,", tries, "tries left"
|
|
if not f is None:
|
|
# Read the response
|
|
s = f.read()
|
|
f.close()
|
|
|
|
# Process the page.
|
|
myparser.parse(s)
|
|
else:
|
|
"\rOpening of", url, "did not succeed, trying next one..."
|
|
i += 1
|
|
return myparser.get_hyperlinks()
|
|
|
|
def get_image_links(baseurl, t = []):
|
|
mysubparser = htmlparser.MySubParser()
|
|
total = len(t)
|
|
progress = progressbar.Progress(total)
|
|
i = 1
|
|
|
|
for link in t:
|
|
progress.show_progress(i)
|
|
|
|
img_url = base_url + link
|
|
tries = 10
|
|
while tries > 0:
|
|
try:
|
|
f = urllib.urlopen(img_url)
|
|
break
|
|
except IOError:
|
|
tries -= 1
|
|
print "\rTry of", img_url, "failed,", tries, "tries left"
|
|
if not f is None:
|
|
s = f.read()
|
|
f.close()
|
|
|
|
mysubparser.parse(s)
|
|
else:
|
|
print "\rOpening of", img_url, "did not succeed, trying next one..."
|
|
i += 1
|
|
|
|
return mysubparser.get_hyperlinks()
|
|
|
|
def get_images(t = []):
|
|
total = len(t)
|
|
progress = progressbar.Progress(total)
|
|
i = 1
|
|
for link in t:
|
|
progress.show_progress(i)
|
|
filename = os.path.join(savedir, os.path.split(link)[1])
|
|
if not os.path.exists(filename):
|
|
tries = 10
|
|
while tries > 0:
|
|
try:
|
|
urllib.urlretrieve(link, filename)
|
|
break
|
|
except IOError:
|
|
tries -= 1
|
|
print "\rDownloading of", link, "failed,", tries, "left"
|
|
else:
|
|
print "\rNot downloading", link, "already downloaded"
|
|
i += 1
|
|
|
|
if __name__ == "__main__":
|
|
# Get a file-like object for the 4chan.org w/imgboard
|
|
base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/"
|
|
|
|
# Get the hyperlinks.
|
|
t = get_thread_links(base_url)
|
|
t = get_image_links(base_url, t)
|
|
get_images(t)
|