Merge branch 'non-fixed-pages' into develop
This commit is contained in:
commit
7ab6d2911f
1 changed files with 23 additions and 16 deletions
39
download.py
39
download.py
|
@ -23,6 +23,7 @@ import urllib
|
||||||
import os
|
import os
|
||||||
import htmlparser
|
import htmlparser
|
||||||
import config
|
import config
|
||||||
|
import sys
|
||||||
|
|
||||||
def get_savedir():
|
def get_savedir():
|
||||||
conf = config.Configuration()
|
conf = config.Configuration()
|
||||||
|
@ -36,6 +37,9 @@ def check_archive(fullpath):
|
||||||
filename = os.path.basename(fullpath)
|
filename = os.path.basename(fullpath)
|
||||||
archfile = os.path.join(archive, filename)
|
archfile = os.path.join(archive, filename)
|
||||||
return os.path.exists(archfile)
|
return os.path.exists(archfile)
|
||||||
|
def write(message):
|
||||||
|
sys.stdout.write(message)
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
class Downloader(object):
|
class Downloader(object):
|
||||||
def __init__(self, progress_reporter):
|
def __init__(self, progress_reporter):
|
||||||
|
@ -47,17 +51,12 @@ class Downloader(object):
|
||||||
|
|
||||||
def get_thread_links(self, baseurl):
|
def get_thread_links(self, baseurl):
|
||||||
myparser = htmlparser.MyParser()
|
myparser = htmlparser.MyParser()
|
||||||
t = ["0", "1", "2", "3", "4",
|
i = 0
|
||||||
"5", "6", "7", "8", "9",
|
code = 0
|
||||||
"10", "11", "12", "13", "14", "15"]
|
url = None
|
||||||
i = 1
|
|
||||||
total = len(t)
|
while code != 404:
|
||||||
progress = self.progress_reporter(total)
|
url = baseurl + str(i)
|
||||||
|
|
||||||
for pagenum in t:
|
|
||||||
progress.show_progress(i)
|
|
||||||
|
|
||||||
url = baseurl + pagenum
|
|
||||||
tries = 10
|
tries = 10
|
||||||
while tries > 0:
|
while tries > 0:
|
||||||
try:
|
try:
|
||||||
|
@ -65,8 +64,13 @@ class Downloader(object):
|
||||||
break
|
break
|
||||||
except IOError:
|
except IOError:
|
||||||
tries -= 1
|
tries -= 1
|
||||||
print "\rTry of", url, "failed,", tries, "tries left"
|
write("\rTry of %s failed, %d tries left" % (url, tries))
|
||||||
if not f is None:
|
if not f is None:
|
||||||
|
code = f.getcode()
|
||||||
|
if code == 404:
|
||||||
|
write("\rCollected %d pages\n" % i)
|
||||||
|
f.close()
|
||||||
|
continue
|
||||||
# Read the response
|
# Read the response
|
||||||
s = f.read()
|
s = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
@ -74,10 +78,11 @@ class Downloader(object):
|
||||||
# Process the page.
|
# Process the page.
|
||||||
myparser.parse(s)
|
myparser.parse(s)
|
||||||
else:
|
else:
|
||||||
"\rOpening of", url, "did not succeed, trying next one..."
|
write("\rOpening of %s did not succeed, trying next one..." \
|
||||||
|
% url)
|
||||||
i += 1
|
i += 1
|
||||||
|
write("\rCollected %d pages" % i)
|
||||||
|
|
||||||
progress.complete()
|
|
||||||
return myparser.get_hyperlinks()
|
return myparser.get_hyperlinks()
|
||||||
|
|
||||||
def get_image_links(self, baseurl, t = []):
|
def get_image_links(self, baseurl, t = []):
|
||||||
|
@ -97,14 +102,16 @@ class Downloader(object):
|
||||||
break
|
break
|
||||||
except IOError:
|
except IOError:
|
||||||
tries -= 1
|
tries -= 1
|
||||||
print "\rTry of", img_url, "failed,", tries, "tries left"
|
write("\rTry of %s failed, %d tries left" \
|
||||||
|
% (img_url, tries))
|
||||||
if not f is None:
|
if not f is None:
|
||||||
s = f.read()
|
s = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
mysubparser.parse(s)
|
mysubparser.parse(s)
|
||||||
else:
|
else:
|
||||||
print "\rOpening of", img_url, "did not succeed, trying next one..."
|
write("\rOpening of %s did not succeed, " \
|
||||||
|
"trying next one..." % img_url)
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
progress.complete()
|
progress.complete()
|
||||||
|
|
Loading…
Reference in a new issue