non-fixed page count

4grab no longer assumes either 11 or 16 pages, it will keep trying to collect new pages up to the moment it receives a 404 error
This commit is contained in:
ryuslash 2010-03-25 22:28:08 +01:00
parent 018abb7da1
commit 14e2b0cc54

View file

@ -23,6 +23,7 @@ import urllib
import os import os
import htmlparser import htmlparser
import config import config
import sys
def get_savedir(): def get_savedir():
conf = config.Configuration() conf = config.Configuration()
@ -36,6 +37,9 @@ def check_archive(fullpath):
filename = os.path.basename(fullpath) filename = os.path.basename(fullpath)
archfile = os.path.join(archive, filename) archfile = os.path.join(archive, filename)
return os.path.exists(archfile) return os.path.exists(archfile)
def write(message):
sys.stdout.write(message)
sys.stdout.flush()
class Downloader(object): class Downloader(object):
def __init__(self, progress_reporter): def __init__(self, progress_reporter):
@ -47,17 +51,12 @@ class Downloader(object):
def get_thread_links(self, baseurl): def get_thread_links(self, baseurl):
myparser = htmlparser.MyParser() myparser = htmlparser.MyParser()
t = ["0", "1", "2", "3", "4", i = 0
"5", "6", "7", "8", "9", code = 0
"10", "11", "12", "13", "14", "15"] url = None
i = 1
total = len(t) while code != 404:
progress = self.progress_reporter(total) url = baseurl + str(i)
for pagenum in t:
progress.show_progress(i)
url = baseurl + pagenum
tries = 10 tries = 10
while tries > 0: while tries > 0:
try: try:
@ -65,8 +64,13 @@ class Downloader(object):
break break
except IOError: except IOError:
tries -= 1 tries -= 1
print "\rTry of", url, "failed,", tries, "tries left" write("\rTry of %s failed, %d tries left" % (url, tries))
if not f is None: if not f is None:
code = f.getcode()
if code == 404:
write("\rCollected %d pages\n" % i)
f.close()
continue
# Read the response # Read the response
s = f.read() s = f.read()
f.close() f.close()
@ -74,10 +78,11 @@ class Downloader(object):
# Process the page. # Process the page.
myparser.parse(s) myparser.parse(s)
else: else:
"\rOpening of", url, "did not succeed, trying next one..." write("\rOpening of %s did not succeed, trying next one..." \
% url)
i += 1 i += 1
write("\rCollected %d pages" % i)
progress.complete()
return myparser.get_hyperlinks() return myparser.get_hyperlinks()
def get_image_links(self, baseurl, t = []): def get_image_links(self, baseurl, t = []):
@ -97,14 +102,16 @@ class Downloader(object):
break break
except IOError: except IOError:
tries -= 1 tries -= 1
print "\rTry of", img_url, "failed,", tries, "tries left" write("\rTry of %s failed, %d tries left" \
% (img_url, tries))
if not f is None: if not f is None:
s = f.read() s = f.read()
f.close() f.close()
mysubparser.parse(s) mysubparser.parse(s)
else: else:
print "\rOpening of", img_url, "did not succeed, trying next one..." write("\rOpening of %s did not succeed, " \
"trying next one..." % img_url)
i += 1 i += 1
progress.complete() progress.complete()