From 14e2b0cc546975da9c65576a34e80c6e21cc7a79 Mon Sep 17 00:00:00 2001 From: ryuslash Date: Thu, 25 Mar 2010 22:28:08 +0100 Subject: non-fixed page count 4grab no longer assumes either 11 or 16 pages, it will keep trying to collect new pages up to the moment it receives a 404 error --- download.py | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/download.py b/download.py index 62a611b..60ec62a 100644 --- a/download.py +++ b/download.py @@ -23,6 +23,7 @@ import urllib import os import htmlparser import config +import sys def get_savedir(): conf = config.Configuration() @@ -36,6 +37,9 @@ def check_archive(fullpath): filename = os.path.basename(fullpath) archfile = os.path.join(archive, filename) return os.path.exists(archfile) +def write(message): + sys.stdout.write(message) + sys.stdout.flush() class Downloader(object): def __init__(self, progress_reporter): @@ -47,17 +51,12 @@ class Downloader(object): def get_thread_links(self, baseurl): myparser = htmlparser.MyParser() - t = ["0", "1", "2", "3", "4", - "5", "6", "7", "8", "9", - "10", "11", "12", "13", "14", "15"] - i = 1 - total = len(t) - progress = self.progress_reporter(total) - - for pagenum in t: - progress.show_progress(i) - - url = baseurl + pagenum + i = 0 + code = 0 + url = None + + while code != 404: + url = baseurl + str(i) tries = 10 while tries > 0: try: @@ -65,8 +64,13 @@ class Downloader(object): break except IOError: tries -= 1 - print "\rTry of", url, "failed,", tries, "tries left" + write("\rTry of %s failed, %d tries left" % (url, tries)) if not f is None: + code = f.getcode() + if code == 404: + write("\rCollected %d pages\n" % i) + f.close() + continue # Read the response s = f.read() f.close() @@ -74,10 +78,11 @@ class Downloader(object): # Process the page. myparser.parse(s) else: - "\rOpening of", url, "did not succeed, trying next one..." + write("\rOpening of %s did not succeed, trying next one..." \ + % url) i += 1 + write("\rCollected %d pages" % i) - progress.complete() return myparser.get_hyperlinks() def get_image_links(self, baseurl, t = []): @@ -97,14 +102,16 @@ class Downloader(object): break except IOError: tries -= 1 - print "\rTry of", img_url, "failed,", tries, "tries left" + write("\rTry of %s failed, %d tries left" \ + % (img_url, tries)) if not f is None: s = f.read() f.close() mysubparser.parse(s) else: - print "\rOpening of", img_url, "did not succeed, trying next one..." + write("\rOpening of %s did not succeed, " \ + "trying next one..." % img_url) i += 1 progress.complete() -- cgit v1.2.3-54-g00ecf