non-fixed page count

4grab no longer assumes either 11 or 16 pages, it will keep trying to collect new pages up to the moment it receives a 404 error
2010-03-25 22:28:08 +01:00 · 2010-03-25 22:28:08 +01:00 · 14e2b0cc54
commit 14e2b0cc54
parent 018abb7da1
1 changed files with 23 additions and 16 deletions
--- a/download.py
+++ b/download.py
@ -23,6 +23,7 @@ import urllib
 import os
 import htmlparser
 import config
 import sys
 def get_savedir():
    conf = config.Configuration()
@ -36,6 +37,9 @@ def check_archive(fullpath):
    filename = os.path.basename(fullpath)
    archfile = os.path.join(archive, filename)
    return os.path.exists(archfile)
 def write(message):
    sys.stdout.write(message)
    sys.stdout.flush()
 class Downloader(object):
    def __init__(self, progress_reporter):
@ -47,17 +51,12 @@ class Downloader(object):
    def get_thread_links(self, baseurl):
        myparser = htmlparser.MyParser()
-        t = ["0", "1", "2", "3", "4",
+        i = 0
-             "5", "6", "7", "8", "9",
+        code = 0
-             "10", "11", "12", "13", "14", "15"]
+        url = None
-        i = 1
+
-        total = len(t)
+        while code != 404:
-        progress = self.progress_reporter(total)
+            url = baseurl + str(i)
        for pagenum in t:
            progress.show_progress(i)
            url = baseurl + pagenum
            tries = 10
            while tries > 0:
                try:
@ -65,8 +64,13 @@ class Downloader(object):
                    break
                except IOError:
                    tries -= 1
-                    print "\rTry of", url, "failed,", tries, "tries left"
+                    write("\rTry of %s failed, %d tries left" % (url, tries))
            if not f is None:
                code = f.getcode()
                if code == 404:
                    write("\rCollected %d pages\n" % i)
                    f.close()
                    continue
                # Read the response
                s = f.read()
                f.close()
@ -74,10 +78,11 @@ class Downloader(object):
                # Process the page.
                myparser.parse(s)
            else:
-                "\rOpening of", url, "did not succeed, trying next one..."
+                write("\rOpening of %s did not succeed, trying next one..." \
                          % url)
            i += 1
            write("\rCollected %d pages" % i)
        progress.complete()
        return myparser.get_hyperlinks()
    def get_image_links(self, baseurl, t = []):
@ -97,14 +102,16 @@ class Downloader(object):
                    break
                except IOError:
                    tries -= 1
-                    print "\rTry of", img_url, "failed,", tries, "tries left"
+                    write("\rTry of %s failed, %d tries left" \
                              % (img_url, tries))
            if not f is None:
                s = f.read()
                f.close()
                mysubparser.parse(s)
            else:
-                print "\rOpening of", img_url, "did not succeed, trying next one..."
+                write("\rOpening of %s did not succeed, " \
                    "trying next one..." % img_url)
            i += 1
        progress.complete()