From 14e2b0cc546975da9c65576a34e80c6e21cc7a79 Mon Sep 17 00:00:00 2001
From: ryuslash
Date: Thu, 25 Mar 2010 22:28:08 +0100
Subject: non-fixed page count

4grab no longer assumes either 11 or 16 pages, it will keep trying to collect new pages up to the moment it receives a 404 error
---
 download.py | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/download.py b/download.py
index 62a611b..60ec62a 100644
--- a/download.py
+++ b/download.py
@@ -23,6 +23,7 @@ import urllib
 import os
 import htmlparser
 import config
+import sys
 
 def get_savedir():
     conf = config.Configuration()
@@ -36,6 +37,9 @@ def check_archive(fullpath):
     filename = os.path.basename(fullpath)
     archfile = os.path.join(archive, filename)
     return os.path.exists(archfile)
+def write(message):
+    sys.stdout.write(message)
+    sys.stdout.flush()
 
 class Downloader(object):
     def __init__(self, progress_reporter):
@@ -47,17 +51,12 @@ class Downloader(object):
 
     def get_thread_links(self, baseurl):
         myparser = htmlparser.MyParser()
-        t = ["0", "1", "2", "3", "4",
-             "5", "6", "7", "8", "9",
-             "10", "11", "12", "13", "14", "15"]
-        i = 1
-        total = len(t)
-        progress = self.progress_reporter(total)
-        
-        for pagenum in t:
-            progress.show_progress(i)
-            
-            url = baseurl + pagenum
+        i = 0
+        code = 0
+        url = None
+
+        while code != 404:
+            url = baseurl + str(i)
             tries = 10
             while tries > 0:
                 try:
@@ -65,8 +64,13 @@ class Downloader(object):
                     break
                 except IOError:
                     tries -= 1
-                    print "\rTry of", url, "failed,", tries, "tries left"
+                    write("\rTry of %s failed, %d tries left" % (url, tries))
             if not f is None:
+                code = f.getcode()
+                if code == 404:
+                    write("\rCollected %d pages\n" % i)
+                    f.close()
+                    continue
                 # Read the response
                 s = f.read()
                 f.close()
@@ -74,10 +78,11 @@ class Downloader(object):
                 # Process the page.
                 myparser.parse(s)
             else:
-                "\rOpening of", url, "did not succeed, trying next one..."
+                write("\rOpening of %s did not succeed, trying next one..." \
+                          % url)
             i += 1
+            write("\rCollected %d pages" % i)
     
-        progress.complete()
         return myparser.get_hyperlinks()
 
     def get_image_links(self, baseurl, t = []):
@@ -97,14 +102,16 @@ class Downloader(object):
                     break
                 except IOError:
                     tries -= 1
-                    print "\rTry of", img_url, "failed,", tries, "tries left"
+                    write("\rTry of %s failed, %d tries left" \
+                              % (img_url, tries))
             if not f is None:
                 s = f.read()
                 f.close()
 
                 mysubparser.parse(s)
             else:
-                print "\rOpening of", img_url, "did not succeed, trying next one..."
+                write("\rOpening of %s did not succeed, " \
+                    "trying next one..." % img_url)
             i += 1
 
         progress.complete()
-- 
cgit v1.2.3-54-g00ecf