summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar ryuslash2010-03-25 22:28:32 +0100
committerGravatar ryuslash2010-03-25 22:28:32 +0100
commit7ab6d2911f102e498e0b7cbcecb357e3020a42a8 (patch)
tree5a8656993a76128f36ece9a55fa75cd6cbc5556f
parent018abb7da1da1c5ec4b5ae06e1ccdbc07596885f (diff)
parent14e2b0cc546975da9c65576a34e80c6e21cc7a79 (diff)
download4grab-7ab6d2911f102e498e0b7cbcecb357e3020a42a8.tar.gz
4grab-7ab6d2911f102e498e0b7cbcecb357e3020a42a8.zip
Merge branch 'non-fixed-pages' into develop
-rw-r--r--download.py39
1 files changed, 23 insertions, 16 deletions
diff --git a/download.py b/download.py
index 62a611b..60ec62a 100644
--- a/download.py
+++ b/download.py
@@ -23,6 +23,7 @@ import urllib
import os
import htmlparser
import config
+import sys
def get_savedir():
conf = config.Configuration()
@@ -36,6 +37,9 @@ def check_archive(fullpath):
filename = os.path.basename(fullpath)
archfile = os.path.join(archive, filename)
return os.path.exists(archfile)
+def write(message):
+ sys.stdout.write(message)
+ sys.stdout.flush()
class Downloader(object):
def __init__(self, progress_reporter):
@@ -47,17 +51,12 @@ class Downloader(object):
def get_thread_links(self, baseurl):
myparser = htmlparser.MyParser()
- t = ["0", "1", "2", "3", "4",
- "5", "6", "7", "8", "9",
- "10", "11", "12", "13", "14", "15"]
- i = 1
- total = len(t)
- progress = self.progress_reporter(total)
-
- for pagenum in t:
- progress.show_progress(i)
-
- url = baseurl + pagenum
+ i = 0
+ code = 0
+ url = None
+
+ while code != 404:
+ url = baseurl + str(i)
tries = 10
while tries > 0:
try:
@@ -65,8 +64,13 @@ class Downloader(object):
break
except IOError:
tries -= 1
- print "\rTry of", url, "failed,", tries, "tries left"
+ write("\rTry of %s failed, %d tries left" % (url, tries))
if not f is None:
+ code = f.getcode()
+ if code == 404:
+ write("\rCollected %d pages\n" % i)
+ f.close()
+ continue
# Read the response
s = f.read()
f.close()
@@ -74,10 +78,11 @@ class Downloader(object):
# Process the page.
myparser.parse(s)
else:
- "\rOpening of", url, "did not succeed, trying next one..."
+ write("\rOpening of %s did not succeed, trying next one..." \
+ % url)
i += 1
+ write("\rCollected %d pages" % i)
- progress.complete()
return myparser.get_hyperlinks()
def get_image_links(self, baseurl, t = []):
@@ -97,14 +102,16 @@ class Downloader(object):
break
except IOError:
tries -= 1
- print "\rTry of", img_url, "failed,", tries, "tries left"
+ write("\rTry of %s failed, %d tries left" \
+ % (img_url, tries))
if not f is None:
s = f.read()
f.close()
mysubparser.parse(s)
else:
- print "\rOpening of", img_url, "did not succeed, trying next one..."
+ write("\rOpening of %s did not succeed, " \
+ "trying next one..." % img_url)
i += 1
progress.complete()