Sorting, multi category, multi resolution

After a file has been downloaded a callback function can now be called. The callback function I call checks to see if the resolution of the image appears in the collection of resolutions that has been entered in the configuration file and deletes/moves accordingly. If a file can not be read (which I have noticed happens sometimes), it is removed, not copied and not archived so that it can be retried later. 4grab got a new command-line option, -s --sorter, to sort out old images, running python sorter.py has the same effect, but this seemed pretties. theoretically multiple categories could now be entered into the configuration file seperated by ',', but this hasn't been tested yet. mutliple resolutions could be entered into the configuration file, seperated by ',' like so: 1680x1050,1920x1200. Configuration now checks to see if all the necessary properties are available in the configuration file, if one is missing, it tries to create it.
author: ryuslash 2010-03-19 00:18:04 +0100
committer: ryuslash 2010-03-19 00:18:04 +0100
commit: 7f8dfa1d30583dd1d8c40c6cd1c079d2a722c9df (patch)
tree: 12564b5ac501f2a35b660d7f5bd7461581f3f6ae /download.py
parent: 4a9cc7e2b608332f3d41aa47f5f1a893a5eab529 (diff)
download: 4grab-7f8dfa1d30583dd1d8c40c6cd1c079d2a722c9df.tar.gz
4grab-7f8dfa1d30583dd1d8c40c6cd1c079d2a722c9df.zip
1 files changed, 17 insertions, 3 deletions
diff --git a/download.py b/download.py
index 378d281..3b12cc8 100644
--- a/download.py
+++ b/download.py
@@ -22,7 +22,6 @@
 import urllib
 import os
 import htmlparser
-#import progressbar
 import config
 
 def get_savedir():
@@ -31,14 +30,27 @@ def get_savedir():
     if not os.path.exists(savedir):
         os.makedirs(savedir)
     return savedir
+def check_archive(fullpath):
+    conf = config.Configuration()
+    archive = conf.get_archive_location()
+    filename = os.path.basename(fullpath)
+    archfile = os.path.join(archive, filename)
+    #print "Path", archfile, "exists:", os.path.exists(archfile)
+    return os.path.exists(archfile)
 
 class Downloader(object):
     def __init__(self, progress_reporter):
         self.progress_reporter = progress_reporter
+        self.on_downloaded = None
+
+    def set_on_downloaded(self, on_downloaded):
+        self.on_downloaded = on_downloaded
 
     def get_thread_links(self, baseurl):
         myparser = htmlparser.MyParser()
-        t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
+        t = ["0", "1", "2", "3", "4",
+             "5", "6", "7", "8", "9",
+             "10", "11", "12", "13", "14", "15"]
         i = 1
         total = len(t)
         progress = self.progress_reporter(total)
@@ -109,7 +121,7 @@ class Downloader(object):
         for link in t:
             progress.show_progress(i)
             filename = os.path.join(get_savedir(), os.path.split(link)[1])
-            if not os.path.exists(filename):
+            if not check_archive(filename):
                 tries = 10
                 while tries > 0:
                     try:
@@ -121,6 +133,8 @@ class Downloader(object):
                     failed += 1
                 else:
                     downloaded += 1
+                    if self.on_downloaded is not None:
+                        self.on_downloaded(filename)
             else:
                 skipped += 1
             i += 1
author	ryuslash	2010-03-19 00:18:04 +0100
committer	ryuslash	2010-03-19 00:18:04 +0100
commit	7f8dfa1d30583dd1d8c40c6cd1c079d2a722c9df (patch)
tree	12564b5ac501f2a35b660d7f5bd7461581f3f6ae /download.py
parent	4a9cc7e2b608332f3d41aa47f5f1a893a5eab529 (diff)
download	4grab-7f8dfa1d30583dd1d8c40c6cd1c079d2a722c9df.tar.gz 4grab-7f8dfa1d30583dd1d8c40c6cd1c079d2a722c9df.zip