summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar ryuslash2010-03-19 08:26:26 +0100
committerGravatar ryuslash2010-03-19 08:26:26 +0100
commit96247d41d50c53e7d3e1c5aa4c6a8dcba6d647f2 (patch)
tree12564b5ac501f2a35b660d7f5bd7461581f3f6ae
parentcaba2811b94577eb89e13d9d1a7f7de64c979acc (diff)
parent7f8dfa1d30583dd1d8c40c6cd1c079d2a722c9df (diff)
download4grab-96247d41d50c53e7d3e1c5aa4c6a8dcba6d647f2.tar.gz
4grab-96247d41d50c53e7d3e1c5aa4c6a8dcba6d647f2.zip
Merge branch 'sorting' into develop
-rwxr-xr-x4grab.py223
-rw-r--r--config.py118
-rw-r--r--download.py39
-rw-r--r--sorter.py110
4 files changed, 356 insertions, 134 deletions
diff --git a/4grab.py b/4grab.py
index fd12c32..4dfca95 100755
--- a/4grab.py
+++ b/4grab.py
@@ -21,16 +21,27 @@
import optparse
import sys
+import os
import config
+
import download
import progressbar
+import sorter
+
+def raw_input_with_default(default, prompt):
+ inp = raw_input("%s (default=%s): " % (prompt, default))
+ if inp == "":
+ return default
+ return inp
+config._optioncreator = raw_input_with_default
base_url = "http://boards.4chan.org/"
parser = optparse.OptionParser()
downloader = download.Downloader(progressbar.Progress)
def walk_with_wizard(baseurl):
+ conf = config.Configuration()
wzrd_msg = "Pilates! *SHAZAM* Here they come!"
print "Alright, let me put on my robe and wizard hat."
@@ -54,8 +65,8 @@ def walk_with_wizard(baseurl):
[thread])
else:
inp = raw_input("Which category would you like to download? ")
- config.Configuration().set_category(inp)
- baseurl = "%s%s/" % (baseurl, config.Configuration().get_category())
+ conf.set_categories([inp])
+ baseurl = "%s%s/" % (baseurl, conf.get_categories()[0])
print wzrd_msg
t = downloader.get_thread_links(baseurl)
@@ -66,102 +77,126 @@ def walk_with_wizard(baseurl):
print "Failed: ", failed
print "Total: ", total
-parser.set_usage(
-"""%prog [options]
+def parse_commands():
+ conf = config.Configuration()
+ parser.set_usage(
+ """%prog [options]
4grab Copyright (C) 2009-2010 ryuslash
This program comes with ABSOLUTELY NO WARRANTY.
This is free software, and you are welcome to redistribute it
under certain conditions.""")
-parser.add_option("-e",
- nargs=2,
- dest="confval",
- metavar="CONF VALUE",
- help="Set configuration option CONF to be VALUE")
-parser.add_option("-c",
- "--category",
- dest="tempcat",
- metavar="CATEGORY",
- help="Set the category to CATEGORY only for this run")
-parser.add_option("-t",
- "--thread",
- dest="thread",
- metavar="THREAD",
- help="Download only THREAD. If THREAD is only an ID, "
- "CATEGORY must also be set. Otherwise, no problem :-)")
-parser.add_option("-w",
- "--wizard",
- action="store_true",
- dest="wizard",
- help="I'll put on my robe and wizard hat and help you "
- "get some of those pictures you like")
-(options, args) = parser.parse_args()
-
-if options.confval and (options.tempcat
- or options.thread
- or options.wizard):
- print "Can't configure something and do something else too."
- exit(1)
-if options.wizard and (options.tempcat
- or options.thread
- or options.confval):
- print "Can't take a walk with the wizard and do something else too."
- exit(1)
-
-if options.confval:
- if not config.Configuration().option_exists(options.confval[0]):
- print ("%s: error: %s is not a "
- "valid configuration option") % (sys.argv[0],
- options.confval[0])
+ parser.add_option("-e",
+ nargs=2,
+ dest="confval",
+ metavar="CONF VALUE",
+ help="Set configuration option CONF to be VALUE")
+ parser.add_option("-c",
+ "--category",
+ dest="tempcat",
+ metavar="CATEGORY",
+ help="Set the category to CATEGORY only for this run")
+ parser.add_option("-t",
+ "--thread",
+ dest="thread",
+ metavar="THREAD",
+ help="Download only THREAD. If THREAD is only an ID, "
+ "CATEGORY must also be set. Otherwise, no problem :-)")
+ parser.add_option("-w",
+ "--wizard",
+ action="store_true",
+ dest="wizard",
+ help="I'll put on my robe and wizard hat and help you "
+ "get some of those pictures you like")
+ parser.add_option("-s",
+ "--sort",
+ action="store_true",
+ dest="sort",
+ help="Sort downloaded images, most handy if you've used "
+ "older versions which didn't sort yet")
+ (options, args) = parser.parse_args()
+
+ if options.confval and (options.tempcat
+ or options.thread
+ or options.wizard
+ or options.sort):
+ print "Can't configure something and do something else too."
exit(1)
- print "Setting", options.confval[0], "to", options.confval[1]
- config.Configuration().set_option(options.confval[0],
- options.confval[1])
- config.Configuration().save()
- exit(0)
-
-elif options.wizard:
- try:
- walk_with_wizard(base_url)
- except KeyboardInterrupt:
- print
- print "Alright, no more wizard hat and robe then. Goodbye"
- exit(0)
-
-elif options.thread:
- try:
- if options.thread[:7] == "http://":
- t = downloader.get_image_links("", [options.thread])
- elif options.tempcat:
- url = "%s%s/res/" % (base_url, options.tempcat)
- t = downloader.get_image_links(url, [options.thread])
- else:
- print ("if THREAD is not an absolute URL, "
- "CATEGORY must also be specified")
+
+ if options.wizard and (options.tempcat
+ or options.thread
+ or options.confval
+ or options.sort):
+ print "Can't take a walk with the wizard and do something else too."
+ exit(1)
+
+ if options.sort:
+ sort = sorter.Sorter()
+ for item in os.listdir(conf.get_download_location()):
+ sort.act(item)
+ exit(0)
+
+ if options.confval:
+ if not conf.option_exists(options.confval[0]):
+ print ("%s: error: %s is not a valid configuration option"
+ % (sys.argv[0], options.confval[0]))
exit(1)
- (skipped, failed, downloaded, total) = downloader.get_images(t)
- print "Downloaded: ", downloaded
- print "Skipped: ", skipped
- print "Failed: ", failed
- print "Total: ", total
- except KeyboardInterrupt:
- print
- print "Goodbye"
- exit(0)
-
-elif options.tempcat:
- config.Configuration().set_category(options.tempcat)
-
-base_url = "%s%s/" % (base_url, config.Configuration().get_category())
-
-try:
- t = downloader.get_thread_links(base_url)
- t = downloader.get_image_links(base_url, t)
- (skipped, failed, downloaded, total) = downloader.get_images(t)
- print "Downloaded: ", downloaded
- print "Skipped: ", skipped
- print "Failed: ", failed
- print "Total: ", total
-except KeyboardInterrupt:
- print
- print "So you don't want these images? Fine! I'll stop then."
+ print "Setting", options.confval[0], "to", options.confval[1]
+ conf.set_option(options.confval[0],
+ options.confval[1])
+ conf.save()
+ exit(0)
+
+ elif options.wizard:
+ try:
+ walk_with_wizard(base_url)
+ except KeyboardInterrupt:
+ print
+ print "Alright, no more wizard hat and robe then. Goodbye"
+ exit(0)
+
+ elif options.thread:
+ try:
+ if options.thread[:7] == "http://":
+ t = downloader.get_image_links("", [options.thread])
+ elif options.tempcat:
+ url = "%s%s/res/" % (base_url, options.tempcat)
+ t = downloader.get_image_links(url, [options.thread])
+ else:
+ print ("if THREAD is not an absolute URL, "
+ "CATEGORY must also be specified")
+ exit(1)
+ (skipped, failed, downloaded, total) = downloader.get_images(t)
+ print "Downloaded: ", downloaded
+ print "Skipped: ", skipped
+ print "Failed: ", failed
+ print "Total: ", total
+ except KeyboardInterrupt:
+ print
+ print "Goodbye"
+ exit(0)
+
+ elif options.tempcat:
+ conf.set_categories([options.tempcat])
+
+#base_url = "%s%s/" % (base_url, conf.get_categories())
+
+if __name__ == "__main__":
+ conf = config.Configuration()
+ sort = sorter.Sorter()
+ parse_commands()
+
+ downloader.set_on_downloaded(sort.act)
+ for category in conf.get_categories():
+ base_url = "%s%s/" % (base_url, category)
+ try:
+ t = downloader.get_thread_links(base_url)
+ t = downloader.get_image_links(base_url, t)
+ (skipped, failed, downloaded, total) = downloader.get_images(t)
+ print "Downloaded: ", downloaded
+ print "Skipped: ", skipped
+ print "Failed: ", failed
+ print "Total: ", total
+ except KeyboardInterrupt:
+ print
+ print "So you don't want these images? Fine! I'll stop then."
diff --git a/config.py b/config.py
index 1bc0b5e..7230285 100644
--- a/config.py
+++ b/config.py
@@ -21,42 +21,97 @@ import os
import ConfigParser
import sys
+# Get our reference point. preferably $HOME.
homedir = os.getenv("HOME")
if homedir is None:
homedir = os.path.dirname(sys.argv[0])
-
+
class _Configuration(object):
- def __init__(self):
+ def __init__(self, optioncreator):
self.filename = os.path.join(os.path.join(homedir, ".4grab"), "config.cfg")
self.configparser = ConfigParser.RawConfigParser()
- if not os.path.exists(self.filename):
- self.create_new()
- else:
- self.configparser.read(self.filename)
-
- def create_new(self):
- self.configparser.add_section("settings")
- self.set_category(self.raw_input_with_default("w", "Please enter which category you would like to download from: "))
+ self.optioncreator = optioncreator
- self.configparser.add_section("locations")
- self.configparser.set("locations", "download", self.raw_input_with_default(os.path.join(homedir, "Pictures"), "Please enter where you would like the downloads to go: "))
+ self.configparser.read(self.filename)
- self.save()
+ def check(self):
+ changed = False
+ # read if it exists
+ if os.path.exists(self.filename):
+ self.configparser.read(self.filename)
+ # locations
+ if not self.configparser.has_section("locations"):
+ self.configparser.add_section("locations")
+ # locations/download_base
+ if not self.configparser.has_option("locations", "download_base"):
+ self.create_option("locations",
+ "download_base",
+ os.path.join(homedir,
+ "Pictures"),
+ "Please enter where "
+ "you would like the "
+ "downloads to go: ")
+ changed = True
+ # locations/archive
+ if not self.configparser.has_option("locations", "archive"):
+ self.create_option("locations",
+ "archive",
+ os.path.join(self.configparser.get("locations",
+ "download_base"),
+ ".arch"),
+ "Please enter where in {download_base} you "
+ "would like to store archived images (used for "
+ "checking what to download): ")
+ changed = True
+ # settings
+ if not self.configparser.has_section("settings"):
+ self.configparser.add_section("settings")
+ # settings/categories
+ if not self.configparser.has_option("settings", "categories"):
+ self.create_option("settings",
+ "categories",
+ "w",
+ "Please enter which "
+ "category you would like "
+ "to download from: ")
+ changed = True
+ # settings/resolutions
+ if not self.configparser.has_option("settings", "resolutions"):
+ self.create_option("settings",
+ "resolutions",
+ "1600x1050,1900x1200,1900x1080",
+ "Please enter your preferred "
+ "resolutions (* for all)")
+ changed = True
+ # save
+ if changed:
+ self.save()
- def raw_input_with_default(self, default, prompt):
- inp = raw_input("%s (default=%s): " % (prompt, default))
- if inp == "":
- return default
- return inp
+ def create_option(self, section, name, default, message):
+ self.configparser.set(section,
+ name,
+ self.optioncreator(default,
+ message))
def get_download_location(self):
- return self.configparser.get("locations", "download")
+ return self.configparser.get("locations", "download_base")
+ def set_download_location(self, value):
+ self.configparser.set("locations", "download_base", value)
- def get_category(self):
- return self.configparser.get("settings", "category")
+ def get_archive_location(self):
+ return self.configparser.get("locations", "archive")
+ def set_archive_location(self, value):
+ self.configparser.set("locations", "archive", value)
- def set_category(self, value):
- self.configparser.set("settings", "category", value)
+ def get_categories(self):
+ return self.configparser.get("settings", "categories").split(',')
+ def set_categories(self, value = []):
+ self.configparser.set("settings", "category", ','.join(value))
+
+ def get_resolutions(self):
+ return self.configparser.get("settings", "resolutions").split(',')
+ def set_resolutions(self, value = []):
+ self.configparser.set("settings", "resolutions", ','.join(value))
def option_exists(self, option):
sections = self.configparser.sections()
@@ -65,6 +120,7 @@ class _Configuration(object):
return True
return False
+ # Should only be used by the command-line
def set_option(self, option, value):
sec = None
sections = self.configparser.sections()
@@ -81,9 +137,19 @@ class _Configuration(object):
def save(self):
dirname = os.path.dirname(self.filename)
if not os.path.exists(dirname):
- os.mkdir(dirname)
+ os.makedirs(dirname)
configfile = open(self.filename, "w")
self.configparser.write(configfile)
-_configuration = _Configuration()
-def Configuration(): return _configuration
+_configuration = None
+_optioncreator = None
+def Configuration():
+ global _optioncreator
+ global _configuration
+
+ if _optioncreator is None:
+ raise ValueError("optioncreator must be set")
+ if _configuration is None:
+ _configuration = _Configuration(_optioncreator)
+ _configuration.check()
+ return _configuration
diff --git a/download.py b/download.py
index 2405805..3b12cc8 100644
--- a/download.py
+++ b/download.py
@@ -22,20 +22,35 @@
import urllib
import os
import htmlparser
-#import progressbar
import config
-savedir = config.Configuration().get_download_location()
-if not os.path.exists(savedir):
- os.makedirs(savedir)
+def get_savedir():
+ conf = config.Configuration()
+ savedir = conf.get_download_location()
+ if not os.path.exists(savedir):
+ os.makedirs(savedir)
+ return savedir
+def check_archive(fullpath):
+ conf = config.Configuration()
+ archive = conf.get_archive_location()
+ filename = os.path.basename(fullpath)
+ archfile = os.path.join(archive, filename)
+ #print "Path", archfile, "exists:", os.path.exists(archfile)
+ return os.path.exists(archfile)
class Downloader(object):
def __init__(self, progress_reporter):
self.progress_reporter = progress_reporter
+ self.on_downloaded = None
+
+ def set_on_downloaded(self, on_downloaded):
+ self.on_downloaded = on_downloaded
def get_thread_links(self, baseurl):
myparser = htmlparser.MyParser()
- t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
+ t = ["0", "1", "2", "3", "4",
+ "5", "6", "7", "8", "9",
+ "10", "11", "12", "13", "14", "15"]
i = 1
total = len(t)
progress = self.progress_reporter(total)
@@ -105,8 +120,8 @@ class Downloader(object):
i = 1
for link in t:
progress.show_progress(i)
- filename = os.path.join(savedir, os.path.split(link)[1])
- if not os.path.exists(filename):
+ filename = os.path.join(get_savedir(), os.path.split(link)[1])
+ if not check_archive(filename):
tries = 10
while tries > 0:
try:
@@ -118,6 +133,8 @@ class Downloader(object):
failed += 1
else:
downloaded += 1
+ if self.on_downloaded is not None:
+ self.on_downloaded(filename)
else:
skipped += 1
i += 1
@@ -126,10 +143,4 @@ class Downloader(object):
return (skipped, failed, downloaded, total)
if __name__ == "__main__":
- # Get a file-like object for the 4chan.org w/imgboard
- base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/"
-
- # Get the hyperlinks.
- t = get_thread_links(base_url)
- t = get_image_links(base_url, t)
- get_images(t)
+ print "Don't run me, run 4grab.py"
diff --git a/sorter.py b/sorter.py
new file mode 100644
index 0000000..482e343
--- /dev/null
+++ b/sorter.py
@@ -0,0 +1,110 @@
+######################################################################
+# Copyright 2009, 2010 ryuslash
+#
+# This file is part of 4grab.
+#
+# 4grab is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# 4grab is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
+######################################################################
+
+import config
+import Image
+import shutil
+import os
+
+def dummy_option_creator(value1, value2): pass
+config._optioncreator = dummy_option_creator
+
+class Sorter:
+
+ def __init__(self):
+ self.conf = config.Configuration()
+ self.resolutions = self.conf.get_resolutions()
+
+ def act(self, filename):
+ download_base = self.conf.get_download_location()
+
+ if self.check_filename(filename):
+ image = None
+ try:
+ image = Image.open(os.path.join(download_base,
+ filename))
+ except IOError:
+ print "Cannot read image file %s, might be broken" \
+ % filename
+
+ if not image == None and self.archive_check(filename):
+ for resolution in self.resolutions:
+ resolution = resolution.split('x')
+ foldername = "%s-%s" % (resolution[0],
+ resolution[1])
+ folderpath = os.path.join(download_base,
+ foldername)
+
+ if str(image.size[0]) == resolution[0] and \
+ str(image.size[1]) == resolution[1]:
+ if not os.path.exists(folderpath):
+ os.makedirs(folderpath)
+ #print "creating", folderpath
+
+ self.copy(filename, folderpath)
+ break
+
+ self.archive(filename)
+ self.remove(filename)
+
+ def copy(self, filename, destpath):
+ download_base = self.conf.get_download_location()
+ source = os.path.join(download_base,
+ os.path.basename(filename))
+ dest = os.path.join(destpath,
+ os.path.basename(filename))
+ if source != dest:
+ shutil.copy(source, dest)
+ else:
+ print "\nHow can this even happen?! Copying", source, "to", dest
+ #print "\nParameters are", filename, "and", destpath
+
+ def archive(self, filename):
+ download_base = self.conf.get_download_location()
+ location = self.conf.get_archive_location()
+ if not os.path.exists(location):
+ os.makedirs(location)
+
+ dest = os.path.join(location, filename)
+ f = open(dest, "w")
+ file.close(f)
+
+
+ def archive_check(self, filename):
+ archive_path = self.conf.get_archive_location()
+ fullname = os.path.join(archive_path, filename)
+ return os.path.exists(fullname)
+
+ def check_filename(self, filename):
+ ext = os.path.splitext(filename)[1]
+ return ext == ".jpg" or \
+ ext == ".png" or \
+ ext == ".gif"
+
+ def remove(self, filename):
+ download_base = self.conf.get_download_location()
+ source = os.path.join(download_base, filename)
+ os.remove(source)
+
+if __name__ == "__main__":
+ conf = config.Configuration()
+ download_base = conf.get_download_location()
+ sorter = Sorter()
+ for item in os.listdir(download_base):
+ sorter.act(item)