diff --git a/4grab.py b/4grab.py index fd12c32..4dfca95 100755 --- a/4grab.py +++ b/4grab.py @@ -21,16 +21,27 @@ import optparse import sys +import os import config + import download import progressbar +import sorter + +def raw_input_with_default(default, prompt): + inp = raw_input("%s (default=%s): " % (prompt, default)) + if inp == "": + return default + return inp +config._optioncreator = raw_input_with_default base_url = "http://boards.4chan.org/" parser = optparse.OptionParser() downloader = download.Downloader(progressbar.Progress) def walk_with_wizard(baseurl): + conf = config.Configuration() wzrd_msg = "Pilates! *SHAZAM* Here they come!" print "Alright, let me put on my robe and wizard hat." @@ -54,8 +65,8 @@ def walk_with_wizard(baseurl): [thread]) else: inp = raw_input("Which category would you like to download? ") - config.Configuration().set_category(inp) - baseurl = "%s%s/" % (baseurl, config.Configuration().get_category()) + conf.set_categories([inp]) + baseurl = "%s%s/" % (baseurl, conf.get_categories()[0]) print wzrd_msg t = downloader.get_thread_links(baseurl) @@ -66,102 +77,126 @@ def walk_with_wizard(baseurl): print "Failed: ", failed print "Total: ", total -parser.set_usage( -"""%prog [options] +def parse_commands(): + conf = config.Configuration() + parser.set_usage( + """%prog [options] 4grab Copyright (C) 2009-2010 ryuslash This program comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it under certain conditions.""") -parser.add_option("-e", - nargs=2, - dest="confval", - metavar="CONF VALUE", - help="Set configuration option CONF to be VALUE") -parser.add_option("-c", - "--category", - dest="tempcat", - metavar="CATEGORY", - help="Set the category to CATEGORY only for this run") -parser.add_option("-t", - "--thread", - dest="thread", - metavar="THREAD", - help="Download only THREAD. If THREAD is only an ID, " - "CATEGORY must also be set. Otherwise, no problem :-)") -parser.add_option("-w", - "--wizard", - action="store_true", - dest="wizard", - help="I'll put on my robe and wizard hat and help you " - "get some of those pictures you like") -(options, args) = parser.parse_args() + parser.add_option("-e", + nargs=2, + dest="confval", + metavar="CONF VALUE", + help="Set configuration option CONF to be VALUE") + parser.add_option("-c", + "--category", + dest="tempcat", + metavar="CATEGORY", + help="Set the category to CATEGORY only for this run") + parser.add_option("-t", + "--thread", + dest="thread", + metavar="THREAD", + help="Download only THREAD. If THREAD is only an ID, " + "CATEGORY must also be set. Otherwise, no problem :-)") + parser.add_option("-w", + "--wizard", + action="store_true", + dest="wizard", + help="I'll put on my robe and wizard hat and help you " + "get some of those pictures you like") + parser.add_option("-s", + "--sort", + action="store_true", + dest="sort", + help="Sort downloaded images, most handy if you've used " + "older versions which didn't sort yet") + (options, args) = parser.parse_args() -if options.confval and (options.tempcat - or options.thread - or options.wizard): - print "Can't configure something and do something else too." - exit(1) -if options.wizard and (options.tempcat - or options.thread - or options.confval): - print "Can't take a walk with the wizard and do something else too." - exit(1) - -if options.confval: - if not config.Configuration().option_exists(options.confval[0]): - print ("%s: error: %s is not a " - "valid configuration option") % (sys.argv[0], - options.confval[0]) + if options.confval and (options.tempcat + or options.thread + or options.wizard + or options.sort): + print "Can't configure something and do something else too." exit(1) - print "Setting", options.confval[0], "to", options.confval[1] - config.Configuration().set_option(options.confval[0], - options.confval[1]) - config.Configuration().save() - exit(0) -elif options.wizard: - try: - walk_with_wizard(base_url) - except KeyboardInterrupt: - print - print "Alright, no more wizard hat and robe then. Goodbye" - exit(0) + if options.wizard and (options.tempcat + or options.thread + or options.confval + or options.sort): + print "Can't take a walk with the wizard and do something else too." + exit(1) -elif options.thread: - try: - if options.thread[:7] == "http://": - t = downloader.get_image_links("", [options.thread]) - elif options.tempcat: - url = "%s%s/res/" % (base_url, options.tempcat) - t = downloader.get_image_links(url, [options.thread]) - else: - print ("if THREAD is not an absolute URL, " - "CATEGORY must also be specified") + if options.sort: + sort = sorter.Sorter() + for item in os.listdir(conf.get_download_location()): + sort.act(item) + exit(0) + + if options.confval: + if not conf.option_exists(options.confval[0]): + print ("%s: error: %s is not a valid configuration option" + % (sys.argv[0], options.confval[0])) exit(1) - (skipped, failed, downloaded, total) = downloader.get_images(t) - print "Downloaded: ", downloaded - print "Skipped: ", skipped - print "Failed: ", failed - print "Total: ", total - except KeyboardInterrupt: - print - print "Goodbye" - exit(0) + print "Setting", options.confval[0], "to", options.confval[1] + conf.set_option(options.confval[0], + options.confval[1]) + conf.save() + exit(0) -elif options.tempcat: - config.Configuration().set_category(options.tempcat) + elif options.wizard: + try: + walk_with_wizard(base_url) + except KeyboardInterrupt: + print + print "Alright, no more wizard hat and robe then. Goodbye" + exit(0) -base_url = "%s%s/" % (base_url, config.Configuration().get_category()) + elif options.thread: + try: + if options.thread[:7] == "http://": + t = downloader.get_image_links("", [options.thread]) + elif options.tempcat: + url = "%s%s/res/" % (base_url, options.tempcat) + t = downloader.get_image_links(url, [options.thread]) + else: + print ("if THREAD is not an absolute URL, " + "CATEGORY must also be specified") + exit(1) + (skipped, failed, downloaded, total) = downloader.get_images(t) + print "Downloaded: ", downloaded + print "Skipped: ", skipped + print "Failed: ", failed + print "Total: ", total + except KeyboardInterrupt: + print + print "Goodbye" + exit(0) -try: - t = downloader.get_thread_links(base_url) - t = downloader.get_image_links(base_url, t) - (skipped, failed, downloaded, total) = downloader.get_images(t) - print "Downloaded: ", downloaded - print "Skipped: ", skipped - print "Failed: ", failed - print "Total: ", total -except KeyboardInterrupt: - print - print "So you don't want these images? Fine! I'll stop then." + elif options.tempcat: + conf.set_categories([options.tempcat]) + +#base_url = "%s%s/" % (base_url, conf.get_categories()) + +if __name__ == "__main__": + conf = config.Configuration() + sort = sorter.Sorter() + parse_commands() + + downloader.set_on_downloaded(sort.act) + for category in conf.get_categories(): + base_url = "%s%s/" % (base_url, category) + try: + t = downloader.get_thread_links(base_url) + t = downloader.get_image_links(base_url, t) + (skipped, failed, downloaded, total) = downloader.get_images(t) + print "Downloaded: ", downloaded + print "Skipped: ", skipped + print "Failed: ", failed + print "Total: ", total + except KeyboardInterrupt: + print + print "So you don't want these images? Fine! I'll stop then." diff --git a/config.py b/config.py index 1bc0b5e..7230285 100644 --- a/config.py +++ b/config.py @@ -21,42 +21,97 @@ import os import ConfigParser import sys +# Get our reference point. preferably $HOME. homedir = os.getenv("HOME") if homedir is None: homedir = os.path.dirname(sys.argv[0]) - + class _Configuration(object): - def __init__(self): + def __init__(self, optioncreator): self.filename = os.path.join(os.path.join(homedir, ".4grab"), "config.cfg") self.configparser = ConfigParser.RawConfigParser() - if not os.path.exists(self.filename): - self.create_new() - else: - self.configparser.read(self.filename) - - def create_new(self): - self.configparser.add_section("settings") - self.set_category(self.raw_input_with_default("w", "Please enter which category you would like to download from: ")) + self.optioncreator = optioncreator - self.configparser.add_section("locations") - self.configparser.set("locations", "download", self.raw_input_with_default(os.path.join(homedir, "Pictures"), "Please enter where you would like the downloads to go: ")) + self.configparser.read(self.filename) - self.save() + def check(self): + changed = False + # read if it exists + if os.path.exists(self.filename): + self.configparser.read(self.filename) + # locations + if not self.configparser.has_section("locations"): + self.configparser.add_section("locations") + # locations/download_base + if not self.configparser.has_option("locations", "download_base"): + self.create_option("locations", + "download_base", + os.path.join(homedir, + "Pictures"), + "Please enter where " + "you would like the " + "downloads to go: ") + changed = True + # locations/archive + if not self.configparser.has_option("locations", "archive"): + self.create_option("locations", + "archive", + os.path.join(self.configparser.get("locations", + "download_base"), + ".arch"), + "Please enter where in {download_base} you " + "would like to store archived images (used for " + "checking what to download): ") + changed = True + # settings + if not self.configparser.has_section("settings"): + self.configparser.add_section("settings") + # settings/categories + if not self.configparser.has_option("settings", "categories"): + self.create_option("settings", + "categories", + "w", + "Please enter which " + "category you would like " + "to download from: ") + changed = True + # settings/resolutions + if not self.configparser.has_option("settings", "resolutions"): + self.create_option("settings", + "resolutions", + "1600x1050,1900x1200,1900x1080", + "Please enter your preferred " + "resolutions (* for all)") + changed = True + # save + if changed: + self.save() - def raw_input_with_default(self, default, prompt): - inp = raw_input("%s (default=%s): " % (prompt, default)) - if inp == "": - return default - return inp + def create_option(self, section, name, default, message): + self.configparser.set(section, + name, + self.optioncreator(default, + message)) def get_download_location(self): - return self.configparser.get("locations", "download") + return self.configparser.get("locations", "download_base") + def set_download_location(self, value): + self.configparser.set("locations", "download_base", value) - def get_category(self): - return self.configparser.get("settings", "category") + def get_archive_location(self): + return self.configparser.get("locations", "archive") + def set_archive_location(self, value): + self.configparser.set("locations", "archive", value) - def set_category(self, value): - self.configparser.set("settings", "category", value) + def get_categories(self): + return self.configparser.get("settings", "categories").split(',') + def set_categories(self, value = []): + self.configparser.set("settings", "category", ','.join(value)) + + def get_resolutions(self): + return self.configparser.get("settings", "resolutions").split(',') + def set_resolutions(self, value = []): + self.configparser.set("settings", "resolutions", ','.join(value)) def option_exists(self, option): sections = self.configparser.sections() @@ -65,6 +120,7 @@ class _Configuration(object): return True return False + # Should only be used by the command-line def set_option(self, option, value): sec = None sections = self.configparser.sections() @@ -81,9 +137,19 @@ class _Configuration(object): def save(self): dirname = os.path.dirname(self.filename) if not os.path.exists(dirname): - os.mkdir(dirname) + os.makedirs(dirname) configfile = open(self.filename, "w") self.configparser.write(configfile) -_configuration = _Configuration() -def Configuration(): return _configuration +_configuration = None +_optioncreator = None +def Configuration(): + global _optioncreator + global _configuration + + if _optioncreator is None: + raise ValueError("optioncreator must be set") + if _configuration is None: + _configuration = _Configuration(_optioncreator) + _configuration.check() + return _configuration diff --git a/download.py b/download.py index 2405805..3b12cc8 100644 --- a/download.py +++ b/download.py @@ -22,20 +22,35 @@ import urllib import os import htmlparser -#import progressbar import config -savedir = config.Configuration().get_download_location() -if not os.path.exists(savedir): - os.makedirs(savedir) +def get_savedir(): + conf = config.Configuration() + savedir = conf.get_download_location() + if not os.path.exists(savedir): + os.makedirs(savedir) + return savedir +def check_archive(fullpath): + conf = config.Configuration() + archive = conf.get_archive_location() + filename = os.path.basename(fullpath) + archfile = os.path.join(archive, filename) + #print "Path", archfile, "exists:", os.path.exists(archfile) + return os.path.exists(archfile) class Downloader(object): def __init__(self, progress_reporter): self.progress_reporter = progress_reporter + self.on_downloaded = None + + def set_on_downloaded(self, on_downloaded): + self.on_downloaded = on_downloaded def get_thread_links(self, baseurl): myparser = htmlparser.MyParser() - t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] + t = ["0", "1", "2", "3", "4", + "5", "6", "7", "8", "9", + "10", "11", "12", "13", "14", "15"] i = 1 total = len(t) progress = self.progress_reporter(total) @@ -105,8 +120,8 @@ class Downloader(object): i = 1 for link in t: progress.show_progress(i) - filename = os.path.join(savedir, os.path.split(link)[1]) - if not os.path.exists(filename): + filename = os.path.join(get_savedir(), os.path.split(link)[1]) + if not check_archive(filename): tries = 10 while tries > 0: try: @@ -118,6 +133,8 @@ class Downloader(object): failed += 1 else: downloaded += 1 + if self.on_downloaded is not None: + self.on_downloaded(filename) else: skipped += 1 i += 1 @@ -126,10 +143,4 @@ class Downloader(object): return (skipped, failed, downloaded, total) if __name__ == "__main__": - # Get a file-like object for the 4chan.org w/imgboard - base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/" - - # Get the hyperlinks. - t = get_thread_links(base_url) - t = get_image_links(base_url, t) - get_images(t) + print "Don't run me, run 4grab.py" diff --git a/sorter.py b/sorter.py new file mode 100644 index 0000000..482e343 --- /dev/null +++ b/sorter.py @@ -0,0 +1,110 @@ +###################################################################### +# Copyright 2009, 2010 ryuslash +# +# This file is part of 4grab. +# +# 4grab is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# 4grab is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with 4grab. If not, see . +###################################################################### + +import config +import Image +import shutil +import os + +def dummy_option_creator(value1, value2): pass +config._optioncreator = dummy_option_creator + +class Sorter: + + def __init__(self): + self.conf = config.Configuration() + self.resolutions = self.conf.get_resolutions() + + def act(self, filename): + download_base = self.conf.get_download_location() + + if self.check_filename(filename): + image = None + try: + image = Image.open(os.path.join(download_base, + filename)) + except IOError: + print "Cannot read image file %s, might be broken" \ + % filename + + if not image == None and self.archive_check(filename): + for resolution in self.resolutions: + resolution = resolution.split('x') + foldername = "%s-%s" % (resolution[0], + resolution[1]) + folderpath = os.path.join(download_base, + foldername) + + if str(image.size[0]) == resolution[0] and \ + str(image.size[1]) == resolution[1]: + if not os.path.exists(folderpath): + os.makedirs(folderpath) + #print "creating", folderpath + + self.copy(filename, folderpath) + break + + self.archive(filename) + self.remove(filename) + + def copy(self, filename, destpath): + download_base = self.conf.get_download_location() + source = os.path.join(download_base, + os.path.basename(filename)) + dest = os.path.join(destpath, + os.path.basename(filename)) + if source != dest: + shutil.copy(source, dest) + else: + print "\nHow can this even happen?! Copying", source, "to", dest + #print "\nParameters are", filename, "and", destpath + + def archive(self, filename): + download_base = self.conf.get_download_location() + location = self.conf.get_archive_location() + if not os.path.exists(location): + os.makedirs(location) + + dest = os.path.join(location, filename) + f = open(dest, "w") + file.close(f) + + + def archive_check(self, filename): + archive_path = self.conf.get_archive_location() + fullname = os.path.join(archive_path, filename) + return os.path.exists(fullname) + + def check_filename(self, filename): + ext = os.path.splitext(filename)[1] + return ext == ".jpg" or \ + ext == ".png" or \ + ext == ".gif" + + def remove(self, filename): + download_base = self.conf.get_download_location() + source = os.path.join(download_base, filename) + os.remove(source) + +if __name__ == "__main__": + conf = config.Configuration() + download_base = conf.get_download_location() + sorter = Sorter() + for item in os.listdir(download_base): + sorter.act(item)