Merge branch 'sorting' into develop

This commit is contained in:
ryuslash 2010-03-19 08:26:26 +01:00
commit 96247d41d5
4 changed files with 350 additions and 128 deletions

211
4grab.py
View file

@ -21,16 +21,27 @@
import optparse import optparse
import sys import sys
import os
import config import config
import download import download
import progressbar import progressbar
import sorter
def raw_input_with_default(default, prompt):
inp = raw_input("%s (default=%s): " % (prompt, default))
if inp == "":
return default
return inp
config._optioncreator = raw_input_with_default
base_url = "http://boards.4chan.org/" base_url = "http://boards.4chan.org/"
parser = optparse.OptionParser() parser = optparse.OptionParser()
downloader = download.Downloader(progressbar.Progress) downloader = download.Downloader(progressbar.Progress)
def walk_with_wizard(baseurl): def walk_with_wizard(baseurl):
conf = config.Configuration()
wzrd_msg = "Pilates! *SHAZAM* Here they come!" wzrd_msg = "Pilates! *SHAZAM* Here they come!"
print "Alright, let me put on my robe and wizard hat." print "Alright, let me put on my robe and wizard hat."
@ -54,8 +65,8 @@ def walk_with_wizard(baseurl):
[thread]) [thread])
else: else:
inp = raw_input("Which category would you like to download? ") inp = raw_input("Which category would you like to download? ")
config.Configuration().set_category(inp) conf.set_categories([inp])
baseurl = "%s%s/" % (baseurl, config.Configuration().get_category()) baseurl = "%s%s/" % (baseurl, conf.get_categories()[0])
print wzrd_msg print wzrd_msg
t = downloader.get_thread_links(baseurl) t = downloader.get_thread_links(baseurl)
@ -66,102 +77,126 @@ def walk_with_wizard(baseurl):
print "Failed: ", failed print "Failed: ", failed
print "Total: ", total print "Total: ", total
parser.set_usage( def parse_commands():
"""%prog [options] conf = config.Configuration()
parser.set_usage(
"""%prog [options]
4grab Copyright (C) 2009-2010 ryuslash 4grab Copyright (C) 2009-2010 ryuslash
This program comes with ABSOLUTELY NO WARRANTY. This program comes with ABSOLUTELY NO WARRANTY.
This is free software, and you are welcome to redistribute it This is free software, and you are welcome to redistribute it
under certain conditions.""") under certain conditions.""")
parser.add_option("-e", parser.add_option("-e",
nargs=2, nargs=2,
dest="confval", dest="confval",
metavar="CONF VALUE", metavar="CONF VALUE",
help="Set configuration option CONF to be VALUE") help="Set configuration option CONF to be VALUE")
parser.add_option("-c", parser.add_option("-c",
"--category", "--category",
dest="tempcat", dest="tempcat",
metavar="CATEGORY", metavar="CATEGORY",
help="Set the category to CATEGORY only for this run") help="Set the category to CATEGORY only for this run")
parser.add_option("-t", parser.add_option("-t",
"--thread", "--thread",
dest="thread", dest="thread",
metavar="THREAD", metavar="THREAD",
help="Download only THREAD. If THREAD is only an ID, " help="Download only THREAD. If THREAD is only an ID, "
"CATEGORY must also be set. Otherwise, no problem :-)") "CATEGORY must also be set. Otherwise, no problem :-)")
parser.add_option("-w", parser.add_option("-w",
"--wizard", "--wizard",
action="store_true", action="store_true",
dest="wizard", dest="wizard",
help="I'll put on my robe and wizard hat and help you " help="I'll put on my robe and wizard hat and help you "
"get some of those pictures you like") "get some of those pictures you like")
(options, args) = parser.parse_args() parser.add_option("-s",
"--sort",
action="store_true",
dest="sort",
help="Sort downloaded images, most handy if you've used "
"older versions which didn't sort yet")
(options, args) = parser.parse_args()
if options.confval and (options.tempcat if options.confval and (options.tempcat
or options.thread or options.thread
or options.wizard): or options.wizard
print "Can't configure something and do something else too." or options.sort):
exit(1) print "Can't configure something and do something else too."
if options.wizard and (options.tempcat
or options.thread
or options.confval):
print "Can't take a walk with the wizard and do something else too."
exit(1)
if options.confval:
if not config.Configuration().option_exists(options.confval[0]):
print ("%s: error: %s is not a "
"valid configuration option") % (sys.argv[0],
options.confval[0])
exit(1) exit(1)
print "Setting", options.confval[0], "to", options.confval[1]
config.Configuration().set_option(options.confval[0],
options.confval[1])
config.Configuration().save()
exit(0)
elif options.wizard: if options.wizard and (options.tempcat
try: or options.thread
walk_with_wizard(base_url) or options.confval
except KeyboardInterrupt: or options.sort):
print print "Can't take a walk with the wizard and do something else too."
print "Alright, no more wizard hat and robe then. Goodbye" exit(1)
exit(0)
elif options.thread: if options.sort:
try: sort = sorter.Sorter()
if options.thread[:7] == "http://": for item in os.listdir(conf.get_download_location()):
t = downloader.get_image_links("", [options.thread]) sort.act(item)
elif options.tempcat: exit(0)
url = "%s%s/res/" % (base_url, options.tempcat)
t = downloader.get_image_links(url, [options.thread]) if options.confval:
else: if not conf.option_exists(options.confval[0]):
print ("if THREAD is not an absolute URL, " print ("%s: error: %s is not a valid configuration option"
"CATEGORY must also be specified") % (sys.argv[0], options.confval[0]))
exit(1) exit(1)
(skipped, failed, downloaded, total) = downloader.get_images(t) print "Setting", options.confval[0], "to", options.confval[1]
print "Downloaded: ", downloaded conf.set_option(options.confval[0],
print "Skipped: ", skipped options.confval[1])
print "Failed: ", failed conf.save()
print "Total: ", total exit(0)
except KeyboardInterrupt:
print
print "Goodbye"
exit(0)
elif options.tempcat: elif options.wizard:
config.Configuration().set_category(options.tempcat) try:
walk_with_wizard(base_url)
except KeyboardInterrupt:
print
print "Alright, no more wizard hat and robe then. Goodbye"
exit(0)
base_url = "%s%s/" % (base_url, config.Configuration().get_category()) elif options.thread:
try:
if options.thread[:7] == "http://":
t = downloader.get_image_links("", [options.thread])
elif options.tempcat:
url = "%s%s/res/" % (base_url, options.tempcat)
t = downloader.get_image_links(url, [options.thread])
else:
print ("if THREAD is not an absolute URL, "
"CATEGORY must also be specified")
exit(1)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
print
print "Goodbye"
exit(0)
try: elif options.tempcat:
t = downloader.get_thread_links(base_url) conf.set_categories([options.tempcat])
t = downloader.get_image_links(base_url, t)
(skipped, failed, downloaded, total) = downloader.get_images(t) #base_url = "%s%s/" % (base_url, conf.get_categories())
print "Downloaded: ", downloaded
print "Skipped: ", skipped if __name__ == "__main__":
print "Failed: ", failed conf = config.Configuration()
print "Total: ", total sort = sorter.Sorter()
except KeyboardInterrupt: parse_commands()
print
print "So you don't want these images? Fine! I'll stop then." downloader.set_on_downloaded(sort.act)
for category in conf.get_categories():
base_url = "%s%s/" % (base_url, category)
try:
t = downloader.get_thread_links(base_url)
t = downloader.get_image_links(base_url, t)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
print
print "So you don't want these images? Fine! I'll stop then."

118
config.py
View file

@ -21,42 +21,97 @@ import os
import ConfigParser import ConfigParser
import sys import sys
# Get our reference point. preferably $HOME.
homedir = os.getenv("HOME") homedir = os.getenv("HOME")
if homedir is None: if homedir is None:
homedir = os.path.dirname(sys.argv[0]) homedir = os.path.dirname(sys.argv[0])
class _Configuration(object): class _Configuration(object):
def __init__(self): def __init__(self, optioncreator):
self.filename = os.path.join(os.path.join(homedir, ".4grab"), "config.cfg") self.filename = os.path.join(os.path.join(homedir, ".4grab"), "config.cfg")
self.configparser = ConfigParser.RawConfigParser() self.configparser = ConfigParser.RawConfigParser()
if not os.path.exists(self.filename): self.optioncreator = optioncreator
self.create_new()
else:
self.configparser.read(self.filename)
def create_new(self):
self.configparser.add_section("settings")
self.set_category(self.raw_input_with_default("w", "Please enter which category you would like to download from: "))
self.configparser.add_section("locations") self.configparser.read(self.filename)
self.configparser.set("locations", "download", self.raw_input_with_default(os.path.join(homedir, "Pictures"), "Please enter where you would like the downloads to go: "))
self.save() def check(self):
changed = False
# read if it exists
if os.path.exists(self.filename):
self.configparser.read(self.filename)
# locations
if not self.configparser.has_section("locations"):
self.configparser.add_section("locations")
# locations/download_base
if not self.configparser.has_option("locations", "download_base"):
self.create_option("locations",
"download_base",
os.path.join(homedir,
"Pictures"),
"Please enter where "
"you would like the "
"downloads to go: ")
changed = True
# locations/archive
if not self.configparser.has_option("locations", "archive"):
self.create_option("locations",
"archive",
os.path.join(self.configparser.get("locations",
"download_base"),
".arch"),
"Please enter where in {download_base} you "
"would like to store archived images (used for "
"checking what to download): ")
changed = True
# settings
if not self.configparser.has_section("settings"):
self.configparser.add_section("settings")
# settings/categories
if not self.configparser.has_option("settings", "categories"):
self.create_option("settings",
"categories",
"w",
"Please enter which "
"category you would like "
"to download from: ")
changed = True
# settings/resolutions
if not self.configparser.has_option("settings", "resolutions"):
self.create_option("settings",
"resolutions",
"1600x1050,1900x1200,1900x1080",
"Please enter your preferred "
"resolutions (* for all)")
changed = True
# save
if changed:
self.save()
def raw_input_with_default(self, default, prompt): def create_option(self, section, name, default, message):
inp = raw_input("%s (default=%s): " % (prompt, default)) self.configparser.set(section,
if inp == "": name,
return default self.optioncreator(default,
return inp message))
def get_download_location(self): def get_download_location(self):
return self.configparser.get("locations", "download") return self.configparser.get("locations", "download_base")
def set_download_location(self, value):
self.configparser.set("locations", "download_base", value)
def get_category(self): def get_archive_location(self):
return self.configparser.get("settings", "category") return self.configparser.get("locations", "archive")
def set_archive_location(self, value):
self.configparser.set("locations", "archive", value)
def set_category(self, value): def get_categories(self):
self.configparser.set("settings", "category", value) return self.configparser.get("settings", "categories").split(',')
def set_categories(self, value = []):
self.configparser.set("settings", "category", ','.join(value))
def get_resolutions(self):
return self.configparser.get("settings", "resolutions").split(',')
def set_resolutions(self, value = []):
self.configparser.set("settings", "resolutions", ','.join(value))
def option_exists(self, option): def option_exists(self, option):
sections = self.configparser.sections() sections = self.configparser.sections()
@ -65,6 +120,7 @@ class _Configuration(object):
return True return True
return False return False
# Should only be used by the command-line
def set_option(self, option, value): def set_option(self, option, value):
sec = None sec = None
sections = self.configparser.sections() sections = self.configparser.sections()
@ -81,9 +137,19 @@ class _Configuration(object):
def save(self): def save(self):
dirname = os.path.dirname(self.filename) dirname = os.path.dirname(self.filename)
if not os.path.exists(dirname): if not os.path.exists(dirname):
os.mkdir(dirname) os.makedirs(dirname)
configfile = open(self.filename, "w") configfile = open(self.filename, "w")
self.configparser.write(configfile) self.configparser.write(configfile)
_configuration = _Configuration() _configuration = None
def Configuration(): return _configuration _optioncreator = None
def Configuration():
global _optioncreator
global _configuration
if _optioncreator is None:
raise ValueError("optioncreator must be set")
if _configuration is None:
_configuration = _Configuration(_optioncreator)
_configuration.check()
return _configuration

View file

@ -22,20 +22,35 @@
import urllib import urllib
import os import os
import htmlparser import htmlparser
#import progressbar
import config import config
savedir = config.Configuration().get_download_location() def get_savedir():
if not os.path.exists(savedir): conf = config.Configuration()
os.makedirs(savedir) savedir = conf.get_download_location()
if not os.path.exists(savedir):
os.makedirs(savedir)
return savedir
def check_archive(fullpath):
conf = config.Configuration()
archive = conf.get_archive_location()
filename = os.path.basename(fullpath)
archfile = os.path.join(archive, filename)
#print "Path", archfile, "exists:", os.path.exists(archfile)
return os.path.exists(archfile)
class Downloader(object): class Downloader(object):
def __init__(self, progress_reporter): def __init__(self, progress_reporter):
self.progress_reporter = progress_reporter self.progress_reporter = progress_reporter
self.on_downloaded = None
def set_on_downloaded(self, on_downloaded):
self.on_downloaded = on_downloaded
def get_thread_links(self, baseurl): def get_thread_links(self, baseurl):
myparser = htmlparser.MyParser() myparser = htmlparser.MyParser()
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"] t = ["0", "1", "2", "3", "4",
"5", "6", "7", "8", "9",
"10", "11", "12", "13", "14", "15"]
i = 1 i = 1
total = len(t) total = len(t)
progress = self.progress_reporter(total) progress = self.progress_reporter(total)
@ -105,8 +120,8 @@ class Downloader(object):
i = 1 i = 1
for link in t: for link in t:
progress.show_progress(i) progress.show_progress(i)
filename = os.path.join(savedir, os.path.split(link)[1]) filename = os.path.join(get_savedir(), os.path.split(link)[1])
if not os.path.exists(filename): if not check_archive(filename):
tries = 10 tries = 10
while tries > 0: while tries > 0:
try: try:
@ -118,6 +133,8 @@ class Downloader(object):
failed += 1 failed += 1
else: else:
downloaded += 1 downloaded += 1
if self.on_downloaded is not None:
self.on_downloaded(filename)
else: else:
skipped += 1 skipped += 1
i += 1 i += 1
@ -126,10 +143,4 @@ class Downloader(object):
return (skipped, failed, downloaded, total) return (skipped, failed, downloaded, total)
if __name__ == "__main__": if __name__ == "__main__":
# Get a file-like object for the 4chan.org w/imgboard print "Don't run me, run 4grab.py"
base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/"
# Get the hyperlinks.
t = get_thread_links(base_url)
t = get_image_links(base_url, t)
get_images(t)

110
sorter.py Normal file
View file

@ -0,0 +1,110 @@
######################################################################
# Copyright 2009, 2010 ryuslash
#
# This file is part of 4grab.
#
# 4grab is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# 4grab is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
######################################################################
import config
import Image
import shutil
import os
def dummy_option_creator(value1, value2): pass
config._optioncreator = dummy_option_creator
class Sorter:
def __init__(self):
self.conf = config.Configuration()
self.resolutions = self.conf.get_resolutions()
def act(self, filename):
download_base = self.conf.get_download_location()
if self.check_filename(filename):
image = None
try:
image = Image.open(os.path.join(download_base,
filename))
except IOError:
print "Cannot read image file %s, might be broken" \
% filename
if not image == None and self.archive_check(filename):
for resolution in self.resolutions:
resolution = resolution.split('x')
foldername = "%s-%s" % (resolution[0],
resolution[1])
folderpath = os.path.join(download_base,
foldername)
if str(image.size[0]) == resolution[0] and \
str(image.size[1]) == resolution[1]:
if not os.path.exists(folderpath):
os.makedirs(folderpath)
#print "creating", folderpath
self.copy(filename, folderpath)
break
self.archive(filename)
self.remove(filename)
def copy(self, filename, destpath):
download_base = self.conf.get_download_location()
source = os.path.join(download_base,
os.path.basename(filename))
dest = os.path.join(destpath,
os.path.basename(filename))
if source != dest:
shutil.copy(source, dest)
else:
print "\nHow can this even happen?! Copying", source, "to", dest
#print "\nParameters are", filename, "and", destpath
def archive(self, filename):
download_base = self.conf.get_download_location()
location = self.conf.get_archive_location()
if not os.path.exists(location):
os.makedirs(location)
dest = os.path.join(location, filename)
f = open(dest, "w")
file.close(f)
def archive_check(self, filename):
archive_path = self.conf.get_archive_location()
fullname = os.path.join(archive_path, filename)
return os.path.exists(fullname)
def check_filename(self, filename):
ext = os.path.splitext(filename)[1]
return ext == ".jpg" or \
ext == ".png" or \
ext == ".gif"
def remove(self, filename):
download_base = self.conf.get_download_location()
source = os.path.join(download_base, filename)
os.remove(source)
if __name__ == "__main__":
conf = config.Configuration()
download_base = conf.get_download_location()
sorter = Sorter()
for item in os.listdir(download_base):
sorter.act(item)