Merge branch 'sorting' into develop

This commit is contained in:
ryuslash 2010-03-19 08:26:26 +01:00
commit 96247d41d5
4 changed files with 350 additions and 128 deletions

211
4grab.py
View file

@ -21,16 +21,27 @@
import optparse
import sys
import os
import config
import download
import progressbar
import sorter
def raw_input_with_default(default, prompt):
inp = raw_input("%s (default=%s): " % (prompt, default))
if inp == "":
return default
return inp
config._optioncreator = raw_input_with_default
base_url = "http://boards.4chan.org/"
parser = optparse.OptionParser()
downloader = download.Downloader(progressbar.Progress)
def walk_with_wizard(baseurl):
conf = config.Configuration()
wzrd_msg = "Pilates! *SHAZAM* Here they come!"
print "Alright, let me put on my robe and wizard hat."
@ -54,8 +65,8 @@ def walk_with_wizard(baseurl):
[thread])
else:
inp = raw_input("Which category would you like to download? ")
config.Configuration().set_category(inp)
baseurl = "%s%s/" % (baseurl, config.Configuration().get_category())
conf.set_categories([inp])
baseurl = "%s%s/" % (baseurl, conf.get_categories()[0])
print wzrd_msg
t = downloader.get_thread_links(baseurl)
@ -66,102 +77,126 @@ def walk_with_wizard(baseurl):
print "Failed: ", failed
print "Total: ", total
parser.set_usage(
"""%prog [options]
def parse_commands():
conf = config.Configuration()
parser.set_usage(
"""%prog [options]
4grab Copyright (C) 2009-2010 ryuslash
This program comes with ABSOLUTELY NO WARRANTY.
This is free software, and you are welcome to redistribute it
under certain conditions.""")
parser.add_option("-e",
nargs=2,
dest="confval",
metavar="CONF VALUE",
help="Set configuration option CONF to be VALUE")
parser.add_option("-c",
"--category",
dest="tempcat",
metavar="CATEGORY",
help="Set the category to CATEGORY only for this run")
parser.add_option("-t",
"--thread",
dest="thread",
metavar="THREAD",
help="Download only THREAD. If THREAD is only an ID, "
"CATEGORY must also be set. Otherwise, no problem :-)")
parser.add_option("-w",
"--wizard",
action="store_true",
dest="wizard",
help="I'll put on my robe and wizard hat and help you "
"get some of those pictures you like")
(options, args) = parser.parse_args()
parser.add_option("-e",
nargs=2,
dest="confval",
metavar="CONF VALUE",
help="Set configuration option CONF to be VALUE")
parser.add_option("-c",
"--category",
dest="tempcat",
metavar="CATEGORY",
help="Set the category to CATEGORY only for this run")
parser.add_option("-t",
"--thread",
dest="thread",
metavar="THREAD",
help="Download only THREAD. If THREAD is only an ID, "
"CATEGORY must also be set. Otherwise, no problem :-)")
parser.add_option("-w",
"--wizard",
action="store_true",
dest="wizard",
help="I'll put on my robe and wizard hat and help you "
"get some of those pictures you like")
parser.add_option("-s",
"--sort",
action="store_true",
dest="sort",
help="Sort downloaded images, most handy if you've used "
"older versions which didn't sort yet")
(options, args) = parser.parse_args()
if options.confval and (options.tempcat
or options.thread
or options.wizard):
print "Can't configure something and do something else too."
exit(1)
if options.wizard and (options.tempcat
or options.thread
or options.confval):
print "Can't take a walk with the wizard and do something else too."
exit(1)
if options.confval:
if not config.Configuration().option_exists(options.confval[0]):
print ("%s: error: %s is not a "
"valid configuration option") % (sys.argv[0],
options.confval[0])
if options.confval and (options.tempcat
or options.thread
or options.wizard
or options.sort):
print "Can't configure something and do something else too."
exit(1)
print "Setting", options.confval[0], "to", options.confval[1]
config.Configuration().set_option(options.confval[0],
options.confval[1])
config.Configuration().save()
exit(0)
elif options.wizard:
try:
walk_with_wizard(base_url)
except KeyboardInterrupt:
print
print "Alright, no more wizard hat and robe then. Goodbye"
exit(0)
if options.wizard and (options.tempcat
or options.thread
or options.confval
or options.sort):
print "Can't take a walk with the wizard and do something else too."
exit(1)
elif options.thread:
try:
if options.thread[:7] == "http://":
t = downloader.get_image_links("", [options.thread])
elif options.tempcat:
url = "%s%s/res/" % (base_url, options.tempcat)
t = downloader.get_image_links(url, [options.thread])
else:
print ("if THREAD is not an absolute URL, "
"CATEGORY must also be specified")
if options.sort:
sort = sorter.Sorter()
for item in os.listdir(conf.get_download_location()):
sort.act(item)
exit(0)
if options.confval:
if not conf.option_exists(options.confval[0]):
print ("%s: error: %s is not a valid configuration option"
% (sys.argv[0], options.confval[0]))
exit(1)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
print
print "Goodbye"
exit(0)
print "Setting", options.confval[0], "to", options.confval[1]
conf.set_option(options.confval[0],
options.confval[1])
conf.save()
exit(0)
elif options.tempcat:
config.Configuration().set_category(options.tempcat)
elif options.wizard:
try:
walk_with_wizard(base_url)
except KeyboardInterrupt:
print
print "Alright, no more wizard hat and robe then. Goodbye"
exit(0)
base_url = "%s%s/" % (base_url, config.Configuration().get_category())
elif options.thread:
try:
if options.thread[:7] == "http://":
t = downloader.get_image_links("", [options.thread])
elif options.tempcat:
url = "%s%s/res/" % (base_url, options.tempcat)
t = downloader.get_image_links(url, [options.thread])
else:
print ("if THREAD is not an absolute URL, "
"CATEGORY must also be specified")
exit(1)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
print
print "Goodbye"
exit(0)
try:
t = downloader.get_thread_links(base_url)
t = downloader.get_image_links(base_url, t)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
print
print "So you don't want these images? Fine! I'll stop then."
elif options.tempcat:
conf.set_categories([options.tempcat])
#base_url = "%s%s/" % (base_url, conf.get_categories())
if __name__ == "__main__":
conf = config.Configuration()
sort = sorter.Sorter()
parse_commands()
downloader.set_on_downloaded(sort.act)
for category in conf.get_categories():
base_url = "%s%s/" % (base_url, category)
try:
t = downloader.get_thread_links(base_url)
t = downloader.get_image_links(base_url, t)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
print
print "So you don't want these images? Fine! I'll stop then."

118
config.py
View file

@ -21,42 +21,97 @@ import os
import ConfigParser
import sys
# Get our reference point. preferably $HOME.
homedir = os.getenv("HOME")
if homedir is None:
homedir = os.path.dirname(sys.argv[0])
class _Configuration(object):
def __init__(self):
def __init__(self, optioncreator):
self.filename = os.path.join(os.path.join(homedir, ".4grab"), "config.cfg")
self.configparser = ConfigParser.RawConfigParser()
if not os.path.exists(self.filename):
self.create_new()
else:
self.configparser.read(self.filename)
def create_new(self):
self.configparser.add_section("settings")
self.set_category(self.raw_input_with_default("w", "Please enter which category you would like to download from: "))
self.optioncreator = optioncreator
self.configparser.add_section("locations")
self.configparser.set("locations", "download", self.raw_input_with_default(os.path.join(homedir, "Pictures"), "Please enter where you would like the downloads to go: "))
self.configparser.read(self.filename)
self.save()
def check(self):
changed = False
# read if it exists
if os.path.exists(self.filename):
self.configparser.read(self.filename)
# locations
if not self.configparser.has_section("locations"):
self.configparser.add_section("locations")
# locations/download_base
if not self.configparser.has_option("locations", "download_base"):
self.create_option("locations",
"download_base",
os.path.join(homedir,
"Pictures"),
"Please enter where "
"you would like the "
"downloads to go: ")
changed = True
# locations/archive
if not self.configparser.has_option("locations", "archive"):
self.create_option("locations",
"archive",
os.path.join(self.configparser.get("locations",
"download_base"),
".arch"),
"Please enter where in {download_base} you "
"would like to store archived images (used for "
"checking what to download): ")
changed = True
# settings
if not self.configparser.has_section("settings"):
self.configparser.add_section("settings")
# settings/categories
if not self.configparser.has_option("settings", "categories"):
self.create_option("settings",
"categories",
"w",
"Please enter which "
"category you would like "
"to download from: ")
changed = True
# settings/resolutions
if not self.configparser.has_option("settings", "resolutions"):
self.create_option("settings",
"resolutions",
"1600x1050,1900x1200,1900x1080",
"Please enter your preferred "
"resolutions (* for all)")
changed = True
# save
if changed:
self.save()
def raw_input_with_default(self, default, prompt):
inp = raw_input("%s (default=%s): " % (prompt, default))
if inp == "":
return default
return inp
def create_option(self, section, name, default, message):
self.configparser.set(section,
name,
self.optioncreator(default,
message))
def get_download_location(self):
return self.configparser.get("locations", "download")
return self.configparser.get("locations", "download_base")
def set_download_location(self, value):
self.configparser.set("locations", "download_base", value)
def get_category(self):
return self.configparser.get("settings", "category")
def get_archive_location(self):
return self.configparser.get("locations", "archive")
def set_archive_location(self, value):
self.configparser.set("locations", "archive", value)
def set_category(self, value):
self.configparser.set("settings", "category", value)
def get_categories(self):
return self.configparser.get("settings", "categories").split(',')
def set_categories(self, value = []):
self.configparser.set("settings", "category", ','.join(value))
def get_resolutions(self):
return self.configparser.get("settings", "resolutions").split(',')
def set_resolutions(self, value = []):
self.configparser.set("settings", "resolutions", ','.join(value))
def option_exists(self, option):
sections = self.configparser.sections()
@ -65,6 +120,7 @@ class _Configuration(object):
return True
return False
# Should only be used by the command-line
def set_option(self, option, value):
sec = None
sections = self.configparser.sections()
@ -81,9 +137,19 @@ class _Configuration(object):
def save(self):
dirname = os.path.dirname(self.filename)
if not os.path.exists(dirname):
os.mkdir(dirname)
os.makedirs(dirname)
configfile = open(self.filename, "w")
self.configparser.write(configfile)
_configuration = _Configuration()
def Configuration(): return _configuration
_configuration = None
_optioncreator = None
def Configuration():
global _optioncreator
global _configuration
if _optioncreator is None:
raise ValueError("optioncreator must be set")
if _configuration is None:
_configuration = _Configuration(_optioncreator)
_configuration.check()
return _configuration

View file

@ -22,20 +22,35 @@
import urllib
import os
import htmlparser
#import progressbar
import config
savedir = config.Configuration().get_download_location()
if not os.path.exists(savedir):
os.makedirs(savedir)
def get_savedir():
conf = config.Configuration()
savedir = conf.get_download_location()
if not os.path.exists(savedir):
os.makedirs(savedir)
return savedir
def check_archive(fullpath):
conf = config.Configuration()
archive = conf.get_archive_location()
filename = os.path.basename(fullpath)
archfile = os.path.join(archive, filename)
#print "Path", archfile, "exists:", os.path.exists(archfile)
return os.path.exists(archfile)
class Downloader(object):
def __init__(self, progress_reporter):
self.progress_reporter = progress_reporter
self.on_downloaded = None
def set_on_downloaded(self, on_downloaded):
self.on_downloaded = on_downloaded
def get_thread_links(self, baseurl):
myparser = htmlparser.MyParser()
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
t = ["0", "1", "2", "3", "4",
"5", "6", "7", "8", "9",
"10", "11", "12", "13", "14", "15"]
i = 1
total = len(t)
progress = self.progress_reporter(total)
@ -105,8 +120,8 @@ class Downloader(object):
i = 1
for link in t:
progress.show_progress(i)
filename = os.path.join(savedir, os.path.split(link)[1])
if not os.path.exists(filename):
filename = os.path.join(get_savedir(), os.path.split(link)[1])
if not check_archive(filename):
tries = 10
while tries > 0:
try:
@ -118,6 +133,8 @@ class Downloader(object):
failed += 1
else:
downloaded += 1
if self.on_downloaded is not None:
self.on_downloaded(filename)
else:
skipped += 1
i += 1
@ -126,10 +143,4 @@ class Downloader(object):
return (skipped, failed, downloaded, total)
if __name__ == "__main__":
# Get a file-like object for the 4chan.org w/imgboard
base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/"
# Get the hyperlinks.
t = get_thread_links(base_url)
t = get_image_links(base_url, t)
get_images(t)
print "Don't run me, run 4grab.py"

110
sorter.py Normal file
View file

@ -0,0 +1,110 @@
######################################################################
# Copyright 2009, 2010 ryuslash
#
# This file is part of 4grab.
#
# 4grab is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# 4grab is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
######################################################################
import config
import Image
import shutil
import os
def dummy_option_creator(value1, value2): pass
config._optioncreator = dummy_option_creator
class Sorter:
def __init__(self):
self.conf = config.Configuration()
self.resolutions = self.conf.get_resolutions()
def act(self, filename):
download_base = self.conf.get_download_location()
if self.check_filename(filename):
image = None
try:
image = Image.open(os.path.join(download_base,
filename))
except IOError:
print "Cannot read image file %s, might be broken" \
% filename
if not image == None and self.archive_check(filename):
for resolution in self.resolutions:
resolution = resolution.split('x')
foldername = "%s-%s" % (resolution[0],
resolution[1])
folderpath = os.path.join(download_base,
foldername)
if str(image.size[0]) == resolution[0] and \
str(image.size[1]) == resolution[1]:
if not os.path.exists(folderpath):
os.makedirs(folderpath)
#print "creating", folderpath
self.copy(filename, folderpath)
break
self.archive(filename)
self.remove(filename)
def copy(self, filename, destpath):
download_base = self.conf.get_download_location()
source = os.path.join(download_base,
os.path.basename(filename))
dest = os.path.join(destpath,
os.path.basename(filename))
if source != dest:
shutil.copy(source, dest)
else:
print "\nHow can this even happen?! Copying", source, "to", dest
#print "\nParameters are", filename, "and", destpath
def archive(self, filename):
download_base = self.conf.get_download_location()
location = self.conf.get_archive_location()
if not os.path.exists(location):
os.makedirs(location)
dest = os.path.join(location, filename)
f = open(dest, "w")
file.close(f)
def archive_check(self, filename):
archive_path = self.conf.get_archive_location()
fullname = os.path.join(archive_path, filename)
return os.path.exists(fullname)
def check_filename(self, filename):
ext = os.path.splitext(filename)[1]
return ext == ".jpg" or \
ext == ".png" or \
ext == ".gif"
def remove(self, filename):
download_base = self.conf.get_download_location()
source = os.path.join(download_base, filename)
os.remove(source)
if __name__ == "__main__":
conf = config.Configuration()
download_base = conf.get_download_location()
sorter = Sorter()
for item in os.listdir(download_base):
sorter.act(item)