Merge branch 'sorting' into develop
This commit is contained in:
commit
96247d41d5
4 changed files with 350 additions and 128 deletions
211
4grab.py
211
4grab.py
|
@ -21,16 +21,27 @@
|
|||
|
||||
import optparse
|
||||
import sys
|
||||
import os
|
||||
|
||||
import config
|
||||
|
||||
import download
|
||||
import progressbar
|
||||
import sorter
|
||||
|
||||
def raw_input_with_default(default, prompt):
|
||||
inp = raw_input("%s (default=%s): " % (prompt, default))
|
||||
if inp == "":
|
||||
return default
|
||||
return inp
|
||||
config._optioncreator = raw_input_with_default
|
||||
|
||||
base_url = "http://boards.4chan.org/"
|
||||
parser = optparse.OptionParser()
|
||||
downloader = download.Downloader(progressbar.Progress)
|
||||
|
||||
def walk_with_wizard(baseurl):
|
||||
conf = config.Configuration()
|
||||
wzrd_msg = "Pilates! *SHAZAM* Here they come!"
|
||||
print "Alright, let me put on my robe and wizard hat."
|
||||
|
||||
|
@ -54,8 +65,8 @@ def walk_with_wizard(baseurl):
|
|||
[thread])
|
||||
else:
|
||||
inp = raw_input("Which category would you like to download? ")
|
||||
config.Configuration().set_category(inp)
|
||||
baseurl = "%s%s/" % (baseurl, config.Configuration().get_category())
|
||||
conf.set_categories([inp])
|
||||
baseurl = "%s%s/" % (baseurl, conf.get_categories()[0])
|
||||
|
||||
print wzrd_msg
|
||||
t = downloader.get_thread_links(baseurl)
|
||||
|
@ -66,102 +77,126 @@ def walk_with_wizard(baseurl):
|
|||
print "Failed: ", failed
|
||||
print "Total: ", total
|
||||
|
||||
parser.set_usage(
|
||||
"""%prog [options]
|
||||
def parse_commands():
|
||||
conf = config.Configuration()
|
||||
parser.set_usage(
|
||||
"""%prog [options]
|
||||
|
||||
4grab Copyright (C) 2009-2010 ryuslash
|
||||
This program comes with ABSOLUTELY NO WARRANTY.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions.""")
|
||||
parser.add_option("-e",
|
||||
nargs=2,
|
||||
dest="confval",
|
||||
metavar="CONF VALUE",
|
||||
help="Set configuration option CONF to be VALUE")
|
||||
parser.add_option("-c",
|
||||
"--category",
|
||||
dest="tempcat",
|
||||
metavar="CATEGORY",
|
||||
help="Set the category to CATEGORY only for this run")
|
||||
parser.add_option("-t",
|
||||
"--thread",
|
||||
dest="thread",
|
||||
metavar="THREAD",
|
||||
help="Download only THREAD. If THREAD is only an ID, "
|
||||
"CATEGORY must also be set. Otherwise, no problem :-)")
|
||||
parser.add_option("-w",
|
||||
"--wizard",
|
||||
action="store_true",
|
||||
dest="wizard",
|
||||
help="I'll put on my robe and wizard hat and help you "
|
||||
"get some of those pictures you like")
|
||||
(options, args) = parser.parse_args()
|
||||
parser.add_option("-e",
|
||||
nargs=2,
|
||||
dest="confval",
|
||||
metavar="CONF VALUE",
|
||||
help="Set configuration option CONF to be VALUE")
|
||||
parser.add_option("-c",
|
||||
"--category",
|
||||
dest="tempcat",
|
||||
metavar="CATEGORY",
|
||||
help="Set the category to CATEGORY only for this run")
|
||||
parser.add_option("-t",
|
||||
"--thread",
|
||||
dest="thread",
|
||||
metavar="THREAD",
|
||||
help="Download only THREAD. If THREAD is only an ID, "
|
||||
"CATEGORY must also be set. Otherwise, no problem :-)")
|
||||
parser.add_option("-w",
|
||||
"--wizard",
|
||||
action="store_true",
|
||||
dest="wizard",
|
||||
help="I'll put on my robe and wizard hat and help you "
|
||||
"get some of those pictures you like")
|
||||
parser.add_option("-s",
|
||||
"--sort",
|
||||
action="store_true",
|
||||
dest="sort",
|
||||
help="Sort downloaded images, most handy if you've used "
|
||||
"older versions which didn't sort yet")
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if options.confval and (options.tempcat
|
||||
or options.thread
|
||||
or options.wizard):
|
||||
print "Can't configure something and do something else too."
|
||||
exit(1)
|
||||
if options.wizard and (options.tempcat
|
||||
or options.thread
|
||||
or options.confval):
|
||||
print "Can't take a walk with the wizard and do something else too."
|
||||
exit(1)
|
||||
|
||||
if options.confval:
|
||||
if not config.Configuration().option_exists(options.confval[0]):
|
||||
print ("%s: error: %s is not a "
|
||||
"valid configuration option") % (sys.argv[0],
|
||||
options.confval[0])
|
||||
if options.confval and (options.tempcat
|
||||
or options.thread
|
||||
or options.wizard
|
||||
or options.sort):
|
||||
print "Can't configure something and do something else too."
|
||||
exit(1)
|
||||
print "Setting", options.confval[0], "to", options.confval[1]
|
||||
config.Configuration().set_option(options.confval[0],
|
||||
options.confval[1])
|
||||
config.Configuration().save()
|
||||
exit(0)
|
||||
|
||||
elif options.wizard:
|
||||
try:
|
||||
walk_with_wizard(base_url)
|
||||
except KeyboardInterrupt:
|
||||
print
|
||||
print "Alright, no more wizard hat and robe then. Goodbye"
|
||||
exit(0)
|
||||
if options.wizard and (options.tempcat
|
||||
or options.thread
|
||||
or options.confval
|
||||
or options.sort):
|
||||
print "Can't take a walk with the wizard and do something else too."
|
||||
exit(1)
|
||||
|
||||
elif options.thread:
|
||||
try:
|
||||
if options.thread[:7] == "http://":
|
||||
t = downloader.get_image_links("", [options.thread])
|
||||
elif options.tempcat:
|
||||
url = "%s%s/res/" % (base_url, options.tempcat)
|
||||
t = downloader.get_image_links(url, [options.thread])
|
||||
else:
|
||||
print ("if THREAD is not an absolute URL, "
|
||||
"CATEGORY must also be specified")
|
||||
if options.sort:
|
||||
sort = sorter.Sorter()
|
||||
for item in os.listdir(conf.get_download_location()):
|
||||
sort.act(item)
|
||||
exit(0)
|
||||
|
||||
if options.confval:
|
||||
if not conf.option_exists(options.confval[0]):
|
||||
print ("%s: error: %s is not a valid configuration option"
|
||||
% (sys.argv[0], options.confval[0]))
|
||||
exit(1)
|
||||
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
||||
print "Downloaded: ", downloaded
|
||||
print "Skipped: ", skipped
|
||||
print "Failed: ", failed
|
||||
print "Total: ", total
|
||||
except KeyboardInterrupt:
|
||||
print
|
||||
print "Goodbye"
|
||||
exit(0)
|
||||
print "Setting", options.confval[0], "to", options.confval[1]
|
||||
conf.set_option(options.confval[0],
|
||||
options.confval[1])
|
||||
conf.save()
|
||||
exit(0)
|
||||
|
||||
elif options.tempcat:
|
||||
config.Configuration().set_category(options.tempcat)
|
||||
elif options.wizard:
|
||||
try:
|
||||
walk_with_wizard(base_url)
|
||||
except KeyboardInterrupt:
|
||||
print
|
||||
print "Alright, no more wizard hat and robe then. Goodbye"
|
||||
exit(0)
|
||||
|
||||
base_url = "%s%s/" % (base_url, config.Configuration().get_category())
|
||||
elif options.thread:
|
||||
try:
|
||||
if options.thread[:7] == "http://":
|
||||
t = downloader.get_image_links("", [options.thread])
|
||||
elif options.tempcat:
|
||||
url = "%s%s/res/" % (base_url, options.tempcat)
|
||||
t = downloader.get_image_links(url, [options.thread])
|
||||
else:
|
||||
print ("if THREAD is not an absolute URL, "
|
||||
"CATEGORY must also be specified")
|
||||
exit(1)
|
||||
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
||||
print "Downloaded: ", downloaded
|
||||
print "Skipped: ", skipped
|
||||
print "Failed: ", failed
|
||||
print "Total: ", total
|
||||
except KeyboardInterrupt:
|
||||
print
|
||||
print "Goodbye"
|
||||
exit(0)
|
||||
|
||||
try:
|
||||
t = downloader.get_thread_links(base_url)
|
||||
t = downloader.get_image_links(base_url, t)
|
||||
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
||||
print "Downloaded: ", downloaded
|
||||
print "Skipped: ", skipped
|
||||
print "Failed: ", failed
|
||||
print "Total: ", total
|
||||
except KeyboardInterrupt:
|
||||
print
|
||||
print "So you don't want these images? Fine! I'll stop then."
|
||||
elif options.tempcat:
|
||||
conf.set_categories([options.tempcat])
|
||||
|
||||
#base_url = "%s%s/" % (base_url, conf.get_categories())
|
||||
|
||||
if __name__ == "__main__":
|
||||
conf = config.Configuration()
|
||||
sort = sorter.Sorter()
|
||||
parse_commands()
|
||||
|
||||
downloader.set_on_downloaded(sort.act)
|
||||
for category in conf.get_categories():
|
||||
base_url = "%s%s/" % (base_url, category)
|
||||
try:
|
||||
t = downloader.get_thread_links(base_url)
|
||||
t = downloader.get_image_links(base_url, t)
|
||||
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
||||
print "Downloaded: ", downloaded
|
||||
print "Skipped: ", skipped
|
||||
print "Failed: ", failed
|
||||
print "Total: ", total
|
||||
except KeyboardInterrupt:
|
||||
print
|
||||
print "So you don't want these images? Fine! I'll stop then."
|
||||
|
|
118
config.py
118
config.py
|
@ -21,42 +21,97 @@ import os
|
|||
import ConfigParser
|
||||
import sys
|
||||
|
||||
# Get our reference point. preferably $HOME.
|
||||
homedir = os.getenv("HOME")
|
||||
if homedir is None:
|
||||
homedir = os.path.dirname(sys.argv[0])
|
||||
|
||||
|
||||
class _Configuration(object):
|
||||
def __init__(self):
|
||||
def __init__(self, optioncreator):
|
||||
self.filename = os.path.join(os.path.join(homedir, ".4grab"), "config.cfg")
|
||||
self.configparser = ConfigParser.RawConfigParser()
|
||||
if not os.path.exists(self.filename):
|
||||
self.create_new()
|
||||
else:
|
||||
self.configparser.read(self.filename)
|
||||
|
||||
def create_new(self):
|
||||
self.configparser.add_section("settings")
|
||||
self.set_category(self.raw_input_with_default("w", "Please enter which category you would like to download from: "))
|
||||
self.optioncreator = optioncreator
|
||||
|
||||
self.configparser.add_section("locations")
|
||||
self.configparser.set("locations", "download", self.raw_input_with_default(os.path.join(homedir, "Pictures"), "Please enter where you would like the downloads to go: "))
|
||||
self.configparser.read(self.filename)
|
||||
|
||||
self.save()
|
||||
def check(self):
|
||||
changed = False
|
||||
# read if it exists
|
||||
if os.path.exists(self.filename):
|
||||
self.configparser.read(self.filename)
|
||||
# locations
|
||||
if not self.configparser.has_section("locations"):
|
||||
self.configparser.add_section("locations")
|
||||
# locations/download_base
|
||||
if not self.configparser.has_option("locations", "download_base"):
|
||||
self.create_option("locations",
|
||||
"download_base",
|
||||
os.path.join(homedir,
|
||||
"Pictures"),
|
||||
"Please enter where "
|
||||
"you would like the "
|
||||
"downloads to go: ")
|
||||
changed = True
|
||||
# locations/archive
|
||||
if not self.configparser.has_option("locations", "archive"):
|
||||
self.create_option("locations",
|
||||
"archive",
|
||||
os.path.join(self.configparser.get("locations",
|
||||
"download_base"),
|
||||
".arch"),
|
||||
"Please enter where in {download_base} you "
|
||||
"would like to store archived images (used for "
|
||||
"checking what to download): ")
|
||||
changed = True
|
||||
# settings
|
||||
if not self.configparser.has_section("settings"):
|
||||
self.configparser.add_section("settings")
|
||||
# settings/categories
|
||||
if not self.configparser.has_option("settings", "categories"):
|
||||
self.create_option("settings",
|
||||
"categories",
|
||||
"w",
|
||||
"Please enter which "
|
||||
"category you would like "
|
||||
"to download from: ")
|
||||
changed = True
|
||||
# settings/resolutions
|
||||
if not self.configparser.has_option("settings", "resolutions"):
|
||||
self.create_option("settings",
|
||||
"resolutions",
|
||||
"1600x1050,1900x1200,1900x1080",
|
||||
"Please enter your preferred "
|
||||
"resolutions (* for all)")
|
||||
changed = True
|
||||
# save
|
||||
if changed:
|
||||
self.save()
|
||||
|
||||
def raw_input_with_default(self, default, prompt):
|
||||
inp = raw_input("%s (default=%s): " % (prompt, default))
|
||||
if inp == "":
|
||||
return default
|
||||
return inp
|
||||
def create_option(self, section, name, default, message):
|
||||
self.configparser.set(section,
|
||||
name,
|
||||
self.optioncreator(default,
|
||||
message))
|
||||
|
||||
def get_download_location(self):
|
||||
return self.configparser.get("locations", "download")
|
||||
return self.configparser.get("locations", "download_base")
|
||||
def set_download_location(self, value):
|
||||
self.configparser.set("locations", "download_base", value)
|
||||
|
||||
def get_category(self):
|
||||
return self.configparser.get("settings", "category")
|
||||
def get_archive_location(self):
|
||||
return self.configparser.get("locations", "archive")
|
||||
def set_archive_location(self, value):
|
||||
self.configparser.set("locations", "archive", value)
|
||||
|
||||
def set_category(self, value):
|
||||
self.configparser.set("settings", "category", value)
|
||||
def get_categories(self):
|
||||
return self.configparser.get("settings", "categories").split(',')
|
||||
def set_categories(self, value = []):
|
||||
self.configparser.set("settings", "category", ','.join(value))
|
||||
|
||||
def get_resolutions(self):
|
||||
return self.configparser.get("settings", "resolutions").split(',')
|
||||
def set_resolutions(self, value = []):
|
||||
self.configparser.set("settings", "resolutions", ','.join(value))
|
||||
|
||||
def option_exists(self, option):
|
||||
sections = self.configparser.sections()
|
||||
|
@ -65,6 +120,7 @@ class _Configuration(object):
|
|||
return True
|
||||
return False
|
||||
|
||||
# Should only be used by the command-line
|
||||
def set_option(self, option, value):
|
||||
sec = None
|
||||
sections = self.configparser.sections()
|
||||
|
@ -81,9 +137,19 @@ class _Configuration(object):
|
|||
def save(self):
|
||||
dirname = os.path.dirname(self.filename)
|
||||
if not os.path.exists(dirname):
|
||||
os.mkdir(dirname)
|
||||
os.makedirs(dirname)
|
||||
configfile = open(self.filename, "w")
|
||||
self.configparser.write(configfile)
|
||||
|
||||
_configuration = _Configuration()
|
||||
def Configuration(): return _configuration
|
||||
_configuration = None
|
||||
_optioncreator = None
|
||||
def Configuration():
|
||||
global _optioncreator
|
||||
global _configuration
|
||||
|
||||
if _optioncreator is None:
|
||||
raise ValueError("optioncreator must be set")
|
||||
if _configuration is None:
|
||||
_configuration = _Configuration(_optioncreator)
|
||||
_configuration.check()
|
||||
return _configuration
|
||||
|
|
39
download.py
39
download.py
|
@ -22,20 +22,35 @@
|
|||
import urllib
|
||||
import os
|
||||
import htmlparser
|
||||
#import progressbar
|
||||
import config
|
||||
|
||||
savedir = config.Configuration().get_download_location()
|
||||
if not os.path.exists(savedir):
|
||||
os.makedirs(savedir)
|
||||
def get_savedir():
|
||||
conf = config.Configuration()
|
||||
savedir = conf.get_download_location()
|
||||
if not os.path.exists(savedir):
|
||||
os.makedirs(savedir)
|
||||
return savedir
|
||||
def check_archive(fullpath):
|
||||
conf = config.Configuration()
|
||||
archive = conf.get_archive_location()
|
||||
filename = os.path.basename(fullpath)
|
||||
archfile = os.path.join(archive, filename)
|
||||
#print "Path", archfile, "exists:", os.path.exists(archfile)
|
||||
return os.path.exists(archfile)
|
||||
|
||||
class Downloader(object):
|
||||
def __init__(self, progress_reporter):
|
||||
self.progress_reporter = progress_reporter
|
||||
self.on_downloaded = None
|
||||
|
||||
def set_on_downloaded(self, on_downloaded):
|
||||
self.on_downloaded = on_downloaded
|
||||
|
||||
def get_thread_links(self, baseurl):
|
||||
myparser = htmlparser.MyParser()
|
||||
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
|
||||
t = ["0", "1", "2", "3", "4",
|
||||
"5", "6", "7", "8", "9",
|
||||
"10", "11", "12", "13", "14", "15"]
|
||||
i = 1
|
||||
total = len(t)
|
||||
progress = self.progress_reporter(total)
|
||||
|
@ -105,8 +120,8 @@ class Downloader(object):
|
|||
i = 1
|
||||
for link in t:
|
||||
progress.show_progress(i)
|
||||
filename = os.path.join(savedir, os.path.split(link)[1])
|
||||
if not os.path.exists(filename):
|
||||
filename = os.path.join(get_savedir(), os.path.split(link)[1])
|
||||
if not check_archive(filename):
|
||||
tries = 10
|
||||
while tries > 0:
|
||||
try:
|
||||
|
@ -118,6 +133,8 @@ class Downloader(object):
|
|||
failed += 1
|
||||
else:
|
||||
downloaded += 1
|
||||
if self.on_downloaded is not None:
|
||||
self.on_downloaded(filename)
|
||||
else:
|
||||
skipped += 1
|
||||
i += 1
|
||||
|
@ -126,10 +143,4 @@ class Downloader(object):
|
|||
return (skipped, failed, downloaded, total)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Get a file-like object for the 4chan.org w/imgboard
|
||||
base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/"
|
||||
|
||||
# Get the hyperlinks.
|
||||
t = get_thread_links(base_url)
|
||||
t = get_image_links(base_url, t)
|
||||
get_images(t)
|
||||
print "Don't run me, run 4grab.py"
|
||||
|
|
110
sorter.py
Normal file
110
sorter.py
Normal file
|
@ -0,0 +1,110 @@
|
|||
######################################################################
|
||||
# Copyright 2009, 2010 ryuslash
|
||||
#
|
||||
# This file is part of 4grab.
|
||||
#
|
||||
# 4grab is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# 4grab is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
|
||||
######################################################################
|
||||
|
||||
import config
|
||||
import Image
|
||||
import shutil
|
||||
import os
|
||||
|
||||
def dummy_option_creator(value1, value2): pass
|
||||
config._optioncreator = dummy_option_creator
|
||||
|
||||
class Sorter:
|
||||
|
||||
def __init__(self):
|
||||
self.conf = config.Configuration()
|
||||
self.resolutions = self.conf.get_resolutions()
|
||||
|
||||
def act(self, filename):
|
||||
download_base = self.conf.get_download_location()
|
||||
|
||||
if self.check_filename(filename):
|
||||
image = None
|
||||
try:
|
||||
image = Image.open(os.path.join(download_base,
|
||||
filename))
|
||||
except IOError:
|
||||
print "Cannot read image file %s, might be broken" \
|
||||
% filename
|
||||
|
||||
if not image == None and self.archive_check(filename):
|
||||
for resolution in self.resolutions:
|
||||
resolution = resolution.split('x')
|
||||
foldername = "%s-%s" % (resolution[0],
|
||||
resolution[1])
|
||||
folderpath = os.path.join(download_base,
|
||||
foldername)
|
||||
|
||||
if str(image.size[0]) == resolution[0] and \
|
||||
str(image.size[1]) == resolution[1]:
|
||||
if not os.path.exists(folderpath):
|
||||
os.makedirs(folderpath)
|
||||
#print "creating", folderpath
|
||||
|
||||
self.copy(filename, folderpath)
|
||||
break
|
||||
|
||||
self.archive(filename)
|
||||
self.remove(filename)
|
||||
|
||||
def copy(self, filename, destpath):
|
||||
download_base = self.conf.get_download_location()
|
||||
source = os.path.join(download_base,
|
||||
os.path.basename(filename))
|
||||
dest = os.path.join(destpath,
|
||||
os.path.basename(filename))
|
||||
if source != dest:
|
||||
shutil.copy(source, dest)
|
||||
else:
|
||||
print "\nHow can this even happen?! Copying", source, "to", dest
|
||||
#print "\nParameters are", filename, "and", destpath
|
||||
|
||||
def archive(self, filename):
|
||||
download_base = self.conf.get_download_location()
|
||||
location = self.conf.get_archive_location()
|
||||
if not os.path.exists(location):
|
||||
os.makedirs(location)
|
||||
|
||||
dest = os.path.join(location, filename)
|
||||
f = open(dest, "w")
|
||||
file.close(f)
|
||||
|
||||
|
||||
def archive_check(self, filename):
|
||||
archive_path = self.conf.get_archive_location()
|
||||
fullname = os.path.join(archive_path, filename)
|
||||
return os.path.exists(fullname)
|
||||
|
||||
def check_filename(self, filename):
|
||||
ext = os.path.splitext(filename)[1]
|
||||
return ext == ".jpg" or \
|
||||
ext == ".png" or \
|
||||
ext == ".gif"
|
||||
|
||||
def remove(self, filename):
|
||||
download_base = self.conf.get_download_location()
|
||||
source = os.path.join(download_base, filename)
|
||||
os.remove(source)
|
||||
|
||||
if __name__ == "__main__":
|
||||
conf = config.Configuration()
|
||||
download_base = conf.get_download_location()
|
||||
sorter = Sorter()
|
||||
for item in os.listdir(download_base):
|
||||
sorter.act(item)
|
Loading…
Reference in a new issue