Merge branch 'sorting' into develop
This commit is contained in:
commit
96247d41d5
4 changed files with 350 additions and 128 deletions
59
4grab.py
59
4grab.py
|
@ -21,16 +21,27 @@
|
||||||
|
|
||||||
import optparse
|
import optparse
|
||||||
import sys
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
import config
|
import config
|
||||||
|
|
||||||
import download
|
import download
|
||||||
import progressbar
|
import progressbar
|
||||||
|
import sorter
|
||||||
|
|
||||||
|
def raw_input_with_default(default, prompt):
|
||||||
|
inp = raw_input("%s (default=%s): " % (prompt, default))
|
||||||
|
if inp == "":
|
||||||
|
return default
|
||||||
|
return inp
|
||||||
|
config._optioncreator = raw_input_with_default
|
||||||
|
|
||||||
base_url = "http://boards.4chan.org/"
|
base_url = "http://boards.4chan.org/"
|
||||||
parser = optparse.OptionParser()
|
parser = optparse.OptionParser()
|
||||||
downloader = download.Downloader(progressbar.Progress)
|
downloader = download.Downloader(progressbar.Progress)
|
||||||
|
|
||||||
def walk_with_wizard(baseurl):
|
def walk_with_wizard(baseurl):
|
||||||
|
conf = config.Configuration()
|
||||||
wzrd_msg = "Pilates! *SHAZAM* Here they come!"
|
wzrd_msg = "Pilates! *SHAZAM* Here they come!"
|
||||||
print "Alright, let me put on my robe and wizard hat."
|
print "Alright, let me put on my robe and wizard hat."
|
||||||
|
|
||||||
|
@ -54,8 +65,8 @@ def walk_with_wizard(baseurl):
|
||||||
[thread])
|
[thread])
|
||||||
else:
|
else:
|
||||||
inp = raw_input("Which category would you like to download? ")
|
inp = raw_input("Which category would you like to download? ")
|
||||||
config.Configuration().set_category(inp)
|
conf.set_categories([inp])
|
||||||
baseurl = "%s%s/" % (baseurl, config.Configuration().get_category())
|
baseurl = "%s%s/" % (baseurl, conf.get_categories()[0])
|
||||||
|
|
||||||
print wzrd_msg
|
print wzrd_msg
|
||||||
t = downloader.get_thread_links(baseurl)
|
t = downloader.get_thread_links(baseurl)
|
||||||
|
@ -66,6 +77,8 @@ def walk_with_wizard(baseurl):
|
||||||
print "Failed: ", failed
|
print "Failed: ", failed
|
||||||
print "Total: ", total
|
print "Total: ", total
|
||||||
|
|
||||||
|
def parse_commands():
|
||||||
|
conf = config.Configuration()
|
||||||
parser.set_usage(
|
parser.set_usage(
|
||||||
"""%prog [options]
|
"""%prog [options]
|
||||||
|
|
||||||
|
@ -95,29 +108,43 @@ parser.add_option("-w",
|
||||||
dest="wizard",
|
dest="wizard",
|
||||||
help="I'll put on my robe and wizard hat and help you "
|
help="I'll put on my robe and wizard hat and help you "
|
||||||
"get some of those pictures you like")
|
"get some of those pictures you like")
|
||||||
|
parser.add_option("-s",
|
||||||
|
"--sort",
|
||||||
|
action="store_true",
|
||||||
|
dest="sort",
|
||||||
|
help="Sort downloaded images, most handy if you've used "
|
||||||
|
"older versions which didn't sort yet")
|
||||||
(options, args) = parser.parse_args()
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
if options.confval and (options.tempcat
|
if options.confval and (options.tempcat
|
||||||
or options.thread
|
or options.thread
|
||||||
or options.wizard):
|
or options.wizard
|
||||||
|
or options.sort):
|
||||||
print "Can't configure something and do something else too."
|
print "Can't configure something and do something else too."
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
if options.wizard and (options.tempcat
|
if options.wizard and (options.tempcat
|
||||||
or options.thread
|
or options.thread
|
||||||
or options.confval):
|
or options.confval
|
||||||
|
or options.sort):
|
||||||
print "Can't take a walk with the wizard and do something else too."
|
print "Can't take a walk with the wizard and do something else too."
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
if options.sort:
|
||||||
|
sort = sorter.Sorter()
|
||||||
|
for item in os.listdir(conf.get_download_location()):
|
||||||
|
sort.act(item)
|
||||||
|
exit(0)
|
||||||
|
|
||||||
if options.confval:
|
if options.confval:
|
||||||
if not config.Configuration().option_exists(options.confval[0]):
|
if not conf.option_exists(options.confval[0]):
|
||||||
print ("%s: error: %s is not a "
|
print ("%s: error: %s is not a valid configuration option"
|
||||||
"valid configuration option") % (sys.argv[0],
|
% (sys.argv[0], options.confval[0]))
|
||||||
options.confval[0])
|
|
||||||
exit(1)
|
exit(1)
|
||||||
print "Setting", options.confval[0], "to", options.confval[1]
|
print "Setting", options.confval[0], "to", options.confval[1]
|
||||||
config.Configuration().set_option(options.confval[0],
|
conf.set_option(options.confval[0],
|
||||||
options.confval[1])
|
options.confval[1])
|
||||||
config.Configuration().save()
|
conf.save()
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
elif options.wizard:
|
elif options.wizard:
|
||||||
|
@ -150,10 +177,18 @@ elif options.thread:
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
elif options.tempcat:
|
elif options.tempcat:
|
||||||
config.Configuration().set_category(options.tempcat)
|
conf.set_categories([options.tempcat])
|
||||||
|
|
||||||
base_url = "%s%s/" % (base_url, config.Configuration().get_category())
|
#base_url = "%s%s/" % (base_url, conf.get_categories())
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
conf = config.Configuration()
|
||||||
|
sort = sorter.Sorter()
|
||||||
|
parse_commands()
|
||||||
|
|
||||||
|
downloader.set_on_downloaded(sort.act)
|
||||||
|
for category in conf.get_categories():
|
||||||
|
base_url = "%s%s/" % (base_url, category)
|
||||||
try:
|
try:
|
||||||
t = downloader.get_thread_links(base_url)
|
t = downloader.get_thread_links(base_url)
|
||||||
t = downloader.get_image_links(base_url, t)
|
t = downloader.get_image_links(base_url, t)
|
||||||
|
|
112
config.py
112
config.py
|
@ -21,42 +21,97 @@ import os
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
# Get our reference point. preferably $HOME.
|
||||||
homedir = os.getenv("HOME")
|
homedir = os.getenv("HOME")
|
||||||
if homedir is None:
|
if homedir is None:
|
||||||
homedir = os.path.dirname(sys.argv[0])
|
homedir = os.path.dirname(sys.argv[0])
|
||||||
|
|
||||||
class _Configuration(object):
|
class _Configuration(object):
|
||||||
def __init__(self):
|
def __init__(self, optioncreator):
|
||||||
self.filename = os.path.join(os.path.join(homedir, ".4grab"), "config.cfg")
|
self.filename = os.path.join(os.path.join(homedir, ".4grab"), "config.cfg")
|
||||||
self.configparser = ConfigParser.RawConfigParser()
|
self.configparser = ConfigParser.RawConfigParser()
|
||||||
if not os.path.exists(self.filename):
|
self.optioncreator = optioncreator
|
||||||
self.create_new()
|
|
||||||
else:
|
|
||||||
self.configparser.read(self.filename)
|
self.configparser.read(self.filename)
|
||||||
|
|
||||||
def create_new(self):
|
def check(self):
|
||||||
self.configparser.add_section("settings")
|
changed = False
|
||||||
self.set_category(self.raw_input_with_default("w", "Please enter which category you would like to download from: "))
|
# read if it exists
|
||||||
|
if os.path.exists(self.filename):
|
||||||
|
self.configparser.read(self.filename)
|
||||||
|
# locations
|
||||||
|
if not self.configparser.has_section("locations"):
|
||||||
self.configparser.add_section("locations")
|
self.configparser.add_section("locations")
|
||||||
self.configparser.set("locations", "download", self.raw_input_with_default(os.path.join(homedir, "Pictures"), "Please enter where you would like the downloads to go: "))
|
# locations/download_base
|
||||||
|
if not self.configparser.has_option("locations", "download_base"):
|
||||||
|
self.create_option("locations",
|
||||||
|
"download_base",
|
||||||
|
os.path.join(homedir,
|
||||||
|
"Pictures"),
|
||||||
|
"Please enter where "
|
||||||
|
"you would like the "
|
||||||
|
"downloads to go: ")
|
||||||
|
changed = True
|
||||||
|
# locations/archive
|
||||||
|
if not self.configparser.has_option("locations", "archive"):
|
||||||
|
self.create_option("locations",
|
||||||
|
"archive",
|
||||||
|
os.path.join(self.configparser.get("locations",
|
||||||
|
"download_base"),
|
||||||
|
".arch"),
|
||||||
|
"Please enter where in {download_base} you "
|
||||||
|
"would like to store archived images (used for "
|
||||||
|
"checking what to download): ")
|
||||||
|
changed = True
|
||||||
|
# settings
|
||||||
|
if not self.configparser.has_section("settings"):
|
||||||
|
self.configparser.add_section("settings")
|
||||||
|
# settings/categories
|
||||||
|
if not self.configparser.has_option("settings", "categories"):
|
||||||
|
self.create_option("settings",
|
||||||
|
"categories",
|
||||||
|
"w",
|
||||||
|
"Please enter which "
|
||||||
|
"category you would like "
|
||||||
|
"to download from: ")
|
||||||
|
changed = True
|
||||||
|
# settings/resolutions
|
||||||
|
if not self.configparser.has_option("settings", "resolutions"):
|
||||||
|
self.create_option("settings",
|
||||||
|
"resolutions",
|
||||||
|
"1600x1050,1900x1200,1900x1080",
|
||||||
|
"Please enter your preferred "
|
||||||
|
"resolutions (* for all)")
|
||||||
|
changed = True
|
||||||
|
# save
|
||||||
|
if changed:
|
||||||
self.save()
|
self.save()
|
||||||
|
|
||||||
def raw_input_with_default(self, default, prompt):
|
def create_option(self, section, name, default, message):
|
||||||
inp = raw_input("%s (default=%s): " % (prompt, default))
|
self.configparser.set(section,
|
||||||
if inp == "":
|
name,
|
||||||
return default
|
self.optioncreator(default,
|
||||||
return inp
|
message))
|
||||||
|
|
||||||
def get_download_location(self):
|
def get_download_location(self):
|
||||||
return self.configparser.get("locations", "download")
|
return self.configparser.get("locations", "download_base")
|
||||||
|
def set_download_location(self, value):
|
||||||
|
self.configparser.set("locations", "download_base", value)
|
||||||
|
|
||||||
def get_category(self):
|
def get_archive_location(self):
|
||||||
return self.configparser.get("settings", "category")
|
return self.configparser.get("locations", "archive")
|
||||||
|
def set_archive_location(self, value):
|
||||||
|
self.configparser.set("locations", "archive", value)
|
||||||
|
|
||||||
def set_category(self, value):
|
def get_categories(self):
|
||||||
self.configparser.set("settings", "category", value)
|
return self.configparser.get("settings", "categories").split(',')
|
||||||
|
def set_categories(self, value = []):
|
||||||
|
self.configparser.set("settings", "category", ','.join(value))
|
||||||
|
|
||||||
|
def get_resolutions(self):
|
||||||
|
return self.configparser.get("settings", "resolutions").split(',')
|
||||||
|
def set_resolutions(self, value = []):
|
||||||
|
self.configparser.set("settings", "resolutions", ','.join(value))
|
||||||
|
|
||||||
def option_exists(self, option):
|
def option_exists(self, option):
|
||||||
sections = self.configparser.sections()
|
sections = self.configparser.sections()
|
||||||
|
@ -65,6 +120,7 @@ class _Configuration(object):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Should only be used by the command-line
|
||||||
def set_option(self, option, value):
|
def set_option(self, option, value):
|
||||||
sec = None
|
sec = None
|
||||||
sections = self.configparser.sections()
|
sections = self.configparser.sections()
|
||||||
|
@ -81,9 +137,19 @@ class _Configuration(object):
|
||||||
def save(self):
|
def save(self):
|
||||||
dirname = os.path.dirname(self.filename)
|
dirname = os.path.dirname(self.filename)
|
||||||
if not os.path.exists(dirname):
|
if not os.path.exists(dirname):
|
||||||
os.mkdir(dirname)
|
os.makedirs(dirname)
|
||||||
configfile = open(self.filename, "w")
|
configfile = open(self.filename, "w")
|
||||||
self.configparser.write(configfile)
|
self.configparser.write(configfile)
|
||||||
|
|
||||||
_configuration = _Configuration()
|
_configuration = None
|
||||||
def Configuration(): return _configuration
|
_optioncreator = None
|
||||||
|
def Configuration():
|
||||||
|
global _optioncreator
|
||||||
|
global _configuration
|
||||||
|
|
||||||
|
if _optioncreator is None:
|
||||||
|
raise ValueError("optioncreator must be set")
|
||||||
|
if _configuration is None:
|
||||||
|
_configuration = _Configuration(_optioncreator)
|
||||||
|
_configuration.check()
|
||||||
|
return _configuration
|
||||||
|
|
35
download.py
35
download.py
|
@ -22,20 +22,35 @@
|
||||||
import urllib
|
import urllib
|
||||||
import os
|
import os
|
||||||
import htmlparser
|
import htmlparser
|
||||||
#import progressbar
|
|
||||||
import config
|
import config
|
||||||
|
|
||||||
savedir = config.Configuration().get_download_location()
|
def get_savedir():
|
||||||
|
conf = config.Configuration()
|
||||||
|
savedir = conf.get_download_location()
|
||||||
if not os.path.exists(savedir):
|
if not os.path.exists(savedir):
|
||||||
os.makedirs(savedir)
|
os.makedirs(savedir)
|
||||||
|
return savedir
|
||||||
|
def check_archive(fullpath):
|
||||||
|
conf = config.Configuration()
|
||||||
|
archive = conf.get_archive_location()
|
||||||
|
filename = os.path.basename(fullpath)
|
||||||
|
archfile = os.path.join(archive, filename)
|
||||||
|
#print "Path", archfile, "exists:", os.path.exists(archfile)
|
||||||
|
return os.path.exists(archfile)
|
||||||
|
|
||||||
class Downloader(object):
|
class Downloader(object):
|
||||||
def __init__(self, progress_reporter):
|
def __init__(self, progress_reporter):
|
||||||
self.progress_reporter = progress_reporter
|
self.progress_reporter = progress_reporter
|
||||||
|
self.on_downloaded = None
|
||||||
|
|
||||||
|
def set_on_downloaded(self, on_downloaded):
|
||||||
|
self.on_downloaded = on_downloaded
|
||||||
|
|
||||||
def get_thread_links(self, baseurl):
|
def get_thread_links(self, baseurl):
|
||||||
myparser = htmlparser.MyParser()
|
myparser = htmlparser.MyParser()
|
||||||
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
|
t = ["0", "1", "2", "3", "4",
|
||||||
|
"5", "6", "7", "8", "9",
|
||||||
|
"10", "11", "12", "13", "14", "15"]
|
||||||
i = 1
|
i = 1
|
||||||
total = len(t)
|
total = len(t)
|
||||||
progress = self.progress_reporter(total)
|
progress = self.progress_reporter(total)
|
||||||
|
@ -105,8 +120,8 @@ class Downloader(object):
|
||||||
i = 1
|
i = 1
|
||||||
for link in t:
|
for link in t:
|
||||||
progress.show_progress(i)
|
progress.show_progress(i)
|
||||||
filename = os.path.join(savedir, os.path.split(link)[1])
|
filename = os.path.join(get_savedir(), os.path.split(link)[1])
|
||||||
if not os.path.exists(filename):
|
if not check_archive(filename):
|
||||||
tries = 10
|
tries = 10
|
||||||
while tries > 0:
|
while tries > 0:
|
||||||
try:
|
try:
|
||||||
|
@ -118,6 +133,8 @@ class Downloader(object):
|
||||||
failed += 1
|
failed += 1
|
||||||
else:
|
else:
|
||||||
downloaded += 1
|
downloaded += 1
|
||||||
|
if self.on_downloaded is not None:
|
||||||
|
self.on_downloaded(filename)
|
||||||
else:
|
else:
|
||||||
skipped += 1
|
skipped += 1
|
||||||
i += 1
|
i += 1
|
||||||
|
@ -126,10 +143,4 @@ class Downloader(object):
|
||||||
return (skipped, failed, downloaded, total)
|
return (skipped, failed, downloaded, total)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Get a file-like object for the 4chan.org w/imgboard
|
print "Don't run me, run 4grab.py"
|
||||||
base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/"
|
|
||||||
|
|
||||||
# Get the hyperlinks.
|
|
||||||
t = get_thread_links(base_url)
|
|
||||||
t = get_image_links(base_url, t)
|
|
||||||
get_images(t)
|
|
||||||
|
|
110
sorter.py
Normal file
110
sorter.py
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
######################################################################
|
||||||
|
# Copyright 2009, 2010 ryuslash
|
||||||
|
#
|
||||||
|
# This file is part of 4grab.
|
||||||
|
#
|
||||||
|
# 4grab is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# 4grab is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
import config
|
||||||
|
import Image
|
||||||
|
import shutil
|
||||||
|
import os
|
||||||
|
|
||||||
|
def dummy_option_creator(value1, value2): pass
|
||||||
|
config._optioncreator = dummy_option_creator
|
||||||
|
|
||||||
|
class Sorter:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conf = config.Configuration()
|
||||||
|
self.resolutions = self.conf.get_resolutions()
|
||||||
|
|
||||||
|
def act(self, filename):
|
||||||
|
download_base = self.conf.get_download_location()
|
||||||
|
|
||||||
|
if self.check_filename(filename):
|
||||||
|
image = None
|
||||||
|
try:
|
||||||
|
image = Image.open(os.path.join(download_base,
|
||||||
|
filename))
|
||||||
|
except IOError:
|
||||||
|
print "Cannot read image file %s, might be broken" \
|
||||||
|
% filename
|
||||||
|
|
||||||
|
if not image == None and self.archive_check(filename):
|
||||||
|
for resolution in self.resolutions:
|
||||||
|
resolution = resolution.split('x')
|
||||||
|
foldername = "%s-%s" % (resolution[0],
|
||||||
|
resolution[1])
|
||||||
|
folderpath = os.path.join(download_base,
|
||||||
|
foldername)
|
||||||
|
|
||||||
|
if str(image.size[0]) == resolution[0] and \
|
||||||
|
str(image.size[1]) == resolution[1]:
|
||||||
|
if not os.path.exists(folderpath):
|
||||||
|
os.makedirs(folderpath)
|
||||||
|
#print "creating", folderpath
|
||||||
|
|
||||||
|
self.copy(filename, folderpath)
|
||||||
|
break
|
||||||
|
|
||||||
|
self.archive(filename)
|
||||||
|
self.remove(filename)
|
||||||
|
|
||||||
|
def copy(self, filename, destpath):
|
||||||
|
download_base = self.conf.get_download_location()
|
||||||
|
source = os.path.join(download_base,
|
||||||
|
os.path.basename(filename))
|
||||||
|
dest = os.path.join(destpath,
|
||||||
|
os.path.basename(filename))
|
||||||
|
if source != dest:
|
||||||
|
shutil.copy(source, dest)
|
||||||
|
else:
|
||||||
|
print "\nHow can this even happen?! Copying", source, "to", dest
|
||||||
|
#print "\nParameters are", filename, "and", destpath
|
||||||
|
|
||||||
|
def archive(self, filename):
|
||||||
|
download_base = self.conf.get_download_location()
|
||||||
|
location = self.conf.get_archive_location()
|
||||||
|
if not os.path.exists(location):
|
||||||
|
os.makedirs(location)
|
||||||
|
|
||||||
|
dest = os.path.join(location, filename)
|
||||||
|
f = open(dest, "w")
|
||||||
|
file.close(f)
|
||||||
|
|
||||||
|
|
||||||
|
def archive_check(self, filename):
|
||||||
|
archive_path = self.conf.get_archive_location()
|
||||||
|
fullname = os.path.join(archive_path, filename)
|
||||||
|
return os.path.exists(fullname)
|
||||||
|
|
||||||
|
def check_filename(self, filename):
|
||||||
|
ext = os.path.splitext(filename)[1]
|
||||||
|
return ext == ".jpg" or \
|
||||||
|
ext == ".png" or \
|
||||||
|
ext == ".gif"
|
||||||
|
|
||||||
|
def remove(self, filename):
|
||||||
|
download_base = self.conf.get_download_location()
|
||||||
|
source = os.path.join(download_base, filename)
|
||||||
|
os.remove(source)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
conf = config.Configuration()
|
||||||
|
download_base = conf.get_download_location()
|
||||||
|
sorter = Sorter()
|
||||||
|
for item in os.listdir(download_base):
|
||||||
|
sorter.act(item)
|
Loading…
Reference in a new issue