Sorting, multi category, multi resolution
After a file has been downloaded a callback function can now be called. The callback function I call checks to see if the resolution of the image appears in the collection of resolutions that has been entered in the configuration file and deletes/moves accordingly. If a file can not be read (which I have noticed happens sometimes), it is removed, not copied and not archived so that it can be retried later. 4grab got a new command-line option, -s --sorter, to sort out old images, running python sorter.py has the same effect, but this seemed pretties. theoretically multiple categories could now be entered into the configuration file seperated by ',', but this hasn't been tested yet. mutliple resolutions could be entered into the configuration file, seperated by ',' like so: 1680x1050,1920x1200. Configuration now checks to see if all the necessary properties are available in the configuration file, if one is missing, it tries to create it.
This commit is contained in:
parent
4a9cc7e2b6
commit
7f8dfa1d30
4 changed files with 148 additions and 6 deletions
23
4grab.py
23
4grab.py
|
@ -21,11 +21,13 @@
|
||||||
|
|
||||||
import optparse
|
import optparse
|
||||||
import sys
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
import config
|
import config
|
||||||
|
|
||||||
import download
|
import download
|
||||||
import progressbar
|
import progressbar
|
||||||
|
import sorter
|
||||||
|
|
||||||
def raw_input_with_default(default, prompt):
|
def raw_input_with_default(default, prompt):
|
||||||
inp = raw_input("%s (default=%s): " % (prompt, default))
|
inp = raw_input("%s (default=%s): " % (prompt, default))
|
||||||
|
@ -106,20 +108,34 @@ under certain conditions.""")
|
||||||
dest="wizard",
|
dest="wizard",
|
||||||
help="I'll put on my robe and wizard hat and help you "
|
help="I'll put on my robe and wizard hat and help you "
|
||||||
"get some of those pictures you like")
|
"get some of those pictures you like")
|
||||||
|
parser.add_option("-s",
|
||||||
|
"--sort",
|
||||||
|
action="store_true",
|
||||||
|
dest="sort",
|
||||||
|
help="Sort downloaded images, most handy if you've used "
|
||||||
|
"older versions which didn't sort yet")
|
||||||
(options, args) = parser.parse_args()
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
if options.confval and (options.tempcat
|
if options.confval and (options.tempcat
|
||||||
or options.thread
|
or options.thread
|
||||||
or options.wizard):
|
or options.wizard
|
||||||
|
or options.sort):
|
||||||
print "Can't configure something and do something else too."
|
print "Can't configure something and do something else too."
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
if options.wizard and (options.tempcat
|
if options.wizard and (options.tempcat
|
||||||
or options.thread
|
or options.thread
|
||||||
or options.confval):
|
or options.confval
|
||||||
|
or options.sort):
|
||||||
print "Can't take a walk with the wizard and do something else too."
|
print "Can't take a walk with the wizard and do something else too."
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
if options.sort:
|
||||||
|
sort = sorter.Sorter()
|
||||||
|
for item in os.listdir(conf.get_download_location()):
|
||||||
|
sort.act(item)
|
||||||
|
exit(0)
|
||||||
|
|
||||||
if options.confval:
|
if options.confval:
|
||||||
if not conf.option_exists(options.confval[0]):
|
if not conf.option_exists(options.confval[0]):
|
||||||
print ("%s: error: %s is not a valid configuration option"
|
print ("%s: error: %s is not a valid configuration option"
|
||||||
|
@ -167,7 +183,10 @@ under certain conditions.""")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
conf = config.Configuration()
|
conf = config.Configuration()
|
||||||
|
sort = sorter.Sorter()
|
||||||
parse_commands()
|
parse_commands()
|
||||||
|
|
||||||
|
downloader.set_on_downloaded(sort.act)
|
||||||
for category in conf.get_categories():
|
for category in conf.get_categories():
|
||||||
base_url = "%s%s/" % (base_url, category)
|
base_url = "%s%s/" % (base_url, category)
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -33,7 +33,6 @@ class _Configuration(object):
|
||||||
self.optioncreator = optioncreator
|
self.optioncreator = optioncreator
|
||||||
|
|
||||||
self.configparser.read(self.filename)
|
self.configparser.read(self.filename)
|
||||||
print "__init__"
|
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
changed = False
|
changed = False
|
||||||
|
|
20
download.py
20
download.py
|
@ -22,7 +22,6 @@
|
||||||
import urllib
|
import urllib
|
||||||
import os
|
import os
|
||||||
import htmlparser
|
import htmlparser
|
||||||
#import progressbar
|
|
||||||
import config
|
import config
|
||||||
|
|
||||||
def get_savedir():
|
def get_savedir():
|
||||||
|
@ -31,14 +30,27 @@ def get_savedir():
|
||||||
if not os.path.exists(savedir):
|
if not os.path.exists(savedir):
|
||||||
os.makedirs(savedir)
|
os.makedirs(savedir)
|
||||||
return savedir
|
return savedir
|
||||||
|
def check_archive(fullpath):
|
||||||
|
conf = config.Configuration()
|
||||||
|
archive = conf.get_archive_location()
|
||||||
|
filename = os.path.basename(fullpath)
|
||||||
|
archfile = os.path.join(archive, filename)
|
||||||
|
#print "Path", archfile, "exists:", os.path.exists(archfile)
|
||||||
|
return os.path.exists(archfile)
|
||||||
|
|
||||||
class Downloader(object):
|
class Downloader(object):
|
||||||
def __init__(self, progress_reporter):
|
def __init__(self, progress_reporter):
|
||||||
self.progress_reporter = progress_reporter
|
self.progress_reporter = progress_reporter
|
||||||
|
self.on_downloaded = None
|
||||||
|
|
||||||
|
def set_on_downloaded(self, on_downloaded):
|
||||||
|
self.on_downloaded = on_downloaded
|
||||||
|
|
||||||
def get_thread_links(self, baseurl):
|
def get_thread_links(self, baseurl):
|
||||||
myparser = htmlparser.MyParser()
|
myparser = htmlparser.MyParser()
|
||||||
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
|
t = ["0", "1", "2", "3", "4",
|
||||||
|
"5", "6", "7", "8", "9",
|
||||||
|
"10", "11", "12", "13", "14", "15"]
|
||||||
i = 1
|
i = 1
|
||||||
total = len(t)
|
total = len(t)
|
||||||
progress = self.progress_reporter(total)
|
progress = self.progress_reporter(total)
|
||||||
|
@ -109,7 +121,7 @@ class Downloader(object):
|
||||||
for link in t:
|
for link in t:
|
||||||
progress.show_progress(i)
|
progress.show_progress(i)
|
||||||
filename = os.path.join(get_savedir(), os.path.split(link)[1])
|
filename = os.path.join(get_savedir(), os.path.split(link)[1])
|
||||||
if not os.path.exists(filename):
|
if not check_archive(filename):
|
||||||
tries = 10
|
tries = 10
|
||||||
while tries > 0:
|
while tries > 0:
|
||||||
try:
|
try:
|
||||||
|
@ -121,6 +133,8 @@ class Downloader(object):
|
||||||
failed += 1
|
failed += 1
|
||||||
else:
|
else:
|
||||||
downloaded += 1
|
downloaded += 1
|
||||||
|
if self.on_downloaded is not None:
|
||||||
|
self.on_downloaded(filename)
|
||||||
else:
|
else:
|
||||||
skipped += 1
|
skipped += 1
|
||||||
i += 1
|
i += 1
|
||||||
|
|
110
sorter.py
Normal file
110
sorter.py
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
######################################################################
|
||||||
|
# Copyright 2009, 2010 ryuslash
|
||||||
|
#
|
||||||
|
# This file is part of 4grab.
|
||||||
|
#
|
||||||
|
# 4grab is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# 4grab is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
import config
|
||||||
|
import Image
|
||||||
|
import shutil
|
||||||
|
import os
|
||||||
|
|
||||||
|
def dummy_option_creator(value1, value2): pass
|
||||||
|
config._optioncreator = dummy_option_creator
|
||||||
|
|
||||||
|
class Sorter:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conf = config.Configuration()
|
||||||
|
self.resolutions = self.conf.get_resolutions()
|
||||||
|
|
||||||
|
def act(self, filename):
|
||||||
|
download_base = self.conf.get_download_location()
|
||||||
|
|
||||||
|
if self.check_filename(filename):
|
||||||
|
image = None
|
||||||
|
try:
|
||||||
|
image = Image.open(os.path.join(download_base,
|
||||||
|
filename))
|
||||||
|
except IOError:
|
||||||
|
print "Cannot read image file %s, might be broken" \
|
||||||
|
% filename
|
||||||
|
|
||||||
|
if not image == None and self.archive_check(filename):
|
||||||
|
for resolution in self.resolutions:
|
||||||
|
resolution = resolution.split('x')
|
||||||
|
foldername = "%s-%s" % (resolution[0],
|
||||||
|
resolution[1])
|
||||||
|
folderpath = os.path.join(download_base,
|
||||||
|
foldername)
|
||||||
|
|
||||||
|
if str(image.size[0]) == resolution[0] and \
|
||||||
|
str(image.size[1]) == resolution[1]:
|
||||||
|
if not os.path.exists(folderpath):
|
||||||
|
os.makedirs(folderpath)
|
||||||
|
#print "creating", folderpath
|
||||||
|
|
||||||
|
self.copy(filename, folderpath)
|
||||||
|
break
|
||||||
|
|
||||||
|
self.archive(filename)
|
||||||
|
self.remove(filename)
|
||||||
|
|
||||||
|
def copy(self, filename, destpath):
|
||||||
|
download_base = self.conf.get_download_location()
|
||||||
|
source = os.path.join(download_base,
|
||||||
|
os.path.basename(filename))
|
||||||
|
dest = os.path.join(destpath,
|
||||||
|
os.path.basename(filename))
|
||||||
|
if source != dest:
|
||||||
|
shutil.copy(source, dest)
|
||||||
|
else:
|
||||||
|
print "\nHow can this even happen?! Copying", source, "to", dest
|
||||||
|
#print "\nParameters are", filename, "and", destpath
|
||||||
|
|
||||||
|
def archive(self, filename):
|
||||||
|
download_base = self.conf.get_download_location()
|
||||||
|
location = self.conf.get_archive_location()
|
||||||
|
if not os.path.exists(location):
|
||||||
|
os.makedirs(location)
|
||||||
|
|
||||||
|
dest = os.path.join(location, filename)
|
||||||
|
f = open(dest, "w")
|
||||||
|
file.close(f)
|
||||||
|
|
||||||
|
|
||||||
|
def archive_check(self, filename):
|
||||||
|
archive_path = self.conf.get_archive_location()
|
||||||
|
fullname = os.path.join(archive_path, filename)
|
||||||
|
return os.path.exists(fullname)
|
||||||
|
|
||||||
|
def check_filename(self, filename):
|
||||||
|
ext = os.path.splitext(filename)[1]
|
||||||
|
return ext == ".jpg" or \
|
||||||
|
ext == ".png" or \
|
||||||
|
ext == ".gif"
|
||||||
|
|
||||||
|
def remove(self, filename):
|
||||||
|
download_base = self.conf.get_download_location()
|
||||||
|
source = os.path.join(download_base, filename)
|
||||||
|
os.remove(source)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
conf = config.Configuration()
|
||||||
|
download_base = conf.get_download_location()
|
||||||
|
sorter = Sorter()
|
||||||
|
for item in os.listdir(download_base):
|
||||||
|
sorter.act(item)
|
Loading…
Reference in a new issue