Compare commits

..

No commits in common. "develop-arch" and "master" have entirely different histories.

10 changed files with 215 additions and 2555 deletions

3
.gitignore vendored
View file

@ -1,5 +1,4 @@
*~ *~
*.pyc *.pyc
\#*\# \#*\#
.* .*
modules/plane.tbl

232
4grab.py
View file

@ -1,4 +1,4 @@
#!/usr/bin/python2 #!/usr/bin/python
###################################################################### ######################################################################
# Copyright 2009, 2010 ryuslash # Copyright 2009, 2010 ryuslash
@ -21,133 +21,147 @@
import optparse import optparse
import sys import sys
import os
import config import config
from util import raw_input_with_default
import util
import download import download
import progressbar import progressbar
import sorter
import backend
config._optioncreator = raw_input_with_default
base_url = "http://boards.4chan.org/" base_url = "http://boards.4chan.org/"
parser = optparse.OptionParser() parser = optparse.OptionParser()
downloader = download.Downloader(progressbar.Progress) downloader = download.Downloader(progressbar.Progress)
def parse_commands(): def walk_with_wizard(baseurl):
conf = config.Configuration() wzrd_msg = "Pilates! *SHAZAM* Here they come!"
parser.set_usage( print "Alright, let me put on my robe and wizard hat."
"""%prog [options]
# Single or all
inp = None
prompt = "Would you like to download a single thread, or all? "
inp = raw_input(prompt)
while (inp != "single" and inp != "all"):
print "Please type single or all"
inp = raw_input(prompt)
if inp == "single":
inp = raw_input("Which thread would you like to download? ")
if inp[:7] == "http://":
t = downloader.get_image_links("", [inp])
else:
thread = inp
inp = raw_input("Which category is this thread in? ")
print wzrd_msg
t = downloader.get_image_links("%s%s/res/" % (baseurl, inp),
[thread])
else:
inp = raw_input("Which category would you like to download? ")
config.Configuration().set_category(inp)
baseurl = "%s%s/" % (baseurl, config.Configuration().get_category())
print wzrd_msg
t = downloader.get_thread_links(baseurl)
t = downloader.get_image_links(baseurl, t)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
parser.set_usage(
"""%prog [options]
4grab Copyright (C) 2009-2010 ryuslash 4grab Copyright (C) 2009-2010 ryuslash
This program comes with ABSOLUTELY NO WARRANTY. This program comes with ABSOLUTELY NO WARRANTY.
This is free software, and you are welcome to redistribute it This is free software, and you are welcome to redistribute it
under certain conditions.""") under certain conditions.""")
parser.add_option("-e", parser.add_option("-e",
nargs=2, nargs=2,
dest="confval", dest="confval",
metavar="CONF VALUE", metavar="CONF VALUE",
help="Set configuration option CONF to be VALUE") help="Set configuration option CONF to be VALUE")
parser.add_option("-c", parser.add_option("-c",
"--category", "--category",
dest="tempcat", dest="tempcat",
metavar="CATEGORY", metavar="CATEGORY",
help="Set the category to CATEGORY only for this run") help="Set the category to CATEGORY only for this run")
parser.add_option("-t", parser.add_option("-t",
"--thread", "--thread",
dest="thread", dest="thread",
metavar="THREAD", metavar="THREAD",
help="Download only THREAD. If THREAD is only an ID, " help="Download only THREAD. If THREAD is only an ID, "
"CATEGORY must also be set. Otherwise, no problem :-)") "CATEGORY must also be set. Otherwise, no problem :-)")
parser.add_option("-s", parser.add_option("-w",
"--sort", "--wizard",
action="store_true", action="store_true",
dest="sort", dest="wizard",
help="Sort downloaded images, most handy if you've used " help="I'll put on my robe and wizard hat and help you "
"older versions which didn't sort yet") "get some of those pictures you like")
parser.add_option("-l", (options, args) = parser.parse_args()
"--loglevel",
nargs=1,
dest="loglevel",
metavar="LEVEL",
help="Changes the default log level to LEVEL")
(options, args) = parser.parse_args()
if options.confval and (options.tempcat if options.confval and (options.tempcat
or options.thread or options.thread
or options.wizard or options.wizard):
or options.sort): print "Can't configure something and do something else too."
print "Can't configure something and do something else too." exit(1)
if options.wizard and (options.tempcat
or options.thread
or options.confval):
print "Can't take a walk with the wizard and do something else too."
exit(1)
if options.confval:
if not config.Configuration().option_exists(options.confval[0]):
print ("%s: error: %s is not a "
"valid configuration option") % (sys.argv[0],
options.confval[0])
exit(1) exit(1)
print "Setting", options.confval[0], "to", options.confval[1]
config.Configuration().set_option(options.confval[0],
options.confval[1])
config.Configuration().save()
exit(0)
if options.sort: elif options.wizard:
sort = sorter.Sorter() try:
for item in os.listdir(conf.get_download_location()): walk_with_wizard(base_url)
sort.act(item) except KeyboardInterrupt:
exit(0) print
print "Alright, no more wizard hat and robe then. Goodbye"
exit(0)
if options.confval: elif options.thread:
if not conf.option_exists(options.confval[0]): try:
print ("%s: error: %s is not a valid configuration option" if options.thread[:7] == "http://":
% (sys.argv[0], options.confval[0])) t = downloader.get_image_links("", [options.thread])
elif options.tempcat:
url = "%s%s/res/" % (base_url, options.tempcat)
t = downloader.get_image_links(url, [options.thread])
else:
print ("if THREAD is not an absolute URL, "
"CATEGORY must also be specified")
exit(1) exit(1)
print "Setting", options.confval[0], "to", options.confval[1] (skipped, failed, downloaded, total) = downloader.get_images(t)
conf.set_option(options.confval[0], print "Downloaded: ", downloaded
options.confval[1]) print "Skipped: ", skipped
conf.save() print "Failed: ", failed
exit(0) print "Total: ", total
except KeyboardInterrupt:
print
print "Goodbye"
exit(0)
elif options.thread: elif options.tempcat:
try: config.Configuration().set_category(options.tempcat)
if options.thread[:7] == "http://":
t = downloader.get_image_links("", [options.thread])
elif options.tempcat:
url = "%s%s/res/" % (base_url, options.tempcat)
t = downloader.get_image_links(url, [options.thread])
else:
print ("if THREAD is not an absolute URL, "
"CATEGORY must also be specified")
exit(1)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
print
print "Goodbye"
exit(0)
elif options.tempcat: base_url = "%s%s/" % (base_url, config.Configuration().get_category())
conf.set_categories([options.tempcat])
elif options.loglevel is not None: try:
util.loglevel = util.LogType.from_int(options.loglevel) t = downloader.get_thread_links(base_url)
t = downloader.get_image_links(base_url, t)
if __name__ == "__main__": (skipped, failed, downloaded, total) = downloader.get_images(t)
conf = config.Configuration() print "Downloaded: ", downloaded
sort = sorter.Sorter() print "Skipped: ", skipped
parse_commands() print "Failed: ", failed
print "Total: ", total
downloader.set_on_downloaded(sort.act) except KeyboardInterrupt:
for category in conf.get_categories(): print
base_url = "%s%s/" % (base_url, category) print "So you don't want these images? Fine! I'll stop then."
try:
t = downloader.get_thread_links(base_url)
t = downloader.get_image_links(base_url, t)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
be = backend.Backend()
be.save(True) # Make sure that the downloaded images are saved anyway
print
print "So you don't want these images? Fine! I'll stop then."
util.log(util.LogType.Err, "Quit on user request")

View file

@ -1,70 +0,0 @@
import os
import modules.kirbybase
from util import confdir,raw_input_with_default
import config
class _Backend(object):
""" A class that communicates with the datastore """
def __init__(self):
self.table = os.path.join(confdir, "images.tbl")
self.store = modules.kirbybase.KirbyBase()
self.__collection = ""
self.__new_collection = []
self.load()
def create_store_if_needed(self):
if not os.path.exists(self.table):
return self.store.create(self.table, ["filename:String"])
return True
def add(self, filename):
if filename in self.__collection:
self.__collection.remove(filename)
self.__new_collection.append(filename)
def check(self, filename):
collected = filename in self.__collection
downloaded = filename in self.__new_collection
if not downloaded:
self.add(filename)
if collected or downloaded:
return True
return False
def save(self, dump = False):
if dump:
self.__new_collection.extend(self.__collection)
if os.path.exists(self.table):
os.remove(self.table)
self.create_store_if_needed()
for f in self.__new_collection:
self.store.insert(self.table, [f])
def load(self):
if os.path.exists(self.table):
collection = self.store.select(self.table, ['recno'], ['*'], ['filename'], returnType="report")
if collection != '':
self.__collection = collection.split()
_backend = None
def Backend():
global _backend
if _backend == None:
_backend = _Backend()
return _backend
if __name__ == "__main__":
backend = Backend()
config._optioncreator = raw_input_with_default
cfg = config.Configuration()
for f in os.listdir(cfg.get_archive_location()):
backend.add(f)

124
config.py
View file

@ -20,94 +20,43 @@
import os import os
import ConfigParser import ConfigParser
import sys import sys
from util import homedir, confdir
homedir = os.getenv("HOME")
if homedir is None:
homedir = os.path.dirname(sys.argv[0])
class _Configuration(object): class _Configuration(object):
def __init__(self, optioncreator): def __init__(self):
self.filename = os.path.join(confdir, "config.cfg") self.filename = os.path.join(os.path.join(homedir, ".4grab"), "config.cfg")
self.configparser = ConfigParser.RawConfigParser() self.configparser = ConfigParser.RawConfigParser()
self.optioncreator = optioncreator if not os.path.exists(self.filename):
self.create_new()
else:
self.configparser.read(self.filename)
def create_new(self):
self.configparser.add_section("settings")
self.set_category(self.raw_input_with_default("w", "Please enter which category you would like to download from: "))
self.configparser.read(self.filename) self.configparser.add_section("locations")
self.configparser.set("locations", "download", self.raw_input_with_default(os.path.join(homedir, "Pictures"), "Please enter where you would like the downloads to go: "))
def check(self): self.save()
changed = False
# read if it exists
if os.path.exists(self.filename):
self.configparser.read(self.filename)
# locations
if not self.configparser.has_section("locations"):
self.configparser.add_section("locations")
# locations/download_base
if not self.configparser.has_option("locations", "download_base"):
self.create_option("locations",
"download_base",
os.path.join(homedir,
"Pictures"),
"Please enter where "
"you would like the "
"downloads to go: ")
changed = True
# locations/archive
if not self.configparser.has_option("locations", "archive"):
self.create_option("locations",
"archive",
os.path.join(self.configparser.get("locations",
"download_base"),
".arch"),
"Please enter where in {download_base} you "
"would like to store archived images (used for "
"checking what to download): ")
changed = True
# settings
if not self.configparser.has_section("settings"):
self.configparser.add_section("settings")
# settings/categories
if not self.configparser.has_option("settings", "categories"):
self.create_option("settings",
"categories",
"w",
"Please enter which "
"category you would like "
"to download from: ")
changed = True
# settings/resolutions
if not self.configparser.has_option("settings", "resolutions"):
self.create_option("settings",
"resolutions",
"1600x1050,1900x1200,1900x1080",
"Please enter your preferred "
"resolutions (* for all)")
changed = True
# save
if changed:
self.save()
def create_option(self, section, name, default, message): def raw_input_with_default(self, default, prompt):
self.configparser.set(section, inp = raw_input("%s (default=%s): " % (prompt, default))
name, if inp == "":
self.optioncreator(default, return default
message)) return inp
def get_download_location(self): def get_download_location(self):
return self.configparser.get("locations", "download_base") return self.configparser.get("locations", "download")
def set_download_location(self, value):
self.configparser.set("locations", "download_base", value)
def get_archive_location(self): def get_category(self):
return self.configparser.get("locations", "archive") return self.configparser.get("settings", "category")
def set_archive_location(self, value):
self.configparser.set("locations", "archive", value)
def get_categories(self): def set_category(self, value):
return self.configparser.get("settings", "categories").split(',') self.configparser.set("settings", "category", value)
def set_categories(self, value = []):
self.configparser.set("settings", "category", ','.join(value))
def get_resolutions(self):
return self.configparser.get("settings", "resolutions").split(',')
def set_resolutions(self, value = []):
self.configparser.set("settings", "resolutions", ','.join(value))
def option_exists(self, option): def option_exists(self, option):
sections = self.configparser.sections() sections = self.configparser.sections()
@ -116,7 +65,6 @@ class _Configuration(object):
return True return True
return False return False
# Should only be used by the command-line
def set_option(self, option, value): def set_option(self, option, value):
sec = None sec = None
sections = self.configparser.sections() sections = self.configparser.sections()
@ -133,19 +81,9 @@ class _Configuration(object):
def save(self): def save(self):
dirname = os.path.dirname(self.filename) dirname = os.path.dirname(self.filename)
if not os.path.exists(dirname): if not os.path.exists(dirname):
os.makedirs(dirname) os.mkdir(dirname)
configfile = open(self.filename, "w") configfile = open(self.filename, "w")
self.configparser.write(configfile) self.configparser.write(configfile)
_configuration = None _configuration = _Configuration()
_optioncreator = None def Configuration(): return _configuration
def Configuration():
global _optioncreator
global _configuration
if _optioncreator is None:
raise ValueError("optioncreator must be set")
if _configuration is None:
_configuration = _Configuration(_optioncreator)
_configuration.check()
return _configuration

View file

@ -1,3 +1,5 @@
#!/usr/bin/env python
###################################################################### ######################################################################
# Copyright 2009, 2010 ryuslash # Copyright 2009, 2010 ryuslash
# #
@ -20,62 +22,37 @@
import urllib import urllib
import os import os
import htmlparser import htmlparser
#import progressbar
import config import config
import sys
import backend
import util
def get_savedir(): savedir = config.Configuration().get_download_location()
conf = config.Configuration() if not os.path.exists(savedir):
savedir = conf.get_download_location() os.makedirs(savedir)
if not os.path.exists(savedir):
os.makedirs(savedir)
return savedir
def check_archive(fullpath):
filename = os.path.basename(fullpath)
be = backend.Backend()
return be.check(filename)
def write(message):
sys.stdout.write(message)
sys.stdout.flush()
class Downloader(object): class Downloader(object):
def __init__(self, progress_reporter): def __init__(self, progress_reporter):
self.progress_reporter = progress_reporter self.progress_reporter = progress_reporter
self.on_downloaded = None
def set_on_downloaded(self, on_downloaded):
self.on_downloaded = on_downloaded
def download(self, url):
f = None
tries = 10
while tries > 0:
try:
f = urllib.urlopen(url)
break
except IOError:
tries -= 1
write("\rTry of %s failed, %d tries left" % (url, tries))
return f
def get_thread_links(self, baseurl): def get_thread_links(self, baseurl):
myparser = htmlparser.MyParser() myparser = htmlparser.MyParser()
i = 0 t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
code = 0 i = 1
url = None total = len(t)
progress = self.progress_reporter(total)
while code != 404:
url = baseurl + str(i) for pagenum in t:
f = self.download(url) progress.show_progress(i)
url = baseurl + pagenum
tries = 10
while tries > 0:
try:
f = urllib.urlopen(url)
break
except IOError:
tries -= 1
print "\rTry of", url, "failed,", tries, "tries left"
if not f is None: if not f is None:
code = f.getcode()
if code == 404:
write("\rCollected %d pages\n" % i)
f.close()
continue
# Read the response # Read the response
s = f.read() s = f.read()
f.close() f.close()
@ -83,11 +60,10 @@ class Downloader(object):
# Process the page. # Process the page.
myparser.parse(s) myparser.parse(s)
else: else:
write("\rOpening of %s did not succeed, trying next one..." \ "\rOpening of", url, "did not succeed, trying next one..."
% url)
i += 1 i += 1
write("\rCollected %d pages" % i)
progress.complete()
return myparser.get_hyperlinks() return myparser.get_hyperlinks()
def get_image_links(self, baseurl, t = []): def get_image_links(self, baseurl, t = []):
@ -100,16 +76,21 @@ class Downloader(object):
progress.show_progress(i) progress.show_progress(i)
img_url = baseurl + link img_url = baseurl + link
f = self.download(img_url) tries = 10
while tries > 0:
try:
f = urllib.urlopen(img_url)
break
except IOError:
tries -= 1
print "\rTry of", img_url, "failed,", tries, "tries left"
if not f is None: if not f is None:
s = f.read() s = f.read()
f.close() f.close()
mysubparser.parse(s) mysubparser.parse(s)
else: else:
write("\rOpening of %s did not succeed, " \ print "\rOpening of", img_url, "did not succeed, trying next one..."
"trying next one..." % img_url)
i += 1 i += 1
progress.complete() progress.complete()
@ -124,10 +105,8 @@ class Downloader(object):
i = 1 i = 1
for link in t: for link in t:
progress.show_progress(i) progress.show_progress(i)
filename = os.path.join(get_savedir(), os.path.split(link)[1]) filename = os.path.join(savedir, os.path.split(link)[1])
if not check_archive(filename): if not os.path.exists(filename):
util.log(util.LogType.Msg, "%s is not in archive" % filename, None)
tries = 10 tries = 10
while tries > 0: while tries > 0:
try: try:
@ -138,25 +117,19 @@ class Downloader(object):
if tries == 0: if tries == 0:
failed += 1 failed += 1
else: else:
util.log(util.LogType.Msg, "succsesfully downloaded %s" % filename, None)
downloaded += 1 downloaded += 1
if self.on_downloaded is not None:
util.log(util.LogType.Msg, "", self.on_downloaded)
if not self.on_downloaded(filename):
failed += 1
else:
util.log(util.LogType.Warn, "on_downloaded is None", None)
else: else:
skipped += 1 skipped += 1
i += 1 i += 1
progress.complete() progress.complete()
be = backend.Backend()
be.save()
return (skipped, failed, downloaded, total) return (skipped, failed, downloaded, total)
if __name__ == "__main__": if __name__ == "__main__":
print "Don't run me, run 4grab.py" # Get a file-like object for the 4chan.org w/imgboard
base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/"
# Get the hyperlinks.
t = get_thread_links(base_url)
t = get_image_links(base_url, t)
get_images(t)

View file

@ -17,35 +17,33 @@
# along with 4grab. If not, see <http://www.gnu.org/licenses/>. # along with 4grab. If not, see <http://www.gnu.org/licenses/>.
###################################################################### ######################################################################
from HTMLParser import HTMLParser, HTMLParseError import sgmllib
import re import re
class MyParser(HTMLParser): class MyParser(sgmllib.SGMLParser):
def __init__(self): def __init__(self, verbose=0):
HTMLParser.__init__(self) sgmllib.SGMLParser.__init__(self, verbose)
self.hyperlinks = [] self.hyperlinks = []
self.url_reg = re.compile('res/\d+\Z') self.url_reg = re.compile('res/\d+\Z')
self.prev = ""
def parse(self, s): def parse(self, s):
self.feed(s) self.feed(s)
self.close() self.close()
def handle_starttag(self, tag, attrs): def start_a(self, attributes):
prev = "" for name, value in attributes:
if name == "href":
if tag == 'a': if self.url_reg.search(value) != None:
for name, value in attrs: if self.prev != value:
if name == 'href': self.hyperlinks.append(value)
if self.url_reg.search(value) != None: self.prev = value
if prev != value:
self.hyperlinks.append(value)
prev = value
def get_hyperlinks(self): def get_hyperlinks(self):
return self.hyperlinks return self.hyperlinks
class MySubParser(MyParser): class MySubParser(MyParser):
def __init__(self): def __init__(self, verbose=0):
MyParser.__init__(self) MyParser.__init__(self, verbose)
self.url_reg = re.compile('/src/\d+\.\w{3,4}\Z') self.url_reg = re.compile('/src/\d+\.\w{3,4}\Z')

View file

File diff suppressed because it is too large Load diff

118
sorter.py
View file

@ -1,118 +0,0 @@
######################################################################
# Copyright 2009, 2010 ryuslash
#
# This file is part of 4grab.
#
# 4grab is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# 4grab is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
######################################################################
import config
import Image
import shutil
import os
import datetime
import backend
import util
def dummy_option_creator(value1, value2): pass
config._optioncreator = dummy_option_creator
class Sorter:
def __init__(self):
self.conf = config.Configuration()
self.resolutions = self.conf.get_resolutions()
def act(self, filename):
util.log(util.LogType.Msg, "sorter is acting", filename)
download_base = self.conf.get_download_location()
retval = True
if self.check_filename(filename):
image = None
try:
image = Image.open(os.path.join(download_base,
filename))
except IOError:
retval = False
if not image == None and self.archive_check(filename):
util.log(util.LogType.Msg, "Checking resolution", {"filename":filename, "resolution":image.size})
for resolution in self.resolutions:
resolution = resolution.split('x')
foldername = "%s-%s" % (resolution[0],
resolution[1])
folderpath = os.path.join(download_base,
foldername)
if str(image.size[0]) == resolution[0] and \
str(image.size[1]) == resolution[1]:
if not os.path.exists(folderpath):
os.makedirs(folderpath)
self.copy(filename, folderpath)
break
self.archive(filename)
self.remove(filename)
return retval
def copy(self, filename, destpath):
download_base = self.conf.get_download_location()
source = os.path.join(download_base,
os.path.basename(filename))
today = datetime.date.today()
dest = os.path.join(destpath,
"%d-%d-%d" % (today.year, today.month, today.day))
util.log(util.LogType.Msg, "going to copy %s to %s" % (source, dest), None)
if not os.path.exists(dest):
os.makedirs(dest)
dest = os.path.join(dest,
os.path.basename(filename))
if source != dest:
shutil.copy(source, dest)
else:
print "\nHow can this even happen?! Copying", source, "to", dest
def archive(self, filename):
be = backend.Backend()
be.add(os.path.basename(filename))
def archive_check(self, filename):
be = backend.Backend()
return be.check(os.path.basename(filename))
def check_filename(self, filename):
ext = os.path.splitext(filename)[1]
return ext == ".jpg" or \
ext == ".png" or \
ext == ".gif"
def remove(self, filename):
download_base = self.conf.get_download_location()
source = os.path.join(download_base, filename)
os.remove(source)
if __name__ == "__main__":
conf = config.Configuration()
download_base = conf.get_download_location()
sorter = Sorter()
for item in os.listdir(download_base):
sorter.act(item)

39
util.py
View file

@ -1,39 +0,0 @@
import os
import sys
class LogType:
Non = 0
Err = 1
Warn = 2
Msg = 3
@staticmethod
def from_int(lloglevel):
iloglevel = int(lloglevel)
if iloglevel == 0:
return LogType.Non
if iloglevel == 1:
return LogType.Err
if iloglevel == 2:
return LogType.Warn
if iloglevel == 3:
return LogType.Msg
loglevel = LogType.Non
def raw_input_with_default(default, prompt):
inp = raw_input("%s (default=%s): " % (prompt, default))
if inp == "":
return default
return inp
def log(logtype, message, data = None):
global loglevel
if loglevel >= logtype:
print message
if not data is None:
print "data:\n\tdata"
homedir = os.getenv("HOME")
if homedir is None:
homedir = os.path.dirname(sys.argv[0])
confdir = os.path.join(homedir, ".4grab")