Compare commits

..

No commits in common. "develop-arch" and "master" have entirely different histories.

10 changed files with 215 additions and 2555 deletions

3
.gitignore vendored
View file

@ -1,5 +1,4 @@
*~
*.pyc
\#*\#
.*
modules/plane.tbl
.*

232
4grab.py
View file

@ -1,4 +1,4 @@
#!/usr/bin/python2
#!/usr/bin/python
######################################################################
# Copyright 2009, 2010 ryuslash
@ -21,133 +21,147 @@
import optparse
import sys
import os
import config
from util import raw_input_with_default
import util
import download
import progressbar
import sorter
import backend
config._optioncreator = raw_input_with_default
base_url = "http://boards.4chan.org/"
parser = optparse.OptionParser()
downloader = download.Downloader(progressbar.Progress)
def parse_commands():
conf = config.Configuration()
parser.set_usage(
"""%prog [options]
def walk_with_wizard(baseurl):
wzrd_msg = "Pilates! *SHAZAM* Here they come!"
print "Alright, let me put on my robe and wizard hat."
# Single or all
inp = None
prompt = "Would you like to download a single thread, or all? "
inp = raw_input(prompt)
while (inp != "single" and inp != "all"):
print "Please type single or all"
inp = raw_input(prompt)
if inp == "single":
inp = raw_input("Which thread would you like to download? ")
if inp[:7] == "http://":
t = downloader.get_image_links("", [inp])
else:
thread = inp
inp = raw_input("Which category is this thread in? ")
print wzrd_msg
t = downloader.get_image_links("%s%s/res/" % (baseurl, inp),
[thread])
else:
inp = raw_input("Which category would you like to download? ")
config.Configuration().set_category(inp)
baseurl = "%s%s/" % (baseurl, config.Configuration().get_category())
print wzrd_msg
t = downloader.get_thread_links(baseurl)
t = downloader.get_image_links(baseurl, t)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
parser.set_usage(
"""%prog [options]
4grab Copyright (C) 2009-2010 ryuslash
This program comes with ABSOLUTELY NO WARRANTY.
This is free software, and you are welcome to redistribute it
under certain conditions.""")
parser.add_option("-e",
nargs=2,
dest="confval",
metavar="CONF VALUE",
help="Set configuration option CONF to be VALUE")
parser.add_option("-c",
"--category",
dest="tempcat",
metavar="CATEGORY",
help="Set the category to CATEGORY only for this run")
parser.add_option("-t",
"--thread",
dest="thread",
metavar="THREAD",
help="Download only THREAD. If THREAD is only an ID, "
"CATEGORY must also be set. Otherwise, no problem :-)")
parser.add_option("-s",
"--sort",
action="store_true",
dest="sort",
help="Sort downloaded images, most handy if you've used "
"older versions which didn't sort yet")
parser.add_option("-l",
"--loglevel",
nargs=1,
dest="loglevel",
metavar="LEVEL",
help="Changes the default log level to LEVEL")
(options, args) = parser.parse_args()
parser.add_option("-e",
nargs=2,
dest="confval",
metavar="CONF VALUE",
help="Set configuration option CONF to be VALUE")
parser.add_option("-c",
"--category",
dest="tempcat",
metavar="CATEGORY",
help="Set the category to CATEGORY only for this run")
parser.add_option("-t",
"--thread",
dest="thread",
metavar="THREAD",
help="Download only THREAD. If THREAD is only an ID, "
"CATEGORY must also be set. Otherwise, no problem :-)")
parser.add_option("-w",
"--wizard",
action="store_true",
dest="wizard",
help="I'll put on my robe and wizard hat and help you "
"get some of those pictures you like")
(options, args) = parser.parse_args()
if options.confval and (options.tempcat
or options.thread
or options.wizard
or options.sort):
print "Can't configure something and do something else too."
if options.confval and (options.tempcat
or options.thread
or options.wizard):
print "Can't configure something and do something else too."
exit(1)
if options.wizard and (options.tempcat
or options.thread
or options.confval):
print "Can't take a walk with the wizard and do something else too."
exit(1)
if options.confval:
if not config.Configuration().option_exists(options.confval[0]):
print ("%s: error: %s is not a "
"valid configuration option") % (sys.argv[0],
options.confval[0])
exit(1)
print "Setting", options.confval[0], "to", options.confval[1]
config.Configuration().set_option(options.confval[0],
options.confval[1])
config.Configuration().save()
exit(0)
if options.sort:
sort = sorter.Sorter()
for item in os.listdir(conf.get_download_location()):
sort.act(item)
exit(0)
elif options.wizard:
try:
walk_with_wizard(base_url)
except KeyboardInterrupt:
print
print "Alright, no more wizard hat and robe then. Goodbye"
exit(0)
if options.confval:
if not conf.option_exists(options.confval[0]):
print ("%s: error: %s is not a valid configuration option"
% (sys.argv[0], options.confval[0]))
elif options.thread:
try:
if options.thread[:7] == "http://":
t = downloader.get_image_links("", [options.thread])
elif options.tempcat:
url = "%s%s/res/" % (base_url, options.tempcat)
t = downloader.get_image_links(url, [options.thread])
else:
print ("if THREAD is not an absolute URL, "
"CATEGORY must also be specified")
exit(1)
print "Setting", options.confval[0], "to", options.confval[1]
conf.set_option(options.confval[0],
options.confval[1])
conf.save()
exit(0)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
print
print "Goodbye"
exit(0)
elif options.thread:
try:
if options.thread[:7] == "http://":
t = downloader.get_image_links("", [options.thread])
elif options.tempcat:
url = "%s%s/res/" % (base_url, options.tempcat)
t = downloader.get_image_links(url, [options.thread])
else:
print ("if THREAD is not an absolute URL, "
"CATEGORY must also be specified")
exit(1)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
print
print "Goodbye"
exit(0)
elif options.tempcat:
config.Configuration().set_category(options.tempcat)
elif options.tempcat:
conf.set_categories([options.tempcat])
base_url = "%s%s/" % (base_url, config.Configuration().get_category())
elif options.loglevel is not None:
util.loglevel = util.LogType.from_int(options.loglevel)
if __name__ == "__main__":
conf = config.Configuration()
sort = sorter.Sorter()
parse_commands()
downloader.set_on_downloaded(sort.act)
for category in conf.get_categories():
base_url = "%s%s/" % (base_url, category)
try:
t = downloader.get_thread_links(base_url)
t = downloader.get_image_links(base_url, t)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
be = backend.Backend()
be.save(True) # Make sure that the downloaded images are saved anyway
print
print "So you don't want these images? Fine! I'll stop then."
util.log(util.LogType.Err, "Quit on user request")
try:
t = downloader.get_thread_links(base_url)
t = downloader.get_image_links(base_url, t)
(skipped, failed, downloaded, total) = downloader.get_images(t)
print "Downloaded: ", downloaded
print "Skipped: ", skipped
print "Failed: ", failed
print "Total: ", total
except KeyboardInterrupt:
print
print "So you don't want these images? Fine! I'll stop then."

View file

@ -1,70 +0,0 @@
import os
import modules.kirbybase
from util import confdir,raw_input_with_default
import config
class _Backend(object):
""" A class that communicates with the datastore """
def __init__(self):
self.table = os.path.join(confdir, "images.tbl")
self.store = modules.kirbybase.KirbyBase()
self.__collection = ""
self.__new_collection = []
self.load()
def create_store_if_needed(self):
if not os.path.exists(self.table):
return self.store.create(self.table, ["filename:String"])
return True
def add(self, filename):
if filename in self.__collection:
self.__collection.remove(filename)
self.__new_collection.append(filename)
def check(self, filename):
collected = filename in self.__collection
downloaded = filename in self.__new_collection
if not downloaded:
self.add(filename)
if collected or downloaded:
return True
return False
def save(self, dump = False):
if dump:
self.__new_collection.extend(self.__collection)
if os.path.exists(self.table):
os.remove(self.table)
self.create_store_if_needed()
for f in self.__new_collection:
self.store.insert(self.table, [f])
def load(self):
if os.path.exists(self.table):
collection = self.store.select(self.table, ['recno'], ['*'], ['filename'], returnType="report")
if collection != '':
self.__collection = collection.split()
_backend = None
def Backend():
global _backend
if _backend == None:
_backend = _Backend()
return _backend
if __name__ == "__main__":
backend = Backend()
config._optioncreator = raw_input_with_default
cfg = config.Configuration()
for f in os.listdir(cfg.get_archive_location()):
backend.add(f)

124
config.py
View file

@ -20,94 +20,43 @@
import os
import ConfigParser
import sys
from util import homedir, confdir
homedir = os.getenv("HOME")
if homedir is None:
homedir = os.path.dirname(sys.argv[0])
class _Configuration(object):
def __init__(self, optioncreator):
self.filename = os.path.join(confdir, "config.cfg")
def __init__(self):
self.filename = os.path.join(os.path.join(homedir, ".4grab"), "config.cfg")
self.configparser = ConfigParser.RawConfigParser()
self.optioncreator = optioncreator
if not os.path.exists(self.filename):
self.create_new()
else:
self.configparser.read(self.filename)
def create_new(self):
self.configparser.add_section("settings")
self.set_category(self.raw_input_with_default("w", "Please enter which category you would like to download from: "))
self.configparser.read(self.filename)
self.configparser.add_section("locations")
self.configparser.set("locations", "download", self.raw_input_with_default(os.path.join(homedir, "Pictures"), "Please enter where you would like the downloads to go: "))
def check(self):
changed = False
# read if it exists
if os.path.exists(self.filename):
self.configparser.read(self.filename)
# locations
if not self.configparser.has_section("locations"):
self.configparser.add_section("locations")
# locations/download_base
if not self.configparser.has_option("locations", "download_base"):
self.create_option("locations",
"download_base",
os.path.join(homedir,
"Pictures"),
"Please enter where "
"you would like the "
"downloads to go: ")
changed = True
# locations/archive
if not self.configparser.has_option("locations", "archive"):
self.create_option("locations",
"archive",
os.path.join(self.configparser.get("locations",
"download_base"),
".arch"),
"Please enter where in {download_base} you "
"would like to store archived images (used for "
"checking what to download): ")
changed = True
# settings
if not self.configparser.has_section("settings"):
self.configparser.add_section("settings")
# settings/categories
if not self.configparser.has_option("settings", "categories"):
self.create_option("settings",
"categories",
"w",
"Please enter which "
"category you would like "
"to download from: ")
changed = True
# settings/resolutions
if not self.configparser.has_option("settings", "resolutions"):
self.create_option("settings",
"resolutions",
"1600x1050,1900x1200,1900x1080",
"Please enter your preferred "
"resolutions (* for all)")
changed = True
# save
if changed:
self.save()
self.save()
def create_option(self, section, name, default, message):
self.configparser.set(section,
name,
self.optioncreator(default,
message))
def raw_input_with_default(self, default, prompt):
inp = raw_input("%s (default=%s): " % (prompt, default))
if inp == "":
return default
return inp
def get_download_location(self):
return self.configparser.get("locations", "download_base")
def set_download_location(self, value):
self.configparser.set("locations", "download_base", value)
return self.configparser.get("locations", "download")
def get_archive_location(self):
return self.configparser.get("locations", "archive")
def set_archive_location(self, value):
self.configparser.set("locations", "archive", value)
def get_category(self):
return self.configparser.get("settings", "category")
def get_categories(self):
return self.configparser.get("settings", "categories").split(',')
def set_categories(self, value = []):
self.configparser.set("settings", "category", ','.join(value))
def get_resolutions(self):
return self.configparser.get("settings", "resolutions").split(',')
def set_resolutions(self, value = []):
self.configparser.set("settings", "resolutions", ','.join(value))
def set_category(self, value):
self.configparser.set("settings", "category", value)
def option_exists(self, option):
sections = self.configparser.sections()
@ -116,7 +65,6 @@ class _Configuration(object):
return True
return False
# Should only be used by the command-line
def set_option(self, option, value):
sec = None
sections = self.configparser.sections()
@ -133,19 +81,9 @@ class _Configuration(object):
def save(self):
dirname = os.path.dirname(self.filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
os.mkdir(dirname)
configfile = open(self.filename, "w")
self.configparser.write(configfile)
_configuration = None
_optioncreator = None
def Configuration():
global _optioncreator
global _configuration
if _optioncreator is None:
raise ValueError("optioncreator must be set")
if _configuration is None:
_configuration = _Configuration(_optioncreator)
_configuration.check()
return _configuration
_configuration = _Configuration()
def Configuration(): return _configuration

View file

@ -1,3 +1,5 @@
#!/usr/bin/env python
######################################################################
# Copyright 2009, 2010 ryuslash
#
@ -20,62 +22,37 @@
import urllib
import os
import htmlparser
#import progressbar
import config
import sys
import backend
import util
def get_savedir():
conf = config.Configuration()
savedir = conf.get_download_location()
if not os.path.exists(savedir):
os.makedirs(savedir)
return savedir
def check_archive(fullpath):
filename = os.path.basename(fullpath)
be = backend.Backend()
return be.check(filename)
def write(message):
sys.stdout.write(message)
sys.stdout.flush()
savedir = config.Configuration().get_download_location()
if not os.path.exists(savedir):
os.makedirs(savedir)
class Downloader(object):
def __init__(self, progress_reporter):
self.progress_reporter = progress_reporter
self.on_downloaded = None
def set_on_downloaded(self, on_downloaded):
self.on_downloaded = on_downloaded
def download(self, url):
f = None
tries = 10
while tries > 0:
try:
f = urllib.urlopen(url)
break
except IOError:
tries -= 1
write("\rTry of %s failed, %d tries left" % (url, tries))
return f
def get_thread_links(self, baseurl):
myparser = htmlparser.MyParser()
i = 0
code = 0
url = None
while code != 404:
url = baseurl + str(i)
f = self.download(url)
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
i = 1
total = len(t)
progress = self.progress_reporter(total)
for pagenum in t:
progress.show_progress(i)
url = baseurl + pagenum
tries = 10
while tries > 0:
try:
f = urllib.urlopen(url)
break
except IOError:
tries -= 1
print "\rTry of", url, "failed,", tries, "tries left"
if not f is None:
code = f.getcode()
if code == 404:
write("\rCollected %d pages\n" % i)
f.close()
continue
# Read the response
s = f.read()
f.close()
@ -83,11 +60,10 @@ class Downloader(object):
# Process the page.
myparser.parse(s)
else:
write("\rOpening of %s did not succeed, trying next one..." \
% url)
"\rOpening of", url, "did not succeed, trying next one..."
i += 1
write("\rCollected %d pages" % i)
progress.complete()
return myparser.get_hyperlinks()
def get_image_links(self, baseurl, t = []):
@ -100,16 +76,21 @@ class Downloader(object):
progress.show_progress(i)
img_url = baseurl + link
f = self.download(img_url)
tries = 10
while tries > 0:
try:
f = urllib.urlopen(img_url)
break
except IOError:
tries -= 1
print "\rTry of", img_url, "failed,", tries, "tries left"
if not f is None:
s = f.read()
f.close()
mysubparser.parse(s)
else:
write("\rOpening of %s did not succeed, " \
"trying next one..." % img_url)
print "\rOpening of", img_url, "did not succeed, trying next one..."
i += 1
progress.complete()
@ -124,10 +105,8 @@ class Downloader(object):
i = 1
for link in t:
progress.show_progress(i)
filename = os.path.join(get_savedir(), os.path.split(link)[1])
if not check_archive(filename):
util.log(util.LogType.Msg, "%s is not in archive" % filename, None)
filename = os.path.join(savedir, os.path.split(link)[1])
if not os.path.exists(filename):
tries = 10
while tries > 0:
try:
@ -138,25 +117,19 @@ class Downloader(object):
if tries == 0:
failed += 1
else:
util.log(util.LogType.Msg, "succsesfully downloaded %s" % filename, None)
downloaded += 1
if self.on_downloaded is not None:
util.log(util.LogType.Msg, "", self.on_downloaded)
if not self.on_downloaded(filename):
failed += 1
else:
util.log(util.LogType.Warn, "on_downloaded is None", None)
else:
skipped += 1
i += 1
progress.complete()
be = backend.Backend()
be.save()
return (skipped, failed, downloaded, total)
if __name__ == "__main__":
print "Don't run me, run 4grab.py"
# Get a file-like object for the 4chan.org w/imgboard
base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/"
# Get the hyperlinks.
t = get_thread_links(base_url)
t = get_image_links(base_url, t)
get_images(t)

View file

@ -17,35 +17,33 @@
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
######################################################################
from HTMLParser import HTMLParser, HTMLParseError
import sgmllib
import re
class MyParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
class MyParser(sgmllib.SGMLParser):
def __init__(self, verbose=0):
sgmllib.SGMLParser.__init__(self, verbose)
self.hyperlinks = []
self.url_reg = re.compile('res/\d+\Z')
self.prev = ""
def parse(self, s):
self.feed(s)
self.close()
def handle_starttag(self, tag, attrs):
prev = ""
if tag == 'a':
for name, value in attrs:
if name == 'href':
if self.url_reg.search(value) != None:
if prev != value:
self.hyperlinks.append(value)
prev = value
def start_a(self, attributes):
for name, value in attributes:
if name == "href":
if self.url_reg.search(value) != None:
if self.prev != value:
self.hyperlinks.append(value)
self.prev = value
def get_hyperlinks(self):
return self.hyperlinks
class MySubParser(MyParser):
def __init__(self):
MyParser.__init__(self)
def __init__(self, verbose=0):
MyParser.__init__(self, verbose)
self.url_reg = re.compile('/src/\d+\.\w{3,4}\Z')

View file

File diff suppressed because it is too large Load diff

118
sorter.py
View file

@ -1,118 +0,0 @@
######################################################################
# Copyright 2009, 2010 ryuslash
#
# This file is part of 4grab.
#
# 4grab is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# 4grab is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
######################################################################
import config
import Image
import shutil
import os
import datetime
import backend
import util
def dummy_option_creator(value1, value2): pass
config._optioncreator = dummy_option_creator
class Sorter:
def __init__(self):
self.conf = config.Configuration()
self.resolutions = self.conf.get_resolutions()
def act(self, filename):
util.log(util.LogType.Msg, "sorter is acting", filename)
download_base = self.conf.get_download_location()
retval = True
if self.check_filename(filename):
image = None
try:
image = Image.open(os.path.join(download_base,
filename))
except IOError:
retval = False
if not image == None and self.archive_check(filename):
util.log(util.LogType.Msg, "Checking resolution", {"filename":filename, "resolution":image.size})
for resolution in self.resolutions:
resolution = resolution.split('x')
foldername = "%s-%s" % (resolution[0],
resolution[1])
folderpath = os.path.join(download_base,
foldername)
if str(image.size[0]) == resolution[0] and \
str(image.size[1]) == resolution[1]:
if not os.path.exists(folderpath):
os.makedirs(folderpath)
self.copy(filename, folderpath)
break
self.archive(filename)
self.remove(filename)
return retval
def copy(self, filename, destpath):
download_base = self.conf.get_download_location()
source = os.path.join(download_base,
os.path.basename(filename))
today = datetime.date.today()
dest = os.path.join(destpath,
"%d-%d-%d" % (today.year, today.month, today.day))
util.log(util.LogType.Msg, "going to copy %s to %s" % (source, dest), None)
if not os.path.exists(dest):
os.makedirs(dest)
dest = os.path.join(dest,
os.path.basename(filename))
if source != dest:
shutil.copy(source, dest)
else:
print "\nHow can this even happen?! Copying", source, "to", dest
def archive(self, filename):
be = backend.Backend()
be.add(os.path.basename(filename))
def archive_check(self, filename):
be = backend.Backend()
return be.check(os.path.basename(filename))
def check_filename(self, filename):
ext = os.path.splitext(filename)[1]
return ext == ".jpg" or \
ext == ".png" or \
ext == ".gif"
def remove(self, filename):
download_base = self.conf.get_download_location()
source = os.path.join(download_base, filename)
os.remove(source)
if __name__ == "__main__":
conf = config.Configuration()
download_base = conf.get_download_location()
sorter = Sorter()
for item in os.listdir(download_base):
sorter.act(item)

39
util.py
View file

@ -1,39 +0,0 @@
import os
import sys
class LogType:
Non = 0
Err = 1
Warn = 2
Msg = 3
@staticmethod
def from_int(lloglevel):
iloglevel = int(lloglevel)
if iloglevel == 0:
return LogType.Non
if iloglevel == 1:
return LogType.Err
if iloglevel == 2:
return LogType.Warn
if iloglevel == 3:
return LogType.Msg
loglevel = LogType.Non
def raw_input_with_default(default, prompt):
inp = raw_input("%s (default=%s): " % (prompt, default))
if inp == "":
return default
return inp
def log(logtype, message, data = None):
global loglevel
if loglevel >= logtype:
print message
if not data is None:
print "data:\n\tdata"
homedir = os.getenv("HOME")
if homedir is None:
homedir = os.path.dirname(sys.argv[0])
confdir = os.path.join(homedir, ".4grab")