Compare commits
22 commits
master
...
develop-ar
Author | SHA1 | Date | |
---|---|---|---|
|
02524e6780 | ||
|
4bd89ebded | ||
|
05724b37b3 | ||
|
0323ddbed8 | ||
|
ef79c9991b | ||
|
99b87aeffb | ||
|
b5aac62357 | ||
|
af529bcd4e | ||
|
2db5555609 | ||
|
9fb2b4ff58 | ||
|
a567e8630f | ||
|
dfaa24b1b6 | ||
|
d58d029202 | ||
|
7ab6d2911f | ||
|
14e2b0cc54 | ||
|
018abb7da1 | ||
|
3980ccf38e | ||
|
4b70374e9d | ||
|
96247d41d5 | ||
|
7f8dfa1d30 | ||
|
4a9cc7e2b6 | ||
|
8e101c92f9 |
10 changed files with 2555 additions and 215 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -2,3 +2,4 @@
|
||||||
*.pyc
|
*.pyc
|
||||||
\#*\#
|
\#*\#
|
||||||
.*
|
.*
|
||||||
|
modules/plane.tbl
|
138
4grab.py
138
4grab.py
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/python
|
#!/usr/bin/python2
|
||||||
|
|
||||||
######################################################################
|
######################################################################
|
||||||
# Copyright 2009, 2010 ryuslash
|
# Copyright 2009, 2010 ryuslash
|
||||||
|
@ -21,114 +21,87 @@
|
||||||
|
|
||||||
import optparse
|
import optparse
|
||||||
import sys
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
import config
|
import config
|
||||||
|
from util import raw_input_with_default
|
||||||
|
import util
|
||||||
|
|
||||||
import download
|
import download
|
||||||
import progressbar
|
import progressbar
|
||||||
|
import sorter
|
||||||
|
import backend
|
||||||
|
|
||||||
|
config._optioncreator = raw_input_with_default
|
||||||
|
|
||||||
base_url = "http://boards.4chan.org/"
|
base_url = "http://boards.4chan.org/"
|
||||||
parser = optparse.OptionParser()
|
parser = optparse.OptionParser()
|
||||||
downloader = download.Downloader(progressbar.Progress)
|
downloader = download.Downloader(progressbar.Progress)
|
||||||
|
|
||||||
def walk_with_wizard(baseurl):
|
def parse_commands():
|
||||||
wzrd_msg = "Pilates! *SHAZAM* Here they come!"
|
conf = config.Configuration()
|
||||||
print "Alright, let me put on my robe and wizard hat."
|
parser.set_usage(
|
||||||
|
"""%prog [options]
|
||||||
# Single or all
|
|
||||||
inp = None
|
|
||||||
prompt = "Would you like to download a single thread, or all? "
|
|
||||||
inp = raw_input(prompt)
|
|
||||||
while (inp != "single" and inp != "all"):
|
|
||||||
print "Please type single or all"
|
|
||||||
inp = raw_input(prompt)
|
|
||||||
|
|
||||||
if inp == "single":
|
|
||||||
inp = raw_input("Which thread would you like to download? ")
|
|
||||||
if inp[:7] == "http://":
|
|
||||||
t = downloader.get_image_links("", [inp])
|
|
||||||
else:
|
|
||||||
thread = inp
|
|
||||||
inp = raw_input("Which category is this thread in? ")
|
|
||||||
print wzrd_msg
|
|
||||||
t = downloader.get_image_links("%s%s/res/" % (baseurl, inp),
|
|
||||||
[thread])
|
|
||||||
else:
|
|
||||||
inp = raw_input("Which category would you like to download? ")
|
|
||||||
config.Configuration().set_category(inp)
|
|
||||||
baseurl = "%s%s/" % (baseurl, config.Configuration().get_category())
|
|
||||||
|
|
||||||
print wzrd_msg
|
|
||||||
t = downloader.get_thread_links(baseurl)
|
|
||||||
t = downloader.get_image_links(baseurl, t)
|
|
||||||
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
|
||||||
print "Downloaded: ", downloaded
|
|
||||||
print "Skipped: ", skipped
|
|
||||||
print "Failed: ", failed
|
|
||||||
print "Total: ", total
|
|
||||||
|
|
||||||
parser.set_usage(
|
|
||||||
"""%prog [options]
|
|
||||||
|
|
||||||
4grab Copyright (C) 2009-2010 ryuslash
|
4grab Copyright (C) 2009-2010 ryuslash
|
||||||
This program comes with ABSOLUTELY NO WARRANTY.
|
This program comes with ABSOLUTELY NO WARRANTY.
|
||||||
This is free software, and you are welcome to redistribute it
|
This is free software, and you are welcome to redistribute it
|
||||||
under certain conditions.""")
|
under certain conditions.""")
|
||||||
parser.add_option("-e",
|
parser.add_option("-e",
|
||||||
nargs=2,
|
nargs=2,
|
||||||
dest="confval",
|
dest="confval",
|
||||||
metavar="CONF VALUE",
|
metavar="CONF VALUE",
|
||||||
help="Set configuration option CONF to be VALUE")
|
help="Set configuration option CONF to be VALUE")
|
||||||
parser.add_option("-c",
|
parser.add_option("-c",
|
||||||
"--category",
|
"--category",
|
||||||
dest="tempcat",
|
dest="tempcat",
|
||||||
metavar="CATEGORY",
|
metavar="CATEGORY",
|
||||||
help="Set the category to CATEGORY only for this run")
|
help="Set the category to CATEGORY only for this run")
|
||||||
parser.add_option("-t",
|
parser.add_option("-t",
|
||||||
"--thread",
|
"--thread",
|
||||||
dest="thread",
|
dest="thread",
|
||||||
metavar="THREAD",
|
metavar="THREAD",
|
||||||
help="Download only THREAD. If THREAD is only an ID, "
|
help="Download only THREAD. If THREAD is only an ID, "
|
||||||
"CATEGORY must also be set. Otherwise, no problem :-)")
|
"CATEGORY must also be set. Otherwise, no problem :-)")
|
||||||
parser.add_option("-w",
|
parser.add_option("-s",
|
||||||
"--wizard",
|
"--sort",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
dest="wizard",
|
dest="sort",
|
||||||
help="I'll put on my robe and wizard hat and help you "
|
help="Sort downloaded images, most handy if you've used "
|
||||||
"get some of those pictures you like")
|
"older versions which didn't sort yet")
|
||||||
(options, args) = parser.parse_args()
|
parser.add_option("-l",
|
||||||
|
"--loglevel",
|
||||||
|
nargs=1,
|
||||||
|
dest="loglevel",
|
||||||
|
metavar="LEVEL",
|
||||||
|
help="Changes the default log level to LEVEL")
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
if options.confval and (options.tempcat
|
if options.confval and (options.tempcat
|
||||||
or options.thread
|
or options.thread
|
||||||
or options.wizard):
|
or options.wizard
|
||||||
|
or options.sort):
|
||||||
print "Can't configure something and do something else too."
|
print "Can't configure something and do something else too."
|
||||||
exit(1)
|
exit(1)
|
||||||
if options.wizard and (options.tempcat
|
|
||||||
or options.thread
|
|
||||||
or options.confval):
|
|
||||||
print "Can't take a walk with the wizard and do something else too."
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
if options.confval:
|
if options.sort:
|
||||||
if not config.Configuration().option_exists(options.confval[0]):
|
sort = sorter.Sorter()
|
||||||
print ("%s: error: %s is not a "
|
for item in os.listdir(conf.get_download_location()):
|
||||||
"valid configuration option") % (sys.argv[0],
|
sort.act(item)
|
||||||
options.confval[0])
|
exit(0)
|
||||||
|
|
||||||
|
if options.confval:
|
||||||
|
if not conf.option_exists(options.confval[0]):
|
||||||
|
print ("%s: error: %s is not a valid configuration option"
|
||||||
|
% (sys.argv[0], options.confval[0]))
|
||||||
exit(1)
|
exit(1)
|
||||||
print "Setting", options.confval[0], "to", options.confval[1]
|
print "Setting", options.confval[0], "to", options.confval[1]
|
||||||
config.Configuration().set_option(options.confval[0],
|
conf.set_option(options.confval[0],
|
||||||
options.confval[1])
|
options.confval[1])
|
||||||
config.Configuration().save()
|
conf.save()
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
elif options.wizard:
|
elif options.thread:
|
||||||
try:
|
|
||||||
walk_with_wizard(base_url)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print
|
|
||||||
print "Alright, no more wizard hat and robe then. Goodbye"
|
|
||||||
exit(0)
|
|
||||||
|
|
||||||
elif options.thread:
|
|
||||||
try:
|
try:
|
||||||
if options.thread[:7] == "http://":
|
if options.thread[:7] == "http://":
|
||||||
t = downloader.get_image_links("", [options.thread])
|
t = downloader.get_image_links("", [options.thread])
|
||||||
|
@ -149,12 +122,21 @@ elif options.thread:
|
||||||
print "Goodbye"
|
print "Goodbye"
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
elif options.tempcat:
|
elif options.tempcat:
|
||||||
config.Configuration().set_category(options.tempcat)
|
conf.set_categories([options.tempcat])
|
||||||
|
|
||||||
base_url = "%s%s/" % (base_url, config.Configuration().get_category())
|
elif options.loglevel is not None:
|
||||||
|
util.loglevel = util.LogType.from_int(options.loglevel)
|
||||||
|
|
||||||
try:
|
if __name__ == "__main__":
|
||||||
|
conf = config.Configuration()
|
||||||
|
sort = sorter.Sorter()
|
||||||
|
parse_commands()
|
||||||
|
|
||||||
|
downloader.set_on_downloaded(sort.act)
|
||||||
|
for category in conf.get_categories():
|
||||||
|
base_url = "%s%s/" % (base_url, category)
|
||||||
|
try:
|
||||||
t = downloader.get_thread_links(base_url)
|
t = downloader.get_thread_links(base_url)
|
||||||
t = downloader.get_image_links(base_url, t)
|
t = downloader.get_image_links(base_url, t)
|
||||||
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
||||||
|
@ -162,6 +144,10 @@ try:
|
||||||
print "Skipped: ", skipped
|
print "Skipped: ", skipped
|
||||||
print "Failed: ", failed
|
print "Failed: ", failed
|
||||||
print "Total: ", total
|
print "Total: ", total
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
be = backend.Backend()
|
||||||
|
be.save(True) # Make sure that the downloaded images are saved anyway
|
||||||
|
|
||||||
print
|
print
|
||||||
print "So you don't want these images? Fine! I'll stop then."
|
print "So you don't want these images? Fine! I'll stop then."
|
||||||
|
util.log(util.LogType.Err, "Quit on user request")
|
||||||
|
|
70
backend.py
Normal file
70
backend.py
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
import os
|
||||||
|
import modules.kirbybase
|
||||||
|
from util import confdir,raw_input_with_default
|
||||||
|
import config
|
||||||
|
|
||||||
|
class _Backend(object):
|
||||||
|
""" A class that communicates with the datastore """
|
||||||
|
def __init__(self):
|
||||||
|
self.table = os.path.join(confdir, "images.tbl")
|
||||||
|
self.store = modules.kirbybase.KirbyBase()
|
||||||
|
self.__collection = ""
|
||||||
|
self.__new_collection = []
|
||||||
|
|
||||||
|
self.load()
|
||||||
|
|
||||||
|
def create_store_if_needed(self):
|
||||||
|
if not os.path.exists(self.table):
|
||||||
|
return self.store.create(self.table, ["filename:String"])
|
||||||
|
return True
|
||||||
|
|
||||||
|
def add(self, filename):
|
||||||
|
if filename in self.__collection:
|
||||||
|
self.__collection.remove(filename)
|
||||||
|
|
||||||
|
self.__new_collection.append(filename)
|
||||||
|
|
||||||
|
def check(self, filename):
|
||||||
|
collected = filename in self.__collection
|
||||||
|
downloaded = filename in self.__new_collection
|
||||||
|
|
||||||
|
if not downloaded:
|
||||||
|
self.add(filename)
|
||||||
|
|
||||||
|
if collected or downloaded:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def save(self, dump = False):
|
||||||
|
if dump:
|
||||||
|
self.__new_collection.extend(self.__collection)
|
||||||
|
|
||||||
|
if os.path.exists(self.table):
|
||||||
|
os.remove(self.table)
|
||||||
|
|
||||||
|
self.create_store_if_needed()
|
||||||
|
for f in self.__new_collection:
|
||||||
|
self.store.insert(self.table, [f])
|
||||||
|
|
||||||
|
def load(self):
|
||||||
|
if os.path.exists(self.table):
|
||||||
|
collection = self.store.select(self.table, ['recno'], ['*'], ['filename'], returnType="report")
|
||||||
|
if collection != '':
|
||||||
|
self.__collection = collection.split()
|
||||||
|
|
||||||
|
_backend = None
|
||||||
|
def Backend():
|
||||||
|
global _backend
|
||||||
|
|
||||||
|
if _backend == None:
|
||||||
|
_backend = _Backend()
|
||||||
|
return _backend
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
backend = Backend()
|
||||||
|
config._optioncreator = raw_input_with_default
|
||||||
|
cfg = config.Configuration()
|
||||||
|
|
||||||
|
for f in os.listdir(cfg.get_archive_location()):
|
||||||
|
backend.add(f)
|
118
config.py
118
config.py
|
@ -20,43 +20,94 @@
|
||||||
import os
|
import os
|
||||||
import ConfigParser
|
import ConfigParser
|
||||||
import sys
|
import sys
|
||||||
|
from util import homedir, confdir
|
||||||
homedir = os.getenv("HOME")
|
|
||||||
if homedir is None:
|
|
||||||
homedir = os.path.dirname(sys.argv[0])
|
|
||||||
|
|
||||||
class _Configuration(object):
|
class _Configuration(object):
|
||||||
def __init__(self):
|
def __init__(self, optioncreator):
|
||||||
self.filename = os.path.join(os.path.join(homedir, ".4grab"), "config.cfg")
|
self.filename = os.path.join(confdir, "config.cfg")
|
||||||
self.configparser = ConfigParser.RawConfigParser()
|
self.configparser = ConfigParser.RawConfigParser()
|
||||||
if not os.path.exists(self.filename):
|
self.optioncreator = optioncreator
|
||||||
self.create_new()
|
|
||||||
else:
|
|
||||||
self.configparser.read(self.filename)
|
self.configparser.read(self.filename)
|
||||||
|
|
||||||
def create_new(self):
|
def check(self):
|
||||||
self.configparser.add_section("settings")
|
changed = False
|
||||||
self.set_category(self.raw_input_with_default("w", "Please enter which category you would like to download from: "))
|
# read if it exists
|
||||||
|
if os.path.exists(self.filename):
|
||||||
|
self.configparser.read(self.filename)
|
||||||
|
# locations
|
||||||
|
if not self.configparser.has_section("locations"):
|
||||||
self.configparser.add_section("locations")
|
self.configparser.add_section("locations")
|
||||||
self.configparser.set("locations", "download", self.raw_input_with_default(os.path.join(homedir, "Pictures"), "Please enter where you would like the downloads to go: "))
|
# locations/download_base
|
||||||
|
if not self.configparser.has_option("locations", "download_base"):
|
||||||
|
self.create_option("locations",
|
||||||
|
"download_base",
|
||||||
|
os.path.join(homedir,
|
||||||
|
"Pictures"),
|
||||||
|
"Please enter where "
|
||||||
|
"you would like the "
|
||||||
|
"downloads to go: ")
|
||||||
|
changed = True
|
||||||
|
# locations/archive
|
||||||
|
if not self.configparser.has_option("locations", "archive"):
|
||||||
|
self.create_option("locations",
|
||||||
|
"archive",
|
||||||
|
os.path.join(self.configparser.get("locations",
|
||||||
|
"download_base"),
|
||||||
|
".arch"),
|
||||||
|
"Please enter where in {download_base} you "
|
||||||
|
"would like to store archived images (used for "
|
||||||
|
"checking what to download): ")
|
||||||
|
changed = True
|
||||||
|
# settings
|
||||||
|
if not self.configparser.has_section("settings"):
|
||||||
|
self.configparser.add_section("settings")
|
||||||
|
# settings/categories
|
||||||
|
if not self.configparser.has_option("settings", "categories"):
|
||||||
|
self.create_option("settings",
|
||||||
|
"categories",
|
||||||
|
"w",
|
||||||
|
"Please enter which "
|
||||||
|
"category you would like "
|
||||||
|
"to download from: ")
|
||||||
|
changed = True
|
||||||
|
# settings/resolutions
|
||||||
|
if not self.configparser.has_option("settings", "resolutions"):
|
||||||
|
self.create_option("settings",
|
||||||
|
"resolutions",
|
||||||
|
"1600x1050,1900x1200,1900x1080",
|
||||||
|
"Please enter your preferred "
|
||||||
|
"resolutions (* for all)")
|
||||||
|
changed = True
|
||||||
|
# save
|
||||||
|
if changed:
|
||||||
self.save()
|
self.save()
|
||||||
|
|
||||||
def raw_input_with_default(self, default, prompt):
|
def create_option(self, section, name, default, message):
|
||||||
inp = raw_input("%s (default=%s): " % (prompt, default))
|
self.configparser.set(section,
|
||||||
if inp == "":
|
name,
|
||||||
return default
|
self.optioncreator(default,
|
||||||
return inp
|
message))
|
||||||
|
|
||||||
def get_download_location(self):
|
def get_download_location(self):
|
||||||
return self.configparser.get("locations", "download")
|
return self.configparser.get("locations", "download_base")
|
||||||
|
def set_download_location(self, value):
|
||||||
|
self.configparser.set("locations", "download_base", value)
|
||||||
|
|
||||||
def get_category(self):
|
def get_archive_location(self):
|
||||||
return self.configparser.get("settings", "category")
|
return self.configparser.get("locations", "archive")
|
||||||
|
def set_archive_location(self, value):
|
||||||
|
self.configparser.set("locations", "archive", value)
|
||||||
|
|
||||||
def set_category(self, value):
|
def get_categories(self):
|
||||||
self.configparser.set("settings", "category", value)
|
return self.configparser.get("settings", "categories").split(',')
|
||||||
|
def set_categories(self, value = []):
|
||||||
|
self.configparser.set("settings", "category", ','.join(value))
|
||||||
|
|
||||||
|
def get_resolutions(self):
|
||||||
|
return self.configparser.get("settings", "resolutions").split(',')
|
||||||
|
def set_resolutions(self, value = []):
|
||||||
|
self.configparser.set("settings", "resolutions", ','.join(value))
|
||||||
|
|
||||||
def option_exists(self, option):
|
def option_exists(self, option):
|
||||||
sections = self.configparser.sections()
|
sections = self.configparser.sections()
|
||||||
|
@ -65,6 +116,7 @@ class _Configuration(object):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Should only be used by the command-line
|
||||||
def set_option(self, option, value):
|
def set_option(self, option, value):
|
||||||
sec = None
|
sec = None
|
||||||
sections = self.configparser.sections()
|
sections = self.configparser.sections()
|
||||||
|
@ -81,9 +133,19 @@ class _Configuration(object):
|
||||||
def save(self):
|
def save(self):
|
||||||
dirname = os.path.dirname(self.filename)
|
dirname = os.path.dirname(self.filename)
|
||||||
if not os.path.exists(dirname):
|
if not os.path.exists(dirname):
|
||||||
os.mkdir(dirname)
|
os.makedirs(dirname)
|
||||||
configfile = open(self.filename, "w")
|
configfile = open(self.filename, "w")
|
||||||
self.configparser.write(configfile)
|
self.configparser.write(configfile)
|
||||||
|
|
||||||
_configuration = _Configuration()
|
_configuration = None
|
||||||
def Configuration(): return _configuration
|
_optioncreator = None
|
||||||
|
def Configuration():
|
||||||
|
global _optioncreator
|
||||||
|
global _configuration
|
||||||
|
|
||||||
|
if _optioncreator is None:
|
||||||
|
raise ValueError("optioncreator must be set")
|
||||||
|
if _configuration is None:
|
||||||
|
_configuration = _Configuration(_optioncreator)
|
||||||
|
_configuration.check()
|
||||||
|
return _configuration
|
||||||
|
|
99
download.py
99
download.py
|
@ -1,5 +1,3 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
######################################################################
|
######################################################################
|
||||||
# Copyright 2009, 2010 ryuslash
|
# Copyright 2009, 2010 ryuslash
|
||||||
#
|
#
|
||||||
|
@ -22,28 +20,36 @@
|
||||||
import urllib
|
import urllib
|
||||||
import os
|
import os
|
||||||
import htmlparser
|
import htmlparser
|
||||||
#import progressbar
|
|
||||||
import config
|
import config
|
||||||
|
import sys
|
||||||
|
import backend
|
||||||
|
import util
|
||||||
|
|
||||||
savedir = config.Configuration().get_download_location()
|
def get_savedir():
|
||||||
if not os.path.exists(savedir):
|
conf = config.Configuration()
|
||||||
|
savedir = conf.get_download_location()
|
||||||
|
if not os.path.exists(savedir):
|
||||||
os.makedirs(savedir)
|
os.makedirs(savedir)
|
||||||
|
return savedir
|
||||||
|
def check_archive(fullpath):
|
||||||
|
filename = os.path.basename(fullpath)
|
||||||
|
be = backend.Backend()
|
||||||
|
return be.check(filename)
|
||||||
|
|
||||||
|
def write(message):
|
||||||
|
sys.stdout.write(message)
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
class Downloader(object):
|
class Downloader(object):
|
||||||
def __init__(self, progress_reporter):
|
def __init__(self, progress_reporter):
|
||||||
self.progress_reporter = progress_reporter
|
self.progress_reporter = progress_reporter
|
||||||
|
self.on_downloaded = None
|
||||||
|
|
||||||
def get_thread_links(self, baseurl):
|
def set_on_downloaded(self, on_downloaded):
|
||||||
myparser = htmlparser.MyParser()
|
self.on_downloaded = on_downloaded
|
||||||
t = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
|
|
||||||
i = 1
|
|
||||||
total = len(t)
|
|
||||||
progress = self.progress_reporter(total)
|
|
||||||
|
|
||||||
for pagenum in t:
|
def download(self, url):
|
||||||
progress.show_progress(i)
|
f = None
|
||||||
|
|
||||||
url = baseurl + pagenum
|
|
||||||
tries = 10
|
tries = 10
|
||||||
while tries > 0:
|
while tries > 0:
|
||||||
try:
|
try:
|
||||||
|
@ -51,8 +57,25 @@ class Downloader(object):
|
||||||
break
|
break
|
||||||
except IOError:
|
except IOError:
|
||||||
tries -= 1
|
tries -= 1
|
||||||
print "\rTry of", url, "failed,", tries, "tries left"
|
write("\rTry of %s failed, %d tries left" % (url, tries))
|
||||||
|
return f
|
||||||
|
|
||||||
|
def get_thread_links(self, baseurl):
|
||||||
|
myparser = htmlparser.MyParser()
|
||||||
|
i = 0
|
||||||
|
code = 0
|
||||||
|
url = None
|
||||||
|
|
||||||
|
while code != 404:
|
||||||
|
url = baseurl + str(i)
|
||||||
|
f = self.download(url)
|
||||||
|
|
||||||
if not f is None:
|
if not f is None:
|
||||||
|
code = f.getcode()
|
||||||
|
if code == 404:
|
||||||
|
write("\rCollected %d pages\n" % i)
|
||||||
|
f.close()
|
||||||
|
continue
|
||||||
# Read the response
|
# Read the response
|
||||||
s = f.read()
|
s = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
@ -60,10 +83,11 @@ class Downloader(object):
|
||||||
# Process the page.
|
# Process the page.
|
||||||
myparser.parse(s)
|
myparser.parse(s)
|
||||||
else:
|
else:
|
||||||
"\rOpening of", url, "did not succeed, trying next one..."
|
write("\rOpening of %s did not succeed, trying next one..." \
|
||||||
|
% url)
|
||||||
i += 1
|
i += 1
|
||||||
|
write("\rCollected %d pages" % i)
|
||||||
|
|
||||||
progress.complete()
|
|
||||||
return myparser.get_hyperlinks()
|
return myparser.get_hyperlinks()
|
||||||
|
|
||||||
def get_image_links(self, baseurl, t = []):
|
def get_image_links(self, baseurl, t = []):
|
||||||
|
@ -76,21 +100,16 @@ class Downloader(object):
|
||||||
progress.show_progress(i)
|
progress.show_progress(i)
|
||||||
|
|
||||||
img_url = baseurl + link
|
img_url = baseurl + link
|
||||||
tries = 10
|
f = self.download(img_url)
|
||||||
while tries > 0:
|
|
||||||
try:
|
|
||||||
f = urllib.urlopen(img_url)
|
|
||||||
break
|
|
||||||
except IOError:
|
|
||||||
tries -= 1
|
|
||||||
print "\rTry of", img_url, "failed,", tries, "tries left"
|
|
||||||
if not f is None:
|
if not f is None:
|
||||||
s = f.read()
|
s = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
mysubparser.parse(s)
|
mysubparser.parse(s)
|
||||||
else:
|
else:
|
||||||
print "\rOpening of", img_url, "did not succeed, trying next one..."
|
write("\rOpening of %s did not succeed, " \
|
||||||
|
"trying next one..." % img_url)
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
progress.complete()
|
progress.complete()
|
||||||
|
@ -105,8 +124,10 @@ class Downloader(object):
|
||||||
i = 1
|
i = 1
|
||||||
for link in t:
|
for link in t:
|
||||||
progress.show_progress(i)
|
progress.show_progress(i)
|
||||||
filename = os.path.join(savedir, os.path.split(link)[1])
|
filename = os.path.join(get_savedir(), os.path.split(link)[1])
|
||||||
if not os.path.exists(filename):
|
if not check_archive(filename):
|
||||||
|
util.log(util.LogType.Msg, "%s is not in archive" % filename, None)
|
||||||
|
|
||||||
tries = 10
|
tries = 10
|
||||||
while tries > 0:
|
while tries > 0:
|
||||||
try:
|
try:
|
||||||
|
@ -117,19 +138,25 @@ class Downloader(object):
|
||||||
if tries == 0:
|
if tries == 0:
|
||||||
failed += 1
|
failed += 1
|
||||||
else:
|
else:
|
||||||
|
util.log(util.LogType.Msg, "succsesfully downloaded %s" % filename, None)
|
||||||
downloaded += 1
|
downloaded += 1
|
||||||
|
if self.on_downloaded is not None:
|
||||||
|
util.log(util.LogType.Msg, "", self.on_downloaded)
|
||||||
|
|
||||||
|
if not self.on_downloaded(filename):
|
||||||
|
failed += 1
|
||||||
|
else:
|
||||||
|
util.log(util.LogType.Warn, "on_downloaded is None", None)
|
||||||
else:
|
else:
|
||||||
skipped += 1
|
skipped += 1
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
progress.complete()
|
progress.complete()
|
||||||
|
|
||||||
|
be = backend.Backend()
|
||||||
|
be.save()
|
||||||
|
|
||||||
return (skipped, failed, downloaded, total)
|
return (skipped, failed, downloaded, total)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Get a file-like object for the 4chan.org w/imgboard
|
print "Don't run me, run 4grab.py"
|
||||||
base_url = "http://boards.4chan.org/" + config.Configuration().get_category() + "/"
|
|
||||||
|
|
||||||
# Get the hyperlinks.
|
|
||||||
t = get_thread_links(base_url)
|
|
||||||
t = get_image_links(base_url, t)
|
|
||||||
get_images(t)
|
|
||||||
|
|
|
@ -17,33 +17,35 @@
|
||||||
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
|
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
|
||||||
######################################################################
|
######################################################################
|
||||||
|
|
||||||
import sgmllib
|
from HTMLParser import HTMLParser, HTMLParseError
|
||||||
import re
|
import re
|
||||||
|
|
||||||
class MyParser(sgmllib.SGMLParser):
|
class MyParser(HTMLParser):
|
||||||
def __init__(self, verbose=0):
|
def __init__(self):
|
||||||
sgmllib.SGMLParser.__init__(self, verbose)
|
HTMLParser.__init__(self)
|
||||||
|
|
||||||
self.hyperlinks = []
|
self.hyperlinks = []
|
||||||
self.url_reg = re.compile('res/\d+\Z')
|
self.url_reg = re.compile('res/\d+\Z')
|
||||||
self.prev = ""
|
|
||||||
|
|
||||||
def parse(self, s):
|
def parse(self, s):
|
||||||
self.feed(s)
|
self.feed(s)
|
||||||
self.close()
|
self.close()
|
||||||
|
|
||||||
def start_a(self, attributes):
|
def handle_starttag(self, tag, attrs):
|
||||||
for name, value in attributes:
|
prev = ""
|
||||||
if name == "href":
|
|
||||||
|
if tag == 'a':
|
||||||
|
for name, value in attrs:
|
||||||
|
if name == 'href':
|
||||||
if self.url_reg.search(value) != None:
|
if self.url_reg.search(value) != None:
|
||||||
if self.prev != value:
|
if prev != value:
|
||||||
self.hyperlinks.append(value)
|
self.hyperlinks.append(value)
|
||||||
self.prev = value
|
prev = value
|
||||||
|
|
||||||
def get_hyperlinks(self):
|
def get_hyperlinks(self):
|
||||||
return self.hyperlinks
|
return self.hyperlinks
|
||||||
|
|
||||||
class MySubParser(MyParser):
|
class MySubParser(MyParser):
|
||||||
def __init__(self, verbose=0):
|
def __init__(self):
|
||||||
MyParser.__init__(self, verbose)
|
MyParser.__init__(self)
|
||||||
self.url_reg = re.compile('/src/\d+\.\w{3,4}\Z')
|
self.url_reg = re.compile('/src/\d+\.\w{3,4}\Z')
|
||||||
|
|
0
modules/__init__.py
Normal file
0
modules/__init__.py
Normal file
2035
modules/kirbybase.py
Normal file
2035
modules/kirbybase.py
Normal file
File diff suppressed because it is too large
Load diff
118
sorter.py
Normal file
118
sorter.py
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
######################################################################
|
||||||
|
# Copyright 2009, 2010 ryuslash
|
||||||
|
#
|
||||||
|
# This file is part of 4grab.
|
||||||
|
#
|
||||||
|
# 4grab is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# 4grab is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
import config
|
||||||
|
import Image
|
||||||
|
import shutil
|
||||||
|
import os
|
||||||
|
import datetime
|
||||||
|
import backend
|
||||||
|
|
||||||
|
import util
|
||||||
|
|
||||||
|
def dummy_option_creator(value1, value2): pass
|
||||||
|
config._optioncreator = dummy_option_creator
|
||||||
|
|
||||||
|
class Sorter:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.conf = config.Configuration()
|
||||||
|
self.resolutions = self.conf.get_resolutions()
|
||||||
|
|
||||||
|
def act(self, filename):
|
||||||
|
util.log(util.LogType.Msg, "sorter is acting", filename)
|
||||||
|
|
||||||
|
download_base = self.conf.get_download_location()
|
||||||
|
retval = True
|
||||||
|
|
||||||
|
if self.check_filename(filename):
|
||||||
|
image = None
|
||||||
|
try:
|
||||||
|
image = Image.open(os.path.join(download_base,
|
||||||
|
filename))
|
||||||
|
except IOError:
|
||||||
|
retval = False
|
||||||
|
|
||||||
|
if not image == None and self.archive_check(filename):
|
||||||
|
util.log(util.LogType.Msg, "Checking resolution", {"filename":filename, "resolution":image.size})
|
||||||
|
|
||||||
|
for resolution in self.resolutions:
|
||||||
|
resolution = resolution.split('x')
|
||||||
|
foldername = "%s-%s" % (resolution[0],
|
||||||
|
resolution[1])
|
||||||
|
folderpath = os.path.join(download_base,
|
||||||
|
foldername)
|
||||||
|
|
||||||
|
if str(image.size[0]) == resolution[0] and \
|
||||||
|
str(image.size[1]) == resolution[1]:
|
||||||
|
if not os.path.exists(folderpath):
|
||||||
|
os.makedirs(folderpath)
|
||||||
|
|
||||||
|
self.copy(filename, folderpath)
|
||||||
|
break
|
||||||
|
self.archive(filename)
|
||||||
|
self.remove(filename)
|
||||||
|
return retval
|
||||||
|
|
||||||
|
def copy(self, filename, destpath):
|
||||||
|
download_base = self.conf.get_download_location()
|
||||||
|
source = os.path.join(download_base,
|
||||||
|
os.path.basename(filename))
|
||||||
|
|
||||||
|
today = datetime.date.today()
|
||||||
|
dest = os.path.join(destpath,
|
||||||
|
"%d-%d-%d" % (today.year, today.month, today.day))
|
||||||
|
util.log(util.LogType.Msg, "going to copy %s to %s" % (source, dest), None)
|
||||||
|
|
||||||
|
if not os.path.exists(dest):
|
||||||
|
os.makedirs(dest)
|
||||||
|
|
||||||
|
dest = os.path.join(dest,
|
||||||
|
os.path.basename(filename))
|
||||||
|
|
||||||
|
if source != dest:
|
||||||
|
shutil.copy(source, dest)
|
||||||
|
else:
|
||||||
|
print "\nHow can this even happen?! Copying", source, "to", dest
|
||||||
|
|
||||||
|
def archive(self, filename):
|
||||||
|
be = backend.Backend()
|
||||||
|
be.add(os.path.basename(filename))
|
||||||
|
|
||||||
|
def archive_check(self, filename):
|
||||||
|
be = backend.Backend()
|
||||||
|
return be.check(os.path.basename(filename))
|
||||||
|
|
||||||
|
def check_filename(self, filename):
|
||||||
|
ext = os.path.splitext(filename)[1]
|
||||||
|
return ext == ".jpg" or \
|
||||||
|
ext == ".png" or \
|
||||||
|
ext == ".gif"
|
||||||
|
|
||||||
|
def remove(self, filename):
|
||||||
|
download_base = self.conf.get_download_location()
|
||||||
|
source = os.path.join(download_base, filename)
|
||||||
|
os.remove(source)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
conf = config.Configuration()
|
||||||
|
download_base = conf.get_download_location()
|
||||||
|
sorter = Sorter()
|
||||||
|
for item in os.listdir(download_base):
|
||||||
|
sorter.act(item)
|
39
util.py
Normal file
39
util.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
class LogType:
|
||||||
|
Non = 0
|
||||||
|
Err = 1
|
||||||
|
Warn = 2
|
||||||
|
Msg = 3
|
||||||
|
@staticmethod
|
||||||
|
def from_int(lloglevel):
|
||||||
|
iloglevel = int(lloglevel)
|
||||||
|
if iloglevel == 0:
|
||||||
|
return LogType.Non
|
||||||
|
if iloglevel == 1:
|
||||||
|
return LogType.Err
|
||||||
|
if iloglevel == 2:
|
||||||
|
return LogType.Warn
|
||||||
|
if iloglevel == 3:
|
||||||
|
return LogType.Msg
|
||||||
|
|
||||||
|
loglevel = LogType.Non
|
||||||
|
|
||||||
|
def raw_input_with_default(default, prompt):
|
||||||
|
inp = raw_input("%s (default=%s): " % (prompt, default))
|
||||||
|
if inp == "":
|
||||||
|
return default
|
||||||
|
return inp
|
||||||
|
|
||||||
|
def log(logtype, message, data = None):
|
||||||
|
global loglevel
|
||||||
|
if loglevel >= logtype:
|
||||||
|
print message
|
||||||
|
if not data is None:
|
||||||
|
print "data:\n\tdata"
|
||||||
|
|
||||||
|
homedir = os.getenv("HOME")
|
||||||
|
if homedir is None:
|
||||||
|
homedir = os.path.dirname(sys.argv[0])
|
||||||
|
confdir = os.path.join(homedir, ".4grab")
|
Loading…
Reference in a new issue