2010-03-10 12:13:27 +01:00
|
|
|
#!/usr/bin/python
|
2010-02-09 02:45:56 +01:00
|
|
|
|
|
|
|
######################################################################
|
|
|
|
# Copyright 2009, 2010 ryuslash
|
|
|
|
#
|
|
|
|
# This file is part of 4grab.
|
|
|
|
#
|
|
|
|
# 4grab is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# 4grab is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with 4grab. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
######################################################################
|
|
|
|
|
2010-02-09 01:32:42 +01:00
|
|
|
import optparse
|
|
|
|
import sys
|
Sorting, multi category, multi resolution
After a file has been downloaded a callback function can now be called.
The callback function I call checks to see if the resolution of the image appears in the collection of resolutions that has been entered in the configuration file and deletes/moves accordingly.
If a file can not be read (which I have noticed happens sometimes), it is removed, not copied and not archived so that it can be retried later.
4grab got a new command-line option, -s --sorter, to sort out old images, running python sorter.py has the same effect, but this seemed pretties.
theoretically multiple categories could now be entered into the configuration file seperated by ',', but this hasn't been tested yet.
mutliple resolutions could be entered into the configuration file, seperated by ',' like so: 1680x1050,1920x1200.
Configuration now checks to see if all the necessary properties are available in the configuration file, if one is missing, it tries to create it.
2010-03-19 00:18:04 +01:00
|
|
|
import os
|
2010-02-09 01:32:42 +01:00
|
|
|
|
|
|
|
import config
|
2010-04-19 09:16:04 +02:00
|
|
|
from util import raw_input_with_default
|
2010-03-17 23:11:18 +01:00
|
|
|
|
2010-02-09 02:10:17 +01:00
|
|
|
import download
|
2010-03-07 00:20:37 +01:00
|
|
|
import progressbar
|
Sorting, multi category, multi resolution
After a file has been downloaded a callback function can now be called.
The callback function I call checks to see if the resolution of the image appears in the collection of resolutions that has been entered in the configuration file and deletes/moves accordingly.
If a file can not be read (which I have noticed happens sometimes), it is removed, not copied and not archived so that it can be retried later.
4grab got a new command-line option, -s --sorter, to sort out old images, running python sorter.py has the same effect, but this seemed pretties.
theoretically multiple categories could now be entered into the configuration file seperated by ',', but this hasn't been tested yet.
mutliple resolutions could be entered into the configuration file, seperated by ',' like so: 1680x1050,1920x1200.
Configuration now checks to see if all the necessary properties are available in the configuration file, if one is missing, it tries to create it.
2010-03-19 00:18:04 +01:00
|
|
|
import sorter
|
2010-02-09 01:32:42 +01:00
|
|
|
|
2010-03-17 23:11:18 +01:00
|
|
|
config._optioncreator = raw_input_with_default
|
2010-03-16 23:17:46 +01:00
|
|
|
|
2010-02-11 22:05:37 +01:00
|
|
|
base_url = "http://boards.4chan.org/"
|
2010-02-09 01:32:42 +01:00
|
|
|
parser = optparse.OptionParser()
|
2010-03-07 00:20:37 +01:00
|
|
|
downloader = download.Downloader(progressbar.Progress)
|
2010-02-09 01:32:42 +01:00
|
|
|
|
2010-02-11 23:46:23 +01:00
|
|
|
def walk_with_wizard(baseurl):
|
2010-03-17 23:11:18 +01:00
|
|
|
conf = config.Configuration()
|
2010-02-12 00:34:36 +01:00
|
|
|
wzrd_msg = "Pilates! *SHAZAM* Here they come!"
|
2010-02-11 23:46:23 +01:00
|
|
|
print "Alright, let me put on my robe and wizard hat."
|
|
|
|
|
|
|
|
# Single or all
|
|
|
|
inp = None
|
|
|
|
prompt = "Would you like to download a single thread, or all? "
|
|
|
|
inp = raw_input(prompt)
|
|
|
|
while (inp != "single" and inp != "all"):
|
|
|
|
print "Please type single or all"
|
|
|
|
inp = raw_input(prompt)
|
|
|
|
|
|
|
|
if inp == "single":
|
|
|
|
inp = raw_input("Which thread would you like to download? ")
|
|
|
|
if inp[:7] == "http://":
|
2010-03-07 00:20:37 +01:00
|
|
|
t = downloader.get_image_links("", [inp])
|
2010-02-11 23:46:23 +01:00
|
|
|
else:
|
|
|
|
thread = inp
|
|
|
|
inp = raw_input("Which category is this thread in? ")
|
2010-02-12 00:34:36 +01:00
|
|
|
print wzrd_msg
|
2010-03-12 21:50:10 +01:00
|
|
|
t = downloader.get_image_links("%s%s/res/" % (baseurl, inp),
|
|
|
|
[thread])
|
2010-02-11 23:46:23 +01:00
|
|
|
else:
|
|
|
|
inp = raw_input("Which category would you like to download? ")
|
2010-03-17 23:11:18 +01:00
|
|
|
conf.set_categories([inp])
|
|
|
|
baseurl = "%s%s/" % (baseurl, conf.get_categories()[0])
|
2010-02-11 23:46:23 +01:00
|
|
|
|
2010-02-12 00:34:36 +01:00
|
|
|
print wzrd_msg
|
2010-03-07 00:20:37 +01:00
|
|
|
t = downloader.get_thread_links(baseurl)
|
|
|
|
t = downloader.get_image_links(baseurl, t)
|
|
|
|
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
2010-02-12 00:04:34 +01:00
|
|
|
print "Downloaded: ", downloaded
|
|
|
|
print "Skipped: ", skipped
|
|
|
|
print "Failed: ", failed
|
|
|
|
print "Total: ", total
|
2010-02-11 23:46:23 +01:00
|
|
|
|
2010-03-17 23:11:18 +01:00
|
|
|
def parse_commands():
|
|
|
|
conf = config.Configuration()
|
|
|
|
parser.set_usage(
|
|
|
|
"""%prog [options]
|
2010-02-09 02:45:56 +01:00
|
|
|
|
|
|
|
4grab Copyright (C) 2009-2010 ryuslash
|
|
|
|
This program comes with ABSOLUTELY NO WARRANTY.
|
|
|
|
This is free software, and you are welcome to redistribute it
|
|
|
|
under certain conditions.""")
|
2010-03-17 23:11:18 +01:00
|
|
|
parser.add_option("-e",
|
|
|
|
nargs=2,
|
|
|
|
dest="confval",
|
|
|
|
metavar="CONF VALUE",
|
|
|
|
help="Set configuration option CONF to be VALUE")
|
|
|
|
parser.add_option("-c",
|
|
|
|
"--category",
|
|
|
|
dest="tempcat",
|
|
|
|
metavar="CATEGORY",
|
|
|
|
help="Set the category to CATEGORY only for this run")
|
|
|
|
parser.add_option("-t",
|
|
|
|
"--thread",
|
|
|
|
dest="thread",
|
|
|
|
metavar="THREAD",
|
|
|
|
help="Download only THREAD. If THREAD is only an ID, "
|
|
|
|
"CATEGORY must also be set. Otherwise, no problem :-)")
|
|
|
|
parser.add_option("-w",
|
|
|
|
"--wizard",
|
|
|
|
action="store_true",
|
|
|
|
dest="wizard",
|
|
|
|
help="I'll put on my robe and wizard hat and help you "
|
|
|
|
"get some of those pictures you like")
|
Sorting, multi category, multi resolution
After a file has been downloaded a callback function can now be called.
The callback function I call checks to see if the resolution of the image appears in the collection of resolutions that has been entered in the configuration file and deletes/moves accordingly.
If a file can not be read (which I have noticed happens sometimes), it is removed, not copied and not archived so that it can be retried later.
4grab got a new command-line option, -s --sorter, to sort out old images, running python sorter.py has the same effect, but this seemed pretties.
theoretically multiple categories could now be entered into the configuration file seperated by ',', but this hasn't been tested yet.
mutliple resolutions could be entered into the configuration file, seperated by ',' like so: 1680x1050,1920x1200.
Configuration now checks to see if all the necessary properties are available in the configuration file, if one is missing, it tries to create it.
2010-03-19 00:18:04 +01:00
|
|
|
parser.add_option("-s",
|
|
|
|
"--sort",
|
|
|
|
action="store_true",
|
|
|
|
dest="sort",
|
|
|
|
help="Sort downloaded images, most handy if you've used "
|
|
|
|
"older versions which didn't sort yet")
|
2010-03-17 23:11:18 +01:00
|
|
|
(options, args) = parser.parse_args()
|
|
|
|
|
|
|
|
if options.confval and (options.tempcat
|
|
|
|
or options.thread
|
Sorting, multi category, multi resolution
After a file has been downloaded a callback function can now be called.
The callback function I call checks to see if the resolution of the image appears in the collection of resolutions that has been entered in the configuration file and deletes/moves accordingly.
If a file can not be read (which I have noticed happens sometimes), it is removed, not copied and not archived so that it can be retried later.
4grab got a new command-line option, -s --sorter, to sort out old images, running python sorter.py has the same effect, but this seemed pretties.
theoretically multiple categories could now be entered into the configuration file seperated by ',', but this hasn't been tested yet.
mutliple resolutions could be entered into the configuration file, seperated by ',' like so: 1680x1050,1920x1200.
Configuration now checks to see if all the necessary properties are available in the configuration file, if one is missing, it tries to create it.
2010-03-19 00:18:04 +01:00
|
|
|
or options.wizard
|
|
|
|
or options.sort):
|
2010-03-17 23:11:18 +01:00
|
|
|
print "Can't configure something and do something else too."
|
2010-02-09 01:32:42 +01:00
|
|
|
exit(1)
|
2010-03-17 23:11:18 +01:00
|
|
|
|
|
|
|
if options.wizard and (options.tempcat
|
|
|
|
or options.thread
|
Sorting, multi category, multi resolution
After a file has been downloaded a callback function can now be called.
The callback function I call checks to see if the resolution of the image appears in the collection of resolutions that has been entered in the configuration file and deletes/moves accordingly.
If a file can not be read (which I have noticed happens sometimes), it is removed, not copied and not archived so that it can be retried later.
4grab got a new command-line option, -s --sorter, to sort out old images, running python sorter.py has the same effect, but this seemed pretties.
theoretically multiple categories could now be entered into the configuration file seperated by ',', but this hasn't been tested yet.
mutliple resolutions could be entered into the configuration file, seperated by ',' like so: 1680x1050,1920x1200.
Configuration now checks to see if all the necessary properties are available in the configuration file, if one is missing, it tries to create it.
2010-03-19 00:18:04 +01:00
|
|
|
or options.confval
|
|
|
|
or options.sort):
|
2010-03-17 23:11:18 +01:00
|
|
|
print "Can't take a walk with the wizard and do something else too."
|
|
|
|
exit(1)
|
|
|
|
|
Sorting, multi category, multi resolution
After a file has been downloaded a callback function can now be called.
The callback function I call checks to see if the resolution of the image appears in the collection of resolutions that has been entered in the configuration file and deletes/moves accordingly.
If a file can not be read (which I have noticed happens sometimes), it is removed, not copied and not archived so that it can be retried later.
4grab got a new command-line option, -s --sorter, to sort out old images, running python sorter.py has the same effect, but this seemed pretties.
theoretically multiple categories could now be entered into the configuration file seperated by ',', but this hasn't been tested yet.
mutliple resolutions could be entered into the configuration file, seperated by ',' like so: 1680x1050,1920x1200.
Configuration now checks to see if all the necessary properties are available in the configuration file, if one is missing, it tries to create it.
2010-03-19 00:18:04 +01:00
|
|
|
if options.sort:
|
|
|
|
sort = sorter.Sorter()
|
|
|
|
for item in os.listdir(conf.get_download_location()):
|
|
|
|
sort.act(item)
|
|
|
|
exit(0)
|
|
|
|
|
2010-03-17 23:11:18 +01:00
|
|
|
if options.confval:
|
|
|
|
if not conf.option_exists(options.confval[0]):
|
|
|
|
print ("%s: error: %s is not a valid configuration option"
|
|
|
|
% (sys.argv[0], options.confval[0]))
|
2010-03-12 21:50:10 +01:00
|
|
|
exit(1)
|
2010-03-17 23:11:18 +01:00
|
|
|
print "Setting", options.confval[0], "to", options.confval[1]
|
|
|
|
conf.set_option(options.confval[0],
|
|
|
|
options.confval[1])
|
|
|
|
conf.save()
|
|
|
|
exit(0)
|
|
|
|
|
|
|
|
elif options.wizard:
|
|
|
|
try:
|
|
|
|
walk_with_wizard(base_url)
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print
|
|
|
|
print "Alright, no more wizard hat and robe then. Goodbye"
|
|
|
|
exit(0)
|
|
|
|
|
|
|
|
elif options.thread:
|
|
|
|
try:
|
|
|
|
if options.thread[:7] == "http://":
|
|
|
|
t = downloader.get_image_links("", [options.thread])
|
|
|
|
elif options.tempcat:
|
|
|
|
url = "%s%s/res/" % (base_url, options.tempcat)
|
|
|
|
t = downloader.get_image_links(url, [options.thread])
|
|
|
|
else:
|
|
|
|
print ("if THREAD is not an absolute URL, "
|
|
|
|
"CATEGORY must also be specified")
|
|
|
|
exit(1)
|
|
|
|
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
|
|
|
print "Downloaded: ", downloaded
|
|
|
|
print "Skipped: ", skipped
|
|
|
|
print "Failed: ", failed
|
|
|
|
print "Total: ", total
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print
|
|
|
|
print "Goodbye"
|
|
|
|
exit(0)
|
|
|
|
|
|
|
|
elif options.tempcat:
|
|
|
|
conf.set_categories([options.tempcat])
|
|
|
|
|
|
|
|
#base_url = "%s%s/" % (base_url, conf.get_categories())
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
conf = config.Configuration()
|
Sorting, multi category, multi resolution
After a file has been downloaded a callback function can now be called.
The callback function I call checks to see if the resolution of the image appears in the collection of resolutions that has been entered in the configuration file and deletes/moves accordingly.
If a file can not be read (which I have noticed happens sometimes), it is removed, not copied and not archived so that it can be retried later.
4grab got a new command-line option, -s --sorter, to sort out old images, running python sorter.py has the same effect, but this seemed pretties.
theoretically multiple categories could now be entered into the configuration file seperated by ',', but this hasn't been tested yet.
mutliple resolutions could be entered into the configuration file, seperated by ',' like so: 1680x1050,1920x1200.
Configuration now checks to see if all the necessary properties are available in the configuration file, if one is missing, it tries to create it.
2010-03-19 00:18:04 +01:00
|
|
|
sort = sorter.Sorter()
|
2010-03-17 23:11:18 +01:00
|
|
|
parse_commands()
|
Sorting, multi category, multi resolution
After a file has been downloaded a callback function can now be called.
The callback function I call checks to see if the resolution of the image appears in the collection of resolutions that has been entered in the configuration file and deletes/moves accordingly.
If a file can not be read (which I have noticed happens sometimes), it is removed, not copied and not archived so that it can be retried later.
4grab got a new command-line option, -s --sorter, to sort out old images, running python sorter.py has the same effect, but this seemed pretties.
theoretically multiple categories could now be entered into the configuration file seperated by ',', but this hasn't been tested yet.
mutliple resolutions could be entered into the configuration file, seperated by ',' like so: 1680x1050,1920x1200.
Configuration now checks to see if all the necessary properties are available in the configuration file, if one is missing, it tries to create it.
2010-03-19 00:18:04 +01:00
|
|
|
|
|
|
|
downloader.set_on_downloaded(sort.act)
|
2010-03-17 23:11:18 +01:00
|
|
|
for category in conf.get_categories():
|
|
|
|
base_url = "%s%s/" % (base_url, category)
|
|
|
|
try:
|
|
|
|
t = downloader.get_thread_links(base_url)
|
|
|
|
t = downloader.get_image_links(base_url, t)
|
|
|
|
(skipped, failed, downloaded, total) = downloader.get_images(t)
|
|
|
|
print "Downloaded: ", downloaded
|
|
|
|
print "Skipped: ", skipped
|
|
|
|
print "Failed: ", failed
|
|
|
|
print "Total: ", total
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print
|
|
|
|
print "So you don't want these images? Fine! I'll stop then."
|