Danbooru Robot
Join the DZone community and get the full member experience.
Join For FreeDownload the newest version here: http://untu.ms/danbooru/
'''works with http://danbooro.donmai.us/
automated content downloading by tags
released as public domain
author reinis ivanovs
http//untu.ms/'''
from xml.dom import minidom
from sys import stdout
from optparse import OptionParser
import os, urllib
class SpiderOpener(urllib.FancyURLopener):
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1'
urllib._urlopener = SpiderOpener()
parser = OptionParser(add_help_option=False)
parser.add_option('-l', dest='limit', metavar='NUM', default=1000, type='int')
parser.add_option('-o', dest='offset', metavar='NUM', default=0, type='int')
parser.add_option('-s', dest='server', metavar='NUM', default=0, type='int')
options, args = parser.parse_args()
queue = []
tags = '+'.join([urllib.quote(item).replace('%2B', '+') for item in args])
servers = [('danboofoo.macrochan.org', 'data'), ('dan.paramnesiac.net', 'data'), ('danbooru.imouto.org', 'data'), ('danbooru.darkmirage.com', 'data'), ('danbooru.desudesu.org', 'data'), ('danbooru.fumbari.com', 'data'), ('danbooru.sr3r.net', 'data'), ('danbooru.ichijou.org', 'data'), ('danbooru.neoragod.net', 'data'), ('saguratus.com', 'danbooru/data'), ('danbooru.chiisai.net', 'data'), ('danbooru.lolitron.org', 'data'), ('danbooru.zomgwtf.net', 'data')]
server, path = servers[options.server]
api = 'http://danbooru.donmai.us/api/find_posts'
if not os.path.exists(tags):
os.mkdir(tags)
os.chdir(tags)
def reporthook(blocks, blocksize, filesize):
stdout.write('\r%d%%' % min(round(float(blocks)*blocksize/filesize, 2)*100, 100))
def spider(url):
print url
document = minidom.parse(urllib.urlopen(url))
global queue
for child in document.documentElement.childNodes:
if not child.nodeType is 1: continue
queue.append(child.attributes['file_name'].value)
print len(queue)
if len(queue) == 0:
exit()
for i in range(len(queue)):
download()
def download():
global server, path
filename = queue.pop()
url = 'http://%s/%s/%s/%s/%s' % (server, path, filename[0:2], filename[2:4], filename)
print url
if os.path.exists(filename):
print '+'
return
urllib.urlretrieve(url, filename, reporthook)
stdout.write('\n')
def main():
global queue, tags, options
offset, limit = options.offset, options.limit
for i in range(offset, limit-100, 100):
spider('%s?tags=%s&offset=%d&limit=%d' % (api, tags, i, i+100))
spider('%s?tags=%s&offset=%d&limit=%d' % (api, tags, offset+limit-limit%100, limit))
if __name__ == '__main__':
main()
Opinions expressed by DZone contributors are their own.
Comments