#!/usr/bin/python # vim:fileencoding=utf-8:ft=python # # Download files from filehosters # # For end-users: # PyGTK is needed for captchas. If PyGTK is not present, this script will use # ImageMagick's "display" command to show you the captcha image. # # Configuration: # Edit the section "configuration" # If you've got no clue about python: Simply insert your own commands # between the parentheses of the system("") command. They'll be executed when # the specific event (def XXX(..):) occurs. # # For developers: # See the classes (ie. rapidShare). Each class has a canHandle function to # check if the class can handle an URL and a handle function to actually # download the file. Those functions should return True when the download was # successfull. Note that all functions are static! # # import urllib2 import urllib import urlparse import re import os import threading import sys import random import time import getopt import cookielib import optparse import termios import tty import fcntl import select from htmlentitydefs import name2codepoint ## CONFIGURATION {{{ ########################################################### def restartRouter(): """ Restart router to get a new IP """ os.system("routerReset") return True def doneHook(flags): """ Called when a download is finished See handler.HANDLER_* for values of flags """ return if not flags & handler.HANDLER_WAS_REDIRECTED: os.system("aplay -q /usr/share/sounds/k3b_success1.wav 2>&1 >/dev/null &") def errorHook(): """ Called when an error occurred """ return os.system("aplay -q /usr/share/sounds/k3b_error1.wav 2>&1 >/dev/null &") def captchaHook(data, caller): """ Called when a captcha is to be deciphered data holds the raw data of the image! Use caller to check which handler is calling this Return False or the code """ # Anticaptcha does not work ATM if False and caller == "rapidshare" and os.system("which anticaptcha 2>&1 >/dev/null") == 0: # Call anticaptcha fileName = os.path.abspath(impFilename("captcha.jpg")) file = open(fileName, "w") file.write(data) file.close() try: captcha = os.popen("anticaptcha %r --method 22a" % fileName).read().strip() except: os.unlink(fileName) return False os.unlink(fileName) if len(captcha) != 4: return False ui.info("Anticaptcha detected captcha successfully: %s" % captcha) return captcha else: # Play sound to inform the user that interaction is needed os.system("aplay -q /usr/share/sounds/k3b_wait_media1.wav 2>&1 >/dev/null &") return False # Do not edit from here on RSDOWNLOD_VERSION = "1.0" # Install cookie handler for urllib2 {{{ useReferer = "" class HTTPCookieAndRefererProcessor(urllib2.BaseHandler): def __init__(self, cookiejar=None): import cookielib if cookiejar is None: cookiejar = cookielib.CookieJar() self.cookiejar = cookiejar def http_request(self, request): global useReferer if useReferer and not request.has_header("Referer"): request.add_header("Referer", useReferer) self.cookiejar.add_cookie_header(request) return request def http_response(self, request, response): self.cookiejar.extract_cookies(response, request) return response https_request = http_request https_response = http_response cookieJar = cookielib.CookieJar() opener = urllib2.build_opener(HTTPCookieAndRefererProcessor(cookieJar)) opener.addheaders = [('User-agent', 'Mozilla/5.0')] urllib2.install_opener(opener) # }}} # Initialize file list fileList = [] doneList = [] ## }}} ######################################################################### ## USER INTERFACE {{{ ########################################################## class ui(object): hasTermWidth = True _COLOR = { "error": "\033[00;31m", "info": "\033[00;32m", "warn": "\033[00;33m", "info2": "\033[00;34m", "def": "\033[0m" } @staticmethod def _getTerminalWidth(): tsize = 80 if ui.hasTermWidth and sys.stdin.isatty(): oldSettings = termios.tcgetattr(sys.stdin.fileno()) tty.setraw(sys.stdin.fileno()) mfcntl = fcntl.fcntl(sys.stdin.fileno(), fcntl.F_GETFL) fcntl.fcntl(sys.stdin.fileno(), fcntl.F_SETFL, os.O_NONBLOCK | mfcntl) print "\033[18t", sys.stdout.flush() size = "" iter = 0 while "t" not in size: fp, foo, foo = select.select([sys.stdin], [], [], 0.2) if sys.stdin in fp: size += sys.stdin.read(10) else: iter += 1 if iter > 10: break if "t" in size: tsize = int(size[size.find(";") + 1:-1]) else: ui.hasTermWidth = False fcntl.fcntl(sys.stdin.fileno(), fcntl.F_SETFL, mfcntl) termios.tcsetattr(sys.stdin.fileno(), termios.TCSADRAIN, oldSettings) return tsize @staticmethod def endProgress(): """ Erase the current console line (End progress()) """ print "\033[2K\r", # Erase line #print "\033[1A", # Move line up #print "\033[2K\r", # Erase line sys.stdout.flush() _progressData = {} @staticmethod def startProgress(): """ Start progress() """ ui._progressData = { 'start': time.time(), 'last': time.time(), 'data': 0, 'lastDsp': time.time() - 10 } @staticmethod def _formatSize(size): units = [ "b", "Kb", "Mb", "Gb" ] ret = units.pop(0) while size > 1024 and len(units) > 0: size /= 1024.0 ret = units.pop(0) return "%03.2f%s" % (size, ret) @staticmethod def _formatTime(seconds): hours = int(seconds / 3600) seconds %= 3600 minutes = int(seconds / 60) seconds %= 60 return "%02d:%02d:%02d" % (hours, minutes, seconds) @staticmethod def progress(done, length): """ Diplay a progress message """ timeNow = time.time() timeElapsed = timeNow - ui._progressData["start"] if timeElapsed == 0: avgSpeed = 0 else: avgSpeed = done * 1.0 / timeElapsed timeLast = ui._progressData["last"] if timeNow - timeLast == 0: speed = 0 else: speed = (done - ui._progressData["data"]) * 1.0 / (timeNow - timeLast) ui._progressData["last"] = time.time() ui._progressData["data"] = done if length > 0: pdone = done * 1.0 / length timeToGo = 0 if pdone > 0: timeToGo = (timeElapsed / pdone) - timeElapsed info = " %02d%% %s/%s %8s/s %s" % (pdone * 100, ui._formatSize(done), ui._formatSize(length), ui._formatSize(speed), ui._formatTime(timeToGo)) if sys.stdout.isatty(): if ui._progressData["lastDsp"] + 0.5 > timeNow: return ui._progressData["lastDsp"] = timeNow print "\033[2K\r", # Erase line if length > 0: barWidth = ui._getTerminalWidth() - len(info) - 5 - 2 barFilled = int(barWidth * pdone) bar = "" if barFilled > 0: bar = "-" * (barFilled - 1) + ">" bar += " " * (barWidth - len(bar)) bar = "[%s]" % bar print " %s%s" % (bar, info), else: print " %s downloaded, %8s/s" % (ui._formatSize(done), ui._formatSize(speed)), else: if ui._progressData["lastDsp"] + 5 < timeNow: ui._progressData["lastDsp"] = timeNow if length > 0: print info else: print " %s downloaded, %8s/s" % (ui._formatSize(done), ui._formatSize(speed)) sys.stdout.flush() @staticmethod def _gt(color): """ Return a colored ">>>" string """ if sys.stdout.isatty(): return "%s>>>%s" % (ui._COLOR[color], ui._COLOR["def"]) else: return ">>>" @staticmethod def error(message): """ Output error message """ print >> sys.stderr, ui._gt("error"), message @staticmethod def info(message, type=1): """ Output info message Don't use the type parameter, it's meant for use in this script's main function only! """ if type == 1: print ui._gt("info"), message else: print ui._gt("info2"), message @staticmethod def warn(message): """ Output warning message """ print ui._gt("warn"), message @staticmethod def wait(message, seconds): """ Wait for something. Message should contain a %d (which will be replaced by the remaining seconds) """ if sys.stdout.isatty(): while seconds > 0: print "\033[2K\r", # Erase line print ui._gt("warn"), message % seconds, sys.stdout.flush() seconds -= 1 time.sleep(1) print "\033[2K\r", # Erase line else: print ui._gt("warn"), message % seconds sys.stdout.flush() while seconds > 0: seconds -= 1 time.sleep(1) sys.stdout.flush() @staticmethod def getCaptcha(url, caller=None): """ Decipher the captcha from the given URL. Will execute the function captchaHook or, if it fails, display a GTK+ dialog, asking for the code (with console / imagemagick as a fallback) """ try: dataObject = urllib2.urlopen(url) data = dataObject.read() except: ui.error("Failed to load captcha ") return False # Try hook first response = captchaHook(data, caller) if response: return response # Then try GTK+ try: import gtk loader = gtk.gdk.PixbufLoader() try: loader.write(data) loader.close() except: ui.error("Failed to load captcha ") return False dialog = gtk.Dialog(u"Get captcha", buttons=(gtk.STOCK_OK, gtk.RESPONSE_ACCEPT)) image = gtk.Image() image.set_from_pixbuf(loader.get_pixbuf()) dialog.vbox.add(image) entry = gtk.Entry() def returnOnReturn(widget, event): if event.keyval == gtk.keysyms.Return: dialog.response(gtk.RESPONSE_ACCEPT) entry.connect("key-press-event", returnOnReturn) dialog.vbox.add(entry) dialog.show_all() dialog.run() dialog.hide() gtk.main_iteration() gtk.main_iteration() if entry.get_text() == "": return False return entry.get_text() except: return False # Try console finally ui.info("Please enter the captcha from the following image") display = os.popen("display -", "w") display.write(data) display.close() return raw_input(" Captcha: ").strip() ## }}} ######################################################################### ## BASE CLASSES {{{ ############################################################ class handler(object): HANDLER_WAS_REDIRECTED = 2 ** 0 options = {} # Will be set by __main__ NAME = "Base handler" @staticmethod def canHandle(url): """ Return if this handler is able to handle this url """ pass @staticmethod def handle(url): """ Handles an URL May append files to the global download list Returns either - True: Successful download - False: Download failed - bool, bitmask: Additional information, see constants above """ pass ## }}} ######################################################################### ## HELPER FUNCTIONS {{{ ######################################################## def matchGet(rex, string): """ Match regular expresion rex on string Returns - False if rex does not match anywhere in string - The matched part of string if rex does not contain parentheses - The first group if only one group exists - A touple with all group values elsewise """ match = re.search(rex, string) if match: if len(match.groups()) == 0: return string[match.span()[0]:match.span()[1]] if len(match.groups()) == 1: return match.groups()[0] else: return match.groups() return False class WouldDispositException(Exception): pass def GET(url, changeReferer=False): """ Place a GET request on URL, return data changeReferer will store the URL and use it as a referer header for all further requests (during this download) """ try: dataObject = urllib2.urlopen(url) if "Content-Disposition" in dataObject.info(): raise WouldDispositException() if changeReferer: global useReferer useReferer = url return dataObject.read() except WouldDispositException: raise WouldDispositException() except: ui.error("Failed to download %s" % url) return False def POST(url, data, changeReferer=False): """ Place a POST request with data on URL, return data changeReferer will store the URL and use it as a referer header for all further requests (during this download) """ try: if changeReferer: global useReferer useReferer = url return urllib2.urlopen(url, data).read() except: ui.error("Failed to download %s" % url) return False def addDownload(url, redirected=False): """ Add a download to the download list """ global fileList fileList.insert(0, url) if not redirected: ui.info("Added new download %s" % url) def download(url, postData = None, targetFileName = None): """ Download url to targetFileName (will be extracted from url / the content-disposition header if omitted) Returns False on failure and the file name elsewise. """ filename = False targetFile = None targetFileName = None if 1: #try: request = urllib2.Request(url) if postData: request.add_data(postData) urlobject = urllib2.urlopen(request) headers = urlobject.info() if "Content-Disposition" in headers: dispositionHeader = headers["Content-Disposition"] targetFileName = matchGet("filename=(.)(.+)\\1$", dispositionHeader) if targetFileName: targetFileName = targetFileName[1] if not targetFileName: targetFileName = os.path.basename(urlobject.geturl()) if "?" in targetFileName: targetFileName = targetFileName[:targetFileName.index("?")] targetFileName = impFilename(targetFileName) targetFile = open(targetFileName, "w") if "Content-Length" in headers: fileLength = int(headers["Content-Length"]) else: fileLength = -1 ui.startProgress() size = 0 while True: data = urlobject.read(1024 * 8) if not data: break size += len(data) targetFile.write(data) ui.progress(size, fileLength) ui.endProgress() del targetFile return targetFileName #except: try: if targetFile: del targetFile if targetFileName and os.access(targetFileName, os.F_OK): os.unlink(targetFileName) except: pass print ui.error("Download failed: %s" % sys.exc_info()[1]) return False def impFilename(fileName): """ Return fileName or, if this file does already exists, fileName~nnn. """ i = 0 if os.access(fileName, os.F_OK): fileName = "%s~%%d" % fileName while os.access(fileName % i, os.F_OK): i += 1 fileName = fileName % i return fileName def getFileSize(name): return os.stat(name)[6] def htmlEntityDecode(text): """ Decode HTML entities """ def entSubst(arg): if arg.group(1) == "#": return unichr(int(arg.group(2))) else: cp = name2codepoint.get(arg.group(2)) if cp: cp = unichr(cp) else: cp = arg.group(2) return cp return re.sub("&(#?)(\d{1,5}|\w{1,8});", entSubst, text) def getScriptPath(): return os.path.abspath(os.path.dirname(sys.argv[0])) ## }}} ######################################################################### ## BLOG REDIRECTOR {{{ ######################################################### class blogRedirector(handler): NAME = "Blog Redirector Service" @staticmethod def canHandle(url): return matchGet("^http://[^/]+/(?:nl|ff|rc|ul).+", url) != False @staticmethod def handle(url): page = GET(url) frame = matchGet('src="(.+?)"', page) try: redirector = urllib2.urlopen(urlparse.urljoin(url, frame)) redirectUrl = redirector.geturl() redirector.close() if "error_traffic_exceeded_free" in redirectUrl: ui.warn("Download limit exceeded. Restarting router...") routerReset() return blogRedirector.handle(url) except: return False if url: addDownload(redirectUrl, True) return True, handler.HANDLER_WAS_REDIRECTED return False ## }}} ######################################################################### ## EASY-SHARE.COM {{{ ########################################################## class easyShareCom(handler): NAME = "easy-share.com" @staticmethod def canHandle(url): return matchGet("^http://(?:www\.|w[0-9]+\.)?easy-share.com/[0-9]+", url) != False @staticmethod def handle(url): data = GET(url, changeReferer=True) if not 'src="/kaptcha' in data: wait = matchGet("w='([0-9]+)'", data) captchaAjax = matchGet("u='([^']+)'", data) if not wait or not captchaAjax: print data ui.error("Failed to get download information") return False ui.wait("Waiting %d seconds for download", int(wait)) data = GET(urlparse.urljoin(url, captchaAjax)) downloadUrl = matchGet('action="([^"]+)"', data) captcha = matchGet('img src="(/kaptcha[^"]+)', data) if not downloadUrl or not captcha: ui.error("Failed to get download information from AJAX form") return False captcha = urlparse.urljoin(url, captcha) print captcha form = { 'id': matchGet('name="id" value="([^"]+)"', data) } form["captcha"] = ui.getCaptcha(captcha) fileName = download(downloadUrl, urllib.urlencode(form)) if fileName and getFileSize(fileName) < 1024*25: content = open(fileName).read() if "and earn money." in content: ui.error("Download failed. Captcha wrong?") os.unlink(fileName) return False return fileName ## }}} ######################################################################### ## LETITBIT.NET {{{ ############################################################ class letItBitNet(handler): NAME = "letitbit.net" @staticmethod def canHandle(url): return matchGet("^http://(?:www\.)?letitbit.net/download/", url) != False @staticmethod def handle(url): data = GET(url) form = {} form["uid"] = matchGet('name="uid" value="([^"]{13}[^"]+)"', data) form["frameset"] = "Download file" form["fix"] = 1 data = POST("http://letitbit.net/download3.php", urllib.urlencode(form)) url2 = matchGet('src="(/tmpl/tmpl_frame_top[^"]+)', data) if not url2: ui.error("Page syntax error") return False data = GET('http://letitbit.net%s' % url2) downloadUrl = matchGet( '([0-9]+)<', data) if wait: ui.wait("Waiting %d seconds for download", int(wait)) return download(downloadUrl) ## }}} ######################################################################### ## FILES.AG {{{ ################################################################ class filesAg(handler): NAME = "files.ag" @staticmethod def canHandle(url): return matchGet("^http://(?:www\.)?files.ag/files/[0-9]+/.+", url) != False @staticmethod def handle(url): data = POST(url, "start=Free") downloadUrl = matchGet('action="([^"]+)"', data) if not downloadUrl: ui.error("Failed to get download url") return False wait = matchGet('download-timeout">([^<]+)<', data) if wait: ui.wait("Waiting %d seconds for download slot", int(wait)) return download("http://files.ag" + downloadUrl) ## }}} ######################################################################### ## ANONYM.TO {{{ ############################################################### class anonymTo(handler): NAME = "Anonym.To Redirector" @staticmethod def canHandle(url): return matchGet("^http://(?:www\.)?anonym.to", url) != False @staticmethod def handle(url): match = matchGet("^http://(?:www\.)?anonym.to/\?(.+)$", url) if match: addDownload(urllib.unquote(match), True) return True, handler.HANDLER_WAS_REDIRECTED return False ## }}} ######################################################################### ## DEPOSITFILES.COM {{{ ######################################################## class depositfilesCom(handler): NAME = "depositfiles.com" @staticmethod def canHandle(url): return matchGet("^http://(?:www\.)?depositfiles.com", url) != False @staticmethod def handle(url): data = GET(url) target = matchGet(']+action="(.+)" [^>]+ id="gateway_form"', data) if not target: ui.error('Failed to get download form') return False data = POST("http://depositfiles.com" + target, "gateway_result=1") wait = int(matchGet('id="download_waiter_remain">([^<]+)<\/', data)) formData = {} for field in ( "file_password", "gateway_result", "icid", "go" ): formData[field] = matchGet('name="%s" value="([^"]+)"' % field, data) if formData[field] == None: ui.error("Failed to get download field '%s'" % field) return False if matchGet("var img_code_url = '([^']+)'", data): captcha = "http://depositfiles.com" + matchGet("var img_code_url = '([^']+)'", data) + "&rnd=1211011667" formData["img_code"] = ui.getCaptcha(captcha, "depositfilesCom") ui.wait("Waiting %d seconds for download", wait + 2) alt = matchGet('dwnsrc = "([^"]+)"', data) if alt: return download(alt) else: return download("http://depositfiles.com" + target, urllib.urlencode(formData)) ## }}} ######################################################################### ## BINLOAD.TO {{{ ############################################################## class binloadTo(handler): NAME = "binload.to" @staticmethod def canHandle(url): return matchGet("^http://(www\.)?binload.to/file/[a-z0-9]+", url) != False @staticmethod def handle(url): data = GET(url) if not data: ui.error("Failed to load %s" % url) return False urlData = matchGet('action="(http://[^.]+.binload.to/download.php[^"]+)"', data) dlSession = matchGet('dlSession"\s+value="([^"]+)"', data) fileName = matchGet('Download: (.+?)<\/h3>', data) if not urlData or not fileName or not dlSession: ui.error("Failed to get download link from binload.to page") return False wait = matchGet('cdLength = ([0-9]+)', data) if wait: # Ugly but needed - netload adds some time to the time given in the JS oO ui.wait("Waiting %d seconds for download ticket.", int(wait) * 2) postData = "dlSession=%s&down=Download&login_nickname=&login_passwort=" % dlSession downloadedFile = download(urlData, postData, fileName) if not downloadedFile: return False if getFileSize(downloadedFile) < 1024 ** 2: data = open(downloadedFile).read() msg = matchGet('^([^<]+)<\/font>', data) if not fid: ui.error("Failed to get download link") return False captcha = ui.getCaptcha("http://www.fast-load.net//includes/captcha.php", "fastloadNet") if not captcha: return False downloadedFile = download("http://www.fast-load.net//download.php", "fid=%s&captcha_code=%s" % (fid, captcha), file) if not downloadedFile: return False if getFileSize(downloadedFile) < 1024 ** 2: data = open(downloadedFile).read() msg = matchGet('^wrong captcha', data) ui.error("Wrong captcha") return False return True ## }}} ######################################################################### ## DATENSCHLEUDER.CC {{{ ####################################################### class datenschleuderCC(handler): NAME = "datenschleuder.cc Redirector" @staticmethod def canHandle(url): return matchGet("^http://(?:www\.)?datenschleuder.cc", url) != False @staticmethod def handle(url): data = GET(url) if not data: ui.error("Failed to load %s" % url) return False if handler.options.prefer == "rapidshare": mirror = "rapidshare" dlUrl = 'http://rapidshare.com/[^"]+' elif handler.options.prefer == "uploaded.to": mirror = "uploaded" dlUrl = 'http://(?:www\.)?uploaded.to/[^"]+' elif handler.options.prefer == "netload.in": mirror = "netload" dlUrl = 'http://netload.in/[^"]+' redir = matchGet('http://www.datenschleuder.cc/redir.php\?\S+mirror=%s[^"]+' % mirror, data) if not redir: ui.error("Failed to get download frame link") return False data = GET(redir) if not data: ui.error("Failed to load %s" % redir) return False redir = matchGet(dlUrl, data) if not redir: ui.error("Failed to get download link") return False addDownload(redir) return True, handler.HANDLER_WAS_REDIRECTED ## }}} ######################################################################### ## FREE-CLIPS.CH {{{ ########################################################### class freeClipsCh(handler): NAME = "free-clips.ch Redirector" @staticmethod def canHandle(url): return matchGet("^http://(?:www.)?free-clips.ch/", url) != False @staticmethod def handle(url): data = GET(url) links = matchGet('