Pberndt V4

Direkt zum Inhalt springen


Quellcode store.http.downloads.py

Beschreibung

Eine Spielerei aus der nice-2-have Ecke: Speichert ALLE per HTTP auf Port 80 übertragenen Dateien in Echtzeit. Zum Beispiel FLV-Filme und MP3-Dateien.

Sourcecode

#!/bin/
from pcap import pcap
import dpkt
import sys
import socket
import urllib2
import os
import tempfile
from StringIO import StringIO
import re

# Command line handling
if len(sys.argv) < 2:
    print "http.store.py - Use PCAP to store all HTTP downloads to disc."
    print "Copyright (c) 2009, Phillip Berndt"
    print
    print "Syntax: http.store.py [-n] <interface|dumpFile> [filter-regex]"
    print
    print "Note: I will store all data in memory, even if it will be filtered"
    print " Don't download large files while running this program."
    print
    sys.exit(0)

dumpFile = sys.argv[1]
if not os.access(dumpFile, os.F_OK):
    print "File %s not found. Assuming it's a device's name" % dumpFile
    print "Starting dumpcap (from wireshark). Press Ctrl-c to end capturing."
    print "You should wait ~15 seconds after your last download before ending it!"
    print
    device = dumpFile
    dumpFile = tempfile.mktemp()
    dumpcap = os.popen("dumpcap -i %s -f 'tcp port 80' -w %r" % (device, dumpFile))
    try:
        dumpcap.read()
    except:
        dumpcap.close()

if len(sys.argv) > 2:
    FILTER = sys.argv[2]
else:
    FILTER = False

# Setup fake HTTP class for conversion, so we can use
# urllib to parse the responses
fakeSocketReaderFile = False
class fakeSocket():
    def _foo(*x): pass
    def __init__(self, *x):
        for fn in [
            'accept', 'bind', 'close', 'connect', 'connect_ex', 'dup', 'family', 'fileno', 'getpeername',
            'getsockname', 'getsockopt', 'gettimeout', 'listen', 'proto', 'recv_into',
            'recvfrom', 'recvfrom_into', 'send', 'sendall', 'sendto', 'setblocking', 'setsockopt', 'settimeout',
            'shutdown' ]:
            setattr(self, fn, self._foo)
    def makefile(*x):
        return fakeSocketReaderFile
    def recv(self, toRead):
        return fakeSocketReaderFile.read(toRead)
urllib2.socket.socket = fakeSocket

# Process a connection
def process(conn):
    global fakeSocketReaderFile
    fromData = reduce(lambda x,y: x + y[1], sorted(conn["from"].items()), "")
    toData  = reduce(lambda x,y: x + y[1], sorted(conn["to"].items()), "")

    file = re.search("(?:GET|POST) (/[^ ]*) HTTP", toData)
    host = re.search("Host: (.+)", toData)
    if not (file and host):
        print "Received something weird: "
        print toData
        print "---"
        return
    uri = "http://" + host.group(1).strip() + re.sub("%([0-9A-F]{2})", lambda x: chr(int(x.group(1), 16)), file.group(1)).strip()

    print "Processing URI ", uri, ": ",
    sys.stdout.flush()
    if not re.search("HTTP.+ 200", fromData):
        print "Replied non 200. Ignoring"
        return
    if FILTER:
        if not re.search(FILTER, uri):
            print "filtered"
            return
   
    fileName = os.path.basename(uri)
    if not fileName:
        fileName = "index"
    fileName = fileName[:250]
    if os.access(fileName, os.F_OK):
        add = 0
        while(os.access("%s~%d" % (fileName, add), os.F_OK)): add += 1
        fileName = "%s~%d" % (fileName, add)
    fakeSocketReaderFile = StringIO(fromData)
    open(fileName, "w").write(urllib2.urlopen("http://127.0.0.1/" + fileName).read())
    fileIsA = os.popen("file %r" % fileName).read()
    if "gzip" in fileIsA:
        os.rename(fileName, fileName + ".gz")
        os.system("gzip -d %r" % fileName)
    if "bzip" in fileIsA:
        os.rename(fileName, fileName + ".bz")
        os.system("bzip2 -d %r" % fileName)
    fakeSocketReaderFile = False

    print "ok -> ", fileName

# Open data stream
print "Opening dumpFile %s" % dumpFile
listener = pcap(dumpFile)
listener.setfilter("tcp port 80")
os.unlink(dumpFile)
connections = {}

shift = 0
for ts, pkt in listener:
    # Extract TCP packet
    # Heuristic to avoid having to implement all transport layer formats
    firstTry = True
    tcp = None
    while True:
        try:
            ip = dpkt.ip.IP(pkt[shift:])
            assert(ip.p == 6)
            tcp = ip.data
            tcp.dport == 1
        except:
            tcp = None
        if firstTry:
            shift = 0
            firstTry = False
            continue
        if tcp and (tcp.dport == 80 or tcp.sport == 80): break
        if shift > len(pkt): break
        shift += 1
    if not tcp or tcp.dport != 80 and tcp.sport != 80:
        continue

    # Generate a unique identifier
    identifier = (tcp.sport if tcp.sport != 80 else tcp.dport)

    # Store data in connections
    if tcp.data:
        if identifier not in connections:
            connections[identifier] = { "from": {}, "to": {}, "state": 0, "fromlen": 0 }
        #if tcp.seq in connections[identifier][("from" if tcp.sport == 80 else "to")]:
        #    print "WARNING: Double content"
        #connections[identifier][("from" if tcp.sport == 80 else "to")][tcp.seq] = tcp.data
        if tcp.sport == 80:
            connections[identifier]["fromlen"] += len(tcp.data)
        if tcp.seq not in connections[identifier][("from" if tcp.sport == 80 else "to")]:
            connections[identifier][("from" if tcp.sport == 80 else "to")][tcp.seq] = ""
        connections[identifier][("from" if tcp.sport == 80 else "to")][tcp.seq] += tcp.data

    if identifier not in connections:
        continue
   
    # If the connection is closed, process it
    if tcp.flags & dpkt.tcp.TH_ACK and connections[identifier]["state"] & (2 if tcp.sport == 80 else 1):
        connections[identifier]["state"] |= (1 if tcp.sport == 80 else 2) << 2
    if tcp.flags & (dpkt.tcp.TH_FIN | dpkt.tcp.TH_RST):
        connections[identifier]["state"] |= 1 if tcp.sport == 80 else 2
    if connections[identifier]["state"] == 15:
        print "Received %d bytes" % connections[identifier]["fromlen"]
        process(connections[identifier])
        del connections[identifier]

Download

Dateiname
store.http.downloads.py
Größe
5.11kb