apache log extractor

io.kent · February 19, 2014

#!/usr/bin/python
# coding: utf-8

# Apache Log Extractor
#
# Chrs John Riley
# blog.c22.cc
#
# 27/07/2011
#
# Version: 0.4 Alpha
#
# PoC script... use at your own risk...
#
# 0.1 Initial Version
# 0.2 Added wordlist support
# 0.3 Added verbose, filter ... directories
# 0.4 Added Basic Auth username extraction where present

import sys, os, re

logo = '''

                            _           _                 ______      _               _          
      /\                    | |       | |                |  ____|   | |             | |        
     /  \  _ __  __ _  ___| |__  ___  | |    ___  __ _  | |__  __  _| |_ _ __ __ _  ___| |_ ___  _ __
    / /\ \ | '_ \ / _` |/ __| '_ \ / _ \ | |    / _ \ / _` | |  __| \ \/ / __| '__/ _` |/ __| __/ _ \| '__|
  / ____ \| |_) | (_| | (__| | | |  __/ | |___| (_) | (_| | | |____ >  <| |_| | | (_| | (__| || (_) | |
  /_/   \_\ .__/ \__,_|\___|_| |_|\___| |______\___/ \__, | |______/_/\_\\__|_|  \__,_|\___|\__\___/|_|
          | |                                        __/ |                                             
          |_|                                       |___/                                              

                                                    [\x1B[34;40mv0.4\x1B[0m]

                                                                      _/ Apache Log Extractor \x1B[34;40m?\x1B[0m
                                                                              _/ ChrisJohnRiley \x1B[34;40m?\x1B[0m
                                                                                _/ blog.c22.cc \x1B[34;40m?\x1B[0m\n'''


def main():

    if len(sys.argv) < 2:
        print (logo)
        print " [\x1B[34;40m!\x1B[0m] Use " + sys.argv[0] + " log_file.log \n [\x1B[34;40m!\x1B[0m] Use -v for verbose mode"
        sys.exit(1)
    else:
        print (logo)
        print " [\x1B[34;40m \x1B[0m] Analysing the log file....\n"

    logfile = sys.argv[1]
    outfile = sys.argv[1] + ".output"
    wordfile = sys.argv[1] + ".wordlist"
    userfile = sys.argv[1] + ".users"

    if len(sys.argv) > 2:
        if "-v" in sys.argv[2]:
            verbose = 'true'
            print " [\x1B[34;40m+\x1B[0m] Verbose mode active\n"
    else: verbose = ''

    if os.path.exists(outfile):
        print " [\x1B[34;40m!\x1B[0m] Output file already exists \n [\x1B[34;40m!\x1B[0m] Exiting!\n"
        exit()

    try:
        inputfile_handle = open(logfile, 'r')
        outputfile_handle = open(outfile, 'w')
        wordfile_handle = open(wordfile, 'w')

    except:
        print " [\x1B[34;40m!\x1B[0m] Failed to open input/output files \n [\x1B[34;40m!\x1B[0m] Exiting!\n"
        sys.exit(1)

    pattern = re.compile('(GET|POST)\s(.+?)\s', re.IGNORECASE)

    # Scan the logfile and extract the required sections

    matches = []
    matches2 = []
    unique = []

    ValidIpAddressRegex = "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"

    for line in inputfile_handle:
        working_line = re.search(pattern, line)
        small = line.split("[")
        small = small[0].split(" ")

        if (len(small) > 2) and small[2] != "-" and small[2] != "":
            if re.match(ValidIpAddressRegex, small[0]):
                working_line2 = small[2]
        else:  
            working_line2 = ""

        if working_line:
            matches.append(working_line.group(2))
            working_line = ""

        if working_line2:
            matches2.append(working_line2)
            working_line2 = ""

    print " [\x1B[34;40m \x1B[0m] Extracting URLs from logfile : " + logfile + "\n"
    if verbose: print "\n"

    for m in matches:
        if (m not in unique) and m != "*" and (".../" not in m): # .../ match ignores incomplet paths in logfile
            unique.append(m)
            if verbose: print " [\x1B[34;40m \x1B[0m] Extracted URL : ", m
            outputfile_handle.write(m +'\n')

    if verbose: print "\n"
    print " [\x1B[34;40m \x1B[0m] Extracting directory names from logfile\n"

    uniqueword = []

    for w in unique:
        word = w.split('?') # Strip off parameters
        word = word[0].split('/') # Extract directory names
        for x in word[0:-1]:
            if (x not in uniqueword) and x != "" and ("..." not in x):
                uniqueword.append(x)
                if verbose: print " [\x1B[34;40m \x1B[0m] Extracted Word : ", x
                wordfile_handle.write(x +'\n')


        if verbose: print "\n"
        print " [\x1B[34;40m \x1B[0m] Extracting basic auth usernames from logfile : " + logfile + "\n"

    unique2 = []

        for m in matches2:
                if (m not in unique2) and m != " ":
                        unique2.append(m)
                        if verbose: print " [\x1B[34;40m \x1B[0m] Extracted basic auth username : ", m

    if unique2:
            try:
                    userfile_handle = open(userfile, 'w')
            for each in unique2:
                userfile_handle.write(each +'\n')
            userfile_handle.close()
            except:
                    print " [\x1B[34;40m!\x1B[0m] Failed to open input/output files \n [\x1B[34;40m!\x1B[0m] Exiting!\n"
                    sys.exit(1)

    # Close files
    outputfile_handle.close()
    inputfile_handle.close()
    wordfile_handle.close()

    if verbose: print "\n"
    print " [\x1B[34;40m+\x1B[0m] Extracted paths to : \x1B[34;40m" + outfile + "\x1B[0m [" + str(len(unique)) +"]\n"
    print " [\x1B[34;40m+\x1B[0m] Extracted directory names to : \x1B[34;40m" + wordfile + "\x1B[0m [" + str(len(uniqueword)) + "]\n"
    if unique2: print " [\x1B[34;40m+\x1B[0m] Extracted basic auth usernames to : \x1B[34;40m" + userfile + "\x1B[0m [" + str(len(unique2)) + "]\n"

    print "\n [\x1B[34;40m \x1B[0m] Thanks for flying \x1B[34;40mC22\x1B[0m airways: Your ticket to the skies!\n"

if __name__=="__main__":
    main()

Sign In

apache log extractor

Recommended Posts

io.kent

Join the conversation

Browse

Activity

Pages