Jump to content
io.kent

apache log extractor

Recommended Posts

Posted
#!/usr/bin/python
# coding: utf-8

# Apache Log Extractor
#
# Chrs John Riley
# blog.c22.cc
#
# 27/07/2011
#
# Version: 0.4 Alpha
#
# PoC script... use at your own risk...
#
# 0.1 Initial Version
# 0.2 Added wordlist support
# 0.3 Added verbose, filter ... directories
# 0.4 Added Basic Auth username extraction where present

import sys, os, re

logo = '''

_ _ ______ _ _
/\ | | | | | ____| | | | |
/ \ _ __ __ _ ___| |__ ___ | | ___ __ _ | |__ __ _| |_ _ __ __ _ ___| |_ ___ _ __
/ /\ \ | '_ \ / _` |/ __| '_ \ / _ \ | | / _ \ / _` | | __| \ \/ / __| '__/ _` |/ __| __/ _ \| '__|
/ ____ \| |_) | (_| | (__| | | | __/ | |___| (_) | (_| | | |____ > <| |_| | | (_| | (__| || (_) | |
/_/ \_\ .__/ \__,_|\___|_| |_|\___| |______\___/ \__, | |______/_/\_\\__|_| \__,_|\___|\__\___/|_|
| | __/ |
|_| |___/

[\x1B[34;40mv0.4\x1B[0m]

_/ Apache Log Extractor \x1B[34;40m?\x1B[0m
_/ ChrisJohnRiley \x1B[34;40m?\x1B[0m
_/ blog.c22.cc \x1B[34;40m?\x1B[0m\n'''


def main():

if len(sys.argv) < 2:
print (logo)
print " [\x1B[34;40m!\x1B[0m] Use " + sys.argv[0] + " log_file.log \n [\x1B[34;40m!\x1B[0m] Use -v for verbose mode"
sys.exit(1)
else:
print (logo)
print " [\x1B[34;40m \x1B[0m] Analysing the log file....\n"

logfile = sys.argv[1]
outfile = sys.argv[1] + ".output"
wordfile = sys.argv[1] + ".wordlist"
userfile = sys.argv[1] + ".users"

if len(sys.argv) > 2:
if "-v" in sys.argv[2]:
verbose = 'true'
print " [\x1B[34;40m+\x1B[0m] Verbose mode active\n"
else: verbose = ''

if os.path.exists(outfile):
print " [\x1B[34;40m!\x1B[0m] Output file already exists \n [\x1B[34;40m!\x1B[0m] Exiting!\n"
exit()

try:
inputfile_handle = open(logfile, 'r')
outputfile_handle = open(outfile, 'w')
wordfile_handle = open(wordfile, 'w')

except:
print " [\x1B[34;40m!\x1B[0m] Failed to open input/output files \n [\x1B[34;40m!\x1B[0m] Exiting!\n"
sys.exit(1)

pattern = re.compile('(GET|POST)\s(.+?)\s', re.IGNORECASE)

# Scan the logfile and extract the required sections

matches = []
matches2 = []
unique = []

ValidIpAddressRegex = "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"

for line in inputfile_handle:
working_line = re.search(pattern, line)
small = line.split("[")
small = small[0].split(" ")

if (len(small) > 2) and small[2] != "-" and small[2] != "":
if re.match(ValidIpAddressRegex, small[0]):
working_line2 = small[2]
else:
working_line2 = ""

if working_line:
matches.append(working_line.group(2))
working_line = ""

if working_line2:
matches2.append(working_line2)
working_line2 = ""

print " [\x1B[34;40m \x1B[0m] Extracting URLs from logfile : " + logfile + "\n"
if verbose: print "\n"

for m in matches:
if (m not in unique) and m != "*" and (".../" not in m): # .../ match ignores incomplet paths in logfile
unique.append(m)
if verbose: print " [\x1B[34;40m \x1B[0m] Extracted URL : ", m
outputfile_handle.write(m +'\n')

if verbose: print "\n"
print " [\x1B[34;40m \x1B[0m] Extracting directory names from logfile\n"

uniqueword = []

for w in unique:
word = w.split('?') # Strip off parameters
word = word[0].split('/') # Extract directory names
for x in word[0:-1]:
if (x not in uniqueword) and x != "" and ("..." not in x):
uniqueword.append(x)
if verbose: print " [\x1B[34;40m \x1B[0m] Extracted Word : ", x
wordfile_handle.write(x +'\n')


if verbose: print "\n"
print " [\x1B[34;40m \x1B[0m] Extracting basic auth usernames from logfile : " + logfile + "\n"

unique2 = []

for m in matches2:
if (m not in unique2) and m != " ":
unique2.append(m)
if verbose: print " [\x1B[34;40m \x1B[0m] Extracted basic auth username : ", m

if unique2:
try:
userfile_handle = open(userfile, 'w')
for each in unique2:
userfile_handle.write(each +'\n')
userfile_handle.close()
except:
print " [\x1B[34;40m!\x1B[0m] Failed to open input/output files \n [\x1B[34;40m!\x1B[0m] Exiting!\n"
sys.exit(1)

# Close files
outputfile_handle.close()
inputfile_handle.close()
wordfile_handle.close()

if verbose: print "\n"
print " [\x1B[34;40m+\x1B[0m] Extracted paths to : \x1B[34;40m" + outfile + "\x1B[0m [" + str(len(unique)) +"]\n"
print " [\x1B[34;40m+\x1B[0m] Extracted directory names to : \x1B[34;40m" + wordfile + "\x1B[0m [" + str(len(uniqueword)) + "]\n"
if unique2: print " [\x1B[34;40m+\x1B[0m] Extracted basic auth usernames to : \x1B[34;40m" + userfile + "\x1B[0m [" + str(len(unique2)) + "]\n"

print "\n [\x1B[34;40m \x1B[0m] Thanks for flying \x1B[34;40mC22\x1B[0m airways: Your ticket to the skies!\n"

if __name__=="__main__":
main()

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.



×
×
  • Create New...