#!/usr/bin/perl
##########################################################################
# $Id: http,v 1.9 2004/02/03 02:45:26 kirk Exp $
##########################################################################
# $Log: http,v $
# Revision 1.9  2004/02/03 02:45:26  kirk
# Tons of patches, and new 'oidentd' and 'shaperd' filters from
# Pawe? Go?aszewski" <blues@ds.pg.gda.pl>
#
##########################################################################

########################################################
# This was written and is maintained by:
#    Michael Romeo <michaelromeo@mromeo.com>
#
# Please send all comments, suggestions, bug reports,
#    etc, to kirk@kaybee.org.
########################################################


use strict;
# use re "debug";
#
#  parse httpd access_log
#
#  Get the detail level and
#  Build tables of the log format to parse it and determine whats what
#

my $detail = 10;
my @log_fields =();
my @log_format =();
my $ignore_error_hacks = 0;
$detail = $ENV{'LOGWATCH_DETAIL_LEVEL'};
@log_fields = split(" ", $ENV{'http_fields'});
@log_format = split(" ", $ENV{'http_format'});
$ignore_error_hacks = $ENV{'http_ignore_error_hacks'};

#
# Initialization etc.
#

my $byte_summary = 0;
my $failed_requests = 0;
my %field = ();
my %hacks =();
my %hack_success =();
my %needs_exam =();
my %ban_ip =();
my %robots =();
my $pattern = "";
my $flag = 0;
my $isahack = 0;
my $a5xx_resp = 0;
my $a4xx_resp = 0;
my $a3xx_resp = 0;
my $a2xx_resp = 0;
my $a1xx_resp = 0;
my $image_count = 0;
my $image_bytes = 0;
my $docs_count = 0;
my $docs_bytes = 0;
my $archive_count = 0;
my $archive_bytes = 0;
my $sound_count = 0;
my $sound_bytes = 0;
my $movie_count = 0;
my $movie_bytes = 0;
my $winexec_count = 0;
my $winexec_bytes = 0;
my $content_count = 0;
my $content_bytes = 0;
my $redirect_count = 0;
my $redirect_bytes = 0;
my $other_count = 0;
my $other_bytes = 0;
my $total_hack_count = 0;
my $wpad_count =     0;
my $wpad_bytes =     0;
my $src_count =      0;
my $src_bytes =      0;
my $images_count =   0;
my $images_bytes =   0;
my $image_types =    '(\.bmp|\.emz|\.gif|\.ico|\.jpeg|\.jpg|\.png|\.swf|\.tif|\.tiff|\.wmf|\.wmz|\.xdm)';
my $content_types =  '(';
   $content_types =  $content_types.'\/server-status|\/server-info';
   $content_types =  $content_types.'|\.htm|\.html|\.jhtml|\.phtml|\.shtml|\/';
   $content_types =  $content_types.'|\.php|\.php3|\.asp|\.pl|\.wml|\/';
   $content_types =  $content_types.'|\.css|\.js|\.cgi|\.txt|\/';
   $content_types =  $content_types.'|\.class|\.jsp|\.jar|\.java)';
my $docs_types =     '(\.doc|\.dvi|\.gnumeric|\.mcd|\.mso|\.pdf|\.pps|\.ppt|\.ps|\.rtf|\.tex|\.text|\.xls|\.xml)';
my $archive_types =  '(\.ace|\.bz2|\.cab|\.deb|\.gz|\.hqx|\.rar|\.rpm|\.tbz2|\.zip)';
my $sound_types =    '(\.mp3|\.ram|\.raw|\.rm|\.wav|\.wma|\.mid)';
my $movie_types =    '(\.asf|\.avi|\.mid|\.mpg|\.mpeg|\.mov)';
my $winexec_types =  '(\.com|\.exe|\.dll)';
my $wpad_files =     '(wpad\.dat|wspad\.dat|proxy\.pac)';
my $program_src =    '(\.bas|\.c|\.diff|\.f|\.h|\.patch|\.cpp)';
my $images_types =   '(\.bin|\.cue|\.iso|\.run)';

#
#   what to look for as an attack  USE LOWER CASE!!!!!!
#
my @exploits = (
   '\\x90\\x02\\xb1\\x02\\xb1',
   '\\x02\\xb1\\x02\\xb1',
   '\\x90\\x90\\x90\\x90',
   '\\x04\\x01',
   '\\x05\\x01',
   '\/c\+dir',
   'cmd.exe',
   'default.ida',
   'nsiislog.dll',
   'phpmyadmin',
   'root.exe',
   'win.ini'
);

#
#  Define some usefull RE paterns
#

my %re_pattern = (space => '(.*)',
   brace => '\[(.*)\]',
   quote => '\"(.*)\"');

#
#  Build the regex to parse the line
#

for (my $i = 0; $i < @log_format; $i++) {
   # print "$i $log_format[$i] $re_pattern{$log_format[$i]} \n";
   $pattern = $pattern.$re_pattern{$log_format[$i]}.'\\s';
}

# this is easier than coding last element logic in the loop


chop($pattern);
chop($pattern);

#################   print "RE pattern     = $pattern \n";

#
#  Process log file on stdin
#

while (my $line = <STDIN>) {
   chomp($line);
   
   ##################  print "Line = $line \n";
   
   #
   # parse the line per the input spec
   #
   
   my @parsed_line = $line =~ /$pattern/o;
   
   # hash the results so we can identify the fields
   # 
   for (my $i = 0; $i < @log_fields; $i++) {
      #		print "$i $log_fields[$i] $parsed_line[$i] \n";
      $field{$log_fields[$i]} = $parsed_line[$i];
   }

   ##
   ## Do the default stuff
   ## 

   #
   # Break up the request into method, url and protocol
   #

   ($field{method},$field{url},$field{protocol}) = split(/ /,$field{"request"});
   $field{lc_url} = lc $field{url};
   
   #
   # Bytes sent Summary
   # Apache uses "-" to represent 0 bytes transfered
   #
	
   if ($field{bytes_transfered} eq "-") {$field{bytes_transfered} = 0};

	$byte_summary += $field{bytes_transfered};
   #
   #  loop to check for typical exploit attempts
   #
	
   $isahack = 0;
	for (my $i = 0; $i < @exploits; $i++) {
      # print "$i $exploits[$i] $field{lc_url} \n";
      if ($field{lc_url} =~ /$exploits[$i]/) {
         $hacks{$exploits[$i]} {$field{client_ip}} += 1;
         $total_hack_count += 1;
         $ban_ip{$field{client_ip}} = " ";
         if ($field{http_rc} < 400) {
            $hack_success{$field{url}} = $field{http_rc};
         }
         $isahack = 1;
      }
   }
   
   #
   #  Count types and bytes
   #
   #	this is only printed if detail > 4 but it also looks 
   #	for 'strange' stuff so it needs to run always
   #

   ($field{base_url},$field{url_parms}) = split(/\?/,$field{"lc_url"});
   
   if ($field{lc_url} =~ /$image_types$/) {
      $image_count += 1;
      $image_bytes += $field{bytes_transfered};
   } elsif ($field{base_url} =~ /$docs_types$/) {
      $docs_count += 1;
      $docs_bytes += $field{bytes_transfered};
   } elsif ($field{base_url} =~ /$archive_types$/) {
      $archive_count += 1;
      $archive_bytes += $field{bytes_transfered};
   } elsif ($field{base_url} =~ /$sound_types$/) {
      $sound_count += 1;
      $sound_bytes += $field{bytes_transfered};
   } elsif ($field{base_url} =~ /$movie_types$/) {
      $movie_count += 1;
      $movie_bytes += $field{bytes_transfered};
   } elsif ($field{base_url} =~ /$winexec_types$/) {
      $winexec_count += 1;
      $winexec_bytes += $field{bytes_transfered};
   } elsif ($field{base_url} =~ /$content_types$/) {
      $content_count += 1;
      $content_bytes += $field{bytes_transfered};
   } elsif ($field{base_url} =~ /$wpad_files$/) {
      $wpad_count += 1;
      $wpad_bytes += $field{bytes_transfered};
   } elsif ($field{base_url} =~ /$program_src$/) {
      $src_count += 1;
      $src_bytes += $field{bytes_transfered};
   } elsif ($field{base_url} =~ /$images_types$/) {
      $images_count += 1;
      $images_bytes += $field{bytes_transfered};
   } elsif ($field{http_rc} =~ /3\d\d/) {
      $redirect_count += 1;
      $redirect_bytes += $field{bytes_transfered};
   } else {
      $other_count += 1;
      $other_bytes += $field{bytes_transfered};
      if (!$isahack ) {
         if ( !$ignore_error_hacks ) {
            $needs_exam{$field{request}} .= $field{http_rc}." ";
         } elsif ( $field{http_rc} < 400 ) {
            $needs_exam{$field{request}} .= $field{http_rc}." ";
         }
      }
   }

   ##
   ## Do the > 4 stuff
   ## 
   #
   #  Response Summary
   #
   
   if ($field{http_rc} > 499 ) {
      $a5xx_resp += 1;
   } elsif ($field{http_rc} > 399 ) {
      $a4xx_resp += 1;
   } elsif($field{http_rc} > 299 ) {
      $a3xx_resp += 1;
   } elsif($field{http_rc} > 199 ) {
      $a2xx_resp += 1;
   } else {
      $a1xx_resp += 1;
   }

   #
   #  Count the robots who actually ask for the robots.txt file
   #
   
   if ($field{lc_url} =~ /^\/robots.txt$/) {
      $robots{$field{agent}} +=1;
   }
   
} ## End of while loop

##
##   output the results 
##

if ($detail >4) {
   printf "%.2f MB transfered " , $byte_summary/(1024*1024);
   print  "in ";
   print ($a1xx_resp + $a2xx_resp + $a3xx_resp + $a4xx_resp + $a5xx_resp);
   print " responses ";
   print " (1xx $a1xx_resp, 2xx $a2xx_resp, 3xx $a3xx_resp,";
   print " 4xx $a4xx_resp, 5xx $a5xx_resp) \n";
   print	  " $image_count Images ($image_bytes bytes),\n";
   print   " $docs_count Documents ($docs_bytes bytes),\n";
   print   " $archive_count Archives ($archive_bytes bytes),\n";
   print   " $sound_count Sound files ($sound_bytes bytes),\n";
   print   " $movie_count Movies files ($movie_bytes bytes),\n";
   print   " $winexec_count Windows executable files ($winexec_bytes bytes),\n";
   print	  " $content_count Content pages ($content_bytes bytes),\n";
   print	  " $redirect_count Redirects ($redirect_bytes bytes),\n";
   print   " $wpad_count Proxy Configuration Files ($wpad_bytes bytes),\n";
   print   " $src_count Program source files ($src_bytes bytes),\n";
   print   " $images_count CD Images ($images_bytes bytes),\n"; 
   print	  " $other_count Other ($other_bytes bytes) \n";
}

#
#  List attempted exploits
#

if ($detail >4) {
   $flag = 1;
   foreach my $i (keys %hacks) {
      if ($flag) {
         print "\nAttempts to use ".scalar(keys %hacks)." known hacks were logged $total_hack_count time(s)\n";
         $flag = 0;
      }
      print "  $i  ";
      if ($detail > 9) {
         ##
         ##FIXME:  This sucks, but I can't figure out the syntax to make it work without the interim vars
         ##
         print " by \n";
         my $x1 = $hacks{$i};
         my %x2 = %$x1;
         foreach my $j ( keys (%$x1)) {
            print "          $j $x2{$j} time(s) \n" ;
         }
      } else {
         print "\n";
      }
   }	
}

#
#  List (wannabe) blackhat sites
#

$flag = 1;
foreach my $i (keys %ban_ip) {
   if ($flag) {
      print "\nA total of ".scalar(keys %ban_ip)." sites probed the server \n";
      $flag = 0;
   }
   #if ($detail > 4) {
      print "  $i  \n";
   #}
}

#
#  List possible successful probes
#

$flag = 1;
foreach my $i (keys %hack_success) {
   if ($flag) {
      print "\n!!!! ".scalar(keys %hack_success)." possible successful probes \n";
      $flag = 0;
   }
   print " $i HTTP Response $hack_success{$i} \n";
}

#
#  List 'others' that are not known attacks 
#

$flag = 1;
foreach my $i (keys %needs_exam) {
   if ($flag) {
      print "\nA total of ".scalar(keys %needs_exam)." unidentified \'other\' records logged\n";
      $flag = 0;
   }
   print "  $i with response code(s) $needs_exam{$i}\n";
}

#
#  List robots that identified themselves
#

if ($detail > 4) {
   $flag = 1;
   foreach my $i (keys %robots) {
      if ($flag) {
         print "\nA total of ".scalar(keys %robots)." ROBOTS were logged \n";
         $flag = 0;
      }
      if ($detail > 9) {
         print "      $i $robots{$i} time(s) \n";
      }
   }
}

exit (0);

# vi: shiftwidth=3 tabstop=3 et

