#!/usr/bin/perl $LOG="/usr/local/apache/logs/access_log"; if (!open(LOGFILE, "<$LOG")) { print STDERR "Can't open specified log\n"; exit(1); } $totalqueries = 0; while () { # extract the date $inputstring = $_; if ($_ =~ m/query=(.*) HTTP/){ $totalqueries++; $line = $1; m/\[(\d\d\/\w\w\w\/\d\d\d\d):/; $thisdate = $1; $line = "&" . $line; %query = ('FRONTSTUFF', split/&([-\w]+)=/m, $line); #foreach $key (sort keys %query) { # print "$key = $query{$key}\n"; #} # clean up the query string $cleanq = $query{'FRONTSTUFF'}; #print "Pre-cleaning: $query{'FRONTSTUFF'}\n"; # change +'s to spaces and condense big strings of spaces $cleanq =~ s/\++/ /g; $cleanq =~ s/&subject//; $cleanq =~ s/^&//; # $cleanq =~ tr/%3D/=/; $cleanq =~ s/%3D//; $cleanq =~ s/%20/ /g; $cleanq =~ s/%60/ /g; $cleanq =~ s/%2B/+/gi; $cleanq =~ s/%2A/*/gi; $cleanq =~ s/%27/'/gi; $cleanq =~ s/%3A/:/gi; $cleanq =~ s/%2E/./gi; $cleanq =~ s/%2F/\//gi; $cleanq =~ s/%22/"/gi; #print "Cleaned entry: $cleanq\n"; # tally another hit for this day in history $date{$thisdate}++; #tally another use of this query form $mss{$query{mss}}++; #tally another search for these search string $searchstrings{$cleanq}++; # count the number of search terms and increment the appropriate counter @array = split(/ /, $cleanq); $wordcount += @array; # print "Wordcount: $wordcount\n"; $termcount{$wordcount}++; #print $termcount{0}; #if ($wordcount > 20 ) { # print $inputstring, "\n*\n", $cleanq; #} $wordcount = 0; #take each word and increment it's counter foreach $word (@array){ $words{$word}++; } # print "\n"; # printf STDOUT "%s\n%s\n", $query{mss}, $cleanq; # print "*\n"; } } # compute the count for each termcount and average $total = 0; $queries = 0; print "\nNumber of queries with n search terms:\n"; foreach $key (sort numerically keys %termcount) { print $key, '=', $termcount{$key}, "\n"; $total += $termcount{$key} * $key; # gives number of words in this class $queries += $termcount{$key}; } print "Average search terms per query: ", $total/$queries, "\n"; # compute the average number of queries per day print "\nQueries per day:\n"; $total = 0; $days = 0; foreach $key (sort logdatewise keys %date) { print $key, '=', $date{$key}, "\n"; $total += $date{$key}; $days++; } print "Average queries per day: ", $total/$days, "\n"; # print out the top 20 search terms and the number of times they were used print "\nTop Search Terms: \n"; $count = 0; $endcount = 500; #change this to get more or less than 20 results foreach $key (sort { $words{$b} <=> $words{$a} } keys %words) { print $key, '=', $words{$key}, "\n"; $count++; if ($count >= $endcount){ last; } } # print out the top 20 search phrases and the number of times they were used print "\nTop Search Phrases: \n"; $count = 0; $endcount = 500; #change this to get more or less than 20 results foreach $key (sort { $searchstrings{$b} <=> $searchstrings{$a} } keys %searchstrings) { print $key, '=', $searchstrings{$key}, "\n"; $count++; if ($count >= $endcount){ last; } } # print out each type of search form and the number of times it was used print "\nForm Usage: \n"; foreach $key (keys %mss) { $cleankey = $key; $cleankey =~ s/&/&/gi; print $cleankey, " form = ", $mss{$key}, "\n"; } print "Total queries: ", $totalqueries; close(LOGFILE); # a subroutine for sort for sorting numerically sub numerically { $a <=> $b; } # a sort compatible subroutine for parsing and comparing dates as found in # NCSA format web logs sub logdatewise { %months = ( "Jan" => 1, "Feb" => 2, "Mar" => 3, "Apr" => 4, "May" => 5, "Jun" => 6, "Jul" => 7, "Aug" => 8, "Sep" => 9, "Oct" => 10, "Nov" => 11, "Dec" => 12, ); # $a = $_[0]; # $b = $_[1]; # print $a, $b; $a =~ m[(\d\d)/(\w\w\w)/(\d\d\d\d)]; $Aday = $1; $Amonth = $2; $Ayear = $3; # print "DAY ", $Aday, " MONTH ", $Amonth, " YEAR ", $Ayear; $b =~ m/(\d\d)\/(\w\w\w)\/(\d\d\d\d)/; $Bday = $1; $Bmonth = $2; $Byear = $3; if ( $Ayear != $Byear ){ return $Ayear <=> $Byear; } elsif ( $Amonth ne $Bmonth ) { return $months{$Amonth} <=> $months{$Bmonth}; } else {return $Aday <=> $Bday;} }