#!/usr/local/bin/perl -w ######################################################################################################################## # anaml.pl - converts Analog's HTML-2 logfile reports into XML, mapping each report onto a HTML-style table # # Version 1.0: 23.1.99 15:39 - 25.1.99 # # chocolateboy@usa.net - mail me with bugfixes, requests, marriage proposals &c. # # # # Do anything you want with this script, but please keep this header, and let me know if you use it for anything cool # ######################################################################################################################## use strict; ######################################################################################################################### # # # You'll need to set the following three variables to get things working - the rest can be adjusted according to taste # # # ######################################################################################################################### my $SOURCE="/export/htdocs/myreport.html"; # HTML Analog report to be processed my $OUTDIR="/export/htdocs/anaml"; # Full path of xml output directory (can be a new directory if you have permissions) my $LOCALE="/usr/local/analog/lang/uk.lng"; # Make sure you specify the html language file ie *h.lng (if available) ######################################################################################################################### my $GENERATOR="Generator"; # Localize this (Analog Version/Platform) my $HOSTNAME="Hostname"; # And this (Server Name) my $META="Meta Report"; # And this (Analog Uber Report) my $TABLE="REPORT"; # Fiddle with these to roll your own tags my $TR="RR"; # Row delimiter my $TH="RH"; # Column heading my $TD="RD"; # Report element my $NAME="NAME"; # Report name my $SUMMARY="SUMMARY"; # Some reports have one line summaries (busiest month &c.) # my $DEBUG=" BORDER=1"; # Use this to place additional (e.g formatting/debug) tags in the top level ($TABLE) tag my $DEBUG=""; # Make sure $DEBUG is defined (even if it's just "") ######################################################################################################################### open (SOURCE,"<$SOURCE") or die ("Can't open $SOURCE: $!"); # Open the file to be xmlised open (LOCALE,"<$LOCALE") or die ("Can't open $LOCALE: $!"); # And the language file (needed to correctly parse reports) undef ($/); my $slurp=(); $/="\n"; my $index=0; my (@lingo); while () { # Load the language file into a list for easy access (skip comments) next if /^#/; chop; $lingo[$index++]=$_; } my %names=( # This hash maps verbose report names onto their short version - used to name the output file $lingo[61]=>'general', # General Summary $lingo[62]=>'monthly', # Monthly Report $lingo[64]=>'weekly', # Weekly Report $lingo[67]=>'fulldaily', # Daily Report $lingo[66]=>'daily', # Daily Summary $lingo[69]=>'fullhourly', # Hourly Report $lingo[70]=>'hourly', # Hourly Summary $lingo[72]=>'quarter', # Quarter-Hour Report $lingo[74]=>'five', # Five-Minute Report $lingo[76]=>'host', # Host Report $lingo[136]=>'domain', # Domain Report $lingo[88]=>'request', # Request Report $lingo[80]=>'directory', # Directory Report $lingo[84]=>'filetype', # File Type Report $lingo[144]=>'size', # File Size Report $lingo[92]=>'redir', # Redirection Report $lingo[96]=>'failure', # Failure Report $lingo[100]=>'referrer', # Referrer Report $lingo[104]=>'refsite', # Referring Site Report $lingo[108]=>'redirref', # Redirected Referrer Report $lingo[112]=>'failref', # Failed Referrer Report $lingo[132]=>'fullbrowser', # Browser Report $lingo[128]=>'browser', # Browser Summary $lingo[116]=>'vhost', # Virtual Host Report $lingo[120]=>'user', # User Report $lingo[124]=>'failuser', # Failed User Report $lingo[140]=>'status', # Status Code Report $META=>'meta' # Meta Report ); # Set up the hash containing the xml to be exported my %exports; # Now grab a couple of pertinent fields my ($timestamp,$from,$to,$days,$generator,$runtime,$general)=(@lingo[156..158],$lingo[23],@lingo[145..146],$lingo[61]); my @reports=split(/
/i,$slurp); my $report=$reports[0]; # The stuff at the top before the proper reports my ($title,$hostname,$host,$stats)=($report=~/^.+?(.+?)<\/title>.+?(<a href=[^>]+>([^<]+)<\/a>)<\/h1>(.+)$/is); my $xml="<$TABLE $NAME=\"$META\" $SUMMARY=\"$title\"$DEBUG>\n"; $xml.="<$TR><$TH>$HOSTNAME</$TH><$TH>$timestamp</$TH><$TH>$from</$TH><$TH>$to</$TH><$TH>$days</$TH><$TH>$GENERATOR</$TH><$TH>$runtime</$TH></$TR>\n"; $stats.=$reports[-1]; # The stuff at the bottom - this and the stats at the top make up the Meta Report $stats=~s/<\/i>.+$//s; # Remove the tail $stats=~s/\n+//g; # Death to newlines $stats=~s/ +/ /g; # Double spaces -> single spaces ($timestamp,$from,$to,$days,$generator,$runtime)= ($stats=~/^$timestamp (.+?)\.<br>$from (.+?) $to (.+?) \((.+?) $days\)\.<i>$generator (<a href.+?<\/a>).+?$runtime:<\/b> (.+?)\.$/i); $xml.= "<$TR><$TD>$hostname</$TD><$TD>$timestamp</$TD><$TD>$from</$TD><$TD>$to</$TD><$TD>$days</$TD><$TD>$generator</$TD><$TD>$runtime</$TD></$TR>\n"; $xml.="</$TABLE>"; $exports{$names{$META}}=$xml; REPORT: for ($index=1;$index<$#reports;$index++) { $report=$reports[$index]; my ($name)=($report=~m/<h2><.*?>(.+)<.*?><\/h2>/i); my ($dope,$lastseven); if ($name=~/$general/) { # General summary needs special treatment if (($dope)=($report=~/<p><b>(.+)$/si)) { # No summary for the last seven days $lastseven=""; } else { ($lastseven,$dope)=($report=~/<p>\((?!<b>)(.+?)\)\.\n<br><b>(.+)$/is); # Extract the summary and the stats $lastseven=" $SUMMARY=\"$lastseven\.\""; # Format the L7 summary } $dope=~s/\n|://g; # Kill newlines and colons $dope=~s/(<\/b>)( +)/$1/ig; my (%summaries)=split(/(?:<br>)?<\/?b>/ig,$dope); my ($key); my $row="<$TR>"; $xml="<$TABLE $NAME=\"$general\"$lastseven$DEBUG>\n<$TR>"; foreach $key (keys %summaries) { $xml.="<$TH>$key</$TH>"; $row.="<$TD>$summaries{$key}</$TD>"; } $xml.="</$TR>\n"; $row.="</$TR>\n</$TABLE>"; $xml.=$row; $exports{$names{$name}}=$xml; next REPORT; } my ($meat,$summary)=($report=~/<pre><tt>(.+)<\/tt><\/pre>(.*)$/is); # Extract the meat and the summary (if present) $summary=~s/\n//g; # Strip newlines from summary $meat=~s/^\s+//g; # Strip opening spaces $meat=~s/: +<img .+//ig; # Lose the graphics $meat=~s/(?:: +)?\n+ */\n/gs; # Clean out double newlines and orphaned delimiters $meat=~s/ +/ /; # Squash multiple spaces my ($head,$body)=($meat=~/^([^\n]+)\n[^\n]+\n(.+?)\n$/s); $head=~s/: +/<\/$TH><$TH>/g; $head="<$TR><$TH>$head<\/$TH><\/$TR>"; $body=~s/\n/<\/$TD><\/$TR>\n<$TR><$TD>/g; $body=~s/: +/<\/$TD><$TD>/g; $body=~s/(\[.+?)<\/$TD><$TD>(.+?\])/$1: $2/gi; # Colons inside square brackets can be left alone $body="<$TR><$TD>$body<\/$TD><\/$TR>"; $summary=$summary ? " $SUMMARY=\"$summary\"" : ""; $xml="<$TABLE $NAME=\"$name\"$summary$DEBUG>\n$head\n$body\n<\/$TABLE>"; $exports{$names{$name}}=$xml; } if (!(-e $OUTDIR)) { # If it doesn't exist, make it mkdir ($OUTDIR,0755) or die ("Can't create $OUTDIR: $!"); } if (!(-w $OUTDIR)) { die "Can't write to output directory: $!"; # If it does exist, make sure it's writable } foreach $report (keys %exports) { # Let's go open (XML,">$OUTDIR/$report.xml"); print XML $exports{$report}; close XML; } close SOURCE; close LOCALE; exit (0);