# BETA! $Id$ #################### # w3c2isa.pl by Shawn Quillman - squillman@att.net adapter by Peter STEVENINCK to convert ISA logfiles for awstats # Converts ISA Formatto W3c Extended Log Format # Provided as is with no warranty or guarantee # Usage: w3c2isa.pl [-qv] [-h count] -i inputfile [-o outputfile] # Options: # -q quiet mode, no output # -v overwrite existing output file without confirming # -h print hash marks (#) to monitor progress, one mark every # lines # # -i inputfile name of W3C Extended Log Format input file. If # inputfile is a directory all files # in the directory will be processed and output files # will be saved in that directory with a # prefix of ISA-. # -o outputfile name of ISA Log Format output file. If not specified # the output file will be saved in the same # directory as inputfile with a prefix of ISA-. # Ignored if inputfile is a directory. #################### #IMPORTANT: w3c date format in one field, CS-METHOD, CS-REFERER are compulsory fields $webserverIP="123.123.123.123"; #this is the IP adress of the webserver #################### # Check for appropriate number of command line arguments. # Exit if incorrect #################### #@ARGV=("-i","IN.LOG","-o","OUT.LOG"); $numargs=@ARGV; if (($numargs < 4) || ($numargs > 11)) { $msg=("\nUsage: isa2w3c.pl [-qv] [-h count] -t GMT-offset -i inputfile [-o outputfile]\n"); $msg=$msg.("Options:\n"); $msg=$msg.("\t-q\t\tquiet mode, no output\n"); $msg=$msg.("\t-v\t\toverwrite existing output file without confirming\n"); $msg=$msg.("\t-h\t\tprint hash marks to monitor progress, one mark every\n"); $msg=$msg.("\t\t\t lines\n"); $msg=$msg.("\t-i inputfile\tname of W3C Extended Log Format input file. If\n"); $msg=$msg.("\t\t\tinputfile is a directory all files\n"); $msg=$msg.("\t\t\tin the directory will be processed and output files\n"); $msg=$msg.("\t\t\twill be saved in that directory with a\n"); $msg=$msg.("\t\t\tprefix of AWSTATS-.\n"); $msg=$msg.("\t-o outputfile\tname of ISA LOG Format output file. If not specified\n"); $msg=$msg.("\t\t\tthe output file will be saved in the same\n"); $msg=$msg.("\t\t\tdirectory as inputfile with a prefix of AWSTATS-.\n"); $msg=$msg.("\t\t\tIgnored if inputfile is a directory.\n"); $msg=$msg.("\t-host host for awstats.[host].conf\n\t\tseparate them by ','\n"); die($msg); } #################### # Parse command line arguments # Exit if an argument is not valid #################### $offsetflag=0; # set to 1 when offset arg is found $inputfileflag=0; # set to 1 when inputfile arg is found $outputfileflag=0; # set to 1 when outputfile arg is found $hashflag=0; # set to 1 when hash mark arg is found $quiet=0; # set to 1 if q option is specified $hashon=0; # set to 1 if h option is specified $hashcount; # set to when hash options is specified $overwrite=0; # set to 1 if v option is specified $hostconfigflag=0; # set to 1 if host option is specified foreach $arg (@ARGV) { if ($offsetflag) { $offset=$arg; $offsetflag=0; } elsif ($inputfileflag) { $inputfile=$arg; $inputfileflag=0; } elsif ($outputfileflag) { $outputfile=$arg; $outputfileflag=0; } elsif ($hostconfigflag) { $hostconfigfilelist=$arg; $hostconfigflag=0; } elsif ($hashflag) { $hashon=1; $hashcount=$arg; $hashflag=0; } elsif ($arg=~/q/) { $quiet=1; } elsif ($arg=~/v/) { $overwrite=1; } elsif ($arg eq "-h") { $hashflag=1; } elsif ($arg eq "-t") { $offsetflag=1; } elsif ($arg eq "-i") { $inputfileflag=1; } elsif ($arg eq "-o") { $outputfileflag=1; } elsif ($arg eq "-host") { $hostconfigflag=1; } else { if (!$hashflag) { die("\nInvalid argument: ",$hostconfig."-".$arg,"\n"); } } } # # splitting in different configfiles @hostconfigfile = split(",",$hostconfigfilelist); foreach $hostconfigfilelist (@hostconfigfile) { print($hostconfigfilelist."\n > hosts.log \n");} # #################### # Confirm existence of input file # Exit if not found #################### if (!(-e $inputfile)) { die("\nCannot find input file or directory\n"); } #################### # Examine input file # If it's a directory get the list of contents # Otherwise create a single item list #################### if (-d $inputfile) { opendir (INDIR,$inputfile); @infiles=readdir(INDIR); closedir(INDIR); } else { @infiles=($inputfile); if ($outputfile eq "") { @path = split(/\\/,$inputfile); $numlevels=@path; $path[$numlevels]="isa\/".$path[$numlevels]; $outputfile=join("\/",@path); print ("Output:$outputfile"); } } foreach $infile (@infiles) { print($infile."\n"); } #################### # Convert each file in the list #################### foreach $infile (@infiles) { # loop through the file list if (-d $inputfile) { $outputfile="AWSTATS\/".$infile; #$outputfile="AWSTATS\/".$infile; # don't have an output file name yet since the input file was $infile=$inputfile."\/".$infile; # a directory, so make one for each file in the directory } if (($infile ne ".") && ($infile ne "..") && (!(-d $infile))) { # ignore . , .. and any subdirectories if (!$quiet) { print("Converting ",$infile,"\n"); print("Saving as ",$outputfile,"\n"); } # $overwrite="y"; if (!$overwrite) { # confirm overwrite of existing output file if not explicitely told to by arg if (-e $outputfile) { print("\nOutput file ",$outputfile," exists, overwrite (y/n)? "); $overwrite=; chop($overwrite); if (($overwrite eq "n") || ($overwrite eq "N")) { if (!$quiet) { print ("Skipping...\n"); } next; } print ("\n"); } } &doConvert($infile,$outputfile); # call sub that will perform the conversion } } #################### # Begin conversion of file #################### sub doConvert { my($inputfile,$outputfile)=@_; print("inputfile:$inputfile \n"); print("outputfile:$outputfile \n"); open (OUTFILE,"> $outputfile"); open (INFILE,"$inputfile"); for ($x=0;$x<=3;$x++) { # get the line that defines the fields that were logged in the file $fieldsline=; } #Fields: c-ip cs-username c-agent date time s-computername cs-referred r-host r-ip r-port time-taken cs-bytes sc-bytes cs-protocol s-operation cs-uri s-object-source sc-status $fieldsline=substr($fieldsline,9,length($fieldsline)-9); # parse out the fields: characters @loggedfields=split(/\t/,$fieldsline); # create a list of the fields that were logged for ($i=0;$i; #$fieldsline=substr($fieldsline,9,length($fieldsline)-9); # parse out the fields: characters @loggedfields=split(/\t/,$fieldsline); $i=0; chop($fieldsline); #$fieldsline=substr($fieldsline,9,length($fieldsline)-9); # parse out the fields: characters @loggedfields=split(/\t/,$fieldsline); # create a list of the fields that were logged @allfields=("c-ip","cs-username","c-agent","sc-authenticated","date","time","s-svcname","s-computername","cs-referred","r-host","r-ip","r-port","time-taken","cs-bytes","sc-bytes","cs-protocol","cs-transport","s-operation","cs-uri","cs-mime-type","s-object-source","sc-status","s-cache-info","rule#1","rule#2"); #LogFormat = "%time2 %host %method %url %code %bytesd " $index=0; $idate=-1; print("we are reading the file\n"); $linecount=0; $starttime=gmtime; $header="# Software: isa2w3c.pl\n# Version: 1.0\n#Date: 2002-09-04 00:00:14\n# Started at:$starttime\n# "; for ($w3cindex=0;$w3cindex) { chop($line); $cpt=0; $isalogline=(); @loggedfields=split(/\t/,$line); # create a list of the fields that were logged foreach $loggedfield (@loggedfields){ #print("loggedfield content=",$loggedfield,"\n"); $key=$loggedFieldKey[$cpt++]; #print ("key=",$key,"\n"); $isalogline{$key}= $loggedfield; #print("isalogline=",$isalogline{$key},"\n---------------\n"); } # while( my ($key, $value) = each(%isalogline) ) { # print "$key => $value\n"; $linecount++; $w3cline=$isalogline{"date"}." "; $w3cline.=$isalogline{"time"}."\t"; $w3cline.=$isalogline{"c-ip"}."\t"; $w3cline.=$isalogline{"cs-username"}."\t"; #cs-method in W3C is s-operation in ISA $w3cline.=$isalogline{"s-operation"}."\t"; #cs-uri-stem in w3c is r-host combined with a part of cs-uri # ISA Logfiles split r-host and uri, so we need to replace the IP by www.website.com $csuristem=$isalogline{"r-host"}.$isalogline{"cs-uri"}; $csuristem =~ s/http:\/\/$webserverIP//g; $w3cline.="http://".$csuristem."\t"; $w3cline.=$isalogline{"sc-status"}."\t"; $w3cline.=$isalogline{"sc-bytes"}."\t"; $w3cline.="HTTP/1.1\t"; $w3cline.=$isalogline{"c-agent"}."\t"; $w3cline.="UNKNOWN\t"; #print ("\nw3cline:".$w3cline."\n"); print OUTFILE ($w3cline."\t\n"); $linecount++; } #print("The following fields were detected:\n"); #while( my ($key, $value) = each(%isalogline) ) { #print "$key => $value\n";} $endtime=gmtime; print OUTFILE ("# ended at: $endtime in sec"); print ("export $outputfile \n"); foreach $hostconfigfile (@hostconfigfile) { $logname=$hostconfigfile.$outputfile.".log"; $logname=~ s/\///g; print("perl awstats.pl -config=$hostconfigfile -logfile=$outputfile -update > ".$logname." \n******\n"); system("perl awstats.pl -config=$hostconfigfile -logfile=$outputfile -update > ".$logname); } print("Lines: $linecount \nEND\n"); if ($hashon) { # print hash character (#) if requested if ($linecount % $hashcount == 0) { print ("#"); } } } close (OUTFILE); close (INFILE); #execution of awstats.pl if (!$quiet) { print ("\n",$linecount," lines processed \n"); # print total lines processed for the file } sub time_diff{ local($t1,$t2) = @_; $t1 =~ /^.*\s\d{2,2}:(\d{2,2}):(\d{2,2})\s.*$/; $sec1 = 60*$1 + $2; $t2 =~ /^.*\s\d{2,2}:(\d{2,2}):(\d{2,2})\s.*$/; $sec2 = 60*$1 + $2; $diff = $sec2 - $sec1; $diff = $diff + 3600 unless ($diff >= 0); return $diff; }