#!/usr/bin/perl # analyze.cgi -- analyzes Web page hits by browser, platform, time of day, day of week # the first line of this script may have to be changed # if your system's Perl interpreter is not located in /usr/bin/perl # Written by P. Lutus Ashland, Oregon lutusp@arachnoid.com 3/18/96 # some of the filter-by-browser and filter-by-platform code was derived from public sources # set $show_browsers = 1 if you want a breakdown of the names of the browsers -- a really big table! $show_browsers = 0; # set $show_hours = 1 if you want a breakdown by hour of logon $show_hours = 1; # set $show_days = 1 if you want a breakdown by day of logon $show_days = 1; # $hist_high sets height (or width in this case) of histogram columns, # wider gives more detail but takes more space $hist_high = 40; @wday_name = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat'); print "content-type: text/html\n\n"; print "Browser/Platform/Time Statistics"; print "\n"; print "

Browser/Platform/Time Statistics

"; print "
\n"; while (<*.log>) { &analyze($_); } print "
"; exit; sub analyze { $filename = $_; $hits = 0; @browserlist = (); @items = (); @name = (); @data = (); @hit_hour = (); @hit_day = (); %browserlist = (); %items = (); %name = (); %data = (); %hit_hour = (); %hit_day = (); $Mozilla = 0; $Mosaic = 0; $Microsoft = 0; $Lynx = 0; $WebExplorer = 0; $NetCruiser = 0; $Windows = 0; $Macintosh = 0; $UnixGUI = 0; $UnixText = 0; $OS2 = 0; $top = 0; $maxhour = 0; $maxday = 0; open (LOG,$filename); while () { chop; s/\t /\t/g; # remove leading spaces s/ / /g; # remove double spaces $str = $_; if ($hits == 0) { @fnam = split(/\t/,$str); $top = 0; foreach $nm (@fnam) { $name[$top++] = $nm; } } else { @field = split(/\t/,$str); $i = 0; foreach $datum (@field) { $data[$i] = $datum; if($name[$i] eq "HTTP_USER_AGENT") { # test browser $_ = $datum; if (/Mozilla/ && !/MSIE/) # watch out for "cloaked" MSIE! {$Mozilla++;} if (/Mosaic/) {$Mosaic++;} if (/Microsoft/ || /MSIE/) {$Microsoft++;} if (/Lynx/) {$Lynx++;} if (/WebExplorer/) {$WebExplorer++;} if (/NetCruiser/) {$NetCruiser++;} if (/Windows|Microsoft|Win32|Quarterdeck|Chameleon|AIR_Mosaic|SPRY_Mosaic|Win16|WinNT|WindowsNT|Win95/) {$Windows++;} if (/Macintosh/) {$Macintosh++;} if (/X11|X Window/) {$UnixGUI++;} if (/Lynx/) {$UnixText++;} if (/WebExplorer/) {$OS2++;} if($hits > 0) { $browserlist{$datum}++; } } elsif ($name[$i] eq "Time") { # test time ($date,$time) = split(/ /,$datum); if($show_hours) { ($h,$m,$s) = split(/:/,$time); $h = int($h/2); # 12 slots $hit_hour{$h}++; if($maxhour < $hit_hour{$h}) { $maxhour = $hit_hour{$h}; } } if($show_days) { ($mo,$da,$yr) = split('/',$date); # test day of week $jd = ($old_date eq $date)?$old_jd:&mdy_jd("$mo/$da/$yr 00:00:00"); $wd = &dow($jd); $hit_day{$wd}++; if($maxday < $hit_day{$wd}) { $maxday = $hit_day{$wd}; } $old_date = $date; $old_jd = $jd; } } $i++; } } # else data $hits++; } # while () close LOG; $hits = 0; foreach $browser (keys %browserlist) { $hits += $browserlist{$browser}; push (@items,sprintf("%s\t%s\n",$browserlist{$browser},$browser)); } @items = sort {$b <=> $a} @items; print "

Statistics for $filename

Total Logged Hits: $hits

"; if ($hits > 0) { print ""; print "
"; print ""; print ""; print ""; print sprintf ("", $Mozilla, 100*$Mozilla/$hits); print sprintf ("", $Mosaic, 100*$Mosaic/$hits); print sprintf ("", $Microsoft, 100*$Microsoft/$hits); print sprintf ("", $Lynx, 100*$Lynx/$hits); print sprintf ("", $WebExplorer, 100*$WebExplorer/$hits); print sprintf ("", $NetCruiser, 100*$NetCruiser/$hits); print sprintf ("", $Mozilla+$Mosaic+$Microsoft+$Lynx+$WebExplorer+$NetCruiser, 100*($Mozilla+$Mosaic+$Microsoft+$Lynx+$WebExplorer+$NetCruiser)/$hits); print "
Browser summary
BrowserHits%
Netscape%d%.2f
Mosaic%d%.2f
Microsoft%d%.2f
Lynx%d%.2f
WebExplorer%d%.2f
NetCruiser%d%.2f
Accounted for%d%.2f
"; print "
"; print ""; print ""; print ""; print sprintf ("", $Windows, 100*$Windows/$hits); print sprintf ("", $UnixGUI, 100*$UnixGUI/$hits); print sprintf ("", $UnixText, 100*$UnixText/$hits); print sprintf ("", $Macintosh, 100*$Macintosh/$hits); print sprintf ("", $OS2, 100*$OS2/$hits); print sprintf ("", $Windows+$Macintosh+$UnixGUI+$UnixText+$OS2, 100*($Windows+$Macintosh+$UnixGUI+$UnixText+$OS2)/$hits); print "
Platform summary
PlatformHits%
Windows%d%.2f
UnixGUI%d%.2f
UnixText%d%.2f
Macintosh%d%.2f
OS/2%d%.2f
Accounted for%d%.2f
"; if (($show_days) && ($maxday > 0)) { print "
"; print ""; print "\n"; print "\n"; for ($i = 0;$i < 7;$i++) { $z = ($hit_day{$i}) * ($hist_high/$maxday); $q = " "; for($j = 0;$j < $z;$j++) { $q = "$q|"; } print sprintf("\n",$wday_name[$i],$q); } print "
Daily Hits
DayData
%s%s
"; } # show_days if (($show_hours) && ($maxhour > 0)) { print "
"; print ""; print "\n"; print "\n"; for ($i = 0;$i < 12;$i++) { $i1 = sprintf("%02.0f",$i*2); $i2 = sprintf("%02.0f",($i*2)+1); $z = $hit_hour{$i} * ($hist_high/$maxhour); $q = " "; for($j = 0;$j < $z;$j++) { $q = "$q|"; } print sprintf("\n",$i1,$i2,$q); } print "
Hourly Hits
FromToData
%s:00%s:59%s
"; } # show_hours print "
"; if($show_browsers) { print "\n"; print "\n"; print "\n"; $sum = 0; foreach $item (@items) { ($a,$b) = split(/\t/,$item); print sprintf("\n",$b,$a,100*$a/$hits); $sum += $a; } print "
Detail by Browser Type
BrowserHits%
%s%s%.2f
\n"; } } # $hits > 0 print "
"; } sub mdy_jd { # takes mm/dd/yy/ hh:mm:ss ($date,$time) = split(' ',@_[0]); ($mo1,$da1,$yr1) = split('/',$date); ($h1,$m1,$s1) = split(/:/,$time); if($mo1 < 3.0) { $yr1 -= 1.0; $mo1 += 12.0; } $a = $da1 + ($mo1 * 100.0) + ($yr1 * 10000.0); if ($a < 15821015.0) # Julian calendar { $b = 0; } else # Gregorian { $a = int($yr1/100.0); $b = 2.0 - $a + int($a/4.0); } $jd1 = int(365.25*($yr1+4716.0)) + int(30.6001*($mo1+1.0)) + $da1 + $b - 1524.5; $jd1 += ($h1/24.0) + ($m1/1440.0) + ($s1/86400.0); } sub dow { $q = (@_[0] - 5) % 7.0; }