#!/usr/local/bin/perl5 # ------------------------------------------------------------------------ # # readlogII - a tool for reading part of the www-servers access log # and displaying it in html # # Usage: readlogII filename > log.html (writes to std out) # in WWW: HREF = ".../readlogII?filename" # # copyright (c) 1994 readlog.cgi by Brigitte Jellinek # this program is available under the GNU General Public License # e-mail : bjelli@cosy.sbg.ac.at # HREF = "http://www.cosy.sbg.ac.at/people/bjelli.html" # # copyright (c) 1998 readlogII by Christian rossi # this program is available under the GNU General Public License # e-mail : rossi@loria.fr # HREF = "http://www.loria.fr/~rossi" # country name, web link, traceroute... # based on readlog.cgi by Brigitte Jellinek # Version 4.0 # # version 3.0 - cgi conform, no regex (those are handled by mreadlog) # version 2.6 - sorts by date, backlinks to files # # use at your own risk ! - if you enhance it, please give me a copy # # ------------------------------------------------------------------------ # this script fgreps the access-log (found in $www_server_dir/logs ) # for the filename, and then gives a summary of the information found. $SERVER_ROOT = "/usr/www/apache-httpd/logs"; $LOG_FILE = $SERVER_ROOT . "/access_log"; #$SERVER_ROOT = "/local/www/texte/statistiques/logs"; #$LOG_FILE = $SERVER_ROOT . "/access_log.0899"; #$SERVER_ROOT = "/local/www/cgi-bin/rossi"; #$LOG_FILE = $SERVER_ROOT . "/toto.txt"; $| = 1; $date = `/bin/date`; # a few arrays for storing the information we find: @last = (); # last access by host @count = (); # no of accesses by host %index = (); # index: host (as found in log), value: index of above arrays %indexcountry = (); # index country : fr -> France %indexwww = (); # index www : iecn.u-nancy.fr -> www.iecn.u-nancy.fr # for sorting by date %no_of_month = ( Jan, 1, Feb, 2, Mar, 3, Apr, 4, May, 5, Jun, 6, Jul, 7, Aug, 8, Sep, 9, Oct,10, Nov,11, Dec,12 ); sub bydate # for sorting { ($amonth, $ano) = split(/ /, $last[$index{$a}]); ($bmonth, $bno) = split(/ /, $last[$index{$b}]); if ( $amonth eq $bmonth ) { $ano <=> $bno } else { $no_of_month{$amonth} <=> $no_of_month{$bmonth} }; } sub init_country { open(COUNTRYFILE, "listef.txt"); while ($code = ) { chop($code); $country=; chop($country); $indexcountry{$code} = $country; } close(COUNTRYFILE); #foreach $key (keys %indexcountry) #{ #print "
  • at $key we have $indexcountry{$key}"; #} } sub init_www { open(WWWFILE, "listewww.txt"); while ($site = ) { chop($site); $indexwww{$site} = 1; } close(WWWFILE); #foreach $key (keys %indexwww) #{ #print "
  • at $key we have $indexwww{$key}"; #} } # two little subroutins for formating output - really not important sub witchcountry { $caller = $_[0]; $code=$indexcountry{$caller}; #print "code=$code et c=$caller \n"; while ($code eq "") { @adresse=split(/\./,$caller); ($nom,@adresse)=@adresse; $caller=join(".",@adresse); $code=$indexcountry{$caller}; #print "XXX n=$nom et ca=$caller et c=$code XXX\n"; if ($caller eq "") { last; } } #print "code=$code et $caller \n"; if ($code eq "") {$code=Unknown} $code; } #http://beacon.webtv.net/cgi-bin/nph-traceroute.cgi? #http://hplyot.obspm.fr/cgi-bin/nph-traceroute? sub witchwww { $caller = $_[0]; if ($caller =~ /.*[0-9]$/) { #$adrwww="hplyot.obspm.fr/cgi-bin/nph-traceroute?$caller"; #$adrwww="www.freenix.fr/cgi-bin/nph-traceroute?$caller"; $adrwww="www.nic.fr/cgi-bin/nph-prtraceroute?query=$caller&submit=Envoyer"; } else { @adresse=split(/\./,$caller); $nb_elem=@adresse; while ($nb_elem > 2) { ($nom,@adresse)=@adresse; $nb_elem=@adresse; $adrwww=join(".",@adresse); $modif=$indexwww{$adrwww}; if ($modif) { @adresse = ($nom, @adresse); $adrwww=join(".",@adresse); $adrwww= "www." . $adrwww; last; } } $adrwww=join(".",@adresse); $adrwww= "www." . $adrwww; } } sub pr { $caller = $_[0]; $i = $index{ $caller }; print "$caller called "; if ($count[$i]==1) { print "once, on $last[$i]\n"; } elsif ($count[$i]==2) { print "twice, last on $last[$i]\n"; } else {print "$count[$i] times, last on $last[$i]\n"}; }; sub prli { local($heading,$pattern) = @_; @l = grep(/$pattern/, sort bydate keys %index); if ($#l >= 0) { $nombre = $#l + 1 ; print "

    $heading ($nombre sites)

    "; #print "$nombre sites\n"; print " \n"; }; } # Main Program. if ($ARGV[0] eq '') # no argument (filename to search): introduction. { print <<'EndOfExplanation'; Content-type: text/html Read Log plus

    Read Log

    This is a tool for reading the httpd access log of documents. Enter the name of the document whos log you want to see. (the whole server specifiv part of the URL: e.g. '/people/bjelli.html'

    This only works if the document is on our local Server The source code of this script is available.

    share and enjoy !

    EndOfExplanation } else { #A { text-decoration: none } print <<"EOM"; Content-type: text/html CCH Read Log


    EOM $filenamemach = $ARGV[0]; # that's what we're looking for $filenamemach =~ s/\\//g; # unescape characters $filenamemach =~ s/%(..)/pack('c',hex($1))/eg; # unescape characters ($filename,$machine)=split(/&/,$filenamemach); #print "

    a=$ARGV[0]
    f=$filename
    m=$machine
    f1=$filename1

    \n"; if ( $machine eq "" ) { print "

    Log d'activité du serveur WWW pour la partie '$filename'

    \n"; } else { print "

    Log d'activité du serveur WWW pour la partie '$filename'
    et la machine '$machine'

    \n" } # print "

    Log d'activité WWW du CCH pour la partie '$filename'

    \n"; # print " Dans $LOG_FILE
    \n" ; # $filename =~ s!\^! !g; print "
    • le nom du site ou du pays se trouve dans la barre d'état lorsque l'on se place sur un nom de machine\n"; print "
    • le serveur WEB des sites référencés est accessible en cliquant sur les noms de machine\n"; print "
    • pour les machines sans nom un traceroute est effectué depuis Paris\n"; print "
    • en cliquant sur le nombre de requête l'on obtient la liste de tous les fichier accédés par cette machine\n"; print "

      \n"; print "

    • les fichiers accédés sont aussi disponibles, il suffit de cliquer dessus\n"; print "
    • en cliquant sur le nombre après le nom du fichier l'on obtient la liste de toutes les machines y ayant accédé\n"; print "
    \n"; &init_country; &init_www; $date = `/bin/date`; #($day_of_month,$month,$year,$time_of_day) = split(/ /, $date); ($day_of_month,$month,$dayXX,$day,$time_of_day,$AREA1,$AERA2,$year) = split(/ /, $date); ##################### #$month="Mar"; #################### #print "1X $month 2X $day_of_month 3X $day 4X $time_of_day 5X $AREA1 et $AERA2 6X $year 7X
    "; #print "date : $date"; #$month=Aug; #$machine="hugo.loria.fr"; if ( $machine eq "" ) { open(LOG, "/bin/grep '/$month/.*' $LOG_FILE |grep $filename|"); #print " 1 --> $filename

    \n"; } else { open(LOG, "/bin/grep '$machine.*/$month/.*' $LOG_FILE |grep $filename|"); #print " 2 --> $filename

    \n"; } #print " --> $filename

    \n"; # $filename =~ s!^ !!; $global_count = 0; # whole no of accesset to $filename while() { chop; #print " --> $_ \n"; ($caller,$id,$user,$date,$request,$status,$bytes) = ### $_ =~ /(\S*) (\S*) (\S*) \[([^\]]*)\] "* (*) *.*" (\S*) (\S*)/; $_ =~ /(\S*) (\S*) (\S*) \[([^\]]*)\] "([^"]*)" (\S*) (\S*)/; #print "date : $date
    "; ($tmp, $file) = split( / /, $request) ; #print STDERR "+ request = $request | caller = $caller \n"; #print STDERR " date = $date\n"; # ($file) = $request =~ / (\S*)/; ($day_of_month,$month,$year,$time_of_day) = $date =~ m!^(..)/(...)/(....):(..:..):..!; #print " $day_of_month,$month,$year,$time_of_day
    \n"; #if ($file =~ /(.*)\?/) { $file = $1 }; #print " --> $file ($filename) \n"; if ( $file =~ /$filename/ ) { $global_count++; $caller .= ".loria.fr" unless $caller =~ /\./; if( defined $index{$caller} ) #caller already in array { $i = $index{$caller}; $last[$i] = $month . " " . $day_of_month; $count[$i]++; } else { push( @last, $month . " " . $day_of_month ); push( @count, 1 ); $i = $#count; $index{ $caller } = $i; }; if ( defined $fichier{$file} ) { $fichier{$file} ++ ; } else { $fichier{$file} = 1 ; } }; }; chop $files_found; # print " Trouvé $global_count références pour : $filename\n"; $nombre = keys(%index) ; print " Trouvé $global_count références et $nombre sites.\n"; # print "

    LORIA

    \n"; # $count_local = 0 ; # foreach ( keys %index) { # print "$_, " ; # } # print "\n" ; ## foreach ( grep(/.loria.fr/, keys %index) ) ## { if ( /(.*)\.loria\.fr/ ) {$name = $1} ## else {$name = $_}; ## $name =~ s/\W//g ; ## $i = $index{$_}; ## printf "%s ($count[$i]), ", $name; ## $count_local += $count[$i] ; ## delete $index{$_}; ## }; ## print " $count_local connections locales.\n"; &prli("Le LORIA", '\.loria\.fr$'); #&prli("Nancy",'\..*nancy.*\.fr$|\.ciril\.fr$|\.nancy\.inra\.fr$|\.nancy\.inserm\.fr$|\.inist\.fr$'); &prli("Nancy",'.*inalf\.cnrs\.fr$|.*nancy.*\.fr$|\.ciril\.fr$|\.inist\.fr$|\.crai.*\.fr$'); &prli("La région", '.*metz.*\.fr$|\.*strasbg*\.fr$'); &prli("L'INRIA", '\.inria.*\.fr$|\.irisa\.fr$'); &prli("La France", '\.fr$'); &prli("L'Europe",'\.bg$|\.dk$|\.hr$|\.tr$|\.lt$|\.gb$|\.ro$|\.ie$|\.gr$|\.hu$|\.lu$|\.uk$|\.de$|\.at$|\.ch$|\.fi$|\.se$|\.es$|\.it$|\.no$|\.be$|\.pt$|\.pl$|\.ru$|\.nl$'); &prli("Les USA", '\.us$|\.mil$|\.edu$|\.gov$'); &prli("Canada", '\.ca$'); &prli("Japon", '\.jp$'); # &prli("Le Canada", '\.ca$'); &prli("Les organisations", '\.org$'); &prli("Sites commerciaux", '\.com$'); &prli("Les networks", '\.net$'); &prli("Sites inconnues", '\.*[0-9]$'); $nombre = keys(%index) ; if ($nombre > 0) { print "

    Les autres pays ($nombre sites)

    "; print "
      \n"; foreach (sort bydate keys %index) { $i = $index{ $_ }; # print "
    • "; $country = &witchcountry( $_ ); $adrwww = &witchwww( $_ ); #$country=France; print "
    • "; &pr($_); }; print "
    \n"; } print "

    Liste des fichiers

    \n"; print "
    \n"; } $datefr = `/usr/local/bin/datefr`; print <<"EndOfFooter";
    Script de Brigitte Jellinek, modifié par Christian Rossi
    Cette page est à jour au $day_of_month $month $year $time_of_day
    et a été crée le $datefr EndOfFooter # # That's all folks share and enjoy ! ###############################################################################