#!/usr/cs/bin/perl

# This script makes a lot of calls to Unix to extract a lot of web page
# statistics and email them to the user. It definitely must be modified
# to work for anyone but me. Think of it as a starting point. :)

# Written by David Coppit (david@coppit.org,
# http://www.coppit.org/index.html)
# Please send me any modifications you make. (for the better, that is. :) Feel
# free to modify, adapt, or redistribute, but please leave original authorship
# credit to me.

# Usage: mail_access

$" = "";

# Preprocess the access log file
print <<`EOC`;
grep \"dwc3q.*html\" /users/webman/server/httpd_current/logs/access_log | grep \"`date '+%d/%b/%Y'`\" > /tmp/dwc3q.raw

grep -v \"mamba.*\.cs\" /tmp/dwc3q.raw | grep -v \"adder.*\.cs\" | grep -v \"viper.*\.cs\" | grep -v \"cobra.*\.cs\" > /tmp/dwc3q.non
EOC

# Get some statistics
$total = `cat /tmp/dwc3q.raw | awk '{print \$1}' | sort -u | wc -l | awk '{print \$1}'`;
$non_uva = `cat /tmp/dwc3q.non | awk '{print \$1}' | sort -u | wc -l | awk '{print \$1}'`;

#Process Homepage
@visitors = `cat /tmp/dwc3q.non | awk '{print \$1}' | sort | uniq -c | sort -rn | awk '{print \$2}'`;
@popular_homepage = `cat /tmp/dwc3q.non | sort -k 7,7 | awk '{print \$7}' | uniq -c | awk '{print \$1\" \"\$2}' | sort -rn`;

# Get the times of access for each visitor.
chomp @visitors;
foreach $name (@visitors) {
  @times = `grep \"$name\" /tmp/dwc3q.non | cut -d: -f2,3,4 | cut -d\" \" -f1 | awk '{printf \" %s\\n\",\$1}'`;
  chomp @times;
  @pages = `grep \"$name\" /tmp/dwc3q.non | awk '{printf \"%s %s\\n\",\$1,\$7}'`;
  for ($i = 0;$i <= $#times;$i++) {
    $times[$i] =~ s/^ *//;
    push (@visitors_homepage,"$times[$i] $pages[$i]");
  }
}

# Clean up trailing newlines.
chomp $total;
chomp $non_uva;
chomp $popular_homepage[$#popular_homepage] if ($#popular_homepage >= 0);
chomp $visitors_homepage[$#visitors_homepage] if ($#visitors_homepage >= 0);

# Write the mail message to a file.
open (MAIL,">/tmp/dwc3q.mail");
print MAIL<<_EOF_;
To: "David Coppit" <coppit\@cs.virginia.edu>
From: "David Coppit" <coppit\@cs.virginia.edu>
Fcc:

Total visitors: $total
Total visits, non-UVa: $non_uva

Popular Pages on Homepage:
@popular_homepage

Frequent Visitors to Homepage:
@visitors_homepage

_EOF_
close (MAIL);

# Send the mail.
`/usr/bin/mailx -s \"Page Stats\" -t < /tmp/dwc3q.mail`;

# Clean up the left-over files.
unlink ("/tmp/dwc3q.raw");
unlink ("/tmp/dwc3q.non");
unlink ("/tmp/dwc3q.mail");
