[LON-CAPA-cvs] cvs: modules /matthew/activitylog parse_activity_log.pl

matthew lon-capa-cvs@mail.lon-capa.org
Mon, 26 Jul 2004 19:57:39 -0000


This is a MIME encoded message

--matthew1090871859
Content-Type: text/plain

matthew		Mon Jul 26 15:57:39 2004 EDT

  Added files:                 
    /modules/matthew/activitylog	parse_activity_log.pl 
  Log:
  First stab at processing activity logs into MySQL.
  Uses the "loncapa" database.  Currently expects lonmysql to be present in
  @INC, but this will change.
  Creates 5 tables, 4 for storage/creation of ids (student, resource, action, 
  and machines), and 1 to store the actual activity log data.
  
  
--matthew1090871859
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20040726155739.txt"


Index: modules/matthew/activitylog/parse_activity_log.pl
+++ modules/matthew/activitylog/parse_activity_log.pl
#!/usr/bin/perl
#
# The LearningOnline Network
#
# $Id: parse_activity_log.pl,v 1.1 2004/07/26 19:57:39 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
###############################################################################
#
# Expects
#
# ../key/$class.key - key file $username:$keynumber
# ../rawdata/$class.log - log file
# ../rawdata/$class.seq - sequence file
# ../data writable
# ------------------------------------------------------------------ Course log

#
# Exit codes
#   0     Everything is okay
#   1     Another copy is running on this course
#   2     Activity log does not exist
#   3     Unable to connect to database
#   4     Unable to create database tables
#   5     Unspecified error?
#

use strict;
use DBI;
use lonmysql();
use Time::HiRes();
use Getopt::Long();

#
# Determine parameters
my ($help,$course,$domain,$drop,$file,$timerun);
&Getopt::Long::GetOptions( "course=s"  => \$course,
                           "domain=s"  => \$domain,
                           "help"      => \$help,
                           "logfile=s" => \$file,
                           "timerun"      => \$timerun,
                           "drop"      => \$drop);
if (! defined($course) || $help) {
    print<<USAGE;
$0
Process a lon-capa activity log into a database.
Parameters:
   course             Required
   domain             Optional
   drop               optional   if present, drop all course 
                                 specific activity log tables.
   file               optional   Specify the file to parse, including path
   time               optional   if present, print out timing data
Examples:
  $0 -course=123456abcdef -domain=msu
  $0 -course=123456abcdef -file=activity.log
USAGE
    exit;
}

my %perlvar;
&initialize_configuration();
if (! defined($domain) || $domain eq '') {
    $domain = $perlvar{'lonDefDomain'};
}

&update_process_name($course.'@'.$domain);

my $filename;
if ($file) {
    $filename = $file;
} else {
    $filename = &get_filename($course,$domain);
}

my $newfilename = $filename.'.processing';
if (! -e $filename) {
    # warn "$filename does not exist";
    exit 1;
}
if (-e $newfilename) {
    # warn "$newfilename exists";
    exit 2;
}

rename($filename,$filename.'.processing');

#
# Table definition
my $prefix = $course.'_'.$domain.'_';
my $student_table = $prefix.'students';
my $student_table_def = 
{ id => $student_table,
  permanent => 'no',
  columns => [
              { name => 'student_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc => 'yes', },
              { name => 'student',
                type => 'VARCHAR(100) BINARY',
                restrictions => 'NOT NULL', },
              ],
      'PRIMARY KEY' => ['student_id',],
          };

my $res_table = $prefix.'resource';
my $res_table_def = 
{ id => $res_table,
  permanent => 'no',
  columns => [{ name => 'res_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc     => 'yes', },
              { name => 'resource',
                type => 'MEDIUMTEXT',
                restrictions => 'NOT NULL'},
              ],
  'PRIMARY KEY' => ['res_id'],
};

my $action_table = $prefix.'actions';
my $action_table_def =
{ id => $action_table,
  permanent => 'no',
  columns => [{ name => 'act_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc     => 'yes', },
              { name => 'action',
                type => 'VARCHAR(100)',
                restrictions => 'NOT NULL'},
              ],
  'PRIMARY KEY' => ['act_id',], 
};

my $machine_table = $prefix.'machine_table';
my $machine_table_def =
{ id => $machine_table,
  permanent => 'no',
  columns => [{ name => 'machine_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc     => 'yes', },
              { name => 'machine',
                type => 'VARCHAR(100)',
                restrictions => 'NOT NULL'},
              ],
  'PRIMARY KEY' => ['machine_id',],
 };

my $activity_table = $prefix.'activity';
my $activity_table_def = 
{ id => $activity_table,
  permanent => 'no',
  columns => [
              { name => 'res_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',},
              { name => 'time',
                type => 'DATETIME',
                restrictions => 'NOT NULL',},
              { name => 'student_id',
                type => 'VARCHAR(100) BINARY',
                restrictions => 'NOT NULL',},
              { name => 'action_id',
                type => 'VARCHAR(100) BINARY',
                restrictions => 'NOT NULL',},
              { name => 'idx',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc     => 'yes', },
              { name => 'machine_id',
                type => 'VARCHAR(100) BINARY',
                restrictions => 'NOT NULL',},
              { name => 'action_values',
                type => 'MEDIUMTEXT', },
              ], 
      'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'],
};

# 
&Apache::lonmysql::set_mysql_user_and_password($perlvar{'lonSqlUser'},
                                               $perlvar{'lonSqlAccess'});
if (!&Apache::lonmysql::verify_sql_connection()) {
    warn "Unable to connect to MySQL database.";
    exit 3;
}

if ($drop) {
    &drop_tables();
}

if (! &create_tables()) {
    warn "Unable to create tables";
    exit 4;
}

&read_id_tables();

if (!&process_courselog($newfilename)) {
    exit 5;
}

exit 0;   # Everything is okay, so end here before it gets worse.

########################################################
########################################################
##
##                 Various Subroutines
##
########################################################
########################################################
sub process_courselog {
    my ($inputfile) = @_;
    open(IN,$inputfile) or die ('Unable to open '.$inputfile.' for input');
    my ($linecount,$insertcount);
    my $dbh = &Apache::lonmysql::get_dbh();
    #
    # Timing variables
    my %Timing;
    my $starttime;
    
    while (my $line=<IN>){
        last if ($linecount > 500);
        if ($timerun) { $starttime = Time::HiRes::time; }
        chomp($line);
        $linecount++;
        # print $linecount++.$/;
        my ($timestamp,$host,$log)=split(/\:/,$line,3);
        if ($timerun){
            push(@{$Timing{'splitline'}},Time::HiRes::time-$starttime);
        }
        #
        # $log has the actual log entries; currently still escaped, and
        # %26(timestamp)%3a(url)%3a(user)%3a(domain)
        # then additionally
        # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
        # or
        # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
        #
        # get delimiter between timestamped entries to be &&&
        $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
        $log = &unescape($log);
        # now go over all log entries 
        my $machine_id = &get_id($machine_table,'machine',$host);
        foreach (split(/\&\&\&/,$log)) {
	    my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$_);
            if (! defined($res) || $res =~ /^\s*$/) {
                $res = '/adm/roles';
                $action = 'log in';
            }
            if ($res =~ m|^/prtspool/|) {
                $res = '/prtspool/';
            }
            if (! defined($action) || $action eq '') {
                $action = 'view';
            }
            my $student = $uname.':'.$udom;
            if ($timerun) {$starttime = Time::HiRes::time;}
            my $student_id = &get_id($student_table,'student',$student);
            my $res_id = &get_id($res_table,'resource',$res);
            my $action_id = &get_id($action_table,'action',$action);
            my $sql_time = &Apache::lonmysql::sqltime($time);
            my $values = $dbh->quote(join('',@values));
            if ($timerun) {
                push(@{$Timing{'get_id'}},Time::HiRes::time-$starttime);
            }
            #
            if ($timerun) {$starttime = Time::HiRes::time;}
            my $row = [$res_id,
                       $sql_time,
                       $student_id,
                       $action_id,
                       undef,        # idx
                       $machine_id,
                       $values];
            my $result = &Apache::lonmysql::store_row($activity_table,
                                                      $row);
            if ($timerun) {
                push(@{$Timing{'insert'}},Time::HiRes::time-$starttime);
            }
            #
            $insertcount++;
            if (! defined($result)) {
                warn "Got error of ".&Apache::lonmysql::get_error().$/;
                warn 
                    'res_id     => :'.$res_id.':'.$/.
                    'sql_time   => :'.$sql_time.':'.$/.
                    'student_id => :'.$student_id.':'.$/.
                    'action_id  => :'.$action_id.':'.$/.
                    'machine_id => :'.$machine_id.':'.$/.
                    'values     => :'.join(',',@values);
            }
        }
    }
    close IN;
    print "Number of lines: ".$linecount.$/;
    print "Number of inserts: ".$insertcount.$/;
    if ($timerun) {
        print "Timing Data:".$/;
        while (my($k,$v) = each(%Timing)) {
            my $Str .= '  '.$k.'  '.scalar(@$v).'   ';
            my $sum;
            foreach (@$v) {
                $sum+=$_;
            }
            $Str .= $sum.$/;
            print $Str;
        }
    }
}

sub initialize_configuration {
    # Fake it for now:
    $perlvar{'lonSqlUser'} = 'www';
    $perlvar{'lonSqlAccess'} = 'localhostkey';
    $perlvar{'lonUsersDir'} = '/home/httpd/lonUsers';
    $perlvar{'lonDefDomain'} = '103';
}

sub update_process_name {
    my ($text) = @_;
    $0 = 'parse_activity_log.pl: '.$text;
}

sub get_filename {
    my ($course,$domain) = @_;
    my ($a,$b,$c,undef) = split('',$course,4);
    return "$perlvar{'lonUsersDir'}/$domain/$a/$b/$c/$course/activity.log";
}

sub create_tables {
    foreach my $table ($student_table_def,$res_table_def,
                       $action_table_def,$machine_table_def,
                       $activity_table_def) {
        my $table_id = &Apache::lonmysql::create_table($table);
        if (! defined($table_id)) {
            warn "Unable to create table ".$table->{'id'}.$/;
            warn &Apache::lonmysql::build_table_creation_request($table).$/;
            return 0;
        }
    }
    return 1;
}

sub drop_tables {
    foreach my $table ($student_table_def,$res_table_def,
                       $action_table_def,$machine_table_def,
                       $activity_table_def) {
        my $table_id = $table->{'id'};
        &Apache::lonmysql::drop_table($table_id);
    }
}

#################################################################
#################################################################
##
## Database item id code
##
#################################################################
#################################################################
{ # Scoping for ID lookup code
    my %IDs;

sub read_id_tables {
    foreach my $table ($student_table,$res_table,$action_table,$machine_table){
        my @Data = &Apache::lonmysql::get_rows($table);
        foreach my $row (@Data) {
            $IDs{$table}->{$row->[1]} = $row->[0];
        }
    }
}

sub get_id {
    my ($table,$fieldname,$value) = @_;
    if (exists($IDs{$table}->{$value})) {
        return $IDs{$table}->{$value};
    } else {
        # insert into the table - if the item already exists, that is
        # okay.
        my $result = &Apache::lonmysql::store_row($table,[undef,$value]);
        if (! defined($result)) {
            warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error());
        }
        # get the id
        my @Data = 
            &Apache::lonmysql::get_rows($table,qq{$fieldname='$value'});
        if (@Data) {
            $IDs{$table}->{$value}=$Data[0]->[0];
            return $IDs{$table}->{$value};
        } else {
            warn "Unable to retrieve id for $table $fieldname $value".$/;
            return undef;
        }
    }
}

} # End of ID scoping


###############################################################
###############################################################
##
##   The usual suspects
##
###############################################################
###############################################################
sub escape {
    my $str=shift;
    $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
    return $str;
}

sub unescape {
    my $str=shift;
    $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
    return $str;
}


--matthew1090871859--