[LON-CAPA-cvs] cvs: modules /matthew/activitylog parse_activity_log.pl
matthew
lon-capa-cvs@mail.lon-capa.org
Mon, 26 Jul 2004 19:57:39 -0000
This is a MIME encoded message
--matthew1090871859
Content-Type: text/plain
matthew Mon Jul 26 15:57:39 2004 EDT
Added files:
/modules/matthew/activitylog parse_activity_log.pl
Log:
First stab at processing activity logs into MySQL.
Uses the "loncapa" database. Currently expects lonmysql to be present in
@INC, but this will change.
Creates 5 tables, 4 for storage/creation of ids (student, resource, action,
and machines), and 1 to store the actual activity log data.
--matthew1090871859
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20040726155739.txt"
Index: modules/matthew/activitylog/parse_activity_log.pl
+++ modules/matthew/activitylog/parse_activity_log.pl
#!/usr/bin/perl
#
# The LearningOnline Network
#
# $Id: parse_activity_log.pl,v 1.1 2004/07/26 19:57:39 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
###############################################################################
#
# Expects
#
# ../key/$class.key - key file $username:$keynumber
# ../rawdata/$class.log - log file
# ../rawdata/$class.seq - sequence file
# ../data writable
# ------------------------------------------------------------------ Course log
#
# Exit codes
# 0 Everything is okay
# 1 Another copy is running on this course
# 2 Activity log does not exist
# 3 Unable to connect to database
# 4 Unable to create database tables
# 5 Unspecified error?
#
use strict;
use DBI;
use lonmysql();
use Time::HiRes();
use Getopt::Long();
#
# Determine parameters
my ($help,$course,$domain,$drop,$file,$timerun);
&Getopt::Long::GetOptions( "course=s" => \$course,
"domain=s" => \$domain,
"help" => \$help,
"logfile=s" => \$file,
"timerun" => \$timerun,
"drop" => \$drop);
if (! defined($course) || $help) {
print<<USAGE;
$0
Process a lon-capa activity log into a database.
Parameters:
course Required
domain Optional
drop optional if present, drop all course
specific activity log tables.
file optional Specify the file to parse, including path
time optional if present, print out timing data
Examples:
$0 -course=123456abcdef -domain=msu
$0 -course=123456abcdef -file=activity.log
USAGE
exit;
}
my %perlvar;
&initialize_configuration();
if (! defined($domain) || $domain eq '') {
$domain = $perlvar{'lonDefDomain'};
}
&update_process_name($course.'@'.$domain);
my $filename;
if ($file) {
$filename = $file;
} else {
$filename = &get_filename($course,$domain);
}
my $newfilename = $filename.'.processing';
if (! -e $filename) {
# warn "$filename does not exist";
exit 1;
}
if (-e $newfilename) {
# warn "$newfilename exists";
exit 2;
}
rename($filename,$filename.'.processing');
#
# Table definition
my $prefix = $course.'_'.$domain.'_';
my $student_table = $prefix.'students';
my $student_table_def =
{ id => $student_table,
permanent => 'no',
columns => [
{ name => 'student_id',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',
auto_inc => 'yes', },
{ name => 'student',
type => 'VARCHAR(100) BINARY',
restrictions => 'NOT NULL', },
],
'PRIMARY KEY' => ['student_id',],
};
my $res_table = $prefix.'resource';
my $res_table_def =
{ id => $res_table,
permanent => 'no',
columns => [{ name => 'res_id',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',
auto_inc => 'yes', },
{ name => 'resource',
type => 'MEDIUMTEXT',
restrictions => 'NOT NULL'},
],
'PRIMARY KEY' => ['res_id'],
};
my $action_table = $prefix.'actions';
my $action_table_def =
{ id => $action_table,
permanent => 'no',
columns => [{ name => 'act_id',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',
auto_inc => 'yes', },
{ name => 'action',
type => 'VARCHAR(100)',
restrictions => 'NOT NULL'},
],
'PRIMARY KEY' => ['act_id',],
};
my $machine_table = $prefix.'machine_table';
my $machine_table_def =
{ id => $machine_table,
permanent => 'no',
columns => [{ name => 'machine_id',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',
auto_inc => 'yes', },
{ name => 'machine',
type => 'VARCHAR(100)',
restrictions => 'NOT NULL'},
],
'PRIMARY KEY' => ['machine_id',],
};
my $activity_table = $prefix.'activity';
my $activity_table_def =
{ id => $activity_table,
permanent => 'no',
columns => [
{ name => 'res_id',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',},
{ name => 'time',
type => 'DATETIME',
restrictions => 'NOT NULL',},
{ name => 'student_id',
type => 'VARCHAR(100) BINARY',
restrictions => 'NOT NULL',},
{ name => 'action_id',
type => 'VARCHAR(100) BINARY',
restrictions => 'NOT NULL',},
{ name => 'idx',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',
auto_inc => 'yes', },
{ name => 'machine_id',
type => 'VARCHAR(100) BINARY',
restrictions => 'NOT NULL',},
{ name => 'action_values',
type => 'MEDIUMTEXT', },
],
'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'],
};
#
&Apache::lonmysql::set_mysql_user_and_password($perlvar{'lonSqlUser'},
$perlvar{'lonSqlAccess'});
if (!&Apache::lonmysql::verify_sql_connection()) {
warn "Unable to connect to MySQL database.";
exit 3;
}
if ($drop) {
&drop_tables();
}
if (! &create_tables()) {
warn "Unable to create tables";
exit 4;
}
&read_id_tables();
if (!&process_courselog($newfilename)) {
exit 5;
}
exit 0; # Everything is okay, so end here before it gets worse.
########################################################
########################################################
##
## Various Subroutines
##
########################################################
########################################################
sub process_courselog {
my ($inputfile) = @_;
open(IN,$inputfile) or die ('Unable to open '.$inputfile.' for input');
my ($linecount,$insertcount);
my $dbh = &Apache::lonmysql::get_dbh();
#
# Timing variables
my %Timing;
my $starttime;
while (my $line=<IN>){
last if ($linecount > 500);
if ($timerun) { $starttime = Time::HiRes::time; }
chomp($line);
$linecount++;
# print $linecount++.$/;
my ($timestamp,$host,$log)=split(/\:/,$line,3);
if ($timerun){
push(@{$Timing{'splitline'}},Time::HiRes::time-$starttime);
}
#
# $log has the actual log entries; currently still escaped, and
# %26(timestamp)%3a(url)%3a(user)%3a(domain)
# then additionally
# %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
# or
# %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
#
# get delimiter between timestamped entries to be &&&
$log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
$log = &unescape($log);
# now go over all log entries
my $machine_id = &get_id($machine_table,'machine',$host);
foreach (split(/\&\&\&/,$log)) {
my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$_);
if (! defined($res) || $res =~ /^\s*$/) {
$res = '/adm/roles';
$action = 'log in';
}
if ($res =~ m|^/prtspool/|) {
$res = '/prtspool/';
}
if (! defined($action) || $action eq '') {
$action = 'view';
}
my $student = $uname.':'.$udom;
if ($timerun) {$starttime = Time::HiRes::time;}
my $student_id = &get_id($student_table,'student',$student);
my $res_id = &get_id($res_table,'resource',$res);
my $action_id = &get_id($action_table,'action',$action);
my $sql_time = &Apache::lonmysql::sqltime($time);
my $values = $dbh->quote(join('',@values));
if ($timerun) {
push(@{$Timing{'get_id'}},Time::HiRes::time-$starttime);
}
#
if ($timerun) {$starttime = Time::HiRes::time;}
my $row = [$res_id,
$sql_time,
$student_id,
$action_id,
undef, # idx
$machine_id,
$values];
my $result = &Apache::lonmysql::store_row($activity_table,
$row);
if ($timerun) {
push(@{$Timing{'insert'}},Time::HiRes::time-$starttime);
}
#
$insertcount++;
if (! defined($result)) {
warn "Got error of ".&Apache::lonmysql::get_error().$/;
warn
'res_id => :'.$res_id.':'.$/.
'sql_time => :'.$sql_time.':'.$/.
'student_id => :'.$student_id.':'.$/.
'action_id => :'.$action_id.':'.$/.
'machine_id => :'.$machine_id.':'.$/.
'values => :'.join(',',@values);
}
}
}
close IN;
print "Number of lines: ".$linecount.$/;
print "Number of inserts: ".$insertcount.$/;
if ($timerun) {
print "Timing Data:".$/;
while (my($k,$v) = each(%Timing)) {
my $Str .= ' '.$k.' '.scalar(@$v).' ';
my $sum;
foreach (@$v) {
$sum+=$_;
}
$Str .= $sum.$/;
print $Str;
}
}
}
sub initialize_configuration {
# Fake it for now:
$perlvar{'lonSqlUser'} = 'www';
$perlvar{'lonSqlAccess'} = 'localhostkey';
$perlvar{'lonUsersDir'} = '/home/httpd/lonUsers';
$perlvar{'lonDefDomain'} = '103';
}
sub update_process_name {
my ($text) = @_;
$0 = 'parse_activity_log.pl: '.$text;
}
sub get_filename {
my ($course,$domain) = @_;
my ($a,$b,$c,undef) = split('',$course,4);
return "$perlvar{'lonUsersDir'}/$domain/$a/$b/$c/$course/activity.log";
}
sub create_tables {
foreach my $table ($student_table_def,$res_table_def,
$action_table_def,$machine_table_def,
$activity_table_def) {
my $table_id = &Apache::lonmysql::create_table($table);
if (! defined($table_id)) {
warn "Unable to create table ".$table->{'id'}.$/;
warn &Apache::lonmysql::build_table_creation_request($table).$/;
return 0;
}
}
return 1;
}
sub drop_tables {
foreach my $table ($student_table_def,$res_table_def,
$action_table_def,$machine_table_def,
$activity_table_def) {
my $table_id = $table->{'id'};
&Apache::lonmysql::drop_table($table_id);
}
}
#################################################################
#################################################################
##
## Database item id code
##
#################################################################
#################################################################
{ # Scoping for ID lookup code
my %IDs;
sub read_id_tables {
foreach my $table ($student_table,$res_table,$action_table,$machine_table){
my @Data = &Apache::lonmysql::get_rows($table);
foreach my $row (@Data) {
$IDs{$table}->{$row->[1]} = $row->[0];
}
}
}
sub get_id {
my ($table,$fieldname,$value) = @_;
if (exists($IDs{$table}->{$value})) {
return $IDs{$table}->{$value};
} else {
# insert into the table - if the item already exists, that is
# okay.
my $result = &Apache::lonmysql::store_row($table,[undef,$value]);
if (! defined($result)) {
warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error());
}
# get the id
my @Data =
&Apache::lonmysql::get_rows($table,qq{$fieldname='$value'});
if (@Data) {
$IDs{$table}->{$value}=$Data[0]->[0];
return $IDs{$table}->{$value};
} else {
warn "Unable to retrieve id for $table $fieldname $value".$/;
return undef;
}
}
}
} # End of ID scoping
###############################################################
###############################################################
##
## The usual suspects
##
###############################################################
###############################################################
sub escape {
my $str=shift;
$str =~ s/(\W)/"%".unpack('H2',$1)/eg;
return $str;
}
sub unescape {
my $str=shift;
$str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
return $str;
}
--matthew1090871859--