[LON-CAPA-cvs] cvs: loncom /metadata_database parse_activity_log.pl

matthew lon-capa-cvs@mail.lon-capa.org
Wed, 11 Aug 2004 18:37:23 -0000


This is a MIME encoded message

--matthew1092249443
Content-Type: text/plain

matthew		Wed Aug 11 14:37:23 2004 EDT

  Added files:                 
    /loncom/metadata_database	parse_activity_log.pl 
  Log:
  Moved from modules/matthew to main LONCAPA directory structure.
  
  
--matthew1092249443
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20040811143723.txt"


Index: loncom/metadata_database/parse_activity_log.pl
+++ loncom/metadata_database/parse_activity_log.pl
#!/usr/bin/perl
#
# The LearningOnline Network
#
# $Id: parse_activity_log.pl,v 1.1 2004/08/11 18:37:23 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
###############################################################################
#
# Expects
#
# ../key/$class.key - key file $username:$keynumber
# ../rawdata/$class.log - log file
# ../rawdata/$class.seq - sequence file
# ../data writable
# ------------------------------------------------------------------ Course log

#
# Exit codes
#   0     Everything is okay
#   1     Another copy is running on this course
#   2     Activity log does not exist
#   3     Unable to connect to database
#   4     Unable to create database tables
#   5     Unspecified error?
#

use strict;
use DBI;
use lib '/home/httpd/lib/perl/Apache';
use lonmysql();
use Time::HiRes();
use Getopt::Long();

#
# Determine parameters
my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup);
&Getopt::Long::GetOptions( "course=s"  => \$course,
                           "domain=s"  => \$domain,
                           "help"      => \$help,
                           "logfile=s" => \$file,
                           "timerun"   => \$time_run,
                           "nocleanup" => \$nocleanup,
                           "drop"      => \$drop);
if (! defined($course) || $help) {
    print<<USAGE;
parse_activity_log.pl

Process a lon-capa activity log into a database.
Parameters:
   course             Required
   domain             Optional
   drop               optional   if present, drop all course 
                                 specific activity log tables.
   file               optional   Specify the file to parse, including path
   time               optional   if present, print out timing data
   nocleanup          optional   if present, do not remove old files
Examples:
  $0 -course=123456abcdef -domain=msu
  $0 -course=123456abcdef -file=activity.log
USAGE
    exit;
}

##
## Set up timing code
##
my $time_this = \&nothing;
if ($time_run) {
    $time_this = \&time_action;
}
my $initial_time = Time::HiRes::time;

##
## Read in configuration parameters
##
my %perlvar;
&initialize_configuration();
if (! defined($domain) || $domain eq '') {
    $domain = $perlvar{'lonDefDomain'};
}
&update_process_name($course.'@'.$domain);

##
## Determine filenames
##
my $sourcefilename;   # activity log data
my $newfilename;      # $sourcefilename will be renamed to this
my $sql_filename;  # the mysql backup data file name.
if ($file) {
    $sourcefilename = $file;
} else {
    $sourcefilename = &get_filename($course,$domain);
}
$sql_filename = $sourcefilename;
$sql_filename =~ s|[^/]*$|activitylog.sql|;

##
## There will only be a $newfilename file if a copy of this program is already
## running.
my $newfilename = $sourcefilename.'.processing';
if (-e $newfilename) {
    warn "$newfilename exists";
    exit 2;
}

if (-e $sourcefilename) {
    rename($sourcefilename,$newfilename);
}

##
## Table definitions
##
my $prefix = $course.'_'.$domain.'_';
my $student_table = $prefix.'students';
my $student_table_def = 
{ id => $student_table,
  permanent => 'no',
  columns => [
              { name => 'student_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc => 'yes', },
              { name => 'student',
                type => 'VARCHAR(100) BINARY',
                restrictions => 'NOT NULL', },
              ],
      'PRIMARY KEY' => ['student_id',],
          };

my $res_table = $prefix.'resource';
my $res_table_def = 
{ id => $res_table,
  permanent => 'no',
  columns => [{ name => 'res_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc     => 'yes', },
              { name => 'resource',
                type => 'MEDIUMTEXT',
                restrictions => 'NOT NULL'},
              ],
  'PRIMARY KEY' => ['res_id'],
};

my $action_table = $prefix.'actions';
my $action_table_def =
{ id => $action_table,
  permanent => 'no',
  columns => [{ name => 'action_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc     => 'yes', },
              { name => 'action',
                type => 'VARCHAR(100)',
                restrictions => 'NOT NULL'},
              ],
  'PRIMARY KEY' => ['action_id',], 
};

my $machine_table = $prefix.'machine_table';
my $machine_table_def =
{ id => $machine_table,
  permanent => 'no',
  columns => [{ name => 'machine_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc     => 'yes', },
              { name => 'machine',
                type => 'VARCHAR(100)',
                restrictions => 'NOT NULL'},
              ],
  'PRIMARY KEY' => ['machine_id',],
 };

my $activity_table = $prefix.'activity';
my $activity_table_def = 
{ id => $activity_table,
  permanent => 'no',
  columns => [
              { name => 'res_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',},
              { name => 'time',
                type => 'DATETIME',
                restrictions => 'NOT NULL',},
              { name => 'student_id',
                type => 'VARCHAR(100) BINARY',
                restrictions => 'NOT NULL',},
              { name => 'action_id',
                type => 'VARCHAR(100) BINARY',
                restrictions => 'NOT NULL',},
              { name => 'idx',                # This is here in case a student
                type => 'MEDIUMINT UNSIGNED', # has multiple submissions during
                restrictions => 'NOT NULL',   # one second.  It happens, trust
                auto_inc     => 'yes', },     # me.
              { name => 'machine_id',
                type => 'VARCHAR(100) BINARY',
                restrictions => 'NOT NULL',},
              { name => 'action_values',
                type => 'MEDIUMTEXT', },
              ], 
      'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'],
};
my @Activity_Tables = ($student_table_def,$res_table_def,
                       $action_table_def,$machine_table_def,
                       $activity_table_def);


##
## End of table definitions
##

# 
&Apache::lonmysql::set_mysql_user_and_password($perlvar{'lonSqlUser'},
                                               $perlvar{'lonSqlAccess'});
if (!&Apache::lonmysql::verify_sql_connection()) {
    warn "Unable to connect to MySQL database.";
    exit 3;
}

if ($drop) { &drop_tables(); }
if (-e $sql_filename) {
    # if ANY one of the tables does not exist, load the tables from the
    # backup.
    my @Current_Tables = &Apache::lonmysql::tables_in_db();
    my %Found;
    foreach my $tablename (@Current_Tables) {
        foreach my $table (@Activity_Tables) {
            if ($tablename eq  $table->{'id'}) {
                $Found{$tablename}++;
            }
        }
    }
    foreach my $table (@Activity_Tables) {    
        if (! $Found{$table->{'id'}}) {
            $time_this->();
            &load_backup_tables($sql_filename);
            $time_this->('load backup tables');
            last;
        }
    }
}

# create_tables does not complain if the tables already exist
if (! &create_tables()) {
    warn "Unable to create tables";
    exit 4;
}

&read_id_tables();

##
## Do the main bit of work
if (-e $newfilename) {
    my $result = &process_courselog($newfilename);
    if (! defined($result)) {
        # Something went wrong along the way...
        exit 5;
    } elsif ($result > 0) {
        $time_this->();
        &backup_tables($sql_filename);
        $time_this->('write backup tables');
    }
}

##
## Clean up the filesystem
##
&Apache::lonmysql::disconnect_from_db();
unlink($newfilename) if (! $nocleanup);

if ($time_run) {
    print "Overall time: ".(Time::HiRes::time - $initial_time).$/;
    print &outputtimes();
}

exit 0;   # Everything is okay, so end here before it gets worse.

########################################################
########################################################
##
##                 Process Course Log
##
########################################################
########################################################
#
# Returns the number of lines in the activity.log file that were processed.
sub process_courselog {
    my ($inputfile) = @_;
    if (! open(IN,$inputfile)) {
        warn "Unable to open '$inputfile' for reading";
        return undef;
    }
    my ($linecount,$insertcount);
    my $dbh = &Apache::lonmysql::get_dbh();
    #
    # Timing variables
    my @RowData;
    while (my $line=<IN>){
        # last if ($linecount > 1000);
        #
        # Bulk storage variables
        $time_this->();
        chomp($line);
        $linecount++;
        # print $linecount++.$/;
        my ($timestamp,$host,$log)=split(/\:/,$line,3);
        $time_this->('splitline');
        #
        # $log has the actual log entries; currently still escaped, and
        # %26(timestamp)%3a(url)%3a(user)%3a(domain)
        # then additionally
        # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
        # or
        # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
        #
        # get delimiter between timestamped entries to be &&&
        $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
        $log = &unescape($log);
        $time_this->('translate_and_unescape');
        # now go over all log entries 
        my $machine_id = &get_id($machine_table,'machine',$host);
        foreach (split(/\&\&\&/,$log)) {
            $time_this->();
	    my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$_);
            if (! defined($res) || $res =~ /^\s*$/) {
                $res = '/adm/roles';
                $action = 'log in';
            }
            if ($res =~ m|^/prtspool/|) {
                $res = '/prtspool/';
            }
            if (! defined($action) || $action eq '') {
                $action = 'view';
            }
            my $student = $uname.':'.$udom;
            $time_this->('split_and_error_check');
            my $student_id = &get_id($student_table,'student',$student);
            my $res_id = &get_id($res_table,'resource',$res);
            my $action_id = &get_id($action_table,'action',$action);
            my $sql_time = &Apache::lonmysql::sqltime($time);
            my $values = $dbh->quote(join('',@values));
            $time_this->('get_ids');
            #
            my $row = [$res_id,
                       qq{'$sql_time'},
                       $student_id,
                       $action_id,
                       qq{''},        # idx
                       $machine_id,
                       $values];
            push(@RowData,$row);
            $time_this->('push_row');
            #
        }
        $time_this->();
        if ($linecount % 100 == 0) {
            my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
                                                            undef,
                                                            \@RowData);
            $time_this->('bulk_store_rows');
            if (! defined($result)) {
                warn "Error occured during insert.".
                    &Apache::lonmysql::get_error();
            }
            undef(@RowData);
        }
    }
    if (@RowData) {
        $time_this->();
        my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
                                                        undef,
                                                        \@RowData);
        $time_this->('bulk_store_rows');
        if (! defined($result)) {
            warn "Error occured during insert.".
                &Apache::lonmysql::get_error();
        }
        undef(@RowData);
    }
    close IN;
#    print "Number of lines: ".$linecount.$/;
#    print "Number of inserts: ".$insertcount.$/;
    return $linecount;
}

##
## Timing routines
##
{
    my %Timing;
    my $starttime;

sub nothing {
    return;
}

sub time_action {
    my ($key) = @_;
    if (defined($key)) {
        $Timing{$key}+=Time::HiRes::time-$starttime;
        $Timing{'count_'.$key}++;
    }
    $starttime = Time::HiRes::time;
}

sub outputtimes {
    my $Str;
    if ($time_run) {
        $Str = "Timing Data:".$/;
        while (my($k,$v) = each(%Timing)) {
            next if ($k =~ /^count_/);
            my $count = $Timing{'count_'.$k};
            $Str .= 
                '  '.sprintf("%25.25s",$k).
                '  '.sprintf('% 8d',$count).
                '  '.sprintf('%12.5f',$v).$/;
        }
    }
    return $Str;
}

}


##
## Use mysqldump to store backups of the tables
##
sub backup_tables {
    my ($sql_filename) = @_;
    my $command = qq{mysqldump --opt loncapa };
                             
    foreach my $table (@Activity_Tables) {
        my $tablename = $table->{'id'};
        $command .= $tablename.' ';
    }
    $command .= '>'.$sql_filename;
    warn $command.$/;
    system($command);
}

##
## Load in mysqldumped files
##
sub load_backup_tables {
    my ($sql_filename) = @_;
    return undef if (! -e $sql_filename);
    # Check for .my.cnf
    my $command = 'mysql -e "SOURCE '.$sql_filename.'" loncapa';
    warn $command.$/;
    system($command);
}

##
## 
##
sub initialize_configuration {
    # Fake it for now:
    $perlvar{'lonSqlUser'} = 'www';
    $perlvar{'lonSqlAccess'} = 'localhostkey';
    $perlvar{'lonUsersDir'} = '/home/httpd/lonUsers';
    $perlvar{'lonDefDomain'} = '103';
}

sub update_process_name {
    my ($text) = @_;
    $0 = 'parse_activity_log.pl: '.$text;
}

sub get_filename {
    my ($course,$domain) = @_;
    my ($a,$b,$c,undef) = split('',$course,4);
    return "$perlvar{'lonUsersDir'}/$domain/$a/$b/$c/$course/activity.log";
}

sub create_tables {
    foreach my $table (@Activity_Tables) {
        my $table_id = &Apache::lonmysql::create_table($table);
        if (! defined($table_id)) {
            warn "Unable to create table ".$table->{'id'}.$/;
            warn &Apache::lonmysql::build_table_creation_request($table).$/;
            return 0;
        }
    }
    return 1;
}

sub drop_tables {
    foreach my $table (@Activity_Tables) {
        my $table_id = $table->{'id'};
        &Apache::lonmysql::drop_table($table_id);
    }
}

#################################################################
#################################################################
##
## Database item id code
##
#################################################################
#################################################################
{ # Scoping for ID lookup code
    my %IDs;

sub read_id_tables {
    foreach my $table (@Activity_Tables) {
        my @Data = &Apache::lonmysql::get_rows($table->{'id'});
        foreach my $row (@Data) {
            $IDs{$table->{'id'}}->{$row->[1]} = $row->[0];
        }
    }
}

sub get_id {
    my ($table,$fieldname,$value) = @_;
    if (exists($IDs{$table}->{$value})) {
        return $IDs{$table}->{$value};
    } else {
        # insert into the table - if the item already exists, that is
        # okay.
        my $result = &Apache::lonmysql::store_row($table,[undef,$value]);
        if (! defined($result)) {
            warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error());
        }
        # get the id
        my @Data = 
            &Apache::lonmysql::get_rows($table,qq{$fieldname='$value'});
        if (@Data) {
            $IDs{$table}->{$value}=$Data[0]->[0];
            return $IDs{$table}->{$value};
        } else {
            warn "Unable to retrieve id for $table $fieldname $value".$/;
            return undef;
        }
    }
}

} # End of ID scoping


###############################################################
###############################################################
##
##   The usual suspects
##
###############################################################
###############################################################
sub escape {
    my $str=shift;
    $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
    return $str;
}

sub unescape {
    my $str=shift;
    $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
    return $str;
}


--matthew1092249443--