[LON-CAPA-cvs] cvs: modules /matthew/activitylog parse_activity_log.pl
matthew
lon-capa-cvs@mail.lon-capa.org
Wed, 28 Jul 2004 21:37:32 -0000
This is a MIME encoded message
--matthew1091050652
Content-Type: text/plain
matthew Wed Jul 28 17:37:32 2004 EDT
Modified files:
/modules/matthew/activitylog parse_activity_log.pl
Log:
Added 'nocleanup' option.
Modified timing code to be more elegant :)
Added &backup_tables and &load_backup_tables to save and restore tables
using mysqldump. This requires that the user running this script
(probably www) have a proper ~/.my.cnf. See lonsql for how this should
look.
Modified &process_courselog to do bulk inserts to MySQL. The frequency
of inserts should be tuned for speed.
Modified logic to call &backup_tables and &load_backup_tables.
Still untested in a LON-CAPA environment but it works fine on the command
line.
--matthew1091050652
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20040728173732.txt"
Index: modules/matthew/activitylog/parse_activity_log.pl
diff -u modules/matthew/activitylog/parse_activity_log.pl:1.2 modules/matthew/activitylog/parse_activity_log.pl:1.3
--- modules/matthew/activitylog/parse_activity_log.pl:1.2 Mon Jul 26 15:58:50 2004
+++ modules/matthew/activitylog/parse_activity_log.pl Wed Jul 28 17:37:32 2004
@@ -2,7 +2,7 @@
#
# The LearningOnline Network
#
-# $Id: parse_activity_log.pl,v 1.2 2004/07/26 19:58:50 matthew Exp $
+# $Id: parse_activity_log.pl,v 1.3 2004/07/28 21:37:32 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -54,12 +54,13 @@
#
# Determine parameters
-my ($help,$course,$domain,$drop,$file,$timerun);
+my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup);
&Getopt::Long::GetOptions( "course=s" => \$course,
"domain=s" => \$domain,
"help" => \$help,
"logfile=s" => \$file,
- "timerun" => \$timerun,
+ "timerun" => \$time_run,
+ "nocleanup" => \$nocleanup,
"drop" => \$drop);
if (! defined($course) || $help) {
print<<USAGE;
@@ -73,6 +74,7 @@
specific activity log tables.
file optional Specify the file to parse, including path
time optional if present, print out timing data
+ nocleanup optional if present, do not remove old files
Examples:
$0 -course=123456abcdef -domain=msu
$0 -course=123456abcdef -file=activity.log
@@ -80,32 +82,51 @@
exit;
}
+##
+## Set up timing code
+##
+my $time_this = \¬hing;
+if ($time_run) {
+ $time_this = \&time_action;
+}
+my $initial_time = Time::HiRes::time;
+
+##
+## Read in configuration parameters
+##
my %perlvar;
&initialize_configuration();
if (! defined($domain) || $domain eq '') {
$domain = $perlvar{'lonDefDomain'};
}
-
&update_process_name($course.'@'.$domain);
-my $filename;
+##
+## Determine filenames
+##
+my $sourcefilename; # activity log data
+my $newfilename; # $sourcefilename will be renamed to this
+my $sql_filename; # the mysql backup data file name.
if ($file) {
- $filename = $file;
+ $sourcefilename = $file;
} else {
- $filename = &get_filename($course,$domain);
+ $sourcefilename = &get_filename($course,$domain);
}
+$sql_filename = $sourcefilename;
+$sql_filename =~ s|[^/]*$|activitylog.sql|;
-my $newfilename = $filename.'.processing';
-if (! -e $filename) {
- # warn "$filename does not exist";
- exit 1;
-}
+##
+## There will only be a $newfilename file if a copy of this program is already
+## running.
+my $newfilename = $sourcefilename.'.processing';
if (-e $newfilename) {
- # warn "$newfilename exists";
+ warn "$newfilename exists";
exit 2;
}
-rename($filename,$filename.'.processing');
+if (-e $sourcefilename) {
+ rename($sourcefilename,$newfilename);
+}
##
## Table definitions
@@ -201,6 +222,11 @@
],
'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'],
};
+my @Activity_Tables = ($student_table_def,$res_table_def,
+ $action_table_def,$machine_table_def,
+ $activity_table_def);
+
+
##
## End of table definitions
##
@@ -213,10 +239,30 @@
exit 3;
}
-if ($drop) {
- &drop_tables();
+if ($drop) { &drop_tables(); }
+if (-e $sql_filename) {
+ # if ANY one of the tables does not exist, load the tables from the
+ # backup.
+ my @Current_Tables = &Apache::lonmysql::tables_in_db();
+ my %Found;
+ foreach my $tablename (@Current_Tables) {
+ foreach my $table (@Activity_Tables) {
+ if ($tablename eq $table->{'id'}) {
+ $Found{$tablename}++;
+ }
+ }
+ }
+ foreach my $table (@Activity_Tables) {
+ if (! $Found{$table->{'id'}}) {
+ $time_this->();
+ &load_backup_tables($sql_filename);
+ $time_this->('load backup tables');
+ last;
+ }
+ }
}
+# create_tables does not complain if the tables already exist
if (! &create_tables()) {
warn "Unable to create tables";
exit 4;
@@ -224,8 +270,29 @@
&read_id_tables();
-if (!&process_courselog($newfilename)) {
- exit 5;
+##
+## Do the main bit of work
+if (-e $newfilename) {
+ my $result = &process_courselog($newfilename);
+ if (! defined($result)) {
+ # Something went wrong along the way...
+ exit 5;
+ } elsif ($result > 0) {
+ $time_this->();
+ &backup_tables($sql_filename);
+ $time_this->('write backup tables');
+ }
+}
+
+##
+## Clean up the filesystem
+##
+&Apache::lonmysql::disconnect_from_db();
+unlink($newfilename) if (! $nocleanup);
+
+if ($time_run) {
+ print "Overall time: ".(Time::HiRes::time - $initial_time).$/;
+ print &outputtimes();
}
exit 0; # Everything is okay, so end here before it gets worse.
@@ -233,30 +300,33 @@
########################################################
########################################################
##
-## Various Subroutines
+## Process Course Log
##
########################################################
########################################################
+#
+# Returns the number of lines in the activity.log file that were processed.
sub process_courselog {
my ($inputfile) = @_;
- open(IN,$inputfile) or die ('Unable to open '.$inputfile.' for input');
+ if (! open(IN,$inputfile)) {
+ warn "Unable to open '$inputfile' for reading";
+ return undef;
+ }
my ($linecount,$insertcount);
my $dbh = &Apache::lonmysql::get_dbh();
#
# Timing variables
- my %Timing;
- my $starttime;
-
+ my @RowData;
while (my $line=<IN>){
- last if ($linecount > 500);
- if ($timerun) { $starttime = Time::HiRes::time; }
+ # last if ($linecount > 1000);
+ #
+ # Bulk storage variables
+ $time_this->();
chomp($line);
$linecount++;
# print $linecount++.$/;
my ($timestamp,$host,$log)=split(/\:/,$line,3);
- if ($timerun){
- push(@{$Timing{'splitline'}},Time::HiRes::time-$starttime);
- }
+ $time_this->('splitline');
#
# $log has the actual log entries; currently still escaped, and
# %26(timestamp)%3a(url)%3a(user)%3a(domain)
@@ -268,9 +338,11 @@
# get delimiter between timestamped entries to be &&&
$log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
$log = &unescape($log);
+ $time_this->('translate_and_unescape');
# now go over all log entries
my $machine_id = &get_id($machine_table,'machine',$host);
foreach (split(/\&\&\&/,$log)) {
+ $time_this->();
my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$_);
if (! defined($res) || $res =~ /^\s*$/) {
$res = '/adm/roles';
@@ -283,60 +355,126 @@
$action = 'view';
}
my $student = $uname.':'.$udom;
- if ($timerun) {$starttime = Time::HiRes::time;}
+ $time_this->('split_and_error_check');
my $student_id = &get_id($student_table,'student',$student);
my $res_id = &get_id($res_table,'resource',$res);
my $action_id = &get_id($action_table,'action',$action);
my $sql_time = &Apache::lonmysql::sqltime($time);
my $values = $dbh->quote(join('',@values));
- if ($timerun) {
- push(@{$Timing{'get_id'}},Time::HiRes::time-$starttime);
- }
+ $time_this->('get_ids');
#
- if ($timerun) {$starttime = Time::HiRes::time;}
my $row = [$res_id,
- $sql_time,
+ qq{'$sql_time'},
$student_id,
$action_id,
- undef, # idx
+ qq{''}, # idx
$machine_id,
$values];
- my $result = &Apache::lonmysql::store_row($activity_table,
- $row);
- if ($timerun) {
- push(@{$Timing{'insert'}},Time::HiRes::time-$starttime);
- }
+ push(@RowData,$row);
+ $time_this->('push_row');
#
- $insertcount++;
+ }
+ $time_this->();
+ if ($linecount % 100 == 0) {
+ my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
+ undef,
+ \@RowData);
+ $time_this->('bulk_store_rows');
if (! defined($result)) {
- warn "Got error of ".&Apache::lonmysql::get_error().$/;
- warn
- 'res_id => :'.$res_id.':'.$/.
- 'sql_time => :'.$sql_time.':'.$/.
- 'student_id => :'.$student_id.':'.$/.
- 'action_id => :'.$action_id.':'.$/.
- 'machine_id => :'.$machine_id.':'.$/.
- 'values => :'.join(',',@values);
+ warn "Error occured during insert.".
+ &Apache::lonmysql::get_error();
}
+ undef(@RowData);
+ }
+ }
+ if (@RowData) {
+ $time_this->();
+ my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
+ undef,
+ \@RowData);
+ $time_this->('bulk_store_rows');
+ if (! defined($result)) {
+ warn "Error occured during insert.".
+ &Apache::lonmysql::get_error();
}
+ undef(@RowData);
}
close IN;
- print "Number of lines: ".$linecount.$/;
- print "Number of inserts: ".$insertcount.$/;
- if ($timerun) {
- print "Timing Data:".$/;
+# print "Number of lines: ".$linecount.$/;
+# print "Number of inserts: ".$insertcount.$/;
+ return $linecount;
+}
+
+##
+## Timing routines
+##
+{
+ my %Timing;
+ my $starttime;
+
+sub nothing {
+ return;
+}
+
+sub time_action {
+ my ($key) = @_;
+ if (defined($key)) {
+ $Timing{$key}+=Time::HiRes::time-$starttime;
+ $Timing{'count_'.$key}++;
+ }
+ $starttime = Time::HiRes::time;
+}
+
+sub outputtimes {
+ my $Str;
+ if ($time_run) {
+ $Str = "Timing Data:".$/;
while (my($k,$v) = each(%Timing)) {
- my $Str .= ' '.$k.' '.scalar(@$v).' ';
- my $sum;
- foreach (@$v) {
- $sum+=$_;
- }
- $Str .= $sum.$/;
- print $Str;
+ next if ($k =~ /^count_/);
+ my $count = $Timing{'count_'.$k};
+ $Str .=
+ ' '.sprintf("%25.25s",$k).
+ ' '.sprintf('% 8d',$count).
+ ' '.sprintf('%12.5f',$v).$/;
}
}
+ return $Str;
+}
+
+}
+
+
+##
+## Use mysqldump to store backups of the tables
+##
+sub backup_tables {
+ my ($sql_filename) = @_;
+ my $command = qq{mysqldump --opt loncapa };
+
+ foreach my $table (@Activity_Tables) {
+ my $tablename = $table->{'id'};
+ $command .= $tablename.' ';
+ }
+ $command .= '>'.$sql_filename;
+ warn $command.$/;
+ system($command);
}
+##
+## Load in mysqldumped files
+##
+sub load_backup_tables {
+ my ($sql_filename) = @_;
+ return undef if (! -e $sql_filename);
+ # Check for .my.cnf
+ my $command = 'mysql -e "SOURCE '.$sql_filename.'" loncapa';
+ warn $command.$/;
+ system($command);
+}
+
+##
+##
+##
sub initialize_configuration {
# Fake it for now:
$perlvar{'lonSqlUser'} = 'www';
@@ -357,9 +495,7 @@
}
sub create_tables {
- foreach my $table ($student_table_def,$res_table_def,
- $action_table_def,$machine_table_def,
- $activity_table_def) {
+ foreach my $table (@Activity_Tables) {
my $table_id = &Apache::lonmysql::create_table($table);
if (! defined($table_id)) {
warn "Unable to create table ".$table->{'id'}.$/;
@@ -371,9 +507,7 @@
}
sub drop_tables {
- foreach my $table ($student_table_def,$res_table_def,
- $action_table_def,$machine_table_def,
- $activity_table_def) {
+ foreach my $table (@Activity_Tables) {
my $table_id = $table->{'id'};
&Apache::lonmysql::drop_table($table_id);
}
@@ -390,10 +524,10 @@
my %IDs;
sub read_id_tables {
- foreach my $table ($student_table,$res_table,$action_table,$machine_table){
- my @Data = &Apache::lonmysql::get_rows($table);
+ foreach my $table (@Activity_Tables) {
+ my @Data = &Apache::lonmysql::get_rows($table->{'id'});
foreach my $row (@Data) {
- $IDs{$table}->{$row->[1]} = $row->[0];
+ $IDs{$table->{'id'}}->{$row->[1]} = $row->[0];
}
}
}
--matthew1091050652--