[LON-CAPA-cvs] cvs: loncom /metadata_database parse_activity_log.pl
matthew
lon-capa-cvs@mail.lon-capa.org
Thu, 31 Mar 2005 22:09:40 -0000
This is a MIME encoded message
--matthew1112306980
Content-Type: text/plain
matthew Thu Mar 31 17:09:40 2005 EDT
Modified files:
/loncom/metadata_database parse_activity_log.pl
Log:
Saves data in "xml" format. More to come.
Reworked process_courselog SQL insert code to use an accumulator so we can
use the same code to do inserts when loading xml data.
--matthew1112306980
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20050331170940.txt"
Index: loncom/metadata_database/parse_activity_log.pl
diff -u loncom/metadata_database/parse_activity_log.pl:1.13 loncom/metadata_database/parse_activity_log.pl:1.14
--- loncom/metadata_database/parse_activity_log.pl:1.13 Wed Feb 9 16:54:15 2005
+++ loncom/metadata_database/parse_activity_log.pl Thu Mar 31 17:09:40 2005
@@ -2,7 +2,7 @@
#
# The LearningOnline Network
#
-# $Id: parse_activity_log.pl,v 1.13 2005/02/09 21:54:15 matthew Exp $
+# $Id: parse_activity_log.pl,v 1.14 2005/03/31 22:09:40 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -61,6 +61,7 @@
use IO::File;
use File::Copy;
use Fcntl qw(:flock);
+use HTML::TokeParser;
#
# Determine parameters
@@ -141,7 +142,6 @@
##
my $sourcefilename; # activity log data
my $newfilename; # $sourcefilename will be renamed to this
-my $gz_sql_filename; # the gzipped mysql backup data file name.
my $error_filename; # Errors in parsing the activity log will be written here
if ($srcfile) {
$sourcefilename = $srcfile;
@@ -150,12 +150,16 @@
}
my $sql_filename = $sourcefilename;
$sql_filename =~ s|[^/]*$|activity.log.sql|;
-$gz_sql_filename = $sql_filename.'.gz';
+my $gz_sql_filename = $sql_filename.'.gz';
+#
+my $xml_filename = $sourcefilename;
+$xml_filename =~ s|[^/]*$|activity.log.xml|;
+my $gz_xml_filename = $xml_filename.'.gz';
+#
$error_filename = $sourcefilename;
$error_filename =~ s|[^/]*$|activity.log.errors|;
$logthis->('Beginning logging '.time);
-
#
# Wait for a lock on the lockfile to avoid collisions
my $lockfilename = $sourcefilename.'.lock';
@@ -192,10 +196,9 @@
##
## Table definitions
##
-my $prefix = $course.'_'.$domain.'_';
-my $student_table = &Apache::lonmysql::fix_table_name($prefix.'students');
+my %tables = &table_names($course,$domain);
my $student_table_def =
-{ id => $student_table,
+{ id => $tables{'student'},
permanent => 'no',
columns => [
{ name => 'student_id',
@@ -209,9 +212,8 @@
'PRIMARY KEY' => ['student_id',],
};
-my $res_table = &Apache::lonmysql::fix_table_name($prefix.'resource');
my $res_table_def =
-{ id => $res_table,
+{ id => $tables{'res'},
permanent => 'no',
columns => [{ name => 'res_id',
type => 'MEDIUMINT UNSIGNED',
@@ -224,7 +226,6 @@
'PRIMARY KEY' => ['res_id'],
};
-#my $action_table = &Apache::lonmysql::fix_table_name($prefix.'actions');
#my $action_table_def =
#{ id => $action_table,
# permanent => 'no',
@@ -239,9 +240,8 @@
# 'PRIMARY KEY' => ['action_id',],
#};
-my $machine_table = &Apache::lonmysql::fix_table_name($prefix.'machine_table');
my $machine_table_def =
-{ id => $machine_table,
+{ id => $tables{'machine'},
permanent => 'no',
columns => [{ name => 'machine_id',
type => 'MEDIUMINT UNSIGNED',
@@ -254,9 +254,8 @@
'PRIMARY KEY' => ['machine_id',],
};
-my $activity_table = &Apache::lonmysql::fix_table_name($prefix.'activity');
my $activity_table_def =
-{ id => $activity_table,
+{ id => $tables{'activity'},
permanent => 'no',
columns => [
{ name => 'res_id',
@@ -288,9 +287,11 @@
my @Activity_Table = ($activity_table_def);
my @ID_Tables = ($student_table_def,$res_table_def,$machine_table_def);
+
##
## End of table definitions
##
+$logthis->('tables = '.join(',',keys(%tables)));
$logthis->('Connectiong to mysql');
&Apache::lonmysql::set_mysql_user_and_password('www',
@@ -302,53 +303,46 @@
}
$logthis->('SQL connection is up');
-if (-s $gz_sql_filename) {
+my $missing_table = &check_for_missing_tables(values(%tables));
+if (-s $gz_sql_filename && ! -s $gz_xml_filename) {
my $backup_modification_time = (stat($gz_sql_filename))[9];
$logthis->($gz_sql_filename.' was last modified '.
localtime($backup_modification_time).
'('.$backup_modification_time.')');
- # Check for missing tables
- my @Current_Tables = &Apache::lonmysql::tables_in_db();
- $logthis->(join(',',@Current_Tables));
- my %Found;
- foreach my $tablename (@Current_Tables) {
- foreach my $table (@Activity_Table,@ID_Tables) {
- if ($tablename eq $table->{'id'}) {
- $Found{$tablename}++;
- }
- }
- }
- $logthis->('Found tables '.join(',',keys(%Found)));
- my $missing_a_table = 0;
- foreach my $table (@Activity_Table,@ID_Tables) {
- # Hmmm, should I dump the tables?
- if (! $Found{$table->{'id'}}) {
- $logthis->('Missing table '.$table->{'id'});
- $missing_a_table = 1;
- last;
+ if ($missing_table) {
+ # If the backup happened prior to the last table modification,
+ # we need to save the tables.
+ if (&latest_table_modification_time() > $backup_modification_time) {
+ # Save the current tables in case we need them another time.
+ $logthis->('Backing existing tables up');
+ &backup_tables_as_xml($gz_xml_filename.'.save_'.time,\%tables);
}
+ $time_this->();
+ &load_backup_sql_tables($gz_sql_filename);
+ &backup_tables_as_xml($gz_xml_filename,\%tables);
+ $time_this->('load backup tables');
}
- if ($missing_a_table) {
+} elsif (-s $gz_xml_filename) {
+ my $backup_modification_time = (stat($gz_xml_filename))[9];
+ $logthis->($gz_xml_filename.' was last modified '.
+ localtime($backup_modification_time).
+ '('.$backup_modification_time.')');
+ if ($missing_table) {
my $table_modification_time = $backup_modification_time;
# If the backup happened prior to the last table modification,
- foreach my $table (@Activity_Table,@ID_Tables) {
- my %tabledata = &Apache::lonmysql::table_information($table->{'id'});
- next if (! scalar(keys(%tabledata))); # table does not exist
- if ($table_modification_time < $tabledata{'Update_time'}) {
- $table_modification_time = $tabledata{'Update_time'};
- }
- }
- $logthis->("Table modification time = ".$table_modification_time);
- if ($table_modification_time > $backup_modification_time) {
+ # we need to save the tables.
+ if (&latest_table_modification_time() > $backup_modification_time) {
# Save the current tables in case we need them another time.
- my $backup_name = $gz_sql_filename.'.'.time;
- $logthis->('Backing existing tables up in '.$backup_name);
- &backup_tables($backup_name);
+ $logthis->('Backing existing tables up');
+ &backup_tables_as_xml($gz_xml_filename.'.save_'.time,\%tables);
}
$time_this->();
- &load_backup_tables($gz_sql_filename);
+ # We have to make our own tables for the xml format
+ &drop_tables();
+ &create_tables();
+ &load_backup_xml_tables($gz_xml_filename,\%tables);
$time_this->('load backup tables');
- }
+ }
}
##
@@ -375,7 +369,7 @@
## Parse the course log
$logthis->('processing course log');
if (-s $newfilename) {
- my $result = &process_courselog($newfilename,$error_fh);
+ my $result = &process_courselog($newfilename,$error_fh,\%tables);
if (! defined($result)) {
# Something went wrong along the way...
$logthis->('process_courselog returned undef');
@@ -383,7 +377,7 @@
} elsif ($result > 0) {
$time_this->();
$logthis->('process_courselog returned '.$result.' backing up tables');
- &backup_tables($gz_sql_filename);
+ &backup_tables_as_xml($gz_xml_filename,\%tables);
$time_this->('write backup tables');
}
if ($drop_when_done) { &drop_tables(); $logthis->('dropped tables'); }
@@ -416,11 +410,25 @@
}
}
-
exit 0; # Everything is okay, so end here before it gets worse.
########################################################
########################################################
+sub table_names {
+ my ($course,$domain) = @_;
+ my $prefix = $course.'_'.$domain.'_';
+ #
+ my %tables =
+ ( student =>&Apache::lonmysql::fix_table_name($prefix.'students'),
+ res =>&Apache::lonmysql::fix_table_name($prefix.'resource'),
+ machine =>&Apache::lonmysql::fix_table_name($prefix.'machine_table'),
+ activity=>&Apache::lonmysql::fix_table_name($prefix.'activity'),
+ );
+ return %tables;
+}
+
+########################################################
+########################################################
##
## Process Course Log
##
@@ -429,7 +437,7 @@
#
# Returns the number of lines in the activity.log file that were processed.
sub process_courselog {
- my ($inputfile,$error_fh) = @_;
+ my ($inputfile,$error_fh,$tables) = @_;
if (! open(IN,$inputfile)) {
warn "Unable to open '$inputfile' for reading";
$logthis->("Unable to open '$inputfile' for reading");
@@ -438,8 +446,7 @@
my ($linecount,$insertcount);
my $dbh = &Apache::lonmysql::get_dbh();
#
- # Timing variables
- my @RowData;
+ &store_entry();
while (my $line=<IN>){
# last if ($linecount > 1000);
#
@@ -449,7 +456,6 @@
$linecount++;
# print $linecount++.$/;
my ($timestamp,$host,$log)=split(/\:/,$line,3);
- $time_this->('splitline');
#
# $log has the actual log entries; currently still escaped, and
# %26(timestamp)%3a(url)%3a(user)%3a(domain)
@@ -461,16 +467,12 @@
# get delimiter between timestamped entries to be &&&
$log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
$log = &unescape($log);
- $time_this->('translate_and_unescape');
# now go over all log entries
if (! defined($host)) { $host = 'unknown'; }
- my $machine_id = &get_id($machine_table,'machine',$host);
my $prevchunk = 'none';
foreach my $chunk (split(/\&\&\&/,$log)) {
my $warningflag = '';
- $time_this->();
my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk);
- my $student = $uname.':'.$udom;
if (! defined($res) || $res =~ /^\s*$/) {
$res = '/adm/roles';
$action = 'LOGIN';
@@ -495,114 +497,44 @@
'@values = '.join('&',@values));
next; #skip it if we cannot understand what is happening.
}
- if (! defined($student) || $student eq ':') {
- $student = 'unknown';
- $warningflag .= 'student';
- }
- if (! defined($res) || $res =~ /^\s*$/) {
- $res = 'unknown';
- $warningflag .= 'res';
- }
- if (! defined($action) || $action =~ /^\s*$/) {
- $action = 'unknown';
- $warningflag .= 'action';
- }
- if (! defined($time) || $time !~ /^\d+$/) {
- $time = 0;
- $warningflag .= 'time';
- }
- #
- $time_this->('split_and_error_check');
- my $student_id = &get_id($student_table,'student',$student);
- my $res_id = &get_id($res_table,'resource',$res);
-# my $action_id = &get_id($action_table,'action',$action);
- my $sql_time = &Apache::lonmysql::sqltime($time);
- #
- if (! defined($student_id) || $student_id eq '') {
- $warningflag.='student_id';
- }
- if (! defined($res_id) || $res_id eq '') {
- $warningflag.='res_id';
- }
-# if (! defined($action_id) || $action_id eq '') {
-# $warningflag.='action_id';
-# }
- if ($warningflag ne '') {
- print $error_fh 'full log entry:'.$log.$/;
- print $error_fh 'error on chunk:'.$chunk.$/;
- $logthis->('warningflag ('.$warningflag.') on chunk '.
- $/.$chunk.$/.'prevchunk = '.$/.$prevchunk);
- $prevchunk .= $chunk;
- next; # skip this chunk
- }
#
- my $store_values;
+ my %data = (student => $uname.':'.$udom,
+ resource => $res,
+ machine => $host,
+ action => $action,
+ time => &Apache::lonmysql::sqltime($time));
if ($action eq 'POST') {
- $store_values =
+ $data{'action_values'} =
$dbh->quote(join('&',map { &escape($_); } @values));
} else {
- $store_values = $dbh->quote(join('&',@values));
+ $data{'action_values'} = $dbh->quote(join('&',@values));
}
- $time_this->('get_ids');
- #
- my $row = [$res_id,
- qq{'$sql_time'},
- $student_id,
- "'".$action."'",
-# $action_id,
- qq{''}, # idx
- $machine_id,
- $store_values];
- push(@RowData,$row);
- $time_this->('push_row');
- $prevchunk = $chunk;
- #
- }
- $time_this->();
- if ((scalar(@RowData) > 0) && ($linecount % 100 == 0)) {
- my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
- undef,
- \@RowData);
- # $logthis->('result = '.$result);
- $time_this->('bulk_store_rows');
- if (! defined($result)) {
- my $error = &Apache::lonmysql::get_error();
- warn "Error occured during insert.".$error;
- $logthis->('error = '.$error);
+ my $error = &store_entry($dbh,$tables,\%data);
+ if ($error) {
+ $logthis->('error store_entry:'.$error." on %data");
}
- undef(@RowData);
+ $prevchunk = $chunk;
}
}
- if (@RowData) {
- $time_this->();
- $logthis->('storing '.$linecount);
- my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
- undef,
- \@RowData);
- $logthis->('result = '.$result);
- $time_this->('bulk_store_rows');
- if (! defined($result)) {
- my $error = &Apache::lonmysql::get_error();
- warn "Error occured during insert.".$error;
- $logthis->('error = '.$error);
- }
- undef(@RowData);
+ my $result = &store_entry($dbh,$tables);
+ if (! defined($result)) {
+ my $error = &Apache::lonmysql::get_error();
+ warn "Error occured during insert.".$error;
+ $logthis->('error = '.$error);
}
close IN;
-# print "Number of lines: ".$linecount.$/;
-# print "Number of inserts: ".$insertcount.$/;
return $linecount;
}
##
-## Somtimes, instead of doing something, doing nothing is appropriate.
+## default value for $logthis and $time_this
sub nothing {
return;
}
##
-## Logging routine
+## Logging routine (look for $log)
##
sub log_to_file {
my ($input)=@_;
@@ -643,11 +575,47 @@
}
+sub latest_table_modification_time {
+ my $latest_time;
+ foreach my $table (@Activity_Table,@ID_Tables) {
+ my %tabledata = &Apache::lonmysql::table_information($table->{'id'});
+ next if (! scalar(keys(%tabledata))); # table does not exist
+ if (! defined($latest_time) ||
+ $latest_time < $tabledata{'Update_time'}) {
+ $latest_time = $tabledata{'Update_time'};
+ }
+ }
+ return $latest_time;
+}
+
+sub check_for_missing_tables {
+ my @wanted_tables = @_;
+ # Check for missing tables
+ my @Current_Tables = &Apache::lonmysql::tables_in_db();
+ my %Found;
+ foreach my $tablename (@Current_Tables) {
+ foreach my $table (@wanted_tables) {
+ if ($tablename eq $table) {
+ $Found{$tablename}++;
+ }
+ }
+ }
+ $logthis->('Found tables '.join(',',keys(%Found)));
+ my $missing_a_table = 0;
+ foreach my $table (@wanted_tables) {
+ if (! $Found{$table}) {
+ $logthis->('Missing table '.$table);
+ $missing_a_table = 1;
+ last;
+ }
+ }
+ return $missing_a_table;
+}
##
## Use mysqldump to store backups of the tables
##
-sub backup_tables {
+sub backup_tables_as_sql {
my ($gz_sql_filename) = @_;
my $command = qq{mysqldump --quote-names --opt loncapa };
foreach my $table (@ID_Tables,@Activity_Table) {
@@ -663,7 +631,7 @@
##
## Load in mysqldumped files
##
-sub load_backup_tables {
+sub load_backup_sql_tables {
my ($gz_sql_filename) = @_;
if (-s $gz_sql_filename) {
$logthis->('loading data from gzipped sql file');
@@ -756,6 +724,173 @@
} # End of ID scoping
+###############################################################
+###############################################################
+##
+## Save as XML
+##
+###############################################################
+###############################################################
+sub backup_tables_as_xml {
+ my ($filename,$tables) = @_;
+ open(XMLFILE,"|gzip - > $filename") || return ('error:unable to write '.$filename);
+ my $query = qq{
+ SELECT B.resource,
+ A.time,
+ A.idx,
+ C.student,
+ A.action,
+ E.machine,
+ A.action_values
+ FROM $tables->{'activity'} AS A
+ LEFT JOIN $tables->{'res'} AS B ON B.res_id=A.res_id
+ LEFT JOIN $tables->{'student'} AS C ON C.student_id=A.student_id
+ LEFT JOIN $tables->{'machine'} AS E ON E.machine_id=A.machine_id
+ ORDER BY A.time DESC
+ };
+ $query =~ s/\s+/ /g;
+ my $dbh = &Apache::lonmysql::get_dbh();
+ my $sth = $dbh->prepare($query);
+ if (! $sth->execute()) {
+ $logthis->('<font color="blue">'.
+ 'WARNING: Could not retrieve from database:'.
+ $sth->errstr().'</font>');
+ return undef;
+ } else {
+ my ($res,$sqltime,$idx,$student,$action,$machine,$action_values);
+ if ($sth->bind_columns(\$res,\$sqltime,\$idx,\$student,\$action,
+ \$machine,\$action_values)) {
+
+ while ($sth->fetch) {
+ print XMLFILE '<row>'.
+ qq{<resource>$res</resource>}.
+ qq{<time>$sqltime</time>}.
+ qq{<idx>$idx</idx>}.
+ qq{<student>$student</student>}.
+ qq{<action>$action</action>}.
+ qq{<machine>$machine</machine>}.
+ qq{<action_values>$action_values</action_values>}.
+ '</row>'.$/;
+ }
+ } else {
+ warn "Unable to bind to columns.\n";
+ return undef;
+ }
+ }
+ close XMLFILE;
+ return;
+}
+
+###############################################################
+###############################################################
+##
+## load as xml
+##
+###############################################################
+###############################################################
+sub load_backup_xml_tables {
+ my ($filename,$tables) = @_;
+ my $xmlfh;
+ open($xmlfh,"cat $filename | gzip -d - |");
+ if (! defined($xmlfh)) {
+ return ('error:unable to read '.$filename);
+ }
+ my $dbh = &Apache::lonmysql::get_dbh();
+ my $parser = HTML::TokeParser->new($xmlfh);
+ $parser->xml_mode('1');
+ &store_entry();
+ my %data;
+ while (my $token = $parser->get_token()) {
+ if ($token->[0] eq 'S' && $token->[1] eq 'row') {
+ undef(%data);
+ }
+ foreach my $tag ('resource','time','idx',
+ 'student','action','machine','action_values') {
+ if ($token->[0] eq 'S' && $token->[1] eq $tag) {
+ my $text = $parser->get_text("/$tag");
+ $data{$tag} = $text;
+ }
+ }
+ if ($token->[0] eq 'E' && $token->[1] eq 'row') {
+ $data{'action_values'} =qq{'$data{'action_values'}'};
+ my $error = &store_entry($dbh,$tables,\%data);
+ }
+ }
+ &store_entry($dbh,$tables);
+ return;
+}
+
+
+#######################################################################
+#######################################################################
+##
+## store_entry - accumulate data to be inserted into the database
+##
+## Pass no values in to clear accumulator
+## Pass ($dbh,\%tables) to initiate storage of values
+## Pass ($dbh,\%tables,\%data) to use normally
+##
+#######################################################################
+#######################################################################
+
+{
+ my @rows;
+ my $max_row_count = 100;
+
+sub store_entry {
+ if (! @_) {
+ undef(@rows);
+ return '';
+ }
+ my ($dbh,$tables,$data) = @_;
+ return if (! defined($tables));
+ if (defined($data)) {
+ my $error;
+ foreach my $field ('student','resource','action','time') {
+ if (! defined($data->{$field}) || $data->{$field} eq ':' ||
+ $data->{$field}=~ /^\s*$/) {
+ $error.=$field.',';
+ }
+ }
+ if ($error) { $error=~s/,$//; return $error; }
+ #
+ my $student_id = &get_id($tables->{'student'},'student',
+ $data->{'student'});
+ my $res_id = &get_id($tables->{'res'},
+ 'resource',$data->{'resource'});
+ my $machine_id = &get_id($tables->{'machine'},
+ 'machine',$data->{'machine'});
+ my $idx = $data->{'idx'}; if (! $idx) { $idx = "''"; }
+ #
+ push(@rows,[$res_id,
+ qq{'$data->{'time'}'},
+ $student_id,
+ qq{'$data->{'action'}'},
+ $idx,
+ $machine_id,
+ $data->{'action_values'}]);
+ }
+ if (defined($tables) &&
+ ( (! defined($data) && scalar(@rows)) || scalar(@rows)>$max_row_count)
+ ){
+ # Store the rows
+ my $result =
+ &Apache::lonmysql::bulk_store_rows($tables->{'activity'},
+ undef,
+ \@rows);
+ if (! defined($result)) {
+ my $error = &Apache::lonmysql::get_error();
+ warn "Error occured during insert.".$error;
+ return $error;
+ }
+ undef(@rows);
+ return $result if (! defined($data));
+ }
+
+ return '';
+}
+
+} # end of scope for &store_entry
###############################################################
###############################################################
--matthew1112306980--