[LON-CAPA-cvs] cvs: loncom /metadata_database parse_activity_log.pl

matthew lon-capa-cvs@mail.lon-capa.org
Thu, 31 Mar 2005 22:09:40 -0000


This is a MIME encoded message

--matthew1112306980
Content-Type: text/plain

matthew		Thu Mar 31 17:09:40 2005 EDT

  Modified files:              
    /loncom/metadata_database	parse_activity_log.pl 
  Log:
  Saves data in "xml" format.  More to come.
  Reworked process_courselog SQL insert code to use an accumulator so we can
  use the same code to do inserts when loading xml data.
  
  
--matthew1112306980
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20050331170940.txt"

Index: loncom/metadata_database/parse_activity_log.pl
diff -u loncom/metadata_database/parse_activity_log.pl:1.13 loncom/metadata_database/parse_activity_log.pl:1.14
--- loncom/metadata_database/parse_activity_log.pl:1.13	Wed Feb  9 16:54:15 2005
+++ loncom/metadata_database/parse_activity_log.pl	Thu Mar 31 17:09:40 2005
@@ -2,7 +2,7 @@
 #
 # The LearningOnline Network
 #
-# $Id: parse_activity_log.pl,v 1.13 2005/02/09 21:54:15 matthew Exp $
+# $Id: parse_activity_log.pl,v 1.14 2005/03/31 22:09:40 matthew Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -61,6 +61,7 @@
 use IO::File;
 use File::Copy;
 use Fcntl qw(:flock);
+use HTML::TokeParser;
 
 #
 # Determine parameters
@@ -141,7 +142,6 @@
 ##
 my $sourcefilename;   # activity log data
 my $newfilename;      # $sourcefilename will be renamed to this
-my $gz_sql_filename;  # the gzipped mysql backup data file name.
 my $error_filename;   # Errors in parsing the activity log will be written here
 if ($srcfile) {
     $sourcefilename = $srcfile;
@@ -150,12 +150,16 @@
 }
 my $sql_filename = $sourcefilename;
 $sql_filename =~ s|[^/]*$|activity.log.sql|;
-$gz_sql_filename = $sql_filename.'.gz';
+my $gz_sql_filename = $sql_filename.'.gz';
+#
+my $xml_filename = $sourcefilename;
+$xml_filename =~ s|[^/]*$|activity.log.xml|;
+my $gz_xml_filename = $xml_filename.'.gz';
+#
 $error_filename = $sourcefilename;
 $error_filename =~ s|[^/]*$|activity.log.errors|;
 $logthis->('Beginning logging '.time);
 
-
 #
 # Wait for a lock on the lockfile to avoid collisions
 my $lockfilename = $sourcefilename.'.lock';
@@ -192,10 +196,9 @@
 ##
 ## Table definitions
 ##
-my $prefix = $course.'_'.$domain.'_';
-my $student_table = &Apache::lonmysql::fix_table_name($prefix.'students');
+my %tables = &table_names($course,$domain);
 my $student_table_def = 
-{ id => $student_table,
+{ id => $tables{'student'},
   permanent => 'no',
   columns => [
               { name => 'student_id',
@@ -209,9 +212,8 @@
       'PRIMARY KEY' => ['student_id',],
           };
 
-my $res_table = &Apache::lonmysql::fix_table_name($prefix.'resource');
 my $res_table_def = 
-{ id => $res_table,
+{ id => $tables{'res'},
   permanent => 'no',
   columns => [{ name => 'res_id',
                 type => 'MEDIUMINT UNSIGNED',
@@ -224,7 +226,6 @@
   'PRIMARY KEY' => ['res_id'],
 };
 
-#my $action_table = &Apache::lonmysql::fix_table_name($prefix.'actions');
 #my $action_table_def =
 #{ id => $action_table,
 #  permanent => 'no',
@@ -239,9 +240,8 @@
 #  'PRIMARY KEY' => ['action_id',], 
 #};
 
-my $machine_table = &Apache::lonmysql::fix_table_name($prefix.'machine_table');
 my $machine_table_def =
-{ id => $machine_table,
+{ id => $tables{'machine'},
   permanent => 'no',
   columns => [{ name => 'machine_id',
                 type => 'MEDIUMINT UNSIGNED',
@@ -254,9 +254,8 @@
   'PRIMARY KEY' => ['machine_id',],
  };
 
-my $activity_table = &Apache::lonmysql::fix_table_name($prefix.'activity');
 my $activity_table_def = 
-{ id => $activity_table,
+{ id => $tables{'activity'},
   permanent => 'no',
   columns => [
               { name => 'res_id',
@@ -288,9 +287,11 @@
 
 my @Activity_Table = ($activity_table_def);
 my @ID_Tables = ($student_table_def,$res_table_def,$machine_table_def);
+               
 ##
 ## End of table definitions
 ##
+$logthis->('tables = '.join(',',keys(%tables)));
 
 $logthis->('Connectiong to mysql');
 &Apache::lonmysql::set_mysql_user_and_password('www',
@@ -302,53 +303,46 @@
 }
 $logthis->('SQL connection is up');
 
-if (-s $gz_sql_filename) {
+my $missing_table = &check_for_missing_tables(values(%tables));
+if (-s $gz_sql_filename && ! -s $gz_xml_filename) {
     my $backup_modification_time = (stat($gz_sql_filename))[9];
     $logthis->($gz_sql_filename.' was last modified '.
                localtime($backup_modification_time).
                '('.$backup_modification_time.')');
-    # Check for missing tables
-    my @Current_Tables = &Apache::lonmysql::tables_in_db();
-    $logthis->(join(',',@Current_Tables));
-    my %Found;
-    foreach my $tablename (@Current_Tables) {
-        foreach my $table (@Activity_Table,@ID_Tables) {
-            if ($tablename eq  $table->{'id'}) {
-                $Found{$tablename}++;
-            }
-        }
-    }
-    $logthis->('Found tables '.join(',',keys(%Found)));
-    my $missing_a_table = 0;
-    foreach my $table (@Activity_Table,@ID_Tables) {    
-        # Hmmm, should I dump the tables?
-        if (! $Found{$table->{'id'}}) {
-            $logthis->('Missing table '.$table->{'id'});
-            $missing_a_table = 1;
-            last;
+    if ($missing_table) {
+        # If the backup happened prior to the last table modification,
+        # we need to save the tables.
+        if (&latest_table_modification_time() > $backup_modification_time) {
+            # Save the current tables in case we need them another time.
+            $logthis->('Backing existing tables up');
+            &backup_tables_as_xml($gz_xml_filename.'.save_'.time,\%tables);
         }
+        $time_this->();
+        &load_backup_sql_tables($gz_sql_filename);
+        &backup_tables_as_xml($gz_xml_filename,\%tables);
+        $time_this->('load backup tables');
     }
-    if ($missing_a_table) {
+} elsif (-s $gz_xml_filename) {
+    my $backup_modification_time = (stat($gz_xml_filename))[9];
+    $logthis->($gz_xml_filename.' was last modified '.
+               localtime($backup_modification_time).
+               '('.$backup_modification_time.')');
+    if ($missing_table) {
         my $table_modification_time = $backup_modification_time;
         # If the backup happened prior to the last table modification,
-        foreach my $table (@Activity_Table,@ID_Tables) {    
-            my %tabledata = &Apache::lonmysql::table_information($table->{'id'});
-            next if (! scalar(keys(%tabledata))); # table does not exist
-            if ($table_modification_time < $tabledata{'Update_time'}) {
-                $table_modification_time = $tabledata{'Update_time'};
-            }
-        }
-        $logthis->("Table modification time = ".$table_modification_time);
-        if ($table_modification_time > $backup_modification_time) {
+        # we need to save the tables.
+        if (&latest_table_modification_time() > $backup_modification_time) {
             # Save the current tables in case we need them another time.
-            my $backup_name = $gz_sql_filename.'.'.time;
-            $logthis->('Backing existing tables up in '.$backup_name);
-            &backup_tables($backup_name);
+            $logthis->('Backing existing tables up');
+            &backup_tables_as_xml($gz_xml_filename.'.save_'.time,\%tables);
         }
         $time_this->();
-        &load_backup_tables($gz_sql_filename);
+        # We have to make our own tables for the xml format
+        &drop_tables();
+        &create_tables();
+        &load_backup_xml_tables($gz_xml_filename,\%tables);
         $time_this->('load backup tables');
-    }
+    }    
 }
 
 ##
@@ -375,7 +369,7 @@
 ## Parse the course log
 $logthis->('processing course log');
 if (-s $newfilename) {
-    my $result = &process_courselog($newfilename,$error_fh);
+    my $result = &process_courselog($newfilename,$error_fh,\%tables);
     if (! defined($result)) {
         # Something went wrong along the way...
         $logthis->('process_courselog returned undef');
@@ -383,7 +377,7 @@
     } elsif ($result > 0) {
         $time_this->();
         $logthis->('process_courselog returned '.$result.' backing up tables');
-        &backup_tables($gz_sql_filename);
+        &backup_tables_as_xml($gz_xml_filename,\%tables);
         $time_this->('write backup tables');
     }
     if ($drop_when_done) { &drop_tables(); $logthis->('dropped tables'); }
@@ -416,11 +410,25 @@
     }
 }
 
-
 exit 0;   # Everything is okay, so end here before it gets worse.
 
 ########################################################
 ########################################################
+sub table_names {
+    my ($course,$domain) = @_;
+    my $prefix = $course.'_'.$domain.'_';
+    #
+    my %tables = 
+        ( student =>&Apache::lonmysql::fix_table_name($prefix.'students'),
+          res     =>&Apache::lonmysql::fix_table_name($prefix.'resource'),
+          machine =>&Apache::lonmysql::fix_table_name($prefix.'machine_table'),
+          activity=>&Apache::lonmysql::fix_table_name($prefix.'activity'),
+          );
+    return %tables;
+}
+
+########################################################
+########################################################
 ##
 ##                 Process Course Log
 ##
@@ -429,7 +437,7 @@
 #
 # Returns the number of lines in the activity.log file that were processed.
 sub process_courselog {
-    my ($inputfile,$error_fh) = @_;
+    my ($inputfile,$error_fh,$tables) = @_;
     if (! open(IN,$inputfile)) {
         warn "Unable to open '$inputfile' for reading";
         $logthis->("Unable to open '$inputfile' for reading");
@@ -438,8 +446,7 @@
     my ($linecount,$insertcount);
     my $dbh = &Apache::lonmysql::get_dbh();
     #
-    # Timing variables
-    my @RowData;
+    &store_entry();
     while (my $line=<IN>){
         # last if ($linecount > 1000);
         #
@@ -449,7 +456,6 @@
         $linecount++;
         # print $linecount++.$/;
         my ($timestamp,$host,$log)=split(/\:/,$line,3);
-        $time_this->('splitline');
         #
         # $log has the actual log entries; currently still escaped, and
         # %26(timestamp)%3a(url)%3a(user)%3a(domain)
@@ -461,16 +467,12 @@
         # get delimiter between timestamped entries to be &&&
         $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
         $log = &unescape($log);
-        $time_this->('translate_and_unescape');
         # now go over all log entries 
         if (! defined($host)) { $host = 'unknown'; }
-        my $machine_id = &get_id($machine_table,'machine',$host);
         my $prevchunk = 'none';
         foreach my $chunk (split(/\&\&\&/,$log)) {
             my $warningflag = '';
-            $time_this->();
 	    my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk);
-            my $student = $uname.':'.$udom;
             if (! defined($res) || $res =~ /^\s*$/) {
                 $res = '/adm/roles';
                 $action = 'LOGIN';
@@ -495,114 +497,44 @@
                          '@values = '.join('&',@values));
                 next; #skip it if we cannot understand what is happening.
             }
-            if (! defined($student) || $student eq ':') {
-                $student = 'unknown';
-                $warningflag .= 'student';
-            }
-            if (! defined($res) || $res =~ /^\s*$/) {
-                $res = 'unknown';
-                $warningflag .= 'res';
-            }
-            if (! defined($action) || $action =~ /^\s*$/) {
-                $action = 'unknown';
-                $warningflag .= 'action';
-            }
-            if (! defined($time) || $time !~ /^\d+$/) {
-                $time = 0;
-                $warningflag .= 'time';
-            }
-            #
-            $time_this->('split_and_error_check');
-            my $student_id = &get_id($student_table,'student',$student);
-            my $res_id     = &get_id($res_table,'resource',$res);
-#            my $action_id  = &get_id($action_table,'action',$action);
-            my $sql_time   = &Apache::lonmysql::sqltime($time);
-            #
-            if (! defined($student_id) || $student_id eq '') { 
-                $warningflag.='student_id'; 
-            }
-            if (! defined($res_id) || $res_id eq '') { 
-                $warningflag.='res_id'; 
-            }
-#            if (! defined($action_id) || $action_id eq '') { 
-#                $warningflag.='action_id'; 
-#            }
-            if ($warningflag ne '') {
-                print $error_fh 'full log entry:'.$log.$/;
-                print $error_fh 'error on chunk:'.$chunk.$/;
-                $logthis->('warningflag ('.$warningflag.') on chunk '.
-                           $/.$chunk.$/.'prevchunk = '.$/.$prevchunk);
-                $prevchunk .= $chunk;
-                next; # skip this chunk
-            }
             #
-            my $store_values;
+            my %data = (student  => $uname.':'.$udom,
+                        resource => $res,
+                        machine  => $host,
+                        action   => $action,
+                        time => &Apache::lonmysql::sqltime($time));
             if ($action eq 'POST') {
-                $store_values = 
+                $data{'action_values'} =
                     $dbh->quote(join('&',map { &escape($_); } @values));
             } else {
-                $store_values = $dbh->quote(join('&',@values));
+                $data{'action_values'} = $dbh->quote(join('&',@values));
             }
-            $time_this->('get_ids');
-            #
-            my $row = [$res_id,
-                       qq{'$sql_time'},
-                       $student_id,
-                       "'".$action."'",
-#                       $action_id,
-                       qq{''},        # idx
-                       $machine_id,
-                       $store_values];
-            push(@RowData,$row);
-            $time_this->('push_row');
-            $prevchunk = $chunk;
-            #
-        }
-        $time_this->();
-        if ((scalar(@RowData) > 0) && ($linecount % 100 == 0)) {
-            my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
-                                                            undef,
-                                                            \@RowData);
-            # $logthis->('result = '.$result);
-            $time_this->('bulk_store_rows');
-            if (! defined($result)) {
-                my $error = &Apache::lonmysql::get_error();
-                warn "Error occured during insert.".$error;
-                $logthis->('error = '.$error);
+            my $error = &store_entry($dbh,$tables,\%data);
+            if ($error) {
+                $logthis->('error store_entry:'.$error." on %data");
             }
-            undef(@RowData);
+            $prevchunk = $chunk;
         }
     }
-    if (@RowData) {
-        $time_this->();
-        $logthis->('storing '.$linecount);
-        my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
-                                                        undef,
-                                                        \@RowData);
-        $logthis->('result = '.$result);
-        $time_this->('bulk_store_rows');
-        if (! defined($result)) {
-            my $error = &Apache::lonmysql::get_error();
-            warn "Error occured during insert.".$error;
-            $logthis->('error = '.$error);
-        }
-        undef(@RowData);
+    my $result = &store_entry($dbh,$tables);
+    if (! defined($result)) {
+        my $error = &Apache::lonmysql::get_error();
+        warn "Error occured during insert.".$error;
+        $logthis->('error = '.$error);
     }
     close IN;
-#    print "Number of lines: ".$linecount.$/;
-#    print "Number of inserts: ".$insertcount.$/;
     return $linecount;
 }
 
 
 ##
-## Somtimes, instead of doing something, doing nothing is appropriate.
+## default value for $logthis and $time_this
 sub nothing {
     return;
 }
 
 ##
-## Logging routine
+## Logging routine (look for $log)
 ##
 sub log_to_file {
     my ($input)=@_;
@@ -643,11 +575,47 @@
 
 }
 
+sub latest_table_modification_time {
+    my $latest_time;
+    foreach my $table (@Activity_Table,@ID_Tables) {    
+        my %tabledata = &Apache::lonmysql::table_information($table->{'id'});
+        next if (! scalar(keys(%tabledata))); # table does not exist
+        if (! defined($latest_time) ||
+            $latest_time < $tabledata{'Update_time'}) {
+            $latest_time = $tabledata{'Update_time'};
+        }
+    }
+    return $latest_time;
+}
+
+sub check_for_missing_tables {
+    my @wanted_tables = @_;
+    # Check for missing tables
+    my @Current_Tables = &Apache::lonmysql::tables_in_db();
+    my %Found;
+    foreach my $tablename (@Current_Tables) {
+        foreach my $table (@wanted_tables) {
+            if ($tablename eq  $table) {
+                $Found{$tablename}++;
+            }
+        }
+    }
+    $logthis->('Found tables '.join(',',keys(%Found)));
+    my $missing_a_table = 0;
+    foreach my $table (@wanted_tables) {
+        if (! $Found{$table}) {
+            $logthis->('Missing table '.$table);
+            $missing_a_table = 1;
+            last;
+        }
+    }
+    return $missing_a_table;
+}
 
 ##
 ## Use mysqldump to store backups of the tables
 ##
-sub backup_tables {
+sub backup_tables_as_sql {
     my ($gz_sql_filename) = @_;
     my $command = qq{mysqldump --quote-names --opt loncapa };
     foreach my $table (@ID_Tables,@Activity_Table) {
@@ -663,7 +631,7 @@
 ##
 ## Load in mysqldumped files
 ##
-sub load_backup_tables {
+sub load_backup_sql_tables {
     my ($gz_sql_filename) = @_;
     if (-s $gz_sql_filename) {
         $logthis->('loading data from gzipped sql file');
@@ -756,6 +724,173 @@
 
 } # End of ID scoping
 
+###############################################################
+###############################################################
+##
+##   Save as XML
+##
+###############################################################
+###############################################################
+sub backup_tables_as_xml {
+    my ($filename,$tables) = @_;
+    open(XMLFILE,"|gzip - > $filename") || return ('error:unable to write '.$filename);
+    my $query = qq{
+        SELECT B.resource,
+               A.time,
+               A.idx,
+               C.student,
+               A.action,
+               E.machine,
+               A.action_values 
+            FROM $tables->{'activity'} AS A
+            LEFT JOIN $tables->{'res'}      AS B ON B.res_id=A.res_id 
+            LEFT JOIN $tables->{'student'}  AS C ON C.student_id=A.student_id 
+            LEFT JOIN $tables->{'machine'}  AS E ON E.machine_id=A.machine_id
+            ORDER BY A.time DESC
+        };
+    $query =~ s/\s+/ /g;
+    my $dbh = &Apache::lonmysql::get_dbh();
+    my $sth = $dbh->prepare($query);
+    if (! $sth->execute()) {
+        $logthis->('<font color="blue">'.
+                   'WARNING: Could not retrieve from database:'.
+                   $sth->errstr().'</font>');
+        return undef;
+    } else {
+        my ($res,$sqltime,$idx,$student,$action,$machine,$action_values);
+        if ($sth->bind_columns(\$res,\$sqltime,\$idx,\$student,\$action,
+                               \$machine,\$action_values)) {
+            
+            while ($sth->fetch) {
+                print XMLFILE '<row>'.
+                    qq{<resource>$res</resource>}.
+                    qq{<time>$sqltime</time>}.
+                    qq{<idx>$idx</idx>}.
+                    qq{<student>$student</student>}.
+                    qq{<action>$action</action>}.
+                    qq{<machine>$machine</machine>}.
+                    qq{<action_values>$action_values</action_values>}.
+                    '</row>'.$/;
+            }
+        } else {
+            warn "Unable to bind to columns.\n";
+            return undef;
+        }
+    }
+    close XMLFILE;
+    return;
+}
+
+###############################################################
+###############################################################
+##
+##   load as xml
+##
+###############################################################
+###############################################################
+sub load_backup_xml_tables {
+    my ($filename,$tables) = @_;
+    my $xmlfh;
+    open($xmlfh,"cat $filename | gzip -d - |");
+    if (! defined($xmlfh)) {
+        return ('error:unable to read '.$filename);
+    }
+    my $dbh = &Apache::lonmysql::get_dbh();
+    my $parser = HTML::TokeParser->new($xmlfh);
+    $parser->xml_mode('1');
+    &store_entry();
+    my %data;
+    while (my $token = $parser->get_token()) {
+        if ($token->[0] eq 'S' && $token->[1] eq 'row') {
+            undef(%data);
+        }
+        foreach my $tag ('resource','time','idx',
+                         'student','action','machine','action_values') {
+            if ($token->[0] eq 'S' && $token->[1] eq $tag) {
+                my $text = $parser->get_text("/$tag");
+                $data{$tag} = $text;
+            }
+        }
+        if ($token->[0] eq 'E' && $token->[1] eq 'row') {
+            $data{'action_values'} =qq{'$data{'action_values'}'};
+            my $error = &store_entry($dbh,$tables,\%data);
+        }
+    }
+    &store_entry($dbh,$tables);
+    return;
+}
+
+
+#######################################################################
+#######################################################################
+##
+## store_entry - accumulate data to be inserted into the database
+## 
+## Pass no values in to clear accumulator
+## Pass ($dbh,\%tables) to initiate storage of values
+## Pass ($dbh,\%tables,\%data) to use normally
+##
+#######################################################################
+#######################################################################
+
+{
+    my @rows;
+    my $max_row_count = 100;
+
+sub store_entry {
+    if (! @_) {
+        undef(@rows);
+        return '';
+    }
+    my ($dbh,$tables,$data) = @_;
+    return if (! defined($tables));
+    if (defined($data)) {
+        my $error;
+        foreach my $field ('student','resource','action','time') {
+            if (! defined($data->{$field}) || $data->{$field} eq ':' ||
+                $data->{$field}=~ /^\s*$/) {
+                $error.=$field.',';
+            }
+        }
+        if ($error) { $error=~s/,$//; return $error; }
+        #
+        my $student_id = &get_id($tables->{'student'},'student',
+                                 $data->{'student'});
+        my $res_id     = &get_id($tables->{'res'},
+                                 'resource',$data->{'resource'});
+        my $machine_id = &get_id($tables->{'machine'},
+                                 'machine',$data->{'machine'});
+        my $idx = $data->{'idx'}; if (! $idx) { $idx = "''"; }
+        #
+        push(@rows,[$res_id,
+                    qq{'$data->{'time'}'},
+                    $student_id,
+                    qq{'$data->{'action'}'},
+                    $idx,
+                    $machine_id,
+                    $data->{'action_values'}]);
+    }
+    if (defined($tables) && 
+        ( (! defined($data) && scalar(@rows)) || scalar(@rows)>$max_row_count)
+        ){
+        # Store the rows
+        my $result = 
+            &Apache::lonmysql::bulk_store_rows($tables->{'activity'},
+                                               undef,
+                                               \@rows);
+        if (! defined($result)) {
+            my $error = &Apache::lonmysql::get_error();
+            warn "Error occured during insert.".$error;
+            return $error;
+        }
+        undef(@rows);
+        return $result if (! defined($data));
+    }
+
+    return '';
+}
+
+} # end of scope for &store_entry
 
 ###############################################################
 ###############################################################

--matthew1112306980--