[LON-CAPA-cvs] cvs: modules /matthew/activitylog parse_activity_log.pl

matthew lon-capa-cvs@mail.lon-capa.org
Wed, 28 Jul 2004 21:37:32 -0000


This is a MIME encoded message

--matthew1091050652
Content-Type: text/plain

matthew		Wed Jul 28 17:37:32 2004 EDT

  Modified files:              
    /modules/matthew/activitylog	parse_activity_log.pl 
  Log:
  Added 'nocleanup' option.
  Modified timing code to be more elegant :)
  Added &backup_tables and &load_backup_tables to save and restore tables
  using mysqldump.  This requires that the user running this script
  (probably www) have a proper ~/.my.cnf.  See lonsql for how this should
  look.
  Modified &process_courselog to do bulk inserts to MySQL.  The frequency
  of inserts should be tuned for speed.
  Modified logic to call &backup_tables and &load_backup_tables.
  
  Still untested in a LON-CAPA environment but it works fine on the command
  line.
  
  
--matthew1091050652
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20040728173732.txt"

Index: modules/matthew/activitylog/parse_activity_log.pl
diff -u modules/matthew/activitylog/parse_activity_log.pl:1.2 modules/matthew/activitylog/parse_activity_log.pl:1.3
--- modules/matthew/activitylog/parse_activity_log.pl:1.2	Mon Jul 26 15:58:50 2004
+++ modules/matthew/activitylog/parse_activity_log.pl	Wed Jul 28 17:37:32 2004
@@ -2,7 +2,7 @@
 #
 # The LearningOnline Network
 #
-# $Id: parse_activity_log.pl,v 1.2 2004/07/26 19:58:50 matthew Exp $
+# $Id: parse_activity_log.pl,v 1.3 2004/07/28 21:37:32 matthew Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -54,12 +54,13 @@
 
 #
 # Determine parameters
-my ($help,$course,$domain,$drop,$file,$timerun);
+my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup);
 &Getopt::Long::GetOptions( "course=s"  => \$course,
                            "domain=s"  => \$domain,
                            "help"      => \$help,
                            "logfile=s" => \$file,
-                           "timerun"      => \$timerun,
+                           "timerun"   => \$time_run,
+                           "nocleanup" => \$nocleanup,
                            "drop"      => \$drop);
 if (! defined($course) || $help) {
     print<<USAGE;
@@ -73,6 +74,7 @@
                                  specific activity log tables.
    file               optional   Specify the file to parse, including path
    time               optional   if present, print out timing data
+   nocleanup          optional   if present, do not remove old files
 Examples:
   $0 -course=123456abcdef -domain=msu
   $0 -course=123456abcdef -file=activity.log
@@ -80,32 +82,51 @@
     exit;
 }
 
+##
+## Set up timing code
+##
+my $time_this = \&nothing;
+if ($time_run) {
+    $time_this = \&time_action;
+}
+my $initial_time = Time::HiRes::time;
+
+##
+## Read in configuration parameters
+##
 my %perlvar;
 &initialize_configuration();
 if (! defined($domain) || $domain eq '') {
     $domain = $perlvar{'lonDefDomain'};
 }
-
 &update_process_name($course.'@'.$domain);
 
-my $filename;
+##
+## Determine filenames
+##
+my $sourcefilename;   # activity log data
+my $newfilename;      # $sourcefilename will be renamed to this
+my $sql_filename;  # the mysql backup data file name.
 if ($file) {
-    $filename = $file;
+    $sourcefilename = $file;
 } else {
-    $filename = &get_filename($course,$domain);
+    $sourcefilename = &get_filename($course,$domain);
 }
+$sql_filename = $sourcefilename;
+$sql_filename =~ s|[^/]*$|activitylog.sql|;
 
-my $newfilename = $filename.'.processing';
-if (! -e $filename) {
-    # warn "$filename does not exist";
-    exit 1;
-}
+##
+## There will only be a $newfilename file if a copy of this program is already
+## running.
+my $newfilename = $sourcefilename.'.processing';
 if (-e $newfilename) {
-    # warn "$newfilename exists";
+    warn "$newfilename exists";
     exit 2;
 }
 
-rename($filename,$filename.'.processing');
+if (-e $sourcefilename) {
+    rename($sourcefilename,$newfilename);
+}
 
 ##
 ## Table definitions
@@ -201,6 +222,11 @@
               ], 
       'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'],
 };
+my @Activity_Tables = ($student_table_def,$res_table_def,
+                       $action_table_def,$machine_table_def,
+                       $activity_table_def);
+
+
 ##
 ## End of table definitions
 ##
@@ -213,10 +239,30 @@
     exit 3;
 }
 
-if ($drop) {
-    &drop_tables();
+if ($drop) { &drop_tables(); }
+if (-e $sql_filename) {
+    # if ANY one of the tables does not exist, load the tables from the
+    # backup.
+    my @Current_Tables = &Apache::lonmysql::tables_in_db();
+    my %Found;
+    foreach my $tablename (@Current_Tables) {
+        foreach my $table (@Activity_Tables) {
+            if ($tablename eq  $table->{'id'}) {
+                $Found{$tablename}++;
+            }
+        }
+    }
+    foreach my $table (@Activity_Tables) {    
+        if (! $Found{$table->{'id'}}) {
+            $time_this->();
+            &load_backup_tables($sql_filename);
+            $time_this->('load backup tables');
+            last;
+        }
+    }
 }
 
+# create_tables does not complain if the tables already exist
 if (! &create_tables()) {
     warn "Unable to create tables";
     exit 4;
@@ -224,8 +270,29 @@
 
 &read_id_tables();
 
-if (!&process_courselog($newfilename)) {
-    exit 5;
+##
+## Do the main bit of work
+if (-e $newfilename) {
+    my $result = &process_courselog($newfilename);
+    if (! defined($result)) {
+        # Something went wrong along the way...
+        exit 5;
+    } elsif ($result > 0) {
+        $time_this->();
+        &backup_tables($sql_filename);
+        $time_this->('write backup tables');
+    }
+}
+
+##
+## Clean up the filesystem
+##
+&Apache::lonmysql::disconnect_from_db();
+unlink($newfilename) if (! $nocleanup);
+
+if ($time_run) {
+    print "Overall time: ".(Time::HiRes::time - $initial_time).$/;
+    print &outputtimes();
 }
 
 exit 0;   # Everything is okay, so end here before it gets worse.
@@ -233,30 +300,33 @@
 ########################################################
 ########################################################
 ##
-##                 Various Subroutines
+##                 Process Course Log
 ##
 ########################################################
 ########################################################
+#
+# Returns the number of lines in the activity.log file that were processed.
 sub process_courselog {
     my ($inputfile) = @_;
-    open(IN,$inputfile) or die ('Unable to open '.$inputfile.' for input');
+    if (! open(IN,$inputfile)) {
+        warn "Unable to open '$inputfile' for reading";
+        return undef;
+    }
     my ($linecount,$insertcount);
     my $dbh = &Apache::lonmysql::get_dbh();
     #
     # Timing variables
-    my %Timing;
-    my $starttime;
-    
+    my @RowData;
     while (my $line=<IN>){
-        last if ($linecount > 500);
-        if ($timerun) { $starttime = Time::HiRes::time; }
+        # last if ($linecount > 1000);
+        #
+        # Bulk storage variables
+        $time_this->();
         chomp($line);
         $linecount++;
         # print $linecount++.$/;
         my ($timestamp,$host,$log)=split(/\:/,$line,3);
-        if ($timerun){
-            push(@{$Timing{'splitline'}},Time::HiRes::time-$starttime);
-        }
+        $time_this->('splitline');
         #
         # $log has the actual log entries; currently still escaped, and
         # %26(timestamp)%3a(url)%3a(user)%3a(domain)
@@ -268,9 +338,11 @@
         # get delimiter between timestamped entries to be &&&
         $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
         $log = &unescape($log);
+        $time_this->('translate_and_unescape');
         # now go over all log entries 
         my $machine_id = &get_id($machine_table,'machine',$host);
         foreach (split(/\&\&\&/,$log)) {
+            $time_this->();
 	    my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$_);
             if (! defined($res) || $res =~ /^\s*$/) {
                 $res = '/adm/roles';
@@ -283,60 +355,126 @@
                 $action = 'view';
             }
             my $student = $uname.':'.$udom;
-            if ($timerun) {$starttime = Time::HiRes::time;}
+            $time_this->('split_and_error_check');
             my $student_id = &get_id($student_table,'student',$student);
             my $res_id = &get_id($res_table,'resource',$res);
             my $action_id = &get_id($action_table,'action',$action);
             my $sql_time = &Apache::lonmysql::sqltime($time);
             my $values = $dbh->quote(join('',@values));
-            if ($timerun) {
-                push(@{$Timing{'get_id'}},Time::HiRes::time-$starttime);
-            }
+            $time_this->('get_ids');
             #
-            if ($timerun) {$starttime = Time::HiRes::time;}
             my $row = [$res_id,
-                       $sql_time,
+                       qq{'$sql_time'},
                        $student_id,
                        $action_id,
-                       undef,        # idx
+                       qq{''},        # idx
                        $machine_id,
                        $values];
-            my $result = &Apache::lonmysql::store_row($activity_table,
-                                                      $row);
-            if ($timerun) {
-                push(@{$Timing{'insert'}},Time::HiRes::time-$starttime);
-            }
+            push(@RowData,$row);
+            $time_this->('push_row');
             #
-            $insertcount++;
+        }
+        $time_this->();
+        if ($linecount % 100 == 0) {
+            my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
+                                                            undef,
+                                                            \@RowData);
+            $time_this->('bulk_store_rows');
             if (! defined($result)) {
-                warn "Got error of ".&Apache::lonmysql::get_error().$/;
-                warn 
-                    'res_id     => :'.$res_id.':'.$/.
-                    'sql_time   => :'.$sql_time.':'.$/.
-                    'student_id => :'.$student_id.':'.$/.
-                    'action_id  => :'.$action_id.':'.$/.
-                    'machine_id => :'.$machine_id.':'.$/.
-                    'values     => :'.join(',',@values);
+                warn "Error occured during insert.".
+                    &Apache::lonmysql::get_error();
             }
+            undef(@RowData);
+        }
+    }
+    if (@RowData) {
+        $time_this->();
+        my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
+                                                        undef,
+                                                        \@RowData);
+        $time_this->('bulk_store_rows');
+        if (! defined($result)) {
+            warn "Error occured during insert.".
+                &Apache::lonmysql::get_error();
         }
+        undef(@RowData);
     }
     close IN;
-    print "Number of lines: ".$linecount.$/;
-    print "Number of inserts: ".$insertcount.$/;
-    if ($timerun) {
-        print "Timing Data:".$/;
+#    print "Number of lines: ".$linecount.$/;
+#    print "Number of inserts: ".$insertcount.$/;
+    return $linecount;
+}
+
+##
+## Timing routines
+##
+{
+    my %Timing;
+    my $starttime;
+
+sub nothing {
+    return;
+}
+
+sub time_action {
+    my ($key) = @_;
+    if (defined($key)) {
+        $Timing{$key}+=Time::HiRes::time-$starttime;
+        $Timing{'count_'.$key}++;
+    }
+    $starttime = Time::HiRes::time;
+}
+
+sub outputtimes {
+    my $Str;
+    if ($time_run) {
+        $Str = "Timing Data:".$/;
         while (my($k,$v) = each(%Timing)) {
-            my $Str .= '  '.$k.'  '.scalar(@$v).'   ';
-            my $sum;
-            foreach (@$v) {
-                $sum+=$_;
-            }
-            $Str .= $sum.$/;
-            print $Str;
+            next if ($k =~ /^count_/);
+            my $count = $Timing{'count_'.$k};
+            $Str .= 
+                '  '.sprintf("%25.25s",$k).
+                '  '.sprintf('% 8d',$count).
+                '  '.sprintf('%12.5f',$v).$/;
         }
     }
+    return $Str;
+}
+
+}
+
+
+##
+## Use mysqldump to store backups of the tables
+##
+sub backup_tables {
+    my ($sql_filename) = @_;
+    my $command = qq{mysqldump --opt loncapa };
+                             
+    foreach my $table (@Activity_Tables) {
+        my $tablename = $table->{'id'};
+        $command .= $tablename.' ';
+    }
+    $command .= '>'.$sql_filename;
+    warn $command.$/;
+    system($command);
 }
 
+##
+## Load in mysqldumped files
+##
+sub load_backup_tables {
+    my ($sql_filename) = @_;
+    return undef if (! -e $sql_filename);
+    # Check for .my.cnf
+    my $command = 'mysql -e "SOURCE '.$sql_filename.'" loncapa';
+    warn $command.$/;
+    system($command);
+}
+
+##
+## 
+##
 sub initialize_configuration {
     # Fake it for now:
     $perlvar{'lonSqlUser'} = 'www';
@@ -357,9 +495,7 @@
 }
 
 sub create_tables {
-    foreach my $table ($student_table_def,$res_table_def,
-                       $action_table_def,$machine_table_def,
-                       $activity_table_def) {
+    foreach my $table (@Activity_Tables) {
         my $table_id = &Apache::lonmysql::create_table($table);
         if (! defined($table_id)) {
             warn "Unable to create table ".$table->{'id'}.$/;
@@ -371,9 +507,7 @@
 }
 
 sub drop_tables {
-    foreach my $table ($student_table_def,$res_table_def,
-                       $action_table_def,$machine_table_def,
-                       $activity_table_def) {
+    foreach my $table (@Activity_Tables) {
         my $table_id = $table->{'id'};
         &Apache::lonmysql::drop_table($table_id);
     }
@@ -390,10 +524,10 @@
     my %IDs;
 
 sub read_id_tables {
-    foreach my $table ($student_table,$res_table,$action_table,$machine_table){
-        my @Data = &Apache::lonmysql::get_rows($table);
+    foreach my $table (@Activity_Tables) {
+        my @Data = &Apache::lonmysql::get_rows($table->{'id'});
         foreach my $row (@Data) {
-            $IDs{$table}->{$row->[1]} = $row->[0];
+            $IDs{$table->{'id'}}->{$row->[1]} = $row->[0];
         }
     }
 }

--matthew1091050652--