[LON-CAPA-cvs] cvs: loncom /debugging_tools activity_to_accesscount.pl

matthew lon-capa-cvs@mail.lon-capa.org
Fri, 14 Nov 2003 19:41:38 -0000


matthew		Fri Nov 14 14:41:38 2003 EDT

  Modified files:              
    /loncom/debugging_tools	activity_to_accesscount.pl 
  Log:
  Now updates a target db with access count data.  Keeps track of when a source
  activity.log file was parsed (in the target db file) and skips entries already
  recorded.  Given how large the activity.log files get, this script can take a 
  long time to run.
  
  
Index: loncom/debugging_tools/activity_to_accesscount.pl
diff -u loncom/debugging_tools/activity_to_accesscount.pl:1.1 loncom/debugging_tools/activity_to_accesscount.pl:1.2
--- loncom/debugging_tools/activity_to_accesscount.pl:1.1	Fri Nov 14 13:24:40 2003
+++ loncom/debugging_tools/activity_to_accesscount.pl	Fri Nov 14 14:41:38 2003
@@ -1,6 +1,7 @@
 #!/usr/bin/perl -w
 #
 use strict;
+use GDBM_File;
 
 sub unescape {
     my $str=shift;
@@ -12,11 +13,28 @@
 
 sub main {
     my $file=$ARGV[0];
-    print STDERR "Using $file\n";
+    my ($path) = ($file =~ m:(.*)/activity\.log$:);
+    my $target = $path.'/nohist_accesscount.db';
+    print STDERR "source: $file\ntarget: $target\n";
+    my %accessDB;
+    my $accesstime = 0;
+    my $starttime = time;
+    if (-e $target) {
+        if (! tie(%accessDB,'GDBM_File',$target,&GDBM_READER,0640)) {
+            warn "Unable to tie to $target";
+            return;
+        }
+        #
+        if (exists($accessDB{'tabulated '.$file})) {
+            $accesstime = $accessDB{'tabulated '.$file};
+        }
+        untie(%accessDB);
+    }
+    #
     my $line;
     open FILEID,'<'.$file;
     my @allaccess;
-    print STDERR "Access by resource\n\n";
+    print STDERR "Access by resource after $accesstime\n\n";
     my $numlines = 0;
     while ($line=<FILEID>) {
         $numlines++;
@@ -38,30 +56,43 @@
         shift(@accesses);
 	while (@accesses) {
             my $date = shift(@accesses);
+            next if ($date =~ /\D/ || $date < $accesstime);
             my $access = shift(@accesses);
             next if (! defined($access) || $access eq '' || 
                      ! defined($date)   || $date   eq '');
             $access =~ s/(\&$|^:)//g;
             my ($resource,$who,$domain,$post,@posts)=split(':',$access);
-	    if (!$resource) {
+	    if (!$resource || $resource eq '') {
                 next; 
             }
             $resource = &unescape($resource);
-            if ($resource !~ m:/: || $resource =~ m:/prtspool/:) {
+            if ($resource !~ m:(.*)/(.*)/: || $resource =~ m:/prtspool/:) {
                 next;
             }
             if ($resource =~ /___\d+___/) {
                 (undef,$resource) = split(/___\d+___/,$resource);
             }
             next if ($resource =~ m:^/(res/adm|adm)/:);
-            $resource =~ s:^/?res/?::;
+            $resource =~ s:^/?res/::;
             $resourceaccess{$resource}++;            
 	}
     }
-    print STDERR 'done.'.$/;
+    print STDERR 'done.  Updating '.$target.$/;
+    if (! tie(%accessDB,'GDBM_File',$target,&GDBM_WRCREAT,0640)) {
+        warn "Unable to open $target to store data".$/;
+        return;
+    }
+    #
     while (my ($resource,$count) = each(%resourceaccess)) {
+        if (exists($accessDB{$resource})) {
+            $accessDB{$resource}+=$count;
+        } else {
+            $accessDB{$resource} = $count;
+        }
         print sprintf("%10.0f",$count).':'.$resource."\n";
     }
+    $accessDB{'tabulated '.$file} = $starttime;
+    untie(%accessDB);
 }
 
 main;