[LON-CAPA-cvs] cvs: loncom /metadata_database searchcat.pl

matthew lon-capa-cvs@mail.lon-capa.org
Fri, 09 Apr 2004 22:04:53 -0000


This is a MIME encoded message

--matthew1081548293
Content-Type: text/plain

matthew		Fri Apr  9 18:04:53 2004 EDT

  Modified files:              
    /loncom/metadata_database	searchcat.pl 
  Log:
  Added command line option handling and logging routine.
  Added &process_dynamic_metadata and &get_dynamic_metadata.  Removed
  &dynamicmetadata subroutine.  
  
  
--matthew1081548293
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20040409180453.txt"

Index: loncom/metadata_database/searchcat.pl
diff -u loncom/metadata_database/searchcat.pl:1.55 loncom/metadata_database/searchcat.pl:1.56
--- loncom/metadata_database/searchcat.pl:1.55	Thu Apr  8 11:57:32 2004
+++ loncom/metadata_database/searchcat.pl	Fri Apr  9 18:04:53 2004
@@ -2,7 +2,7 @@
 # The LearningOnline Network
 # searchcat.pl "Search Catalog" batch script
 #
-# $Id: searchcat.pl,v 1.55 2004/04/08 15:57:32 matthew Exp $
+# $Id: searchcat.pl,v 1.56 2004/04/09 22:04:53 matthew Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -71,12 +71,51 @@
 use LONCAPA::Configuration;
 use LONCAPA::lonmetadata;
 
+use Getopt::Long;
 use IO::File;
 use HTML::TokeParser;
 use GDBM_File;
 use POSIX qw(strftime mktime);
+
 use File::Find;
 
+#
+# Set up configuration options
+my ($simulate,$oneuser,$help,$verbose,$logfile,$debug);
+GetOptions (
+            'help'     => \$help,
+            'simulate' => \$simulate,
+            'only=s'   => \$oneuser,
+            'verbose=s'  => \$verbose,
+            'debug' => \$debug,
+            );
+
+if ($help) {
+    print <<"ENDHELP";
+$0
+Rebuild and update the LON-CAPA metadata database. 
+Options:
+    -help          Print this help
+    -simulate      Do not modify the database.
+    -only=user     Only compute for the given user.  Implies -simulate   
+    -verbose=val   Sets logging level, val must be a number
+    -debug         Turns on debugging output
+ENDHELP
+    exit 0;
+}
+
+if (! defined($debug)) {
+    $debug = 0;
+}
+
+if (! defined($verbose)) {
+    $verbose = 0;
+}
+
+if (defined($oneuser)) {
+    $simulate=1;
+}
+
 ##
 ## Use variables for table names so we can test this routine a little easier
 my $oldname = 'metadata';
@@ -104,13 +143,18 @@
 #
 # Let people know we are running
 open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
-print LOG '==== Searchcat Run '.localtime()."====\n";
+&log(0,'==== Searchcat Run '.localtime()."====");
+if ($debug) {
+    &log(0,'simulating') if ($simulate);
+    &log(0,'only processing user '.$oneuser) if ($oneuser);
+    &log(0,'verbosity level = '.$verbose);
+}
 #
 # Connect to database
 my $dbh;
 if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},
                           { RaiseError =>0,PrintError=>0}))) {
-    print LOG "Cannot connect to database!\n";
+    &log(0,"Cannot connect to database!");
     die "MySQL Error: Cannot connect to database!\n";
 }
 # This can return an error and still be okay, so we do not bother checking.
@@ -122,24 +166,29 @@
 $dbh->do($request);
 if ($dbh->err) {
     $dbh->disconnect();
-    print LOG "\nMySQL Error Create: ".$dbh->errstr."\n";
+    &log(0,"MySQL Error Create: ".$dbh->errstr);
     die $dbh->errstr;
 }
 #
 # find out which users we need to examine
-opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");
+my $dom = $perlvar{'lonDefDomain'};
+opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$dom");
 my @homeusers = 
     grep {
-        &ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_");
+        &ishome("$perlvar{'lonDocRoot'}/res/$dom/$_");
     } grep { 
         !/^\.\.?$/;
     } readdir(RESOURCES);
 closedir RESOURCES;
 #
+if ($oneuser) {
+    @homeusers=($oneuser);
+}
+#
 # Loop through the users
 foreach my $user (@homeusers) {
-    print LOG "=== User: ".$user."\n";
-    my $prodir=&propath($perlvar{'lonDefDomain'},$user);
+    &log(0,"=== User: ".$user);
+    &process_dynamic_metadata($user,$dom);
     #
     # Use File::Find to get the files we need to read/modify
     find(
@@ -152,18 +201,23 @@
 }
 #
 # Rename the table
-$dbh->do('DROP TABLE IF EXISTS '.$oldname);
-if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) {
-    print LOG "MySQL Error Rename: ".$dbh->errstr."\n";
-    die $dbh->errstr;
+if (! $simulate) {
+    $dbh->do('DROP TABLE IF EXISTS '.$oldname);
+    if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) {
+        &log(0,"MySQL Error Rename: ".$dbh->errstr);
+        die $dbh->errstr;
+    } else {
+        &log(1,"MySQL table rename successful.");
+    }
 }
+
 if (! $dbh->disconnect) {
-    print LOG "MySQL Error Disconnect: ".$dbh->errstr."\n";
+    &log(0,"MySQL Error Disconnect: ".$dbh->errstr);
     die $dbh->errstr;
 }
 ##
 ## Finished!
-print LOG "==== Searchcat completed ".localtime()." ====\n";
+&log(0,"==== Searchcat completed ".localtime()." ====");
 close(LOG);
 
 &write_type_count();
@@ -171,6 +225,23 @@
 
 exit 0;
 
+##
+## Status logging routine.  Inputs: $level, $message
+## 
+## $level 0 should be used for normal output and error messages
+##
+## $message does not need to end with \n.  In the case of errors
+## the message should contain as much information as possible to
+## help in diagnosing the problem.
+##
+sub log {
+    my ($level,$message)=@_;
+    $level = 0 if (! defined($level));
+    if ($verbose >= $level) {
+        print LOG $message.$/;
+    }
+}
+
 ########################################################
 ########################################################
 ###                                                  ###
@@ -205,10 +276,12 @@
 sub print_filename {
     my ($file) = $_;
     my $fullfilename = $File::Find::name;
-    if (-d $file) {
-        print LOG " Got directory ".$fullfilename."\n";
-    } else {
-        print LOG " Got file ".$fullfilename."\n";
+    if ($debug) {
+        if (-d $file) {
+            &log(5," Got directory ".$fullfilename);
+        } else {
+            &log(5," Got file ".$fullfilename);
+        }
     }
     $_=$file;
 }
@@ -217,16 +290,18 @@
     my ($file) = $_;
     my $fullfilename = $File::Find::name;
     return if (-d $fullfilename); # No need to do anything here for directories
-    print LOG $fullfilename."\n";
-    my $ref=&metadata($fullfilename);
-    if (! defined($ref)) {
-        print LOG "    No data\n";
-        return;
-    }
-    while (my($key,$value) = each(%$ref)) {
-        print LOG "    ".$key." => ".$value."\n";
+    if ($debug) {
+        &log(6,$fullfilename);
+        my $ref=&metadata($fullfilename);
+        if (! defined($ref)) {
+            &log(6,"    No data");
+            return;
+        }
+        while (my($key,$value) = each(%$ref)) {
+            &log(6,"    ".$key." => ".$value);
+        }
+        &count_copyright($ref->{'copyright'});
     }
-    &count_copyright($ref->{'copyright'});
     $_=$file;
 }
 
@@ -237,26 +312,26 @@
 ##   Only input is the filename in $_.  
 sub process_meta_file {
     my ($file) = $_;
-    my $filename = $File::Find::name;
+    my $filename = $File::Find::name; # full filename
     return if (-d $filename); # No need to do anything here for directories
     #
-    print LOG $filename."\n";
+    &log(3,$filename) if ($debug);
     #
     my $ref=&metadata($filename);
     #
     # $url is the original file url, not the metadata file
     my $url='/res/'.&declutter($filename);
     $url=~s/\.meta$//;
-    print LOG "    ".$url."\n";
+    &log(3,"    ".$url) if ($debug);
     #
     # Ignore some files based on their metadata
     if ($ref->{'obsolete'}) { 
-        print LOG "obsolete\n"; 
+        &log(3,"obsolete") if ($debug);
         return; 
     }
     &count_copyright($ref->{'copyright'});
     if ($ref->{'copyright'} eq 'private') { 
-        print LOG "private\n"; 
+        &log(3,"private") if ($debug);
         return; 
     }
     #
@@ -264,8 +339,10 @@
     my %dyn;
     if ($url=~ m:/default$:) {
         $url=~ s:/default$:/:;
+        &log(3,"Skipping dynamic data") if ($debug);
     } else {
-        # %dyn=&dynamicmeta($url);
+        &log(3,"Retrieving dynamic data") if ($debug);
+        %dyn=&get_dynamic_metadata($url);
         &count_type($url);
     }
     #
@@ -276,17 +353,17 @@
                 %dyn,
                 'url'=>$url,
                 'version'=>'current');
-    my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname,
-                                                             \%Data);
-    if ($err) {
-        print LOG "\nMySQL Error Insert: ".$err."\n";
-        die $err;
-    }
-    if ($count < 1) {
-        print LOG "Unable to insert record into MySQL database for $url\n";
-        die "Unable to insert record into MySQl database for $url";
-    } else {
-        print LOG "Count = ".$count."\n";
+    if (! $simulate) {
+        my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname,
+                                                                 \%Data);
+        if ($err) {
+            &log(0,"MySQL Error Insert: ".$err);
+            die $err;
+        }
+        if ($count < 1) {
+            &log(0,"Unable to insert record into MySQL database for $url");
+            die "Unable to insert record into MySQl database for $url";
+        }
     }
     #
     # Reset $_ before leaving
@@ -366,122 +443,184 @@
 ###                                                  ###
 ########################################################
 ########################################################
-sub dynamicmeta {
-    my $url = &declutter(shift());
-    $url =~ s/\.meta$//;
-    my %data = ('count'         => 0,
-                'course'        => 0,
-                'course_list'   => '',
-                'avetries'      => 'NULL',
-                'avetries_list' => '',
-                'stdno'         => 0,
-                'stdno_list'    => '',
-                'usage'         => 0,
-                'usage_list'    => '',
-                'goto'          => 0,
-                'goto_list'     => '',
-                'comefrom'      => 0,
-                'comefrom_list' => '',
-                'difficulty'    => 'NULL',
-                'difficulty_list' => '',
-                'sequsage'      => '0',
-                'sequsage_list' => '',
-                'clear'         => 'NULL',
-                'technical'     => 'NULL',
-                'correct'       => 'NULL',
-                'helpful'       => 'NULL',
-                'depth'         => 'NULL',
-                'comments'      => '',                
-                );
-    my ($dom,$auth)=($url=~/^(\w+)\/(\w+)\//);
-    my $prodir=&propath($dom,$auth);
+##
+## Dynamic metadata description
+##
+##   Field             Type
+##-----------------------------------------------------------
+##   count             integer
+##   course            integer
+##   course_list       comma seperated list of course ids
+##   avetries          real                                
+##   avetries_list     comma seperated list of real numbers
+##   stdno             real
+##   stdno_list        comma seperated list of real numbers
+##   usage             integer   
+##   usage_list        comma seperated list of resources
+##   goto              scalar
+##   goto_list         comma seperated list of resources
+##   comefrom          scalar
+##   comefrom_list     comma seperated list of resources
+##   difficulty        real
+##   difficulty_list   comma seperated list of real numbers
+##   sequsage          scalar
+##   sequsage_list     comma seperated list of resources
+##   clear             real
+##   technical         real
+##   correct           real
+##   helpful           real
+##   depth             real
+##   comments          html of all the comments made
+##
+{
+
+my %DynamicData;
+my %Counts;
+
+sub process_dynamic_metadata {
+    my ($user,$dom) = @_;
+    undef(%DynamicData);
+    undef(%Counts);
+    #
+    my $prodir = &propath($dom,$user);
     #
-    # Get metadata except counts
+    # Read in the dynamic metadata
     my %evaldata;
     if (! tie(%evaldata,'GDBM_File',
               $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
-        return (undef);
+        return 0;
     }
-    my %sum=();
-    my %count=();
-    my %concat=();
-    my %listitems=(
-                   'course'       => 'add',
-                   'goto'         => 'add',
-                   'comefrom'     => 'add',
-                   'avetries'     => 'average',
-                   'stdno'        => 'add',
-                   'difficulty'   => 'average',
-                   'clear'        => 'average',
-                   'technical'    => 'average',
-                   'helpful'      => 'average',
-                   'correct'      => 'average',
-                   'depth'        => 'average',
-                   'comments'     => 'append',
-                   'usage'        => 'count'
-                   );
-    #
-    my $regexp=$url;
-    $regexp=~s/(\W)/\\$1/g;
-    $regexp='___'.$regexp.'___([a-z]+)$';
-    while (my ($esckey,$value)=each %evaldata) {
-        my $key=&unescape($esckey);
-        if ($key=~/$regexp/) {
-            my ($item,$purl,$cat)=split(/___/,$key);
-            $count{$cat}++;
-            if ($listitems{$cat} ne 'append') {
-                if (defined($sum{$cat})) {
-                    $sum{$cat}+=&unescape($value);
-                    $concat{$cat}.=','.$item;
-                } else {
-                    $sum{$cat}=&unescape($value);
-                    $concat{$cat}=$item;
-                }
-            } else {
-                if (defined($sum{$cat})) {
-                    if ($evaldata{$esckey}=~/\w/) {
-                        $sum{$cat}.='<hr />'.&unescape($evaldata{$esckey});
-                    }
-                } else {
-                    $sum{$cat}=''.&unescape($evaldata{$esckey});
-		    }
+    #
+    # Process every stored element
+    while (my ($storedkey,$value) = each(%evaldata)) {
+        my ($source,$file,$type) = split('___',$storedkey);
+        $source = &unescape($source);
+        $file = &unescape($file);
+        $value = &unescape($value);
+         "    got ".$file."\n        ".$type." ".$source."\n";
+        if ($type =~ /^(avetries|count|difficulty|stdno|timestamp)$/) {
+            #
+            # Statistics: $source is course id
+            $DynamicData{$file}->{'statistics'}->{$source}->{$type}=$value;
+        } elsif ($type =~ /^(clear|comments|depth|technical|helpful)$/){
+            #
+            # Evaluation $source is username, check if they evaluated it
+            # more than once.  If so, pad the entry with a space.
+            while(exists($DynamicData{$file}->{'evaluation'}->{$type}->{$source})) {
+                $source .= ' ';
             }
+            $DynamicData{$file}->{'evaluation'}->{$type}->{$source}=$value;
+        } elsif ($type =~ /^(course|comefrom|goto|usage)$/) {
+            #
+            # Context $source is course id or resource
+            push(@{$DynamicData{$file}->{$type}},&unescape($source));
+        } else {
+            &log(0,"   ".$user."@".$dom.":Process metadata: Unable to decode ".$type);
         }
     }
     untie(%evaldata);
-    # transfer gathered data to returnhash, calculate averages where applicable
-    my %returnhash;
-    while (my $cat=each(%count)) {
-        if ($count{$cat} eq 'nan') { next; }
-        if ($sum{$cat} eq 'nan') { next; }
-        if ($listitems{$cat} eq 'average') {
-            if ($count{$cat}) {
-                $returnhash{$cat}=int(($sum{$cat}/$count{$cat})*100.0+0.5)/100.0;
-            } else {
-                $returnhash{$cat}='NULL';
+    #
+    # Read in the access count data
+    &log(7,'Reading access count data') if ($debug);
+    my %countdata;
+    if (! tie(%countdata,'GDBM_File',
+              $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
+        return 0;
+    }
+    while (my ($key,$count) = each(%countdata)) {
+        next if ($key !~ /^$dom/);
+        $key = &unescape($key);
+        &log(8,'    Count '.$key.' = '.$count) if ($debug);
+        $Counts{$key}=$count;
+    }
+    untie(%countdata);
+    if ($debug) {
+        &log(7,scalar(keys(%Counts)).
+             " Counts read for ".$user."@".$dom);
+        &log(7,scalar(keys(%DynamicData)).
+             " Dynamic metadata read for ".$user."@".$dom);
+    }
+    #
+    return 1;
+}
+
+sub get_dynamic_metadata {
+    my ($url) = @_;
+    $url =~ s:^/res/::;
+    if (! exists($DynamicData{$url})) {
+        &log(7,'    No dynamic data for '.$url) if ($debug);
+        return ();
+    }
+    my %data;
+    my $resdata = $DynamicData{$url};
+    #
+    # Get the statistical data
+    foreach my $type (qw/avetries difficulty stdno/) {
+        my $count;
+        my $sum;
+        my @Values;
+        foreach my $coursedata (values(%{$resdata->{'statistics'}})) {
+            if (ref($coursedata) eq 'HASH' && exists($coursedata->{$type})) {
+                $count++;
+                $sum += $coursedata->{$type};
+                push(@Values,$coursedata->{$type});
             }
-        } elsif ($listitems{$cat} eq 'count') {
-            $returnhash{$cat}=$count{$cat};
-        } else {
-            $returnhash{$cat}=$sum{$cat};
         }
-        $returnhash{$cat.'_list'}=$concat{$cat};
+        if ($count) {
+            $data{$type} = $sum/$count;
+            $data{$type.'_list'} = join(',',@Values);
+        }
+    }
+    # find the count
+    $data{'count'} = $Counts{$url};
+    #
+    # Get the context data
+    foreach my $type (qw/course goto comefrom/) {
+        if (defined($resdata->{$type}) && 
+            ref($resdata->{$type}) eq 'ARRAY') {
+            $data{$type} = scalar(@{$resdata->{$type}});
+            $data{$type.'_list'} = join(',',@{$resdata->{$type}});
+        }
+    }
+    if (defined($resdata->{'usage'}) && 
+        ref($resdata->{'usage'}) eq 'ARRAY') {
+        $data{'sequsage'} = scalar(@{$resdata->{'usage'}});
+        $data{'sequsage_list'} = join(',',@{$resdata->{'usage'}});
+    }
+    #
+    # Get the evaluation data
+    foreach my $type (qw/clear technical correct helpful depth/) {
+        my $count;
+        my $sum;
+        foreach my $evaluator (keys(%{$resdata->{'evaluation'}->{$type}})){
+            $sum += $resdata->{'evaluation'}->{$type}->{$evaluator};
+            $count++;
+        }
+        if ($count > 0) {
+            $data{$type}=$sum/$count;
+        }
     }
     #
-    # get count
-    if (tie(my %evaldata,'GDBM_File',
-            $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
-	my $escurl=&escape($url);
-	if (! exists($evaldata{$escurl})) {
-	    $returnhash{'count'}=0;
-	} else {
-	    $returnhash{'count'}=$evaldata{$escurl};
-	}
-	untie %evaldata;
+    # put together comments
+    my $comments = '<div class="LCevalcomments">';
+    foreach my $evaluator (keys(%{$resdata->{'evaluation'}->{'comments'}})){
+        $comments .= $evaluator.':'.
+            $resdata->{'evaluation'}->{'comments'}->{$evaluator}.'<hr />';
+    }
+    $comments .= '</div>';
+    #
+    # Log the dynamic metadata
+    if ($debug) {
+        while (my($k,$v)=each(%data)) {
+            &log(8,"    ".$k." => ".$v);
+        }
     }
-    return %returnhash;
+    #
+    return %data;
 }
 
+} # End of %DynamicData and %Counts scope
+
 ########################################################
 ########################################################
 ###                                                  ###
@@ -593,8 +732,10 @@
         $TimeData[5]+=1900;
         $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d',
                              @TimeData[5,4,3,2,1,0]);
+    } elsif (! defined($time) || $time == 0) {
+        $mysqltime = 0;
     } else {
-        print LOG "    Unable to decode time ".$time."\n";
+        &log(0,"    sqltime:Unable to decode time ".$time);
         $mysqltime = 0;
     }
     return $mysqltime;

--matthew1081548293--