[LON-CAPA-cvs] cvs: loncom /metadata_database searchcat.pl
matthew
lon-capa-cvs@mail.lon-capa.org
Fri, 09 Apr 2004 22:04:53 -0000
This is a MIME encoded message
--matthew1081548293
Content-Type: text/plain
matthew Fri Apr 9 18:04:53 2004 EDT
Modified files:
/loncom/metadata_database searchcat.pl
Log:
Added command line option handling and logging routine.
Added &process_dynamic_metadata and &get_dynamic_metadata. Removed
&dynamicmetadata subroutine.
--matthew1081548293
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20040409180453.txt"
Index: loncom/metadata_database/searchcat.pl
diff -u loncom/metadata_database/searchcat.pl:1.55 loncom/metadata_database/searchcat.pl:1.56
--- loncom/metadata_database/searchcat.pl:1.55 Thu Apr 8 11:57:32 2004
+++ loncom/metadata_database/searchcat.pl Fri Apr 9 18:04:53 2004
@@ -2,7 +2,7 @@
# The LearningOnline Network
# searchcat.pl "Search Catalog" batch script
#
-# $Id: searchcat.pl,v 1.55 2004/04/08 15:57:32 matthew Exp $
+# $Id: searchcat.pl,v 1.56 2004/04/09 22:04:53 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -71,12 +71,51 @@
use LONCAPA::Configuration;
use LONCAPA::lonmetadata;
+use Getopt::Long;
use IO::File;
use HTML::TokeParser;
use GDBM_File;
use POSIX qw(strftime mktime);
+
use File::Find;
+#
+# Set up configuration options
+my ($simulate,$oneuser,$help,$verbose,$logfile,$debug);
+GetOptions (
+ 'help' => \$help,
+ 'simulate' => \$simulate,
+ 'only=s' => \$oneuser,
+ 'verbose=s' => \$verbose,
+ 'debug' => \$debug,
+ );
+
+if ($help) {
+ print <<"ENDHELP";
+$0
+Rebuild and update the LON-CAPA metadata database.
+Options:
+ -help Print this help
+ -simulate Do not modify the database.
+ -only=user Only compute for the given user. Implies -simulate
+ -verbose=val Sets logging level, val must be a number
+ -debug Turns on debugging output
+ENDHELP
+ exit 0;
+}
+
+if (! defined($debug)) {
+ $debug = 0;
+}
+
+if (! defined($verbose)) {
+ $verbose = 0;
+}
+
+if (defined($oneuser)) {
+ $simulate=1;
+}
+
##
## Use variables for table names so we can test this routine a little easier
my $oldname = 'metadata';
@@ -104,13 +143,18 @@
#
# Let people know we are running
open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
-print LOG '==== Searchcat Run '.localtime()."====\n";
+&log(0,'==== Searchcat Run '.localtime()."====");
+if ($debug) {
+ &log(0,'simulating') if ($simulate);
+ &log(0,'only processing user '.$oneuser) if ($oneuser);
+ &log(0,'verbosity level = '.$verbose);
+}
#
# Connect to database
my $dbh;
if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},
{ RaiseError =>0,PrintError=>0}))) {
- print LOG "Cannot connect to database!\n";
+ &log(0,"Cannot connect to database!");
die "MySQL Error: Cannot connect to database!\n";
}
# This can return an error and still be okay, so we do not bother checking.
@@ -122,24 +166,29 @@
$dbh->do($request);
if ($dbh->err) {
$dbh->disconnect();
- print LOG "\nMySQL Error Create: ".$dbh->errstr."\n";
+ &log(0,"MySQL Error Create: ".$dbh->errstr);
die $dbh->errstr;
}
#
# find out which users we need to examine
-opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");
+my $dom = $perlvar{'lonDefDomain'};
+opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$dom");
my @homeusers =
grep {
- &ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_");
+ &ishome("$perlvar{'lonDocRoot'}/res/$dom/$_");
} grep {
!/^\.\.?$/;
} readdir(RESOURCES);
closedir RESOURCES;
#
+if ($oneuser) {
+ @homeusers=($oneuser);
+}
+#
# Loop through the users
foreach my $user (@homeusers) {
- print LOG "=== User: ".$user."\n";
- my $prodir=&propath($perlvar{'lonDefDomain'},$user);
+ &log(0,"=== User: ".$user);
+ &process_dynamic_metadata($user,$dom);
#
# Use File::Find to get the files we need to read/modify
find(
@@ -152,18 +201,23 @@
}
#
# Rename the table
-$dbh->do('DROP TABLE IF EXISTS '.$oldname);
-if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) {
- print LOG "MySQL Error Rename: ".$dbh->errstr."\n";
- die $dbh->errstr;
+if (! $simulate) {
+ $dbh->do('DROP TABLE IF EXISTS '.$oldname);
+ if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) {
+ &log(0,"MySQL Error Rename: ".$dbh->errstr);
+ die $dbh->errstr;
+ } else {
+ &log(1,"MySQL table rename successful.");
+ }
}
+
if (! $dbh->disconnect) {
- print LOG "MySQL Error Disconnect: ".$dbh->errstr."\n";
+ &log(0,"MySQL Error Disconnect: ".$dbh->errstr);
die $dbh->errstr;
}
##
## Finished!
-print LOG "==== Searchcat completed ".localtime()." ====\n";
+&log(0,"==== Searchcat completed ".localtime()." ====");
close(LOG);
&write_type_count();
@@ -171,6 +225,23 @@
exit 0;
+##
+## Status logging routine. Inputs: $level, $message
+##
+## $level 0 should be used for normal output and error messages
+##
+## $message does not need to end with \n. In the case of errors
+## the message should contain as much information as possible to
+## help in diagnosing the problem.
+##
+sub log {
+ my ($level,$message)=@_;
+ $level = 0 if (! defined($level));
+ if ($verbose >= $level) {
+ print LOG $message.$/;
+ }
+}
+
########################################################
########################################################
### ###
@@ -205,10 +276,12 @@
sub print_filename {
my ($file) = $_;
my $fullfilename = $File::Find::name;
- if (-d $file) {
- print LOG " Got directory ".$fullfilename."\n";
- } else {
- print LOG " Got file ".$fullfilename."\n";
+ if ($debug) {
+ if (-d $file) {
+ &log(5," Got directory ".$fullfilename);
+ } else {
+ &log(5," Got file ".$fullfilename);
+ }
}
$_=$file;
}
@@ -217,16 +290,18 @@
my ($file) = $_;
my $fullfilename = $File::Find::name;
return if (-d $fullfilename); # No need to do anything here for directories
- print LOG $fullfilename."\n";
- my $ref=&metadata($fullfilename);
- if (! defined($ref)) {
- print LOG " No data\n";
- return;
- }
- while (my($key,$value) = each(%$ref)) {
- print LOG " ".$key." => ".$value."\n";
+ if ($debug) {
+ &log(6,$fullfilename);
+ my $ref=&metadata($fullfilename);
+ if (! defined($ref)) {
+ &log(6," No data");
+ return;
+ }
+ while (my($key,$value) = each(%$ref)) {
+ &log(6," ".$key." => ".$value);
+ }
+ &count_copyright($ref->{'copyright'});
}
- &count_copyright($ref->{'copyright'});
$_=$file;
}
@@ -237,26 +312,26 @@
## Only input is the filename in $_.
sub process_meta_file {
my ($file) = $_;
- my $filename = $File::Find::name;
+ my $filename = $File::Find::name; # full filename
return if (-d $filename); # No need to do anything here for directories
#
- print LOG $filename."\n";
+ &log(3,$filename) if ($debug);
#
my $ref=&metadata($filename);
#
# $url is the original file url, not the metadata file
my $url='/res/'.&declutter($filename);
$url=~s/\.meta$//;
- print LOG " ".$url."\n";
+ &log(3," ".$url) if ($debug);
#
# Ignore some files based on their metadata
if ($ref->{'obsolete'}) {
- print LOG "obsolete\n";
+ &log(3,"obsolete") if ($debug);
return;
}
&count_copyright($ref->{'copyright'});
if ($ref->{'copyright'} eq 'private') {
- print LOG "private\n";
+ &log(3,"private") if ($debug);
return;
}
#
@@ -264,8 +339,10 @@
my %dyn;
if ($url=~ m:/default$:) {
$url=~ s:/default$:/:;
+ &log(3,"Skipping dynamic data") if ($debug);
} else {
- # %dyn=&dynamicmeta($url);
+ &log(3,"Retrieving dynamic data") if ($debug);
+ %dyn=&get_dynamic_metadata($url);
&count_type($url);
}
#
@@ -276,17 +353,17 @@
%dyn,
'url'=>$url,
'version'=>'current');
- my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname,
- \%Data);
- if ($err) {
- print LOG "\nMySQL Error Insert: ".$err."\n";
- die $err;
- }
- if ($count < 1) {
- print LOG "Unable to insert record into MySQL database for $url\n";
- die "Unable to insert record into MySQl database for $url";
- } else {
- print LOG "Count = ".$count."\n";
+ if (! $simulate) {
+ my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname,
+ \%Data);
+ if ($err) {
+ &log(0,"MySQL Error Insert: ".$err);
+ die $err;
+ }
+ if ($count < 1) {
+ &log(0,"Unable to insert record into MySQL database for $url");
+ die "Unable to insert record into MySQl database for $url";
+ }
}
#
# Reset $_ before leaving
@@ -366,122 +443,184 @@
### ###
########################################################
########################################################
-sub dynamicmeta {
- my $url = &declutter(shift());
- $url =~ s/\.meta$//;
- my %data = ('count' => 0,
- 'course' => 0,
- 'course_list' => '',
- 'avetries' => 'NULL',
- 'avetries_list' => '',
- 'stdno' => 0,
- 'stdno_list' => '',
- 'usage' => 0,
- 'usage_list' => '',
- 'goto' => 0,
- 'goto_list' => '',
- 'comefrom' => 0,
- 'comefrom_list' => '',
- 'difficulty' => 'NULL',
- 'difficulty_list' => '',
- 'sequsage' => '0',
- 'sequsage_list' => '',
- 'clear' => 'NULL',
- 'technical' => 'NULL',
- 'correct' => 'NULL',
- 'helpful' => 'NULL',
- 'depth' => 'NULL',
- 'comments' => '',
- );
- my ($dom,$auth)=($url=~/^(\w+)\/(\w+)\//);
- my $prodir=&propath($dom,$auth);
+##
+## Dynamic metadata description
+##
+## Field Type
+##-----------------------------------------------------------
+## count integer
+## course integer
+## course_list comma seperated list of course ids
+## avetries real
+## avetries_list comma seperated list of real numbers
+## stdno real
+## stdno_list comma seperated list of real numbers
+## usage integer
+## usage_list comma seperated list of resources
+## goto scalar
+## goto_list comma seperated list of resources
+## comefrom scalar
+## comefrom_list comma seperated list of resources
+## difficulty real
+## difficulty_list comma seperated list of real numbers
+## sequsage scalar
+## sequsage_list comma seperated list of resources
+## clear real
+## technical real
+## correct real
+## helpful real
+## depth real
+## comments html of all the comments made
+##
+{
+
+my %DynamicData;
+my %Counts;
+
+sub process_dynamic_metadata {
+ my ($user,$dom) = @_;
+ undef(%DynamicData);
+ undef(%Counts);
+ #
+ my $prodir = &propath($dom,$user);
#
- # Get metadata except counts
+ # Read in the dynamic metadata
my %evaldata;
if (! tie(%evaldata,'GDBM_File',
$prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
- return (undef);
+ return 0;
}
- my %sum=();
- my %count=();
- my %concat=();
- my %listitems=(
- 'course' => 'add',
- 'goto' => 'add',
- 'comefrom' => 'add',
- 'avetries' => 'average',
- 'stdno' => 'add',
- 'difficulty' => 'average',
- 'clear' => 'average',
- 'technical' => 'average',
- 'helpful' => 'average',
- 'correct' => 'average',
- 'depth' => 'average',
- 'comments' => 'append',
- 'usage' => 'count'
- );
- #
- my $regexp=$url;
- $regexp=~s/(\W)/\\$1/g;
- $regexp='___'.$regexp.'___([a-z]+)$';
- while (my ($esckey,$value)=each %evaldata) {
- my $key=&unescape($esckey);
- if ($key=~/$regexp/) {
- my ($item,$purl,$cat)=split(/___/,$key);
- $count{$cat}++;
- if ($listitems{$cat} ne 'append') {
- if (defined($sum{$cat})) {
- $sum{$cat}+=&unescape($value);
- $concat{$cat}.=','.$item;
- } else {
- $sum{$cat}=&unescape($value);
- $concat{$cat}=$item;
- }
- } else {
- if (defined($sum{$cat})) {
- if ($evaldata{$esckey}=~/\w/) {
- $sum{$cat}.='<hr />'.&unescape($evaldata{$esckey});
- }
- } else {
- $sum{$cat}=''.&unescape($evaldata{$esckey});
- }
+ #
+ # Process every stored element
+ while (my ($storedkey,$value) = each(%evaldata)) {
+ my ($source,$file,$type) = split('___',$storedkey);
+ $source = &unescape($source);
+ $file = &unescape($file);
+ $value = &unescape($value);
+ " got ".$file."\n ".$type." ".$source."\n";
+ if ($type =~ /^(avetries|count|difficulty|stdno|timestamp)$/) {
+ #
+ # Statistics: $source is course id
+ $DynamicData{$file}->{'statistics'}->{$source}->{$type}=$value;
+ } elsif ($type =~ /^(clear|comments|depth|technical|helpful)$/){
+ #
+ # Evaluation $source is username, check if they evaluated it
+ # more than once. If so, pad the entry with a space.
+ while(exists($DynamicData{$file}->{'evaluation'}->{$type}->{$source})) {
+ $source .= ' ';
}
+ $DynamicData{$file}->{'evaluation'}->{$type}->{$source}=$value;
+ } elsif ($type =~ /^(course|comefrom|goto|usage)$/) {
+ #
+ # Context $source is course id or resource
+ push(@{$DynamicData{$file}->{$type}},&unescape($source));
+ } else {
+ &log(0," ".$user."@".$dom.":Process metadata: Unable to decode ".$type);
}
}
untie(%evaldata);
- # transfer gathered data to returnhash, calculate averages where applicable
- my %returnhash;
- while (my $cat=each(%count)) {
- if ($count{$cat} eq 'nan') { next; }
- if ($sum{$cat} eq 'nan') { next; }
- if ($listitems{$cat} eq 'average') {
- if ($count{$cat}) {
- $returnhash{$cat}=int(($sum{$cat}/$count{$cat})*100.0+0.5)/100.0;
- } else {
- $returnhash{$cat}='NULL';
+ #
+ # Read in the access count data
+ &log(7,'Reading access count data') if ($debug);
+ my %countdata;
+ if (! tie(%countdata,'GDBM_File',
+ $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
+ return 0;
+ }
+ while (my ($key,$count) = each(%countdata)) {
+ next if ($key !~ /^$dom/);
+ $key = &unescape($key);
+ &log(8,' Count '.$key.' = '.$count) if ($debug);
+ $Counts{$key}=$count;
+ }
+ untie(%countdata);
+ if ($debug) {
+ &log(7,scalar(keys(%Counts)).
+ " Counts read for ".$user."@".$dom);
+ &log(7,scalar(keys(%DynamicData)).
+ " Dynamic metadata read for ".$user."@".$dom);
+ }
+ #
+ return 1;
+}
+
+sub get_dynamic_metadata {
+ my ($url) = @_;
+ $url =~ s:^/res/::;
+ if (! exists($DynamicData{$url})) {
+ &log(7,' No dynamic data for '.$url) if ($debug);
+ return ();
+ }
+ my %data;
+ my $resdata = $DynamicData{$url};
+ #
+ # Get the statistical data
+ foreach my $type (qw/avetries difficulty stdno/) {
+ my $count;
+ my $sum;
+ my @Values;
+ foreach my $coursedata (values(%{$resdata->{'statistics'}})) {
+ if (ref($coursedata) eq 'HASH' && exists($coursedata->{$type})) {
+ $count++;
+ $sum += $coursedata->{$type};
+ push(@Values,$coursedata->{$type});
}
- } elsif ($listitems{$cat} eq 'count') {
- $returnhash{$cat}=$count{$cat};
- } else {
- $returnhash{$cat}=$sum{$cat};
}
- $returnhash{$cat.'_list'}=$concat{$cat};
+ if ($count) {
+ $data{$type} = $sum/$count;
+ $data{$type.'_list'} = join(',',@Values);
+ }
+ }
+ # find the count
+ $data{'count'} = $Counts{$url};
+ #
+ # Get the context data
+ foreach my $type (qw/course goto comefrom/) {
+ if (defined($resdata->{$type}) &&
+ ref($resdata->{$type}) eq 'ARRAY') {
+ $data{$type} = scalar(@{$resdata->{$type}});
+ $data{$type.'_list'} = join(',',@{$resdata->{$type}});
+ }
+ }
+ if (defined($resdata->{'usage'}) &&
+ ref($resdata->{'usage'}) eq 'ARRAY') {
+ $data{'sequsage'} = scalar(@{$resdata->{'usage'}});
+ $data{'sequsage_list'} = join(',',@{$resdata->{'usage'}});
+ }
+ #
+ # Get the evaluation data
+ foreach my $type (qw/clear technical correct helpful depth/) {
+ my $count;
+ my $sum;
+ foreach my $evaluator (keys(%{$resdata->{'evaluation'}->{$type}})){
+ $sum += $resdata->{'evaluation'}->{$type}->{$evaluator};
+ $count++;
+ }
+ if ($count > 0) {
+ $data{$type}=$sum/$count;
+ }
}
#
- # get count
- if (tie(my %evaldata,'GDBM_File',
- $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
- my $escurl=&escape($url);
- if (! exists($evaldata{$escurl})) {
- $returnhash{'count'}=0;
- } else {
- $returnhash{'count'}=$evaldata{$escurl};
- }
- untie %evaldata;
+ # put together comments
+ my $comments = '<div class="LCevalcomments">';
+ foreach my $evaluator (keys(%{$resdata->{'evaluation'}->{'comments'}})){
+ $comments .= $evaluator.':'.
+ $resdata->{'evaluation'}->{'comments'}->{$evaluator}.'<hr />';
+ }
+ $comments .= '</div>';
+ #
+ # Log the dynamic metadata
+ if ($debug) {
+ while (my($k,$v)=each(%data)) {
+ &log(8," ".$k." => ".$v);
+ }
}
- return %returnhash;
+ #
+ return %data;
}
+} # End of %DynamicData and %Counts scope
+
########################################################
########################################################
### ###
@@ -593,8 +732,10 @@
$TimeData[5]+=1900;
$mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d',
@TimeData[5,4,3,2,1,0]);
+ } elsif (! defined($time) || $time == 0) {
+ $mysqltime = 0;
} else {
- print LOG " Unable to decode time ".$time."\n";
+ &log(0," sqltime:Unable to decode time ".$time);
$mysqltime = 0;
}
return $mysqltime;
--matthew1081548293--