[LON-CAPA-cvs] cvs: loncom /metadata_database searchcat.pl
www
lon-capa-cvs@mail.lon-capa.org
Mon, 03 Feb 2003 17:01:55 -0000
www Mon Feb 3 12:01:55 2003 EDT
Modified files:
/loncom/metadata_database searchcat.pl
Log:
Trying to make sense of the diffs with previous versions, i.e., 1.27 (sniff)
Index: loncom/metadata_database/searchcat.pl
diff -u loncom/metadata_database/searchcat.pl:1.29 loncom/metadata_database/searchcat.pl:1.30
--- loncom/metadata_database/searchcat.pl:1.29 Mon Feb 3 08:42:16 2003
+++ loncom/metadata_database/searchcat.pl Mon Feb 3 12:01:55 2003
@@ -2,7 +2,7 @@
# The LearningOnline Network
# searchcat.pl "Search Catalog" batch script
#
-# $Id: searchcat.pl,v 1.29 2003/02/03 13:42:16 albertel Exp $
+# $Id: searchcat.pl,v 1.30 2003/02/03 17:01:55 www Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -374,13 +374,22 @@
B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
-Dynamic metadata is stored in a nohist_resevaldata GDBM database.
-The only thing that this subroutine really makes happen is adjusting
-a 'count' value inside the F<nohist_new_resevaldata.db> as well
-as updating F<nohist_new_resevaldata.db> with information from
-F<nohist_resevaldata.db>.
+Returns the dynamic metadata for an author, which will later be added to the
+MySQL database (not yet implemented).
+
+The vast majority of entries in F<nohist_resevaldata.db>, which contains
+the dynamic metadata for an author's resources, are "count", which make
+the file really large and evaluation really slow.
+
+While computing the current value of all dynamic metadata
+for later insertion into the MySQL metadata cache (not yet implemented),
+this routine also simply adds up all "count" type fields and replaces them by
+one new field with the to-date count.
+
+Only after successful completion of working with one author, copy new file to
+original file. Copy to tmp-"new"-db-file was necessary since db-file size
+would not shrink after "delete" of key.
-It may need optmization, but since it gets called once a week. . .
=over 4
Parameters:
@@ -399,16 +408,14 @@
=cut
-sub build_on_the_fly_dynamic_metadata ($)
- {
- # some elements in here maybe non-obvious
+sub build_on_the_fly_dynamic_metadata {
# Need to compute the user's directory.
- my $url = &declutter(shift(@_));
- $url =~ s/\.meta$//;
- my %returnhash = ();
- my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
- my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
+ my $url=&declutter(shift);
+ $url=~s/\.meta$//;
+ my %returnhash=();
+ my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
+ my $user_directory=&construct_path_to_user_directory($adomain,$aauthor);
# Attempt a GDBM database instantiation inside users directory and proceed.
if ((tie(%evaldata,'GDBM_File',
@@ -416,14 +423,13 @@
'/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
(tie(%newevaldata,'GDBM_File',
$user_directory.
- '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
- {
+ '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
# For different variables, track the running sum and counts.
- my %sum = ();
- my %cnt = ();
+ my %sum=();
+ my %cnt=();
# Define computed items as a sum (add) or an average (avg) or a raw
- # count (cnt) or 'app'?
+ # count (cnt) or append (app)?
my %listitems=('count' => 'add',
'course' => 'add',
'avetries' => 'avg',
@@ -439,93 +445,70 @@
);
# Untaint the url and use as part of a regular expression.
- my $regexp = $url;
- $regexp =~ s/(\W)/\\$1/g;
- $regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs
-
- # Check existing nohist database for this url.
- # this is modfying the 'count' entries
- # and copying all othe entries over
- foreach (keys %evaldata)
- {
- my $key = &unescape($_);
- if ($key =~ /$regexp/) # If url-based entry exists.
- {
- my $ctype = $1; # Set to specific category type.
+ my $regexp=$url;
+ $regexp=~s/(\W)/\\$1/g;
+ $regexp='___'.$regexp.'___([a-z]+)$'; #' emacs
+
+ # Check existing database for this author.
+ # this is modifying the 'count' entries
+ # and copying all other entries over
+
+ foreach (keys %evaldata) {
+ my $key=&unescape($_);
+ if ($key=~/$regexp/) { # If url-based entry exists.
+ my $ctype=$1; # Set to specific category type.
# Do an increment for this category type.
- if (defined($cnt{$ctype}))
- {
+ if (defined($cnt{$ctype})) {
$cnt{$ctype}++;
- }
- else
- {
- $cnt{$ctype} = 1;
- }
- unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
- {
+ } else {
+ $cnt{$ctype}=1;
+ }
+ unless ($listitems{$ctype} eq 'app') { # append comments
# Increment the sum based on the evaluated data in the db.
- if (defined($sum{$ctype}))
- {
- $sum{$ctype} += $evaldata{$_};
- }
- else
- {
- $sum{$ctype} = $evaldata{$_};
- }
- }
- else # 'app' mode, means to use '<hr />' as a separator
- {
- if (defined($sum{$ctype}))
- {
- if ($evaldata{$_})
- {
- $sum{$ctype} .= '<hr />'.$evaldata{$_};
- }
- }
- else
- {
- $sum{$ctype} = ''.$evaldata{$_};
- }
- }
- if ($ctype ne 'count')
- {
+ if (defined($sum{$ctype})) {
+ $sum{$ctype}+=$evaldata{$_};
+ } else {
+ $sum{$ctype}=$evaldata{$_};
+ }
+ } else { # 'app' mode, means to use '<hr />' as a separator
+ if (defined($sum{$ctype})) {
+ if ($evaldata{$_}) {
+ $sum{$ctype}.='<hr />'.$evaldata{$_};
+ }
+ } else {
+ $sum{$ctype}=''.$evaldata{$_};
+ }
+ }
+ if ($ctype ne 'count') {
# this is copying all data except 'count' attributes
- $newevaldata{$_} = $evaldata{$_};
- }
- }
- }
-
- # the only other time this loop is useful is for the 'count' hash
- # element
- foreach (keys %cnt)
- {
- if ($listitems{$_} eq 'avg')
- {
- $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
- }
- elsif ($listitems{$_} eq 'cnt')
- {
- $returnhash{$_} = $cnt{$_};
- }
- else
- {
- $returnhash{$_} = $sum{$_};
- }
- }
-
- # seems to be doing something useful
- if ($returnhash{'count'})
- {
- my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
- $newevaldata{$newkey} = $returnhash{'count'};
- }
+ $newevaldata{$_}=$evaldata{$_};
+ }
+ }
+ }
+
+ # these values will be returned (currently still unused)
+ foreach (keys %cnt) {
+ if ($listitems{$_} eq 'avg') {
+ $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
+ } elsif ($listitems{$_} eq 'cnt') {
+ $returnhash{$_}=$cnt{$_};
+ } else {
+ $returnhash{$_}=$sum{$_};
+ }
+ }
+
+ # generate new count key in resevaldata, insert sum
+ if ($returnhash{'count'}) {
+ my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
+ $newevaldata{$newkey}=$returnhash{'count'};
+ }
untie(%evaldata); # Close/release the original nohist database.
untie(%newevaldata); # Close/release the new nohist database.
- }
- return(%returnhash);
- }
+ }
+ return %returnhash;
+}
=pod