[LON-CAPA-cvs] cvs: loncom /metadata_database searchcat.pl

Mon, 03 Feb 2003 17:01:55 -0000

www		Mon Feb  3 12:01:55 2003 EDT

  Modified files:              
    /loncom/metadata_database	searchcat.pl 
  Log:
  Trying to make sense of the diffs with previous versions, i.e., 1.27 (sniff)
  
  
Index: loncom/metadata_database/searchcat.pl
diff -u loncom/metadata_database/searchcat.pl:1.29 loncom/metadata_database/searchcat.pl:1.30

--- loncom/metadata_database/searchcat.pl:1.29	Mon Feb  3 08:42:16 2003
+++ loncom/metadata_database/searchcat.pl	Mon Feb  3 12:01:55 2003
@@ -2,7 +2,7 @@
 # The LearningOnline Network
 # searchcat.pl "Search Catalog" batch script
 #
-# $Id: searchcat.pl,v 1.29 2003/02/03 13:42:16 albertel Exp $
+# $Id: searchcat.pl,v 1.30 2003/02/03 17:01:55 www Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -374,13 +374,22 @@
 
 B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
 
-Dynamic metadata is stored in a nohist_resevaldata GDBM database.
-The only thing that this subroutine really makes happen is adjusting
-a 'count' value inside the F<nohist_new_resevaldata.db> as well
-as updating F<nohist_new_resevaldata.db> with information from
-F<nohist_resevaldata.db>.
+Returns the dynamic metadata for an author, which will later be added to the
+MySQL database (not yet implemented).
+
+The vast majority of entries in F<nohist_resevaldata.db>, which contains
+the dynamic metadata for an author's resources, are "count", which make
+the file really large and evaluation really slow.
+
+While computing the current value of all dynamic metadata
+for later insertion into the MySQL metadata cache (not yet implemented),
+this routine also simply adds up all "count" type fields and replaces them by
+one new field with the to-date count.
+
+Only after successful completion of working with one author, copy new file to
+original file. Copy to tmp-"new"-db-file was necessary since db-file size 
+would not shrink after "delete" of key.
 
-It may need optmization, but since it gets called once a week. . .
 =over 4
 
 Parameters:
@@ -399,16 +408,14 @@
 
 =cut
 
-sub build_on_the_fly_dynamic_metadata ($)
-  {
-    # some elements in here maybe non-obvious
+sub build_on_the_fly_dynamic_metadata {
 
     # Need to compute the user's directory.
-    my $url = &declutter(shift(@_));
-    $url =~ s/\.meta$//;
-    my %returnhash = ();
-    my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
-    my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
+    my $url=&declutter(shift);
+    $url=~s/\.meta$//;
+    my %returnhash=();
+    my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
+    my $user_directory=&construct_path_to_user_directory($adomain,$aauthor);
 
     # Attempt a GDBM database instantiation inside users directory and proceed.
     if ((tie(%evaldata,'GDBM_File',
@@ -416,14 +423,13 @@
 	     '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
         (tie(%newevaldata,'GDBM_File',
 	     $user_directory.
-	     '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
-      {
+	     '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
 	# For different variables, track the running sum and counts.
-	my %sum = ();
-	my %cnt = ();
+	my %sum=();
+	my %cnt=();
 
 	# Define computed items as a sum (add) or an average (avg) or a raw
-	# count (cnt) or 'app'?
+	# count (cnt) or append (app)?
 	my %listitems=('count'        => 'add',
 		       'course'       => 'add',
 		       'avetries'     => 'avg',
@@ -439,93 +445,70 @@
 		       );
 	
 	# Untaint the url and use as part of a regular expression.
-	my $regexp = $url;
-	$regexp =~ s/(\W)/\\$1/g;
-	$regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs
-
-	# Check existing nohist database for this url.
-        # this is modfying the 'count' entries
-        # and copying all othe entries over
-	foreach (keys %evaldata)
-	  {
-	    my $key = &unescape($_);
-	    if ($key =~ /$regexp/) # If url-based entry exists.
-	      {
-		my $ctype = $1; # Set to specific category type.
+	my $regexp=$url;
+	$regexp=~s/(\W)/\\$1/g;
+	$regexp='___'.$regexp.'___([a-z]+)$'; #' emacs
+
+	# Check existing database for this author.
+        # this is modifying the 'count' entries
+        # and copying all other entries over
+
+	foreach (keys %evaldata) {
+	    my $key=&unescape($_);
+	    if ($key=~/$regexp/) { # If url-based entry exists.
+		my $ctype=$1; # Set to specific category type.
 
 		# Do an increment for this category type.
-		if (defined($cnt{$ctype}))
-		  {
+		if (defined($cnt{$ctype})) {
 		    $cnt{$ctype}++; 
-		  }
-		else
-		  {
-		    $cnt{$ctype} = 1; 
-		  }
-                unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
-		  {
+		} else {
+		    $cnt{$ctype}=1; 
+		}
+                unless ($listitems{$ctype} eq 'app') { # append comments
 		    # Increment the sum based on the evaluated data in the db.
-		    if (defined($sum{$ctype}))
-		      {
-			$sum{$ctype} += $evaldata{$_};
-		      }
-		    else
-		      {
-			$sum{$ctype} = $evaldata{$_};
-		      }
- 		  }
-		else # 'app' mode, means to use '<hr />' as a separator
-		  {
-		    if (defined($sum{$ctype}))
-		      {
-			if ($evaldata{$_})
-			  {
-			    $sum{$ctype} .= '<hr />'.$evaldata{$_};
-			  }
-		      }
-		    else
-		      {
-			$sum{$ctype} = ''.$evaldata{$_};
-		      }
-		  }
-		if ($ctype ne 'count')
-		  {
+		    if (defined($sum{$ctype})) {
+			$sum{$ctype}+=$evaldata{$_};
+		    } else {
+			$sum{$ctype}=$evaldata{$_};
+		    }
+ 		} else { # 'app' mode, means to use '<hr />' as a separator
+		    if (defined($sum{$ctype})) {
+			if ($evaldata{$_}) {
+			    $sum{$ctype}.='<hr />'.$evaldata{$_};
+			}
+		    } else {
+			$sum{$ctype}=''.$evaldata{$_};
+		    }
+	        }
+		if ($ctype ne 'count') {
                     # this is copying all data except 'count' attributes
-		    $newevaldata{$_} = $evaldata{$_};
-		  }
-	      }
-	  }
-
-        # the only other time this loop is useful is for the 'count' hash
-        # element
-	foreach (keys %cnt)
-	  {
-	    if ($listitems{$_} eq 'avg')
-	      {
-		$returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
-	      }
-	    elsif ($listitems{$_} eq 'cnt')
-	      {
-		$returnhash{$_} = $cnt{$_};
-	      }
-	    else
-	      {
-		$returnhash{$_} = $sum{$_};
-	      }
-	  }
-
-        # seems to be doing something useful
-	if ($returnhash{'count'})
-	  {
-	    my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
-	    $newevaldata{$newkey} = $returnhash{'count'};
-	  }
+		    $newevaldata{$_}=$evaldata{$_};
+	        }
+	    }
+	}
+
+        # these values will be returned (currently still unused)
+	foreach (keys %cnt) {
+	    if ($listitems{$_} eq 'avg') {
+		$returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
+	    } elsif ($listitems{$_} eq 'cnt') {
+		$returnhash{$_}=$cnt{$_};
+	    } else {
+		$returnhash{$_}=$sum{$_};
+	    }
+	}
+
+        # generate new count key in resevaldata, insert sum
+	if ($returnhash{'count'}) {
+	    my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
+	    $newevaldata{$newkey}=$returnhash{'count'};
+	}
 
 	untie(%evaldata); # Close/release the original nohist database.
 	untie(%newevaldata); # Close/release the new nohist database.
-      }
-    return(%returnhash);
-  }
+    }
+    return %returnhash;
+}
 
 =pod