[LON-CAPA-cvs] cvs: loncom /metadata_database searchcat.pl

harris41 lon-capa-cvs@mail.lon-capa.org
Mon, 03 Feb 2003 05:39:37 -0000


This is a MIME encoded message

--harris411044250777
Content-Type: text/plain

harris41		Mon Feb  3 00:39:37 2003 EDT

  Modified files:              
    /loncom/metadata_database	searchcat.pl 
  Log:
  beautified; BUG 1215 FIXED documentation that was horrible is now non-horrible
  and
  it now exists in every place that it should be; BUG 1216 FIXED stale records that
  are not accounted for on the system are deleted; BUG 1217 FIXED, the
  loncapa:metadata table can now change structure, and this script will
  verify and alter the table structure if necessary; BUG 1218 PARTIALLY FIXED
  (a small solid step toward The Great Metadata Overhaul); also using File::Find
  now instead of find.pl
  
  
--harris411044250777
Content-Type: text/plain
Content-Disposition: attachment; filename="harris41-20030203003937.txt"

Index: loncom/metadata_database/searchcat.pl
diff -u loncom/metadata_database/searchcat.pl:1.27 loncom/metadata_database/searchcat.pl:1.28
--- loncom/metadata_database/searchcat.pl:1.27	Sat Jan  4 14:23:31 2003
+++ loncom/metadata_database/searchcat.pl	Mon Feb  3 00:39:37 2003
@@ -2,29 +2,30 @@
 # The LearningOnline Network
 # searchcat.pl "Search Catalog" batch script
 #
-# $Id: searchcat.pl,v 1.27 2003/01/04 19:23:31 www Exp $
+# $Id: searchcat.pl,v 1.28 2003/02/03 05:39:37 harris41 Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
-# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
+# This file is part of the LearningOnline Network with a
+# Computer assisted personalized approach (loncapa).
 #
-# LON-CAPA is free software; you can redistribute it and/or modify
+# Loncapa is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
 #
-# LON-CAPA is distributed in the hope that it will be useful,
+# Loncapa is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with LON-CAPA; if not, write to the Free Software
+# along with loncapa; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #
 # /home/httpd/html/adm/gpl.txt
 #
-# http://www.lon-capa.org/
+# http://www.loncapa.org/
 #
 # YEAR=2001
 # 04/14/2001, 04/16/2001 Scott Harrison
@@ -32,11 +33,51 @@
 # YEAR=2002
 # 05/11/2002 Scott Harrison
 #
+# YEAR=2003
+# Scott Harrison
+#
 ###
 
-# This script goes through a LON-CAPA resource
-# directory and gathers metadata.
-# The metadata is entered into a SQL database.
+=pod
+
+=head1 NAME
+
+B<searchcat.pl> - put authoritative filesystem data into sql database.
+
+=head1 SYNOPSIS
+
+Ordinarily this script is to be called from a loncapa cron job
+(CVS source location: F<loncapa/loncom/cron/loncapa>; typical
+filesystem installation location: F</etc/cron.d/loncapa>).
+
+Here is the cron job entry.
+
+C<# Repopulate and refresh the metadata database used for the search catalog.>
+
+C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
+
+This script only allows itself to be run as the user C<www>.
+
+=head1 DESCRIPTION
+
+This script goes through a loncapa resource directory and gathers metadata.
+The metadata is entered into a SQL database.
+
+This script also does general database maintenance such as reformatting
+the C<loncapa:metadata> table if it is deprecated.
+
+This script also builds dynamic temporal metadata and stores this inside
+a F<nohist_resevaldata.db> database file.
+
+This script is playing an increasingly important role for a loncapa
+library server.  The proper operation of this script is critical for a smooth
+and correct user experience.
+
+=cut
+
+# ========================================================== Setting things up.
+
+# ------------------------------------------------------  Use external modules.
 
 use lib '/home/httpd/lib/perl/';
 use LONCAPA::Configuration;
@@ -47,346 +88,891 @@
 use GDBM_File;
 use POSIX qw(strftime mktime);
 
+# ----------------- Code to enable 'find' subroutine listing of the .meta files
+use File::Find;
+
+# List of .meta files (used on a per-user basis).
 my @metalist;
 
+# ---------------  Read loncapa_apache.conf and loncapa.conf and get variables.
+my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
+my %perlvar = %{$perlvarref};
+undef($perlvarref); # Remove since sensitive and not needed.
+delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
+
+# ------------------------------------- Only run if machine is a library server
+if ($perlvar{'lonRole'} ne 'library')
+  {
+    exit(0);
+  }
+
+# ------------------------------ Make sure this process is running as user=www.
+my $wwwid = getpwnam('www');
+if ($wwwid != $<)
+  {
+    $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
+    $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
+    system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
+	   "mailto $emailto -s '$subj' > /dev/null");
+    exit(1);
+  }
+
+# ------------------------------------------------------ Initialize log output.
+open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
+print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
+
+my $dbh; # Database object reference handle.
+
+# ----------------------------- Verify connection to loncapa:metadata database.
+unless (
+	$dbh = DBI->connect('DBI:mysql:loncapa','www',
+			    $perlvar{'lonSqlAccess'},
+			    { RaiseError => 0,PrintError => 0})
+	)
+  { 
+    print(LOG '**** ERROR **** Cannot connect to database!'."\n");
+    exit(0);
+  }
+
+# ------------------------------ Create loncapa:metadata table if non-existent.
+my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
+    'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
+    'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
+    'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
+    'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
+    'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
+    'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
+    'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
+    'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
+    'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
+    'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
+
+$dbh->do($make_metadata_table); # Generate the table.
+
+# ----------------------------- Verify format of the loncapa:metadata database.
+#                               (delete and recreate database if necessary).
+
+# Make a positive control for verifying table structure.
+my $make_metadata_table_CONTROL = $make_metadata_table;
+$make_metadata_table_CONTROL =~
+    s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
+
+$dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
+$dbh->do($make_metadata_table_CONTROL);
+
+my $table_description; # selectall reference to the table description.
+
+my $CONTROL_table_string; # What the table description should look like.
+my $table_string; # What the table description does look like.
+
+# Calculate the CONTROL table description (what it should be).
+$table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
+foreach my $table_row (@{$table_description})
+  {
+    $CONTROL_table_string .= join(',',@{$table_row})."\n";
+  }
+
+# Calculate the current table description (what it currently looks like).
+$table_description = $dbh->selectall_arrayref('describe metadata');
+foreach my $table_row (@{$table_description})
+  {
+    $table_string .= join(',',@{$table_row})."\n";
+  }
+
+if ($table_string ne $CONTROL_table_string)
+  {
+    # Log this incident.
+    print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
+	  '.'."\n");
+    # Delete the table.
+    $dbh->do('DROP TABLE IF EXISTS metadata');
+    # Generate the table.
+    $dbh->do($make_metadata_table);
+  }
 
-# ----------------------------------------------------- Un-Escape Special Chars
+$dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay.  Done with control.
 
-sub unescape {
-    my $str=shift;
+# ----------------------------------------------- Set utilitysemaphore to zero.
+$dbh->do('UPDATE metadata SET utilitysemaphore = 0');
+
+# ========================================================= Main functionality.
+
+# - Determine home authors on this server based on resources dir and user tree.
+
+# RESOURCES: the resources directory (subdirs correspond to author usernames).
+opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
+    (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
+     and exit(0));
+
+# query_home_server_status will look for user home directories on this machine.
+my @homeusers =
+    grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
+				    $perlvar{'lonDefDomain'}.'/'.$_)
+	  } grep {!/^\.\.?$/} readdir(RESOURCES);
+closedir(RESOURCES);
+
+unless (@homeusers)
+  {
+    print(LOG '=== No home users found on this server.'."\n");
+  }
+
+# Consider each author individually.
+foreach my $user (@homeusers)
+  {
+    # Make a log entry.
+    print(LOG "\n".'=== User: '.$user."\n\n");
+
+    # Get filesystem path to this user's directory.
+    my $user_directory =
+	&construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
+
+    # Remove left-over db-files from a potentially crashed searchcat run.
+    unlink($user_directory.'/nohist_new_resevaldata.db');
+
+    # Cleanup the metalist array.
+    undef(@metalist);
+    @metalist = ();
+
+    # This will add entries to the @metalist array.
+    &File::Find::find(\&wanted,
+		      $perlvar{'lonDocRoot'}.'/res/'.
+		      $perlvar{'lonDefDomain'}.'/'.$user);
+
+    # -- process file to get metadata and put into search catalog SQL database
+    # Also, build and store dynamic metadata.
+    # Also, delete record entries before refreshing.
+    foreach my $m (@metalist)
+      {
+	# Log this action.
+	print(LOG "- ".$m."\n");
+
+	# Get metadata from the file.
+	my $ref = get_metadata_from_file($m);
+
+	# Make a datarecord identifier for this resource.
+	my $m2 = '/res/'.declutter($m);
+	$m2 =~ s/\.meta$//;
+
+	# Build and store dynamic metadata inside nohist_resevaldata.db.
+	build_on_the_fly_dynamic_metadata($m2);
+
+	# Delete record if it already exists.
+	my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
+	my $sth = $dbh->prepare($q2);
+	$sth->execute();
+	my $r1 = $sth->fetchall_arrayref;
+	if (@$r1)
+	  {
+	    $sth = 
+		$dbh->prepare('delete from metadata where url like binary '.
+			      "'".$m2."'");
+	    $sth->execute();
+	  }
+
+	# Add new/replacement record into the loncapa:metadata table.
+	$sth = $dbh->prepare('insert into metadata values ('.
+			     '"'.delete($ref->{'title'}).'"'.','.
+			     '"'.delete($ref->{'author'}).'"'.','.
+			     '"'.delete($ref->{'subject'}).'"'.','.
+			     '"'.$m2.'"'.','.
+			     '"'.delete($ref->{'keywords'}).'"'.','.
+			     '"'.'current'.'"'.','.
+			     '"'.delete($ref->{'notes'}).'"'.','.
+			     '"'.delete($ref->{'abstract'}).'"'.','.
+			     '"'.delete($ref->{'mime'}).'"'.','.
+			     '"'.delete($ref->{'language'}).'"'.','.
+			     '"'.sql_formatted_time(
+				       delete($ref->{'creationdate'})).'"'.','.
+			     '"'.sql_formatted_time(
+				   delete($ref->{'lastrevisiondate'})).'"'.','.
+			     '"'.delete($ref->{'owner'}).'"'.','.
+			     '"'.delete($ref->{'copyright'}).'"'.','.
+			     '1'.')');
+	$sth->execute();
+      }
+
+# ----------------------- Clean up database, remove stale SQL database records.
+    $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
+
+# -------------------------------------------------- Copy over the new db-files
+    system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
+	         $user_directory.'/nohist_resevaldata.db');
+  }
+
+# --------------------------------------------------- Close database connection
+$dbh->disconnect;
+print LOG "\n==== Searchcat completed ".localtime()." ====\n";
+close(LOG);
+exit(0);
+
+# ================================================================ Subroutines.
+
+=pod
+
+=head1 SUBROUTINES
+
+=cut
+
+=pod
+
+B<unescape> - translate to unstrange escaped syntax to strange characters.
+
+=over 4
+
+Parameters:
+
+=item I<$str> - string with unweird characters.
+
+=back
+
+=over 4
+
+Returns:
+
+=item C<string> - string with potentially weird characters.
+
+=back
+
+=cut
+
+sub unescape ($)
+  {
+    my $str = shift(@_);
     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
-    return $str;
-}
+    return($str);
+  }
+
+=pod
+
+B<escape> - translate strange characters to unstrange escaped syntax.
+
+=over 4
+
+Parameters:
 
-# -------------------------------------------------------- Escape Special Chars
+=item I<$str> - string with potentially weird characters to unweird-ify.
 
-sub escape {
-    my $str=shift;
+=back
+
+=over 4
+
+Returns:
+
+=item C<string> - unweird-ified string.
+
+=back
+
+=cut
+
+sub escape ($)
+  {
+    my $str = shift(@_);
     $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
-    return $str;
-}
+    return($str);
+  }
+
+=pod
+
+B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
+
+Dynamic metadata is stored in a nohist_resevaldata GDBM database.
+Most of the calculations in this subroutine are totally pointless
+and not useful for anything that this subroutine does.
+(THIS IS A FRUSTRATED SUBROUTINE THAT IS NON-OPTIMAL, *&*&!.)
+The only thing that this subroutine really makes happen is adjusting
+a 'count' value inside the F<nohist_new_resevaldata.db> as well
+as updating F<nohist_new_resevaldata.db> with information from
+F<nohist_resevaldata.db>.
+
+=over 4
+
+Parameters:
+
+=item I<$url> - the filesystem path (url may be a misnomer...)
+
+=back
+
+=over 4
+
+Returns:
 
+=item C<hash> - key-value table of dynamically evaluated metadata.
 
-# ------------------------------------------- Code to evaluate dynamic metadata
+=back
 
-sub dynamicmeta {
+=cut
 
-    my $url=&declutter(shift);
-    $url=~s/\.meta$//;
-    my %returnhash=();
-    my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
-    my $prodir=&propath($adomain,$aauthor);
+sub build_on_the_fly_dynamic_metadata ($)
+  {
+    # BEWARE ALL WHO TRY TO UNDERSTAND THIS ABSURDLY HORRIBLE SUBROUTINE.
+      
+    # Do all sorts of mumbo-jumbo to compute the user's directory.
+    my $url = &declutter(shift(@_));
+    $url =~ s/\.meta$//;
+    my %returnhash = ();
+    my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
+    my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
+
+    # Attempt a GDBM database instantiation inside users directory and proceed.
     if ((tie(%evaldata,'GDBM_File',
-            $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
+            $user_directory.
+	     '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
         (tie(%newevaldata,'GDBM_File',
-            $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
-       my %sum=();
-       my %cnt=();
-       my %listitems=('count'        => 'add',
-                      'course'       => 'add',
-                      'avetries'     => 'avg',
-                      'stdno'        => 'add',
-                      'difficulty'   => 'avg',
-                      'clear'        => 'avg',
-                      'technical'    => 'avg',
-                      'helpful'      => 'avg',
-                      'correct'      => 'avg',
-                      'depth'        => 'avg',
-                      'comments'     => 'app',
-                      'usage'        => 'cnt'
-                      );
-       my $regexp=$url;
-       $regexp=~s/(\W)/\\$1/g;
-       $regexp='___'.$regexp.'___([a-z]+)$';
-       foreach (keys %evaldata) {
-	 my $key=&unescape($_);
-	 if ($key=~/$regexp/) {
-	    my $ctype=$1;
-            if (defined($cnt{$ctype})) { 
-               $cnt{$ctype}++; 
-            } else { 
-               $cnt{$ctype}=1; 
-            }
-            unless ($listitems{$ctype} eq 'app') {
-               if (defined($sum{$ctype})) {
-                  $sum{$ctype}+=$evaldata{$_};
-   	       } else {
-                  $sum{$ctype}=$evaldata{$_};
-	       }
-            } else {
-               if (defined($sum{$ctype})) {
-                  if ($evaldata{$_}) {
-                     $sum{$ctype}.='<hr>'.$evaldata{$_};
-	          }
- 	       } else {
-	             $sum{$ctype}=''.$evaldata{$_};
-	       }
-	    }
-	    if ($ctype ne 'count') {
-	       $newevaldata{$_}=$evaldata{$_};
-	   }
-	 }
+	     $user_directory.
+	     '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
+      {
+	# For different variables, track the running sum and counts.
+	my %sum = ();
+	my %cnt = ();
+
+	# Define computed items as a sum (add) or an average (avg) or a raw
+	# count (cnt) or 'app'?
+	my %listitems=('count'        => 'add',
+		       'course'       => 'add',
+		       'avetries'     => 'avg',
+		       'stdno'        => 'add',
+		       'difficulty'   => 'avg',
+		       'clear'        => 'avg',
+		       'technical'    => 'avg',
+		       'helpful'      => 'avg',
+		       'correct'      => 'avg',
+		       'depth'        => 'avg',
+		       'comments'     => 'app',
+		       'usage'        => 'cnt'
+		       );
+	
+	# Untaint the url and use as part of a regular expression.
+	my $regexp = $url;
+	$regexp =~ s/(\W)/\\$1/g;
+	$regexp = '___'.$regexp.'___([a-z]+)$';
+
+	# Check existing nohist database for this url.
+	# THE ONLY TIME THIS IS IMPORTANT FOR THIS AWFUL SUBROUTINE
+	# IS FOR 'count' ENTRIES
+	# AND FOR REFRESHING non-'count' ENTRIES INSIDE nohist_new DATABASE.
+	foreach (keys %evaldata)
+	  {
+	    my $key = &unescape($_);
+	    if ($key =~ /$regexp/) # If url-based entry exists.
+	      {
+		my $ctype = $1; # Set to specific category type.
+
+		# Do an increment for this category type.
+		if (defined($cnt{$ctype}))
+		  {
+		    $cnt{$ctype}++; 
+		  }
+		else
+		  {
+		    $cnt{$ctype} = 1; 
+		  }
+                unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
+		  {
+		    # Increment the sum based on the evaluated data in the db.
+		    if (defined($sum{$ctype}))
+		      {
+			$sum{$ctype} += $evaldata{$_};
+		      }
+		    else
+		      {
+			$sum{$ctype} = $evaldata{$_};
+		      }
+ 		  }
+		else # 'app' mode, means to use '<hr />' as a separator
+		  {
+		    if (defined($sum{$ctype}))
+		      {
+			if ($evaldata{$_})
+			  {
+			    $sum{$ctype} .= '<hr />'.$evaldata{$_};
+			  }
+		      }
+		    else
+		      {
+			$sum{$ctype} = ''.$evaldata{$_};
+		      }
+		  }
+		if ($ctype ne 'count')
+		  {
+		    # ALERT!  THIS HORRIBLE LOOP IS ACTUALLY DOING SOMETHING
+		    # USEFUL!
+		    $newevaldata{$_} = $evaldata{$_};
+		  }
+	      }
+	  }
+
+	# THE ONLY OTHER TIME THIS LOOP IS USEFUL IS FOR THE 'count' HASH
+	# ELEMENT.
+	foreach (keys %cnt)
+	  {
+	    if ($listitems{$_} eq 'avg')
+	      {
+		$returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
+	      }
+	    elsif ($listitems{$_} eq 'cnt')
+	      {
+		$returnhash{$_} = $cnt{$_};
+	      }
+	    else
+	      {
+		$returnhash{$_} = $sum{$_};
+	      }
+	  }
+
+	# A RARE MOMENT OF DOING ANYTHING USEFUL INSIDE THIS
+	# BLEEPING SUBROUTINE.
+	if ($returnhash{'count'})
+	  {
+	    my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
+	    $newevaldata{$newkey} = $returnhash{'count'};
+	  }
+
+	untie(%evaldata); # Close/release the original nohist database.
+	untie(%newevaldata); # Close/release the new nohist database.
       }
-      foreach (keys %cnt) {
-         if ($listitems{$_} eq 'avg') {
-	     $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
-         } elsif ($listitems{$_} eq 'cnt') {
-             $returnhash{$_}=$cnt{$_};
-         } else {
-             $returnhash{$_}=$sum{$_};
-         }
-     }
-     if ($returnhash{'count'}) {
-         my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
-         $newevaldata{$newkey}=$returnhash{'count'};
-     }
-     untie(%evaldata);
-     untie(%newevaldata);
-   }
-   return %returnhash;
-}
-  
-# ----------------- Code to enable 'find' subroutine listing of the .meta files
-require "find.pl";
-sub wanted {
+    return(%returnhash);
+    # Celebrate!  We have now accomplished some simple calculations using
+    # 1000% bloated functionality in our subroutine.  Go wash your eyeballs
+    # out now.
+  }
+
+=pod
+
+B<wanted> - used by B<File::Find::find> subroutine.
+
+This evaluates whether a file is wanted, and pushes it onto the
+I<@metalist> array.  This subroutine was, for the most part, auto-generated
+by the B<find2perl> command.
+
+=over 4
+
+Parameters:
+
+=item I<$file> - a path to the file.
+
+=back
+
+=over 4
+
+Returns:
+
+=item C<boolean> - true or false based on logical statement.
+
+=back
+
+=cut
+
+sub wanted ($)
+  {
     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
-    -f _ &&
+    -f $_ &&
     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
-    push(@metalist,"$dir/$_");
-}
+    push(@metalist,$File::Find::dir.'/'.$_);
+  }
 
-# ---------------  Read loncapa_apache.conf and loncapa.conf and get variables
-my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
-my %perlvar=%{$perlvarref};
-undef $perlvarref; # remove since sensitive and not needed
-delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
+=pod
 
-# ------------------------------------- Only run if machine is a library server
-exit unless $perlvar{'lonRole'} eq 'library';
+B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
 
-# ----------------------------- Make sure this process is running from user=www
+I<Note that this is significantly altered from a subroutine present in lonnet.>
 
-my $wwwid=getpwnam('www');
-if ($wwwid!=$<) {
-   $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
-   $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
-   system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
- mailto $emailto -s '$subj' > /dev/null");
-   exit 1;
-}
+=over 4
 
+Parameters:
 
-# ---------------------------------------------------------- We are in business
+=item I<$file> - a path.to the file.
 
-open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
-print LOG '==== Searchcat Run '.localtime()."====\n\n";
-my $dbh;
-# ------------------------------------- Make sure that database can be accessed
-{
-    unless (
-	    $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
-	    ) { 
-	print LOG "Cannot connect to database!\n";
-	exit;
-    }
-    my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (".
-        "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ".
-        "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ".
-        "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ".
-        "copyright TEXT, FULLTEXT idx_title (title), ".
-        "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ".
-        "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ".
-        "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ".
-        "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ".
-        "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ".
-        "FULLTEXT idx_copyright (copyright)) TYPE=MYISAM";
-    # It would sure be nice to have some logging mechanism.
-    $dbh->do($make_metadata_table);
-}
+=back
 
-# ------------------------------------------------------------- get .meta files
-opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");
-my @homeusers=grep
-          {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")}
-          grep {!/^\.\.?$/} readdir(RESOURCES);
-closedir RESOURCES;
-foreach my $user (@homeusers) {
-    print LOG "\n=== User: ".$user."\n\n";
-# Remove left-over db-files from potentially crashed searchcat run
-    my $prodir=&propath($perlvar{'lonDefDomain'},$user);
-    unlink($prodir.'/nohist_new_resevaldata.db');
-# Use find.pl
-    undef @metalist;
-    @metalist=();
-    &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
-
-# -- process each file to get metadata and put into search catalog SQL database
-# Also, check to see if already there.
-# I could just delete (without searching first), but this works for now.
-foreach my $m (@metalist) {
-    print LOG "- ".$m."\n";
-    my $ref=&metadata($m);
-    my $m2='/res/'.&declutter($m);
-    $m2=~s/\.meta$//;
-    &dynamicmeta($m2);
-    my $q2="select * from metadata where url like binary '$m2'";
-    my $sth = $dbh->prepare($q2);
-    $sth->execute();
-    my $r1=$sth->fetchall_arrayref;
-    if (@$r1) {
-	$sth=$dbh->prepare("delete from metadata where url like binary '$m2'");
-        $sth->execute();
-    }
-    $sth=$dbh->prepare('insert into metadata values ('.
-			  '"'.delete($ref->{'title'}).'"'.','.
-			  '"'.delete($ref->{'author'}).'"'.','.
-			  '"'.delete($ref->{'subject'}).'"'.','.
-			  '"'.$m2.'"'.','.
-			  '"'.delete($ref->{'keywords'}).'"'.','.
-			  '"'.'current'.'"'.','.
-			  '"'.delete($ref->{'notes'}).'"'.','.
-			  '"'.delete($ref->{'abstract'}).'"'.','.
-			  '"'.delete($ref->{'mime'}).'"'.','.
-			  '"'.delete($ref->{'language'}).'"'.','.
-			  '"'.sqltime(delete($ref->{'creationdate'})).'"'.','.
-			  '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','.
-			  '"'.delete($ref->{'owner'}).'"'.','.
-			  '"'.delete($ref->{'copyright'}).'"'.')');
-    $sth->execute();
-}
-
-# ----------------------------------------------------------- Clean up database
-# Need to, perhaps, remove stale SQL database records.
-# ... not yet implemented
+=over 4
 
+Returns:
 
-# -------------------------------------------------- Copy over the new db-files
-    system('mv '.$prodir.'/nohist_new_resevaldata.db '.
-	         $prodir.'/nohist_resevaldata.db');
-}
-# --------------------------------------------------- Close database connection
-$dbh->disconnect;
-print LOG "\n==== Searchcat completed ".localtime()." ====\n";
-close(LOG);
-exit 0;
-# =============================================================================
+=item C<hash reference> - a hash array (keys and values).
+
+=back
 
-# ---------------------------------------------------------------- Get metadata
-# significantly altered from subroutine present in lonnet
-sub metadata {
-    my ($uri,$what)=@_;
-    my %metacache;
-    $uri=&declutter($uri);
-    my $filename=$uri;
-    $uri=~s/\.meta$//;
-    $uri='';
-    unless ($metacache{$uri.'keys'}) {
-        unless ($filename=~/\.meta$/) { $filename.='.meta'; }
-	my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);
-        my $parser=HTML::TokeParser->new(\$metastring);
-        my $token;
-        while ($token=$parser->get_token) {
-           if ($token->[0] eq 'S') {
-	      my $entry=$token->[1];
-              my $unikey=$entry;
-              if (defined($token->[2]->{'part'})) { 
-                 $unikey.='_'.$token->[2]->{'part'}; 
+=cut
+
+sub get_metadata_from_file ($)
+  {
+    my ($filename) = @_;
+    my %metatable; # Used to store return value of hash-tabled metadata.
+    $filename = &declutter($filename); # Remove non-identifying filesystem info
+    my $uri = ''; # The URI is not relevant in this scenario.
+    unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
+      {
+	$filename .= '.meta'; # Append a .meta suffix.
+      }
+    # Get the file contents.
+    my $metadata_string =
+	&get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
+
+    # Parse the file based on its XML tags.
+    my $parser = HTML::TokeParser->new(\$metadata_string);
+    my $token;
+    while ($token = $parser->get_token) # Loop through tokens.
+      {
+	if ($token->[0] eq 'S') # If it is a start token.
+	  {
+	    my $entry = $token->[1];
+	    my $unikey = $entry; # A unique identifier for this xml tag key.
+	    if (defined($token->[2]->{'part'}))
+	      { 
+		$unikey .= '_'.$token->[2]->{'part'}; 
 	      }
-              if (defined($token->[2]->{'name'})) { 
-                 $unikey.='_'.$token->[2]->{'name'}; 
+	    if (defined($token->[2]->{'name'}))
+	      { 
+		$unikey .= '_'.$token->[2]->{'name'}; 
 	      }
-              if ($metacache{$uri.'keys'}) {
-                 $metacache{$uri.'keys'}.=','.$unikey;
-              } else {
-                 $metacache{$uri.'keys'}=$unikey;
+	    # Append $unikey to metatable's keys entry.
+	    if ($metatable{$uri.'keys'})
+	      {
+		$metatable{$uri.'keys'} .= ','.$unikey;
 	      }
-              map {
-		  $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
-              } @{$token->[3]};
-              unless (
-                 $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)
-		      ) { $metacache{$uri.''.$unikey}=
-			      $metacache{$uri.''.$unikey.'.default'};
-		      }
-          }
-       }
-    }
-    return \%metacache;
-}
-
-# ------------------------------------------------------------ Serves up a file
-# returns either the contents of the file or a -1
-sub getfile {
-  my $file=shift;
-  if (! -e $file ) { return -1; };
-  my $fh=IO::File->new($file);
-  my $a='';
-  while (<$fh>) { $a .=$_; }
-  return $a
-}
-
-# ------------------------------------------------------------- Declutters URLs
-sub declutter {
-    my $thisfn=shift;
-    $thisfn=~s/^$perlvar{'lonDocRoot'}//;
-    $thisfn=~s/^\///;
-    $thisfn=~s/^res\///;
-    return $thisfn;
-}
-
-# --------------------------------------- Is this the home server of an author?
-# (copied from lond, modification of the return value)
-sub ishome {
-    my $author=shift;
-    $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
-    my ($udom,$uname)=split(/\//,$author);
-    my $proname=propath($udom,$uname);
-    if (-e $proname) {
-	return 1;
-    } else {
-        return 0;
-    }
-}
-
-# -------------------------------------------- Return path to profile directory
-# (copied from lond)
-sub propath {
-    my ($udom,$uname)=@_;
-    $udom=~s/\W//g;
-    $uname=~s/\W//g;
-    my $subdir=$uname.'__';
-    $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
-    my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
-    return $proname;
-} 
+	    else
+	      {
+		$metatable{$uri.'keys'} = $unikey;
+	      }
+	    # Insert contents into metatable entry for the unikey.
+	    foreach my $t3 (@{$token->[3]})
+	      {
+		$metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
+	      }
+	    # If there was no text contained inside the tags, set = default.
+	    unless
+	      (
+	        $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
+	      )
+	      {
+		$metatable{$uri.''.$unikey} =
+		    $metatable{$uri.''.$unikey.'.default'};
+	      }
+	  }
+      }
+    # Return with a key-value table of XML tags and their tag contents.
+    return(\%metatable);
+  }
 
-# ---------------------------- convert 'time' format into a datetime sql format
-sub sqltime {
+=pod
+
+B<get_file_contents> - returns either the contents of the file or a -1.
+
+=over 4
+
+Parameters:
+
+=item I<$file> - a complete filesystem path.to the file.
+
+=back
+
+=over 4
+
+Returns:
+
+=item C<string> - file contents or a -1.
+
+=back
+
+=cut
+
+sub get_file_contents ($)
+  {
+    my $file = shift(@_);
+
+    # If file does not exist, then return a -1 value.
+    unless (-e $file)
+      {
+	return(-1);
+      }
+
+    # Read in file contents.
+    my $file_handle = IO::File->new($file);
+    my $file_contents = '';
+    while (<$file_handle>)
+      {
+	$file_contents .= $_;
+      }
+
+    # Return file contents.
+    return($file_contents);
+  }
+
+=pod
+
+B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
+
+=over 4
+
+Parameters:
+
+=item I<$filesystem_path> - a complete filesystem path.
+
+=back
+
+=over 4
+
+Returns:
+
+=item C<string> - remnants of the filesystem path (beginning portion removed).
+
+=back
+
+=cut
+
+sub declutter
+  {
+    my $filesystem_path = shift(@_);
+
+    # Remove beginning portions of the filesystem path.
+    $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
+    $filesystem_path =~ s!^/!!;
+    $filesystem_path =~ s!^res/!!;
+
+    # Return what is remaining for the filesystem path.
+    return($filesystem_path);
+  }
+
+=pod
+
+B<query_home_server_status> - Is this the home server of an author's directory?
+
+=over 4
+
+Parameters:
+
+=item I<$author_filesystem_path> - directory path for a user.
+
+=back
+
+=over 4
+
+Returns:
+
+=item C<boolean> - 1 if true; 0 if false.
+
+=back
+
+=cut
+
+sub query_home_server_status ($)
+  {
+    my $author_filesystem_path = shift(@_);
+
+    # Remove beginning portion of this filesystem path.
+    $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
+
+    # Construct path to the author's ordinary user directory.
+    my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
+    my $user_directory_path = construct_path_to_user_directory($user_domain,
+							       $username);
+
+    # Return status of whether the user directory path is defined.
+    if (-e $user_directory_path)
+      {
+	return(1); # True.
+      }
+    else
+      {
+        return(0); # False.
+      }
+  }
+
+=pod
+
+B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
+
+=over 4
+
+Parameters:
+
+=item I<$user_domain> - the loncapa domain of the user.
+
+=item I<$username> - the unique username (user id) of the user.
+
+=back
+
+=over 4
+
+Returns:
+
+=item C<string> - representing the path on the filesystem.
+
+=back
+
+=cut
+
+sub construct_path_to_user_directory ($$)
+  {
+    my ($user_domain,$username) = @_;
+
+    # Untaint.
+    $user_domain =~ s/\W//g;
+    $username =~ s/\W//g;
+
+    # Create three levels of sub-directoried filesystem path
+    # based on the first three characters of the username.
+    my $sub_filesystem_path = $username.'__';
+    $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
+
+    # Use the sub-directoried levels and other variables to generate
+    # the complete filesystem path.
+    my $complete_filesystem_path =
+	join('/',($perlvar{'lonUsersDir'},
+		  $user_domain,
+		  $sub_filesystem_path,
+		  $username));
+
+    # Return the complete filesystem path.
+    return($complete_filesystem_path);
+  }
+
+=pod
+
+B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
+
+=over 4
+
+Parameters:
+
+=item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
+
+=back
+
+=over 4
+
+Returns:
+
+=item C<string> - datetime sql formatted string.
+
+=back
+
+=cut
+
+sub sql_formatted_time ($)
+  {
+    # Sanitize the time argument and convert to localtime array.
     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
-	localtime(&unsqltime(@_[0]));
-    $mon++; $year+=1900;
-    return "$year-$mon-$mday $hour:$min:$sec";
-}
-
-sub maketime {
-    my %th=@_;
-    return POSIX::mktime(
-        ($th{'seconds'},$th{'minutes'},$th{'hours'},
-         $th{'day'},$th{'month'}-1,$th{'year'}-1900,0,0,$th{'dlsav'}));
-}
-
-
-#########################################
-#
-# Retro-fixing of un-backward-compatible time format
-
-sub unsqltime {
-    my $timestamp=shift;
-    if ($timestamp=~/^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) {
-       $timestamp=&maketime(
-	   'year'=>$1,'month'=>$2,'day'=>$3,
-           'hours'=>$4,'minutes'=>$5,'seconds'=>$6);
-    }
-    return $timestamp;
-}
+	localtime(&sanitize_time(shift(@_)));
+
+    # Convert month from (0..11) to (1..12).
+    $mon += 1;
+
+    # Make the year compatible with A.D. specification.
+    $year += 1900;
+
+    # Return a date which is compatible with MySQL's "DATETIME" format.
+    return(join('-',($year,$mon,$mday)).
+	   ' '.
+	   join(':',($hour,$min,$sec))
+	   );
+  }
+
+
+# ==================================== The following two subroutines are needed
+#                 for accommodating incorrect time formats inside the metadata.
+
+=pod
+
+B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
+
+=over 4
+
+Parameters:
+
+=item I<%time_metadata> - a key-value listing characterizing month, year, etc.
+
+=back
+
+=over 4
+
+Returns:
+
+=item C<integer> - seconds since epoch.
+
+=back
+
+=cut
+
+sub make_seconds_since_epoch (@)
+  {
+    # Keytable of time metadata.
+    my %time_metadata = @_;
+
+    # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
+    return(POSIX::mktime(
+			 ($time_metadata{'seconds'},
+			  $time_metadata{'minutes'},
+			  $time_metadata{'hours'},
+			  $time_metadata{'day'},
+			  $time_metadata{'month'}-1,
+			  $time_metadata{'year'}-1900,
+			  0,
+			  0,
+			  $time_metadata{'dlsav'})
+			 )
+	   );
+  }
+
+=pod
+
+B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
+
+Somebody described this subroutine as
+"retro-fixing of un-backward-compatible time format".
+
+What this means, is that a part of this code expects to get UTC seconds
+since the epoch (beginning of 1970).  Yet, some of the .meta files have
+sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
+integers (e.g. 1044147435).  These time strings do not encode the timezone
+and, in this sense, can be considered "un-backwards-compatible".
+
+=over 4
+
+Parameters:
+
+=item I<$potentially_badformat_string> - string to "retro-fix".
+
+=back
+
+=over 4
+
+Returns:
+
+=item C<integer> - seconds since epoch.
+
+=back
+
+=cut
+
+sub sanitize_time ($)
+  {
+    my $timestamp = shift(@_);
+    # If timestamp is in this unexpected format....
+    if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
+      {
+	# then convert into seconds since epoch (the expected format).
+	$timestamp = &make_seconds_since_epoch(
+					       'year' => $1,
+					       'month' => $2,
+					       'day' => $3,
+					       'hours' => $4,
+					       'minutes' => $5,
+					       'seconds' => $6
+					       );
+      }
+    # Otherwise we assume timestamp to be as expected.
+    return($timestamp);
+  }
+
+=pod
+
+=head1 AUTHOR
+
+Written to help the loncapa project.
+
+Scott Harrison, sharrison@users.sourceforge.net
+
+This is distributed under the same terms as loncapa (i.e. "freeware").
 
+=cut

--harris411044250777--