[LON-CAPA-cvs] cvs: loncom /metadata_database searchcat.pl
www
lon-capa-cvs@mail.lon-capa.org
Mon, 18 Nov 2002 20:44:15 -0000
www Mon Nov 18 15:44:15 2002 EDT
Modified files:
/loncom/metadata_database searchcat.pl
Log:
This took a lot of memory since it first stored ALL filenames in an array.
Now does it in author chunks.
Also tries to condense resevaldata files. First run will take a long time.
Index: loncom/metadata_database/searchcat.pl
diff -u loncom/metadata_database/searchcat.pl:1.24 loncom/metadata_database/searchcat.pl:1.25
--- loncom/metadata_database/searchcat.pl:1.24 Fri Oct 18 09:54:31 2002
+++ loncom/metadata_database/searchcat.pl Mon Nov 18 15:44:15 2002
@@ -2,7 +2,7 @@
# The LearningOnline Network
# searchcat.pl "Search Catalog" batch script
#
-# $Id: searchcat.pl,v 1.24 2002/10/18 13:54:31 www Exp $
+# $Id: searchcat.pl,v 1.25 2002/11/18 20:44:15 www Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -70,22 +70,16 @@
# ------------------------------------------- Code to evaluate dynamic metadata
sub dynamicmeta {
-#
-#
-# Do nothing for now ...
-#
-#
- return;
-#
-# ..., but stuff below already works
-#
+
my $url=&declutter(shift);
$url=~s/\.meta$//;
my %returnhash=();
my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
my $prodir=&propath($adomain,$aauthor);
- if (tie(%evaldata,'GDBM_File',
- $prodir.'/nohist_resevaldata.db',&GDBM_WRCREAT(),0640)) {
+ if ((tie(%evaldata,'GDBM_File',
+ $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
+ (tie(%newevaldata,'GDBM_File',
+ $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
my %sum=();
my %cnt=();
my %listitems=('count' => 'add',
@@ -128,9 +122,9 @@
$sum{$ctype}=''.$evaldata{$_};
}
}
- if ($ctype eq 'count') {
- delete($evaldata{$_});
- }
+ if ($ctype ne 'count') {
+ $newevaldata{$_}=$evaldata{$_};
+ }
}
}
foreach (keys %cnt) {
@@ -144,9 +138,10 @@
}
if ($returnhash{'count'}) {
my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
- $evaldata{$newkey}=$returnhash{'count'};
+ $newevaldata{$newkey}=$returnhash{'count'};
}
untie(%evaldata);
+ untie(%newevaldata);
}
return %returnhash;
}
@@ -169,13 +164,17 @@
# ------------------------------------- Only run if machine is a library server
exit unless $perlvar{'lonRole'} eq 'library';
+# ---------------------------------------------------------- We are in business
+
+open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
+print LOG '==== Searchcat Run '.localtime()."====\n\n";
my $dbh;
# ------------------------------------- Make sure that database can be accessed
{
unless (
$dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
) {
- print "Cannot connect to database!\n";
+ print LOG "Cannot connect to database!\n";
exit;
}
my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (".
@@ -200,13 +199,20 @@
grep {!/^\.\.?$/} readdir(RESOURCES);
closedir RESOURCES;
foreach my $user (@homeusers) {
+ print LOG "\n=== User: ".$user."\n\n";
+# Remove left-over db-files from potentially crashed searchcat run
+ my $prodir=&propath($perlvar{'lonDefDomain'},$user);
+ unlink($prodir.'/nohist_new_resevaldata.db');
+# Use find.pl
+ undef @metalist;
+ @metalist=();
&find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
-}
# -- process each file to get metadata and put into search catalog SQL database
# Also, check to see if already there.
# I could just delete (without searching first), but this works for now.
foreach my $m (@metalist) {
+ print LOG "- ".$m."\n";
my $ref=&metadata($m);
my $m2='/res/'.&declutter($m);
$m2=~s/\.meta$//;
@@ -241,8 +247,17 @@
# Need to, perhaps, remove stale SQL database records.
# ... not yet implemented
+
+# -------------------------------------------------- Copy over the new db-files
+ system('mv '.$prodir.'/nohist_new_resevaldata.db '.
+ $prodir.'/nohist_resevaldata.db');
+}
# --------------------------------------------------- Close database connection
$dbh->disconnect;
+print LOG "\n==== Searchcat completed ".localtime()." ====\n";
+close(LOG);
+exit 0;
+# =============================================================================
# ---------------------------------------------------------------- Get metadata
# significantly altered from subroutine present in lonnet