[LON-CAPA-cvs] cvs: nsdl /nsdlloncapaorg harvester.pl

www lon-capa-cvs@mail.lon-capa.org
Wed, 10 May 2006 16:28:59 -0000


www		Wed May 10 12:28:59 2006 EDT

  Modified files:              
    /nsdl/nsdlloncapaorg	harvester.pl 
  Log:
  More NSDLy.
  
  
Index: nsdl/nsdlloncapaorg/harvester.pl
diff -u nsdl/nsdlloncapaorg/harvester.pl:1.8 nsdl/nsdlloncapaorg/harvester.pl:1.9
--- nsdl/nsdlloncapaorg/harvester.pl:1.8	Fri Nov 25 14:29:56 2005
+++ nsdl/nsdlloncapaorg/harvester.pl	Wed May 10 12:28:56 2006
@@ -61,7 +61,8 @@
 'meitner.physics.hope.edu',
 'loncapa.vcu.edu',
 'lon-capa.ucsc.edu',
-'lon-capa.bsu.edu'
+'lon-capa.bsu.edu',
+'harvard.lon-capa.org'
 );
 
 foreach (@servers) {
@@ -141,9 +142,16 @@
 	my $version = $tkline[5];
 	my $notes = $tkline[6];
 	my $abstract = $tkline[7];
-	unless ($abstract) { $abstract=$subject; }
-	unless ($abstract) { $abstract=$title; }
-	unless ($abstract) { $abstract=$keywords; }
+        $abstract=~s/ s / /gs;
+        $abstract=~s/\s+/ /gs;
+        my $postsubject=$subject;
+        unless ($postsubject) {
+           $postsubject=$keywords;
+        } else {
+           $postsubject.=' ('.$keywords.')';
+        }
+        unless ($postsubject=~/\w/) { $knockout{'nosubject_'.$rawtype}++; next; }
+        unless ($abstract) { $knockout{'noabstract_'.$rawtype}++; next; }
 	my $type = $rawtype;
         if ($type=~/htm/) { $type='htm'; }
 
@@ -204,9 +212,7 @@
 	# Domain means restricted to a particular LON-CAPA domain
 	# Defaults mean access open to any registered LON-CAPA user
 	# Private means open only to author of material
-	if ( $copyright eq 'private') { $knockout{'private_'.$rawtype}++; next; } 
-	if ( $copyright eq 'domain') { $knockout{'domain_'.$rawtype}++; next; }
-        if ( $copyright eq 'custom') { $knockout{'custom_'.$rawtype}++; next; }
+        unless ($copyright eq 'public') { $knockout{'notpublic_'.$rawtype}++; next; }
 	my $platform = "5";     # HTML Browser (not specified but construed from metadata)
 #
 # We actually do this
@@ -231,10 +237,9 @@
                               http://www.openarchives.org/OAI/2.0/oai_dc.xsd"
 >
     <title>$title</title>
-    <creator>$author_fname $author_lname</creator>
+    <creator>$author</creator>
     <identifier>$resourceurl</identifier>
-    <subject>$keywords</subject>
-    <subject>$subject</subject>
+    <subject>$postsubject</subject>
     <language>$primary_language</language>
     <description>$abstract</description>
     <date>$rev_year-$rev_month-$rev_day</date>