[LON-CAPA-cvs] cvs: nsdl /nsdlloncapaorg harvester.pl
www
lon-capa-cvs@mail.lon-capa.org
Wed, 10 May 2006 16:28:59 -0000
www Wed May 10 12:28:59 2006 EDT
Modified files:
/nsdl/nsdlloncapaorg harvester.pl
Log:
More NSDLy.
Index: nsdl/nsdlloncapaorg/harvester.pl
diff -u nsdl/nsdlloncapaorg/harvester.pl:1.8 nsdl/nsdlloncapaorg/harvester.pl:1.9
--- nsdl/nsdlloncapaorg/harvester.pl:1.8 Fri Nov 25 14:29:56 2005
+++ nsdl/nsdlloncapaorg/harvester.pl Wed May 10 12:28:56 2006
@@ -61,7 +61,8 @@
'meitner.physics.hope.edu',
'loncapa.vcu.edu',
'lon-capa.ucsc.edu',
-'lon-capa.bsu.edu'
+'lon-capa.bsu.edu',
+'harvard.lon-capa.org'
);
foreach (@servers) {
@@ -141,9 +142,16 @@
my $version = $tkline[5];
my $notes = $tkline[6];
my $abstract = $tkline[7];
- unless ($abstract) { $abstract=$subject; }
- unless ($abstract) { $abstract=$title; }
- unless ($abstract) { $abstract=$keywords; }
+ $abstract=~s/ s / /gs;
+ $abstract=~s/\s+/ /gs;
+ my $postsubject=$subject;
+ unless ($postsubject) {
+ $postsubject=$keywords;
+ } else {
+ $postsubject.=' ('.$keywords.')';
+ }
+ unless ($postsubject=~/\w/) { $knockout{'nosubject_'.$rawtype}++; next; }
+ unless ($abstract) { $knockout{'noabstract_'.$rawtype}++; next; }
my $type = $rawtype;
if ($type=~/htm/) { $type='htm'; }
@@ -204,9 +212,7 @@
# Domain means restricted to a particular LON-CAPA domain
# Defaults mean access open to any registered LON-CAPA user
# Private means open only to author of material
- if ( $copyright eq 'private') { $knockout{'private_'.$rawtype}++; next; }
- if ( $copyright eq 'domain') { $knockout{'domain_'.$rawtype}++; next; }
- if ( $copyright eq 'custom') { $knockout{'custom_'.$rawtype}++; next; }
+ unless ($copyright eq 'public') { $knockout{'notpublic_'.$rawtype}++; next; }
my $platform = "5"; # HTML Browser (not specified but construed from metadata)
#
# We actually do this
@@ -231,10 +237,9 @@
http://www.openarchives.org/OAI/2.0/oai_dc.xsd"
>
<title>$title</title>
- <creator>$author_fname $author_lname</creator>
+ <creator>$author</creator>
<identifier>$resourceurl</identifier>
- <subject>$keywords</subject>
- <subject>$subject</subject>
+ <subject>$postsubject</subject>
<language>$primary_language</language>
<description>$abstract</description>
<date>$rev_year-$rev_month-$rev_day</date>