[LON-CAPA-cvs] cvs: nsdl /nsdlloncapaorg harvester.pl
www
lon-capa-cvs@mail.lon-capa.org
Tue, 29 Jul 2003 14:37:51 -0000
www Tue Jul 29 10:37:51 2003 EDT
Modified files:
/nsdl/nsdlloncapaorg harvester.pl
Log:
This actually does something.
Index: nsdl/nsdlloncapaorg/harvester.pl
diff -u nsdl/nsdlloncapaorg/harvester.pl:1.3 nsdl/nsdlloncapaorg/harvester.pl:1.4
--- nsdl/nsdlloncapaorg/harvester.pl:1.3 Tue Jul 29 10:13:36 2003
+++ nsdl/nsdlloncapaorg/harvester.pl Tue Jul 29 10:37:51 2003
@@ -12,6 +12,9 @@
use LWP::UserAgent;
use Getopt::Std;
use Digest::MD5 qw(md5_hex);
+use IO::File;
+
+my $basepath='/home/httpd/cgi-bin/OAI-XMLFile/XMLFile/nsdlexport/data';
my $pub_month;
my $pub_year;
@@ -60,7 +63,6 @@
#}
my %records = ();;
-print '<?xml version="1.0" encoding="UTF-8"?>'."\n\n";
foreach my $metadata (@loncapa) {
chomp $metadata;
@@ -84,8 +86,10 @@
next if ( ($subject eq 'Sample') || ($subject eq 'Something') );
my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3];
my $baseid=$tkline[3];
+ my ($adom,$auname)=($baseid=~/^\/res\/(\w+)\/(\w+)\//);
$baseid=~s/\W/\_/g;
$baseid=~s/^\_res\_//g;
+ my $fileid=md5_hex($baseid);
next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ );
my $keywords = $tkline[4];
@@ -150,7 +154,17 @@
# Private means open only to author of material
next if ( $copyright eq 'private');
my $platform = "5"; # HTML Browser (not specified but construed from metadata)
- print (<<ENDMETA);
+#
+# Create path
+#
+ unless (-e $basepath.'/'.$adom) { mkdir($basepath.'/'.$adom); }
+ unless (-e $basepath.'/'.$adom.'/'.$auname) {
+ mkdir($basepath.'/'.$adom.'/'.$auname) || die 'Could not create '.$basepath.'/'.$adom.'/'.$auname;
+ }
+ open(XML,'>'.$basepath.'/'.$adom.'/'.$auname.'/'.$baseid.'.xml');
+ print XML (<<ENDMETA);
+<?xml version="1.0" encoding="UTF-8"?>
+
<oaidc:dc xmlns="http://purl.org/dc/elements/1.1/"
xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
@@ -166,6 +180,6 @@
<description>$abstract</description>
<date>$rev_year-$rev_month-$rev_day</date>
</oaidc:dc>
-
ENDMETA
+ close (XML);
}