[LON-CAPA-cvs] cvs: nsdl /nsdlloncapaorg harvester.pl

www lon-capa-cvs@mail.lon-capa.org
Tue, 29 Jul 2003 14:37:51 -0000


www		Tue Jul 29 10:37:51 2003 EDT

  Modified files:              
    /nsdl/nsdlloncapaorg	harvester.pl 
  Log:
  This actually does something.
  
  
Index: nsdl/nsdlloncapaorg/harvester.pl
diff -u nsdl/nsdlloncapaorg/harvester.pl:1.3 nsdl/nsdlloncapaorg/harvester.pl:1.4
--- nsdl/nsdlloncapaorg/harvester.pl:1.3	Tue Jul 29 10:13:36 2003
+++ nsdl/nsdlloncapaorg/harvester.pl	Tue Jul 29 10:37:51 2003
@@ -12,6 +12,9 @@
 use LWP::UserAgent;
 use Getopt::Std;
 use Digest::MD5 qw(md5_hex);
+use IO::File;
+
+my $basepath='/home/httpd/cgi-bin/OAI-XMLFile/XMLFile/nsdlexport/data';
 
 my $pub_month;
 my $pub_year;
@@ -60,7 +63,6 @@
 #}
 
 my %records = ();;
-print '<?xml version="1.0" encoding="UTF-8"?>'."\n\n";
 
 foreach my $metadata (@loncapa) {
 	chomp $metadata;
@@ -84,8 +86,10 @@
 	next if ( ($subject eq 'Sample') || ($subject eq 'Something') );
 	my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3];
         my $baseid=$tkline[3];
+	my ($adom,$auname)=($baseid=~/^\/res\/(\w+)\/(\w+)\//);
 	$baseid=~s/\W/\_/g;
 	$baseid=~s/^\_res\_//g;
+	my $fileid=md5_hex($baseid);
 
 	next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ );
 	my $keywords = $tkline[4];
@@ -150,7 +154,17 @@
 	# Private means open only to author of material
 	next if ( $copyright eq 'private');
 	my $platform = "5";     # HTML Browser (not specified but construed from metadata)
-	print (<<ENDMETA);
+#
+# Create path
+#
+	unless (-e $basepath.'/'.$adom) { mkdir($basepath.'/'.$adom); }
+	unless (-e $basepath.'/'.$adom.'/'.$auname) { 
+	    mkdir($basepath.'/'.$adom.'/'.$auname) || die 'Could not create '.$basepath.'/'.$adom.'/'.$auname;
+	}
+	open(XML,'>'.$basepath.'/'.$adom.'/'.$auname.'/'.$baseid.'.xml');
+	print XML (<<ENDMETA);
+<?xml version="1.0" encoding="UTF-8"?>
+
 <oaidc:dc xmlns="http://purl.org/dc/elements/1.1/" 
           xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/" 
           xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
@@ -166,6 +180,6 @@
     <description>$abstract</description>
     <date>$rev_year-$rev_month-$rev_day</date>
 </oaidc:dc>
-
 ENDMETA
+      close (XML);
 }