[LON-CAPA-cvs] cvs: modules /gerd/harvesting recomprocs.pl

www www at source.lon-capa.org
Sun Jun 24 19:07:39 EDT 2012


www		Sun Jun 24 23:07:39 2012 EDT

  Modified files:              
    /modules/gerd/harvesting	recomprocs.pl 
  Log:
  Determining resources with a certain taxonomy
  
  
Index: modules/gerd/harvesting/recomprocs.pl
diff -u modules/gerd/harvesting/recomprocs.pl:1.1 modules/gerd/harvesting/recomprocs.pl:1.2
--- modules/gerd/harvesting/recomprocs.pl:1.1	Sun Jun 24 18:13:54 2012
+++ modules/gerd/harvesting/recomprocs.pl	Sun Jun 24 23:07:39 2012
@@ -34,10 +34,134 @@
 }
 
 # =================================================================================
+#
+#  Return the resource keys that are catalogued with a set of taxonomies
+#
+
+sub taxonomy_members {
+   my (@which)=@_;
+   my %taxcut;
+   my %lookup=();
+   my %level=();
+   my $minlevel=3;
+   my %output=();
+# 
+# Find out the taxonomy indices
+# taxo_categories has taxonomy names as keys and taxonomy indices as values
+# also has taxonomy indices as keys and frequency as values
+# minlevel is going to be smallest taxonomy level in the set, %lookup contains what to look up
+# minlevel=1 is the most work
+   my @checkon=();
+   tie(%taxcut, 'GDBM_File', $datapath.'dbfiles/taxo_categories.db',&GDBM_READER(),0640);
+   foreach my $twhich (@which) {
+      my $tcat=$taxcut{$twhich};
+      if ($taxcut{$twhich}) {
+         $lookup{$twhich}=$tcat;
+         my @subcats=split(/\:/,$twhich);
+         $level{$twhich}=$#subcats+1;
+         if ($#subcats==0) { 
+            $minlevel=1;
+         } elsif (($#subcats==1) && ($minlevel==3)) {
+            $minlevel=2;
+         }
+      }
+   }
+   untie(%taxcut);
+   my %taxo1;
+   my %taxo2;
+   my %taxo3;
+# Tie only the levels we need
+   tie(%taxo3, 'GDBM_File', $datapath.'dbfiles/taxo_level3.db',&GDBM_READER(),0640);
+   if ($minlevel<3) {
+      tie(%taxo2, 'GDBM_File', $datapath.'dbfiles/taxo_level2.db',&GDBM_READER(),0640);
+   }
+   if ($minlevel<2) {
+      tie(%taxo1, 'GDBM_File', $datapath.'dbfiles/taxo_level1.db',&GDBM_READER(),0640);
+   }
+# Business logic, finding stuff for each key that is supposed to be looked up
+   foreach my $key (keys(%lookup)) {
+      my $keystr='';
+      if ($level{$key}==3) {
+         $keystr=$taxo3{$lookup{$key}};
+      } elsif ($level{$key}==2) {
+         $keystr=$taxo2{$lookup{$key}};
+      } elsif ($level{$key}==1) {
+         $keystr=$taxo1{$lookup{$key}}
+      }
+# Put into unique output
+      foreach my $c (split(/\s*\,\s*/,$keystr)) {
+         if ($c) { $output{$c}++; }
+      }
+   }
+# Now resolve dependencies, etc
+   my $again=0;
+   if ($minlevel<3) {
+      foreach my $c (keys(%output)) {
+         if ($c=~/\:/) {
+# something to resolve
+            my ($tlevel,$tsub)=split(/\:/,$c);
+            delete($output{$c});
+            my $keystr='';
+            if ($tlevel==3) {
+               $keystr=$taxo3{$tsub};
+            } else {
+               $keystr=$taxo2{$tsub};
+            }
+            foreach my $key (split/\,/,$keystr) {
+               if ($key) { $output{$key}++; }
+            }
+            $again=1;
+         }
+         if ($c=~/\-/) {
+            my ($start,$end)=split(/\-/,$c);
+            delete($output{$c});
+            if ($end>$start) {
+               for (my $i=$start;$i<=$end;$i++) {
+                  $output{$i}++;
+               }
+            }
+         }
+      }
+   }
+# If dependencies had been resolved and we were on the lowest level,
+# we might have to resolve second-level dependencies
+   if (($again) && ($minlevel<2)) {
+      foreach my $c (keys(%output)) {
+         if ($c=~/\:/) {
+# something to resolve
+            my ($tlevel,$tsub)=split(/\:/,$c);
+            delete($output{$c});
+# has to be level 3 now
+            if ($tlevel==2) { print "\n=== Wow!!! === $c\n" };
+            my $keystr=$taxo3{$tsub};
+            foreach my $key (split/\,/,$keystr) {
+               if ($key) { $output{$key}++; }
+            }
+         }
+      }
+   }
+# Untie the levels we opened
+   untie(%taxo3);
+   if ($minlevel<3) {
+      untie(%taxo2);
+   }   
+   if ($minlevel<2) {
+      untie(%taxo1);
+   }
+   return %output;
+}
 
 my $parm=shift;
 print "Parameter: $parm\n";
-my %stuff=&associated(split(/\s*\,\s*/,$parm));
-foreach my $key (keys(%stuff)) {
-    print $key." ".$stuff{$key}."\n";
+#my %stuff=&associated(split(/\s*\,\s*/,$parm));
+my %stuff=&taxonomy_members(split(/\s*\,\s*/,$parm));
+#foreach my $key (keys(%stuff)) {
+#    print $key." ".$stuff{$key}."\n";
+#}
+my $sum=0;
+my @astuff=keys(%stuff);
+my $allkeys=$#astuff+1;
+foreach my $key (@astuff) {
+   $sum+=$stuff{$key}
 }
+print "Sum: $sum Unique: $allkeys\n";




More information about the LON-CAPA-cvs mailing list