[LON-CAPA-cvs] cvs: modules /gerd/harvesting recomprocs.pl
www
www at source.lon-capa.org
Sun Jun 24 19:07:39 EDT 2012
www Sun Jun 24 23:07:39 2012 EDT
Modified files:
/modules/gerd/harvesting recomprocs.pl
Log:
Determining resources with a certain taxonomy
Index: modules/gerd/harvesting/recomprocs.pl
diff -u modules/gerd/harvesting/recomprocs.pl:1.1 modules/gerd/harvesting/recomprocs.pl:1.2
--- modules/gerd/harvesting/recomprocs.pl:1.1 Sun Jun 24 18:13:54 2012
+++ modules/gerd/harvesting/recomprocs.pl Sun Jun 24 23:07:39 2012
@@ -34,10 +34,134 @@
}
# =================================================================================
+#
+# Return the resource keys that are catalogued with a set of taxonomies
+#
+
+sub taxonomy_members {
+ my (@which)=@_;
+ my %taxcut;
+ my %lookup=();
+ my %level=();
+ my $minlevel=3;
+ my %output=();
+#
+# Find out the taxonomy indices
+# taxo_categories has taxonomy names as keys and taxonomy indices as values
+# also has taxonomy indices as keys and frequency as values
+# minlevel is going to be smallest taxonomy level in the set, %lookup contains what to look up
+# minlevel=1 is the most work
+ my @checkon=();
+ tie(%taxcut, 'GDBM_File', $datapath.'dbfiles/taxo_categories.db',&GDBM_READER(),0640);
+ foreach my $twhich (@which) {
+ my $tcat=$taxcut{$twhich};
+ if ($taxcut{$twhich}) {
+ $lookup{$twhich}=$tcat;
+ my @subcats=split(/\:/,$twhich);
+ $level{$twhich}=$#subcats+1;
+ if ($#subcats==0) {
+ $minlevel=1;
+ } elsif (($#subcats==1) && ($minlevel==3)) {
+ $minlevel=2;
+ }
+ }
+ }
+ untie(%taxcut);
+ my %taxo1;
+ my %taxo2;
+ my %taxo3;
+# Tie only the levels we need
+ tie(%taxo3, 'GDBM_File', $datapath.'dbfiles/taxo_level3.db',&GDBM_READER(),0640);
+ if ($minlevel<3) {
+ tie(%taxo2, 'GDBM_File', $datapath.'dbfiles/taxo_level2.db',&GDBM_READER(),0640);
+ }
+ if ($minlevel<2) {
+ tie(%taxo1, 'GDBM_File', $datapath.'dbfiles/taxo_level1.db',&GDBM_READER(),0640);
+ }
+# Business logic, finding stuff for each key that is supposed to be looked up
+ foreach my $key (keys(%lookup)) {
+ my $keystr='';
+ if ($level{$key}==3) {
+ $keystr=$taxo3{$lookup{$key}};
+ } elsif ($level{$key}==2) {
+ $keystr=$taxo2{$lookup{$key}};
+ } elsif ($level{$key}==1) {
+ $keystr=$taxo1{$lookup{$key}}
+ }
+# Put into unique output
+ foreach my $c (split(/\s*\,\s*/,$keystr)) {
+ if ($c) { $output{$c}++; }
+ }
+ }
+# Now resolve dependencies, etc
+ my $again=0;
+ if ($minlevel<3) {
+ foreach my $c (keys(%output)) {
+ if ($c=~/\:/) {
+# something to resolve
+ my ($tlevel,$tsub)=split(/\:/,$c);
+ delete($output{$c});
+ my $keystr='';
+ if ($tlevel==3) {
+ $keystr=$taxo3{$tsub};
+ } else {
+ $keystr=$taxo2{$tsub};
+ }
+ foreach my $key (split/\,/,$keystr) {
+ if ($key) { $output{$key}++; }
+ }
+ $again=1;
+ }
+ if ($c=~/\-/) {
+ my ($start,$end)=split(/\-/,$c);
+ delete($output{$c});
+ if ($end>$start) {
+ for (my $i=$start;$i<=$end;$i++) {
+ $output{$i}++;
+ }
+ }
+ }
+ }
+ }
+# If dependencies had been resolved and we were on the lowest level,
+# we might have to resolve second-level dependencies
+ if (($again) && ($minlevel<2)) {
+ foreach my $c (keys(%output)) {
+ if ($c=~/\:/) {
+# something to resolve
+ my ($tlevel,$tsub)=split(/\:/,$c);
+ delete($output{$c});
+# has to be level 3 now
+ if ($tlevel==2) { print "\n=== Wow!!! === $c\n" };
+ my $keystr=$taxo3{$tsub};
+ foreach my $key (split/\,/,$keystr) {
+ if ($key) { $output{$key}++; }
+ }
+ }
+ }
+ }
+# Untie the levels we opened
+ untie(%taxo3);
+ if ($minlevel<3) {
+ untie(%taxo2);
+ }
+ if ($minlevel<2) {
+ untie(%taxo1);
+ }
+ return %output;
+}
my $parm=shift;
print "Parameter: $parm\n";
-my %stuff=&associated(split(/\s*\,\s*/,$parm));
-foreach my $key (keys(%stuff)) {
- print $key." ".$stuff{$key}."\n";
+#my %stuff=&associated(split(/\s*\,\s*/,$parm));
+my %stuff=&taxonomy_members(split(/\s*\,\s*/,$parm));
+#foreach my $key (keys(%stuff)) {
+# print $key." ".$stuff{$key}."\n";
+#}
+my $sum=0;
+my @astuff=keys(%stuff);
+my $allkeys=$#astuff+1;
+foreach my $key (@astuff) {
+ $sum+=$stuff{$key}
}
+print "Sum: $sum Unique: $allkeys\n";
More information about the LON-CAPA-cvs
mailing list