[LON-CAPA-cvs] cvs: modules /gerd/harvesting seq_keywords.pl

www www at source.lon-capa.org
Thu Sep 8 18:25:56 EDT 2011


www		Thu Sep  8 22:25:56 2011 EDT

  Added files:                 
    /modules/gerd/harvesting	seq_keywords.pl 
  Log:
  Collect all keywords associated with an assignment
  
  

Index: modules/gerd/harvesting/seq_keywords.pl
+++ modules/gerd/harvesting/seq_keywords.pl
use strict;

my @keywords=();

my $keystr='';
my ($id,$key)=(1,'');
open(IN,'keywords.dat');
while (my $line=<IN>) {
   my $oid=$id;
   chomp($line);
   ($id,$key)=split(/\t/,$line);
   if ($id!=$oid) {
      $keystr=~s/^\,//;
      $keywords[$oid]=$keystr;
      $keystr='';
   }
   $keystr.=','.lc($key);
}
close(IN);

my @subjects=();
#my %allgensubs=();
open(IN,'res_topics_from_courses.dat');
while (my $line=<IN>) {
   chomp($line);
   my ($rid,$sub)=split(/\t/,$line);
   $subjects[$rid]=$sub;
#   $allgensubs{$sub}=1;
}
close(IN);


#foreach my $as (sort(keys(%allgensubs))) {
#   print $as."\n";
#}

#exit;

# my $i=0;

open(IN,'probs_in_seq.dat');
while (my $line=<IN>) {
   my ($seq,$probs)=split(/\t/,$line);
   print $seq."\t";

   my $allkeys='';
   my %allsubj='';
   foreach my $pid (split(/\,/,$probs)) {
      $allkeys.=','.$keywords[$pid];
      $allsubj{$subjects[$pid]}++;
   }
   my $fsubs='';
   foreach my $sskey (sort(keys(%allsubj))) {
      if ($sskey) { $fsubs.=','.$sskey.':'.$allsubj{$sskey}; }
   }
   $fsubs=~s/^\,//;
   print $fsubs."\t";

   my %skeys=();
   my $max=0;
   foreach my $tkey (split(/\,/,$allkeys)) {
      if ($tkey) {
         $skeys{$tkey}++;
         if ($skeys{$tkey}>$max) { $max=$skeys{$tkey}; }
      }
   }
   my $conskeys='';
   foreach my $tkey (sort(keys(%skeys))) {
      if ($skeys{$tkey}>$max/3.) { $conskeys.=','.$tkey.':'.$skeys{$tkey}; }
   }
   $conskeys=~s/^\,//;
   print $conskeys."\n";
#   $i++;
#   if ($i>30) { exit; }
}
close(IN);




More information about the LON-CAPA-cvs mailing list