[LON-CAPA-cvs] cvs: modules /gerd/harvesting seq_keywords.pl
www
www at source.lon-capa.org
Thu Sep 8 18:25:56 EDT 2011
www Thu Sep 8 22:25:56 2011 EDT
Added files:
/modules/gerd/harvesting seq_keywords.pl
Log:
Collect all keywords associated with an assignment
Index: modules/gerd/harvesting/seq_keywords.pl
+++ modules/gerd/harvesting/seq_keywords.pl
use strict;
my @keywords=();
my $keystr='';
my ($id,$key)=(1,'');
open(IN,'keywords.dat');
while (my $line=<IN>) {
my $oid=$id;
chomp($line);
($id,$key)=split(/\t/,$line);
if ($id!=$oid) {
$keystr=~s/^\,//;
$keywords[$oid]=$keystr;
$keystr='';
}
$keystr.=','.lc($key);
}
close(IN);
my @subjects=();
#my %allgensubs=();
open(IN,'res_topics_from_courses.dat');
while (my $line=<IN>) {
chomp($line);
my ($rid,$sub)=split(/\t/,$line);
$subjects[$rid]=$sub;
# $allgensubs{$sub}=1;
}
close(IN);
#foreach my $as (sort(keys(%allgensubs))) {
# print $as."\n";
#}
#exit;
# my $i=0;
open(IN,'probs_in_seq.dat');
while (my $line=<IN>) {
my ($seq,$probs)=split(/\t/,$line);
print $seq."\t";
my $allkeys='';
my %allsubj='';
foreach my $pid (split(/\,/,$probs)) {
$allkeys.=','.$keywords[$pid];
$allsubj{$subjects[$pid]}++;
}
my $fsubs='';
foreach my $sskey (sort(keys(%allsubj))) {
if ($sskey) { $fsubs.=','.$sskey.':'.$allsubj{$sskey}; }
}
$fsubs=~s/^\,//;
print $fsubs."\t";
my %skeys=();
my $max=0;
foreach my $tkey (split(/\,/,$allkeys)) {
if ($tkey) {
$skeys{$tkey}++;
if ($skeys{$tkey}>$max) { $max=$skeys{$tkey}; }
}
}
my $conskeys='';
foreach my $tkey (sort(keys(%skeys))) {
if ($skeys{$tkey}>$max/3.) { $conskeys.=','.$tkey.':'.$skeys{$tkey}; }
}
$conskeys=~s/^\,//;
print $conskeys."\n";
# $i++;
# if ($i>30) { exit; }
}
close(IN);
More information about the LON-CAPA-cvs
mailing list