[LON-CAPA-cvs] cvs: modules /gerd/harvesting problem_taxonomy.pl
www
www at source.lon-capa.org
Fri Sep 9 20:45:44 EDT 2011
www Sat Sep 10 00:45:44 2011 EDT
Added files:
/modules/gerd/harvesting problem_taxonomy.pl
Log:
Final step: add taxonomies to problems
Index: modules/gerd/harvesting/problem_taxonomy.pl
+++ modules/gerd/harvesting/problem_taxonomy.pl
use strict;
my %st=();
open(IN,'seq_taxonomy.dat');
while (my $line=<IN>) {
chomp($line);
my ($seq,$tax)=split(/\t/,$line);
$st{$seq}=$tax;
}
close(IN);
my @taxo=();
open(IN,'probs_in_seq.dat');
while (my $line=<IN>) {
chomp($line);
my ($seq,$probs)=split(/\t/,$line);
foreach my $pn (split(/\,/,$probs)) {
$taxo[$pn].=','.$st{$seq};
}
}
close(IN);
my @keywords=();
my $keystr='';
my ($id,$key)=(1,'');
open(IN,'keywords.dat');
while (my $line=<IN>) {
my $oid=$id;
chomp($line);
($id,$key)=split(/\t/,$line);
if ($id!=$oid) {
$keystr=~s/^\,//;
$keywords[$oid]=$keystr;
$keystr='';
}
$keystr.=','.lc($key);
}
close(IN);
for (my $i=0; $i<=$#taxo; $i++) {
my %taxh=();
foreach my $taxi (split(/\,/,$taxo[$i])) {
$taxh{$taxi}++;
}
my $taxout='';
my $j=0;
foreach my $key (sort(keys(%taxh))) {
if ($key=~/\w/) {
$taxout.=','.$key.':'.$taxh{$key};
$j++;
}
}
$taxout=~s/^\,//;
print $i."\t".$taxout."\n";
if ($j>1) { print "KEYS: $i $keywords[$i]\n"; }
}
More information about the LON-CAPA-cvs
mailing list