[LON-CAPA-cvs] cvs: modules /gerd/harvesting allcourses.pl counttypes.pl getdata.pl getstudents.pl harvestrun.pl harvestsort.pl semestercount.pl
www
www@source.lon-capa.org
Tue, 14 Jun 2011 15:29:56 -0000
www Tue Jun 14 15:29:56 2011 EDT
Added files:
/modules/gerd/harvesting allcourses.pl counttypes.pl getdata.pl
getstudents.pl harvestrun.pl
harvestsort.pl semestercount.pl
Log:
Some helper scripts for harvesting
Index: modules/gerd/harvesting/allcourses.pl
+++ modules/gerd/harvesting/allcourses.pl
use strict;
open(IN,shift);
my %classes=();
while (my $line=<IN>) {
my ($id,$class)=split(/\t/,$line);
$classes{$class}=1;;
}
close(IN);
foreach my $key (keys(%classes)) {
print $key;
}
Index: modules/gerd/harvesting/counttypes.pl
+++ modules/gerd/harvesting/counttypes.pl
use strict;
my %count=();
my %type=();
foreach my $sub ('problem','exam','survey','task') {
$type{$sub}='problem';
}
$type{'library'}='library';
foreach my $sub ('jpg','jpeg','gif','GIF','png','ps','eps','JPG','JPEG') {
$type{$sub}='image';
}
foreach my $sub ('page','sequence') {
$type{$sub}='sequence';
}
foreach my $sub ('html','htm','xml','HTM','HTML','shtml','pdf','tex') {
$type{$sub}='html';
}
foreach my $sub ('mov','qt','mp2','mp3','mp4','wav','movie') {
$type{$sub}='movie';
}
foreach my $sub ('java','jar','swf','class') {
$type{$sub}='ani';
}
open(IN,shift);
while (my $line=<IN>) {
my @entries=split(/\t/,$line);
my $url=$entries[4];
my ($extension)=($url=~/\.(\w+)$/);
if ($type{$extension}) {
$count{$type{$extension}}++;
} else {
$count{'other'}++;
}
}
close(IN);
foreach my $key (sort(keys(%count))) {
print $key.":".$count{$key}."\n";
}
Index: modules/gerd/harvesting/getdata.pl
+++ modules/gerd/harvesting/getdata.pl
use strict;
my $offset=0;
use lib '/home/httpd/lib/perl';
use Apache::lonnet;
my $count=0;
#open(IN,"matrix/allcourses.dat");
#print("Opened reading\n");
#open(OUT,">matrix/allstudents.dat");
#print("Opened writing\n");
#while (my $line=<IN>) {
my $line='11617e003483fe4oucapa2:ohiou:mg287901:ohiou:1070328580:1062804127';
my ($cnum,$cdom,$uname,$udom)=split(/\:/,$line);
$count++;
if ($count<$offset) { print "."; next;}
my %data=&Apache::lonnet::dump($cdom.'_'.$cnum,$udom,$uname);
print "\n==== $count = $udom = $uname =$cnum = $cdom ====\n";
foreach my $record (keys(%data)) {
print $record.' := '.$data{$record}."\n";
if ($record=~/^version\:(.+)$/) {
my $symb=$1;
my $version=$data{$record};
my $keys=$data{$version.':keys:'.$symb};
print "=> Keys $keys\n";
my @stored=split(/\:/,$keys);
foreach my $item (@stored) {
if ($item=~/^resource\.(.+)\.tries$/) {
my $part=$1;
print "---> Part: $part\n";
my $tries=$data{"$version:$symb:resource.$part.tries"};
my $solved=$data{"$version:$symb:resource.$part.solved"};
print "---> Tries/Solved: $tries/$solved\n";
}
}
}
# my @entries=split(/\:/,$classlist{$student});
# print OUT "$cnum:$cdom:$student:$entries[0]:$entries[1]\n";
}
#}
#close(OUT);
#close(IN);
Index: modules/gerd/harvesting/getstudents.pl
+++ modules/gerd/harvesting/getstudents.pl
use strict;
my $offset=0;
use lib '/home/httpd/lib/perl';
use Apache::lonnet;
my $count=0;
open(IN,"matrix/allcourses.dat");
print("Opened reading\n");
open(OUT,">matrix/allstudents.dat");
print("Opened writing\n");
while (my $line=<IN>) {
$count++;
if ($count<$offset) { print "."; next;}
chomp ($line);
my ($cdom,$cnum)=(split(/\_/,$line));
my %classlist=&Apache::lonnet::dump('classlist',$cdom,$cnum);
print "\n==== $count = $cnum = $cdom ====\n";
foreach my $student (keys(%classlist)) {
my @entries=split(/\:/,$classlist{$student});
print OUT "$cnum:$cdom:$student:$entries[0]:$entries[1]\n";
}
}
close(OUT);
close(IN);
Index: modules/gerd/harvesting/harvestrun.pl
+++ modules/gerd/harvesting/harvestrun.pl
#!/usr/bin/perl
use strict;
use warnings;
use lib '/home/httpd/lib/perl';
use Metadata::Harvest;
use Metadata::Clean;
use Metadata::Load;
my $file;
my $dir = '/home/korte/data'; # OUTPUT DIRECTORY
my $h = Metadata::Harvest->new(debug=>1);
print "Harvesting...\n";
$file = $h->harvest();
my $c = Metadata::Clean->new(file =>$file,
out =>$dir,
debug =>10,
remove=>1, );
print "Cleaning...\n";
$file = $c->clean();
print "Sorting...\n"; #sort -u similar to sort $file | uniq
system("sort","-u", "-o",$file, $file) == 0 #but uniq shortens lines!
or die $?; #unfortunately this has to be done 'manually' as of yet
my $l = Metadata::Load->new( debug =>10,
intodb =>0,
out =>$dir,
file =>$file,
remove =>1, );
print "Loading...\n";
print "Files can be found in:" . $l->load();
Index: modules/gerd/harvesting/harvestsort.pl
+++ modules/gerd/harvesting/harvestsort.pl
#!/usr/bin/perl
use strict;
use warnings;
use lib '/home/httpd/lib/perl';
use Metadata::Harvest;
use Metadata::Clean;
use Metadata::Load;
my $file;
my $dir = '/home/korte/data'; # OUTPUT DIRECTORY
#my $h = Metadata::Harvest->new(debug=>1);
#print "Harvesting...\n";
#$file = $h->harvest();
#my $c = Metadata::Clean->new(file =>$file,
# out =>$dir,
# debug =>10,
# remove=>1, );
#print "Cleaning...\n";
#$file = $c->clean();
$file='/home/httpd/perl/tmp/oldclean.dat';
print "Sorting...\n"; #sort -u similar to sort $file | uniq
system("sort","-u", "-o",$file, $file) == 0 #but uniq shortens lines!
or die $?; #unfortunately this has to be done 'manually' as of yet
my $l = Metadata::Load->new( debug =>10,
intodb =>0,
out =>$dir,
file =>$file,
remove =>1, );
print "Loading...\n";
print "Files can be found in:" . $l->load();
Index: modules/gerd/harvesting/semestercount.pl
+++ modules/gerd/harvesting/semestercount.pl
use strict;
#my $start=1275368400;
#my $end=1293775200;
my $start=1293861600;
my $end=1306904400;
my %students;
print "Locatime ".localtime($start).' '.localtime($end)."\n";
open(IN,"matrix/allstudents.dat");
my $count=0;
while (my $line=<IN>) {
chomp($line);
my @entries=split(/\:/,$line);
# start
unless ($entries[5]) { next; }
if ($entries[5]>$end) { next; }
# end
unless ($entries[4]) { next; }
if ($entries[4]<$start) { next; }
# print "From ".localtime($entries[5])." to ".localtime($entries[4])."\n";
$count++;
$students{$entries[2].':'.$entries[3]}=1;
}
close(IN);
my @stud=keys(%students);
print "\n Count: $count $#stud\n";