[LON-CAPA-cvs] cvs: modules /gerd/harvesting allcourses.pl counttypes.pl getdata.pl getstudents.pl harvestrun.pl harvestsort.pl semestercount.pl

Tue, 14 Jun 2011 15:29:56 -0000

www		Tue Jun 14 15:29:56 2011 EDT

  Added files:                 
    /modules/gerd/harvesting	allcourses.pl counttypes.pl getdata.pl 
                            	getstudents.pl harvestrun.pl 
                            	harvestsort.pl semestercount.pl 
  Log:
  Some helper scripts for harvesting

Index: modules/gerd/harvesting/allcourses.pl
+++ modules/gerd/harvesting/allcourses.pl
use strict;
open(IN,shift);
my %classes=();
while (my $line=<IN>) {
    my ($id,$class)=split(/\t/,$line);
     $classes{$class}=1;;
}
close(IN);
foreach my $key (keys(%classes)) {
   print $key;
}

Index: modules/gerd/harvesting/counttypes.pl
+++ modules/gerd/harvesting/counttypes.pl
use strict;
my %count=();
my %type=();
foreach my $sub ('problem','exam','survey','task') {
   $type{$sub}='problem';
}
$type{'library'}='library';
foreach my $sub ('jpg','jpeg','gif','GIF','png','ps','eps','JPG','JPEG') {
   $type{$sub}='image';
}
foreach my $sub ('page','sequence') {
   $type{$sub}='sequence';
}
foreach my $sub ('html','htm','xml','HTM','HTML','shtml','pdf','tex') {
   $type{$sub}='html';
}
foreach my $sub ('mov','qt','mp2','mp3','mp4','wav','movie') {
   $type{$sub}='movie';
}
foreach my $sub ('java','jar','swf','class') {
   $type{$sub}='ani';
}

open(IN,shift);
while (my $line=<IN>) {
   my @entries=split(/\t/,$line);
   my $url=$entries[4];
   my ($extension)=($url=~/\.(\w+)$/);
   if ($type{$extension}) {
      $count{$type{$extension}}++;
   } else {
      $count{'other'}++;
   }
}
close(IN);
foreach my $key (sort(keys(%count))) {
   print $key.":".$count{$key}."\n";
}

Index: modules/gerd/harvesting/getdata.pl
+++ modules/gerd/harvesting/getdata.pl
use strict;

my $offset=0;

use lib '/home/httpd/lib/perl';
use Apache::lonnet;
my $count=0;
#open(IN,"matrix/allcourses.dat");
#print("Opened reading\n");
#open(OUT,">matrix/allstudents.dat");
#print("Opened writing\n");
#while (my $line=<IN>) {
   my $line='11617e003483fe4oucapa2:ohiou:mg287901:ohiou:1070328580:1062804127';

   my ($cnum,$cdom,$uname,$udom)=split(/\:/,$line);
   $count++;
   if ($count<$offset) { print "."; next;}
   my %data=&Apache::lonnet::dump($cdom.'_'.$cnum,$udom,$uname);
   print "\n==== $count = $udom = $uname =$cnum = $cdom ====\n";
   foreach my $record (keys(%data)) {

       print $record.' :=  '.$data{$record}."\n";

      if ($record=~/^version\:(.+)$/) {
          my $symb=$1;
          my $version=$data{$record};
          my $keys=$data{$version.':keys:'.$symb};
          print "=> Keys $keys\n";
          my @stored=split(/\:/,$keys);
          foreach my $item (@stored) {
              if ($item=~/^resource\.(.+)\.tries$/) {
                 my $part=$1;
                 print "---> Part: $part\n";
                 my $tries=$data{"$version:$symb:resource.$part.tries"};
                 my $solved=$data{"$version:$symb:resource.$part.solved"};
                 print "---> Tries/Solved: $tries/$solved\n";
              }
          } 
       }
#      my @entries=split(/\:/,$classlist{$student});
#      print OUT "$cnum:$cdom:$student:$entries[0]:$entries[1]\n";
   } 
#}
#close(OUT);
#close(IN);

Index: modules/gerd/harvesting/getstudents.pl
+++ modules/gerd/harvesting/getstudents.pl
use strict;

my $offset=0;

use lib '/home/httpd/lib/perl';
use Apache::lonnet;
my $count=0;
open(IN,"matrix/allcourses.dat");
print("Opened reading\n");
open(OUT,">matrix/allstudents.dat");
print("Opened writing\n");
while (my $line=<IN>) {
   $count++;
   if ($count<$offset) { print "."; next;}
   chomp ($line);
   my ($cdom,$cnum)=(split(/\_/,$line));
   my %classlist=&Apache::lonnet::dump('classlist',$cdom,$cnum);
   print "\n==== $count = $cnum = $cdom ====\n";
   foreach my $student (keys(%classlist)) {
      my @entries=split(/\:/,$classlist{$student});
      print OUT "$cnum:$cdom:$student:$entries[0]:$entries[1]\n";
   } 
}
close(OUT);
close(IN);

Index: modules/gerd/harvesting/harvestrun.pl
+++ modules/gerd/harvesting/harvestrun.pl
#!/usr/bin/perl 
use strict;
use warnings;
use lib '/home/httpd/lib/perl';

use Metadata::Harvest;
use Metadata::Clean;
use Metadata::Load;

my $file;
my $dir = '/home/korte/data'; # OUTPUT DIRECTORY

my $h = Metadata::Harvest->new(debug=>1);
print "Harvesting...\n";
$file = $h->harvest();
my $c = Metadata::Clean->new(file  =>$file,
                             out   =>$dir,
                             debug =>10,
                             remove=>1, );
print "Cleaning...\n";
$file = $c->clean();

print "Sorting...\n";                                    #sort -u similar to sort $file | uniq                   
system("sort","-u", "-o",$file, $file) == 0     #but uniq shortens lines!                               
    or die $?;                                           #unfortunately this has to be done 'manually' as of yet 

my $l = Metadata::Load->new( debug  =>10,
                             intodb =>0,
                             out    =>$dir,
                             file   =>$file,
                             remove =>1, );

print "Loading...\n";
print "Files can be found in:" . $l->load();

Index: modules/gerd/harvesting/harvestsort.pl
+++ modules/gerd/harvesting/harvestsort.pl
#!/usr/bin/perl 
use strict;
use warnings;
use lib '/home/httpd/lib/perl';

use Metadata::Harvest;
use Metadata::Clean;
use Metadata::Load;

my $file;
my $dir = '/home/korte/data'; # OUTPUT DIRECTORY

#my $h = Metadata::Harvest->new(debug=>1);
#print "Harvesting...\n";
#$file = $h->harvest();
#my $c = Metadata::Clean->new(file  =>$file,
#                             out   =>$dir,
#                             debug =>10,
#                             remove=>1, );
#print "Cleaning...\n";
#$file = $c->clean();

$file='/home/httpd/perl/tmp/oldclean.dat';

print "Sorting...\n";                                    #sort -u similar to sort $file | uniq                   
system("sort","-u", "-o",$file, $file) == 0     #but uniq shortens lines!                               
    or die $?;                                           #unfortunately this has to be done 'manually' as of yet 

my $l = Metadata::Load->new( debug  =>10,
                             intodb =>0,
                             out    =>$dir,
                             file   =>$file,
                             remove =>1, );

print "Loading...\n";
print "Files can be found in:" . $l->load();

Index: modules/gerd/harvesting/semestercount.pl
+++ modules/gerd/harvesting/semestercount.pl
use strict;
#my $start=1275368400;
#my $end=1293775200;
my $start=1293861600;
my $end=1306904400;

my %students;
print "Locatime ".localtime($start).' '.localtime($end)."\n";
open(IN,"matrix/allstudents.dat");
my $count=0;
while (my $line=<IN>) {
    chomp($line);
    my @entries=split(/\:/,$line);
# start
    unless ($entries[5]) { next; }
    if ($entries[5]>$end) { next; }
# end
    unless ($entries[4]) { next; }
    if ($entries[4]<$start) { next; } 
#    print "From ".localtime($entries[5])." to ".localtime($entries[4])."\n";
    $count++;
    $students{$entries[2].':'.$entries[3]}=1;
}
close(IN);
my @stud=keys(%students);
print "\n Count: $count $#stud\n";