[LON-CAPA-cvs] cvs: modules /raeburn FIE_usage.pl

raeburn lon-capa-cvs@mail.lon-capa.org
Fri, 26 May 2006 14:12:47 -0000


This is a MIME encoded message

--raeburn1148652767
Content-Type: text/plain

raeburn		Fri May 26 10:12:47 2006 EDT

  Modified files:              
    /modules/raeburn	FIE_usage.pl 
  Log:
  Add comments.  Default is to anonymize usernames, domains and names of course owners. Now requires config file - config.txt, including ability to de-anonymize usernames, domains and names of course owners.
  
  
--raeburn1148652767
Content-Type: text/plain
Content-Disposition: attachment; filename="raeburn-20060526101247.txt"

Index: modules/raeburn/FIE_usage.pl
diff -u modules/raeburn/FIE_usage.pl:1.6 modules/raeburn/FIE_usage.pl:1.7
--- modules/raeburn/FIE_usage.pl:1.6	Thu May 25 18:12:33 2006
+++ modules/raeburn/FIE_usage.pl	Fri May 26 10:12:46 2006
@@ -13,10 +13,95 @@
 use LONCAPA::Configuration;
 use GDBM_File;
 use HTML::LCParser;
-                                                                              
+
+# FIE_usage.pl
+#
+# Run as www on a library server
+#
+# This script which gathers usage data for LON-CAPA courses in a 
+# particular domain requires a configuration file - config.txt
+#
+# The config file contain key,value pairs separated by ':', one
+# pair per line.
+# If the value for a particular key needs to contain multiple
+# values, then they are comma separated.
+# The following keys are found in the config file:
+#
+# anonymous,domain,dc,filter,coursefilters,startenroll,courseids
+# where ...
+# anonymous is set to no, if user names and owner information should be stored.
+#    the default is for anonymous to be set to yes.
+#    in this case, the actual username is replaced with user01, user02 etc.
+#                  the actual owner username is replaced with owner01 etc.
+# domain is the LON-CAPA domain being investigated (e.g., msu)
+# dc is the username of a domain coordinator in the domain
+# filter is the type of filter to be used to identify which courses to
+#      include in the data collection
+#      allowed values are: instcode, enrollment or courseid
+#          - instcode to filter by institutional code 
+#          - enrollment to filter by default start and end enrollment dates
+#          - courseid to use a specified set of LON-CAPA courseIDs
+#   if filter = instcode, then a value should be supplied for coursefilters
+#
+# coursefilters is the choice of coursecodes (or parts of codes) 
+#      e.g., fs05,ss06
+#
+#   if filter = enrollment
+#        values need to be supplied for startopen and startclose
+# startopen - time in the format YYYY-MM-DD
+# startclose -   time in the format YYYY-MM-DD           
+#        courses with default start enrollment times falling between
+#         startopen and startclose will be included in data collection     
+#
+#   if filter = courseid, then a value should be supplied for courseids
+#
+# courseids is the set of LON-CAPA courseIDs to be included in data collection
+#  e.g., msu_402683446469c4368msul1,msu_40171337dbb41aemsul1 
+#
+# A stub config.txt file is included with this script in the LC_usage directory. 
+#
+# An example config file might be:
+#
+# anonymous:yes
+# domain:msu
+# dc:fietester
+# filter:instcode
+# coursefilters:fs05,ss06
+# startopen:
+# startclose:
+# courseids:
+#
+# The following files will be written to in the directory in which the 
+# FIE_usage.pl script is run.
+#
+# fie-data-summary.txt
+# fie-data-domain-$dom.txt  (where $dom is the domain, e.g., msu)
+# fie-data-$sem-$cid.txt  (where $sem is the semester, e.g., ss05, if 
+#   the institutional course code scheme employs a format of 
+#   beginning letter letter number number, corresponding to semester code;
+#   otherwise $sem will be the institutional coursecode.
+# fie-data-$uname-$udom.dat (where $uname is the username of the course owner,
+#   and $udom is the domain of the course owner.
+#
+# The summary data file is a text file in CSV format, with a first row containing
+# column titles:
+# ID,CourseCode,Enrollment,Cloned,Items,Problems,Exams,Surveys,Allproblems,Essay,Match,Numerical,Option,Radiobutton,String,Formula,Rank,Images,NumResponses,gnuplot,multipart,Scripts,Functions,Duplicates,Hidden,Encrypted,Conditional,Randompicks,Embedded-res,Embedded-upload,Scantron,Imported,Homegrown,HomeDomain,Uploaded,WithDiscussion,NoDiscussion,TotalPosts,Broadcast,Critical,UserNotes,examupload,Calendar
+#
+# and then subsequent rows with data for each course.
+# 
+
+open (CONFIG,"<config.txt") || die("Can't open configuration file - config.txt");
+my @buffer = <CONFIG>;
+close(CONFIG);
+my %config = ();
+foreach my $line (@buffer) {
+    my ($key,$value) = split(/:/,$line);
+    $config{$key} = $value;
+}
+
 # Determine the library server's domain and hostID
 my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
-my $logfile = $$perlvarref{'lonDaemons'}.'/logs/usage.log';
+my $hostid = $$perlvarref{'lonHostID'};
 my @domains = &Apache::lonnet::current_machine_domains();
 my @hostids = &Apache::lonnet::current_machine_ids();
 my @allowners = ();
@@ -75,7 +160,7 @@
 my %numscantrons = ();
 my %studentphotos = ();
 my %savedsummary = ();
-@domains = ('msu');
+my $ownernum = 0;
 
 # Determine the present time;
 my $timenow = time();
@@ -84,12 +169,25 @@
 open (SUMM,">fie-data-summary.txt");
 print SUMM "ID,CourseCode,Enrollment,Cloned,Items,Problems,Exams,Surveys,Allproblems,Essay,Match,Numerical,Option,Radiobutton,String,Formula,Rank,Images,NumResponses,gnuplot,multipart,Scripts,Functions,Duplicates,Hidden,Encrypted,Conditional,Randompicks,Embedded-res,Embedded-upload,Scantron,Imported,Homegrown,HomeDomain,Uploaded,WithDiscussion,NoDiscussion,TotalPosts,Broadcast,Critical,UserNotes,examupload,Calendar\n";
 
-# For each domain ......
-foreach my $dom (@domains) {
+my $dom = $config{'domain'};
+if (grep/^$dom$/,@domains) {
     open(LOG,">fie-data-domain-$dom.txt");
-    my $dc = 'fietester';
-    my $lonidsdir='/home/httpd/lonIDs';
-    my $handle = 'fietester_1142872930_msu_msul1';
+    my $dc = $config{'dc'}; 
+    my $lonidsdir= $$perlvarref{'lonIDsDir'};
+    opendir(DIR,"<$lonidsdir");
+    my @lonids = readdir(DIR);
+    closedir(DIR);
+    my $handle;
+    foreach my $file (@lonids) {
+        if ($file =~ /^\Q$dc\E_\d+_\Q$dom\E_\Q$hostid\E$/) {
+            $handle = $file;
+            last;
+        }
+    }
+    if (!$handle) {
+        print "Could not find a session token for $dc in $dom in $lonidsdir on $hostid server\n";
+        return;
+    }
     $env{'user.name'} = $dc;
     my $authhost = &Apache::lonnet::homeserver($dc,$dom);
     $env{'user.home'} = $authhost;
@@ -98,15 +196,49 @@
     &Apache::lonnet::transfer_profile_to_env($lonidsdir,$handle);
     $env{'allowed.bre'} = 'F';
     $env{'request.role.adv'} = '1';
-    my $now=time;
-    my $then=$env{'user.login.time'};
-    my @instcodes = ('fs05');
     my %courses = ();
-    foreach my $code (@instcodes) {
-      %{$courses{$code}} = &Apache::lonnet::courseiddump($dom,'.',1,$code,'.','.',1,\@hostids);
-#    %courses = ('northwood5_1w268410c6d0744e2northwood5l1' => 'Some course'); 
-#      %{$courses{$code}} = ('msu_153926ccad542e6msul1' => 'phy');
-      foreach my $cid (sort keys %{$courses{$code}}) {
+    if ($config{'filter'} eq 'instcode') {
+        my @instcodes = split(/,/,$config{'coursefilters'});
+        foreach my $code (@instcodes) {
+            my %coursehash = &Apache::lonnet::courseiddump($dom,'.',1,$code,'.','.',1,\@hostids);
+            foreach my $key (sort(keys %coursehash)) {
+                $courses{$key} = $code;
+            }
+        }
+    } elsif ($config{'filter'} eq 'enrollment') {
+        my %coursehash = &Apache::lonnet::courseiddump($dom,'.',1,'.','.','.',1,\@hostids);
+        foreach my $key (sort(keys %coursehash)) {
+            my ($cdom,$cnum) = split(/_/,$key);
+            my %courseenv = &Apache::lonnet::dump('environment',$cdom,$cnum);
+            my $startenroll = $courseenv{'default_enrollment_start_date'};
+            if (($config{'startenroll'} >= $startenroll) && ($startenroll)) {
+                my $code = $courseenv{'coursecode'};
+                if (!defined($code)) {
+                    $code = 'none';
+                } 
+                $courses{$key} = $code;
+            }
+        }
+    } elsif ($config{'filter'} eq 'courseid') {
+        my %coursehash = &Apache::lonnet::courseiddump($dom,'.',1,'.','.','.',1,\@hostids);
+        my @courses = split(/,/,$config{'courseids'});
+        foreach my $cid (@courses) {
+            if (defined($coursehash{$cid})) {
+                my ($cdom,$cnum) = split(/_/,$cid);
+                my %courseenv = &Apache::lonnet::dump('environment',$cdom,$cnum);
+                my $code = $courseenv{'coursecode'};
+                if (!defined($code)) {
+                    $code = 'none';
+                }
+                $courses{$cid} = $code;
+            }
+        }
+    }
+    my @excluded_cc = ();
+    if (defined($config{'excluded_cc'})) {
+        @excluded_cc = split(/,/,$config{'ecxluded_cc'});
+    }
+    foreach my $cid (sort keys %courses) {
         my %summary = (); 
         print "course is $cid\n";
         $summary{id} = $cid;
@@ -135,10 +267,8 @@
             }
         }
         my %crsenv = &Apache::lonnet::dump('environment',$cdom,$crs);
-        my $owner;
-        my $ownerdom;
-        my $ownername;
-        my $ownermail;
+        my ($owner,$ownerdom,$ownername,$ownermail,$ownertext,$ownerdomtext,
+            $ownernametext,$ownermailtext);
 
         $owner = $crsenv{'internal.courseowner'};
         $cparms{$cid} = { 
@@ -169,7 +299,7 @@
                     my ($uname,$udom) = split/:/,$_;
                     if ($uname eq $owner) {
                         $ownerdom = $udom;
-                    } elsif ($uname ne 'felicia' && $uname ne 'raeburn' && $uname ne 'albertelli') {
+                    } elsif (!grep/^$uname$/,@excluded_cc) {
                         if (!$owner) {
                             $owner = $uname;
                             $ownerdom = $udom;
@@ -179,10 +309,22 @@
             }
         }
         if ($owner) {
+            $ownernum ++;
+            $ownertext = $ownernum;
+            while (length($ownertext) < 3) {
+                $ownertext = '0'.$ownertext;
+            }
+            $ownertext = 'owner'.$ownertext;  
             if (!$ownerdom) {
                 $ownerdom = $cdom;
             }
-            print LOG "$code - Owner is $owner, ownerdom is $ownerdom\n";
+            if ($config{'anonymous'} eq 'no') {
+                $ownertext = $owner;
+                $ownerdomtext = $ownerdom;
+            } else {
+                $ownerdom = 'hidden';
+            }
+            print LOG "$courses{$cid} - Owner is $owner, ownerdom is $ownerdom\n";
             push(@{$ownedcourses{$owner.':'.$ownerdom}},$cid);
             unless (grep/^$owner:$ownerdom/,@allowners) {
                 push @allowners, $owner.':'.$ownerdom;
@@ -191,7 +333,7 @@
                 $ownercourses{$owner.':'.$ownerdom} = 0;
             }
             my %ownerhash =  &Apache::lonnet::dump('environment',$ownerdom,$owner);
-            foreach my $key (sort keys %ownerhash) {
+            foreach my $key (sort(keys(%ownerhash))) {
                 if ($key eq 'firstname') {
                     $ownername = $ownerhash{$key};
                 } elsif ($key eq 'lastname') {
@@ -211,13 +353,19 @@
         }
         my $totalmaps = 0;
         my $numparsed = 0;
-
+        if ($config{'anonymous'} eq 'no') {
+            $ownermailtext = $ownermail;
+            $ownernametext = $ownername; 
+        } else {
+            $ownermailtext = 'hidden';
+            $ownernametext = 'hidden';
+        }
         if ($stucount > 0 && $lastvisit > 0) {
             $summary{enrollment} = $stucount;
             $crsitems{$cid} = 0;
             &Apache::lonroles::set_privileges($dom,$crs);
-            print LOG "$cid =".&Apache::lonnet::unescape($courses{$code}{$cid})."\n";
-            print LOG "lastvisit ".localtime($lastvisit)." count is $stucount owner is $ownername, e-mail is $ownermail\n";
+            print LOG "$cid =".&Apache::lonnet::unescape($courses{$cid})."\n";
+            print LOG "lastvisit ".localtime($lastvisit)." count is $stucount owner is $ownernametext, e-mail is $ownermailtext\n";
             $ownercourses{$owner.':'.$ownerdom} ++;
             unless (defined($external{$cid})) {
                 $external{$cid} = 0;
@@ -501,7 +649,6 @@
             $summary{randompicks} = 100 * $numrandompicks{$cid}/$totalmaps;
         }
         $savedsummary{$cid} = \%summary;
-      }
     }
     delete($env{'user.name'});
     delete($env{'user.home'});
@@ -511,9 +658,21 @@
 
 # For each course owner..
 my @summaryitems = ('id','coursecode','enrollment','cloned','items','problem','exam','survey','allproblems','essay','match','numerical','option','radiobutton','string','formula','rank','images','numresponses','gnuplot','multipart','script','functions','duplicates','hidden','encrypted','conditional','randompicks','embedded-res','embedded-upload','numscantrons','external','homegrown','homedomain','uploaded','withdiscussion','nodiscussion','totalposts','broadcast','critical','usernotes','examupload','calendar');
+my $ccnum = 0;
 foreach my $cc (@allowners) {
+    $ccnum ++;
+    my $ccname = $ccnum;
+    while (length($ccname) < 3) {
+        $ccname = '0'.$ccname;
+    }
+    $ccname = 'owner'.$ccname;
     foreach my $cid (@{$ownedcourses{$cc}}) {
-        my ($sem) = ($cparms{$cid}{'coursecode'} =~ /^(\w{2}\d{2})/);
+        my $sem;
+        if ($cparms{$cid}{'coursecode'} =~ /^(\w{2}\d{2})/) {
+            $sem = $1;
+        } else {
+            $sem = $cparms{$cid}{'coursecode'};
+        } 
         open(FILE,">fie-data-$sem-$cid.txt");
         print FILE "total items = $crsitems{$cid}\n";
         foreach my $key (sort(keys(%{$cparms{$cid}}))) {
@@ -677,24 +836,38 @@
         print SUMM "$output\n";
     }
     close(FILE);
+    my ($uname,$udom);
+    my ($usertext,$unametext,$ownernametext,$udomtext,$udomnametext);
+    if ($config{'anonymous'} eq 'no') {
+        $usertext = $cc;
+        $ownernametext = $ownernames{$cc};
+        $udomtext = $udom;
+        $udomnametext = $udom;
+    } else {
+        $usertext = $ccname;
+        $unametext = $ccname;
+        $ownernametext = 'hidden';
+        $udomtext = 'another';
+    }
+
     my ($uname,$udom) = split/:/,$cc;
-    open(OWNER,">fie-data-$uname-$udom.dat");
-    print OWNER "Total number of courses for $ownernames{$cc} ($cc) = $ownercourses{$cc}\n";    
+    open(OWNER,">fie-data-$unametext-$udomnametext.dat");
+    print OWNER "Total number of courses for $ownernametext ($usertext) = $ownercourses{$cc}\n";    
     %{$totals{$cc}} = ();
     %{$sizes{$cc}} = ();
     %{$publish{$cc}} = ();
     if (grep/^$udom/,@domains) {
-        print OWNER "Last activity ".localtime($lastactivity{$cc})." $uname uses this server as a homeserver\n";
+        print OWNER "Last activity ".localtime($lastactivity{$cc})." $unametext uses this server as a homeserver\n";
         if (-e "/home/$uname/public_html") {
 #            &readtree("/home/$uname/public_html",\%totals,\%sizes,\%publish,$cc,$uname,$udom);
         }
         foreach my $type (sort keys %{$totals{$cc}}) {
-            print OWNER "$cc -- $type - total files = $totals{$cc}{$type}\n";
-            print OWNER "$cc -- $type - total published = $publish{$cc}{$type}\n";
-            print OWNER "$cc -- $type - total bytes = $sizes{$cc}{$type}\n";
+            print OWNER "$usertext -- $type - total files = $totals{$cc}{$type}\n";
+            print OWNER "$usertext -- $type - total published = $publish{$cc}{$type}\n";
+            print OWNER "$usertext -- $type - total bytes = $sizes{$cc}{$type}\n";
         }
     } else {
-        print OWNER "$ownernames{$cc} ($cc) uses $udom server as a homeserver\n";
+        print OWNER "$ownernametext ($usertext) uses $udomtext server as a homeserver\n";
     }
     close(OWNER);
 }

--raeburn1148652767--