[LON-CAPA-cvs] cvs: loncom /interface loncoursedata.pm /interface/statistics lonproblemstatistics.pm

matthew lon-capa-cvs@mail.lon-capa.org
Thu, 01 Apr 2004 20:02:56 -0000


This is a MIME encoded message

--matthew1080849776
Content-Type: text/plain

matthew		Thu Apr  1 15:02:56 2004 EDT

  Modified files:              
    /loncom/interface	loncoursedata.pm 
    /loncom/interface/statistics	lonproblemstatistics.pm 
  Log:
  Bug 539: Report KR-21 reliability statistic
  
  loncoursedata.pm: 
    Added &limit_by_start_end_time to consolidate $starttime and $endtime code.  
    Added &score_stats and &count_stats to return summary data on the scores 
      and problem correct counts.
  
  lonproblemstatistics: 
    Added description of @Fields data structures
    Added @SeqFields, &output_sequence_statistics, &sequence_html_header,
      &sequence_html_output, and &compute_sequence_statistics.
  
  Reporting statistics on sequences (instead of problems) is still being 
  fleshed out, interface wise.  No Excel output is available for sequence 
  statistics at this time.
  
  
--matthew1080849776
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20040401150256.txt"

Index: loncom/interface/loncoursedata.pm
diff -u loncom/interface/loncoursedata.pm:1.128 loncom/interface/loncoursedata.pm:1.129
--- loncom/interface/loncoursedata.pm:1.128	Fri Mar 26 17:01:30 2004
+++ loncom/interface/loncoursedata.pm	Thu Apr  1 15:02:55 2004
@@ -1,6 +1,6 @@
 # The LearningOnline Network with CAPA
 #
-# $Id: loncoursedata.pm,v 1.128 2004/03/26 22:01:30 matthew Exp $
+# $Id: loncoursedata.pm,v 1.129 2004/04/01 20:02:55 matthew Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -2160,6 +2160,38 @@
 
 =pod
 
+=item &limit_by_start_end_times
+
+Build SQL WHERE condition which limits the data collected by the start
+and end times provided
+
+Inputs: $starttime, $endtime, $table
+
+Returns: $time_limits
+
+=cut
+
+##########################################################
+##########################################################
+sub limit_by_start_end_time {
+    my ($starttime,$endtime,$table) = @_;
+    my $time_requirements = undef;
+    if (defined($starttime)) {
+        $time_requirements .= $table.".timestamp>='".$starttime."'";
+        if (defined($endtime)) {
+            $time_requirements .= " AND ".$table.".timestamp<='".$endtime."'";
+        }
+    } elsif (defined($endtime)) {
+        $time_requirements .= $table.".timestamp<='".$endtime."'";
+    }
+    return $time_requirements;
+}
+
+##########################################################
+##########################################################
+
+=pod
+
 =item &limit_by_section_and_status
 
 Build SQL WHERE condition which limits the data collected by section and
@@ -2300,6 +2332,147 @@
     return ($rows->[0],$rows->[1]);
 }
 
+########################################################
+########################################################
+
+=pod
+
+=item &score_stats
+
+Inputs: $Sections, $enrollment, $symbs, $starttime,
+        $endtime, $courseid
+
+$Sections, $enrollment, $starttime, $endtime, and $courseid are the same as 
+elsewhere in this module.  
+$symbs is an array ref of symbs
+
+Returns: minimum, maximum, mean, s.d., number of students, and maximum
+  possible of student scores on the given resources
+
+=cut
+
+########################################################
+########################################################
+sub score_stats {
+    my ($Sections,$enrollment,$symbs,$starttime,$endtime,$courseid)=@_;
+    if (! defined($courseid)) {
+        $courseid = $ENV{'request.course.id'};
+    }
+    #
+    &setup_table_names($courseid);
+    my $dbh = &Apache::lonmysql::get_dbh();
+    #
+    my ($section_limits,$enrollment_limits)=
+        &limit_by_section_and_status($Sections,$enrollment,'b');
+    my $time_limits = &limit_by_start_end_time($starttime,$endtime,'a');
+    my @Symbids = map { &get_symb_id($_); } @{$symbs};
+    #
+    my $stats_table = $courseid.'_problem_stats';
+    my $symb_restriction = join(' OR ',map {'a.symb_id='.$_;} @Symbids);
+    my $request = 'DROP TABLE '.$stats_table;
+    $dbh->do($request);
+    $request = 
+        'CREATE TEMPORARY TABLE '.$stats_table.' '.
+        'SELECT a.student_id,'.
+        'SUM(a.awarded*w.weight) AS score FROM '.
+        $performance_table.' AS a '.
+        'NATURAL LEFT JOIN '.$weight_table.' AS w '.
+        'LEFT JOIN '.$student_table.' AS b ON a.student_id=b.student_id '.
+        'WHERE ('.$symb_restriction.')';
+    if ($time_limits) {
+        $request .= ' AND '.$time_limits;
+    }
+    if ($section_limits) {
+        $request .= ' AND '.$section_limits;
+    }
+    if ($enrollment_limits) {
+        $request .= ' AND '.$enrollment_limits;
+    }
+    $request .= ' GROUP BY a.student_id';
+#    &Apache::lonnet::logthis('request = '.$/.$request);
+    my $sth = $dbh->prepare($request);
+    $sth->execute();
+    $request = 
+        'SELECT AVG(score),STD(score),MAX(score),MIN(score),COUNT(score) '.
+        'FROM '.$stats_table;
+    my ($ave,$std,$max,$min,$count) = &execute_SQL_request($dbh,$request);
+#    &Apache::lonnet::logthis('request = '.$/.$request);
+    
+    $request = 'SELECT SUM(weight) FROM '.$weight_table.
+        ' WHERE ('.$symb_restriction.')';
+    my ($max_possible) = &execute_SQL_request($dbh,$request);
+    # &Apache::lonnet::logthis('request = '.$/.$request);
+    return($min,$max,$ave,$std,$count,$max_possible);
+}
+
+
+########################################################
+########################################################
+
+=pod
+
+=item &count_stats
+
+Inputs: $Sections, $enrollment, $symbs, $starttime,
+        $endtime, $courseid
+
+$Sections, $enrollment, $starttime, $endtime, and $courseid are the same as 
+elsewhere in this module.  
+$symbs is an array ref of symbs
+
+Returns: minimum, maximum, mean, s.d., and number of students
+  of the number of items correct on the given resources
+
+=cut
+
+########################################################
+########################################################
+sub count_stats {
+    my ($Sections,$enrollment,$symbs,$starttime,$endtime,$courseid)=@_;
+    if (! defined($courseid)) {
+        $courseid = $ENV{'request.course.id'};
+    }
+    #
+    &setup_table_names($courseid);
+    my $dbh = &Apache::lonmysql::get_dbh();
+    #
+    my ($section_limits,$enrollment_limits)=
+        &limit_by_section_and_status($Sections,$enrollment,'b');
+    my $time_limits = &limit_by_start_end_time($starttime,$endtime,'a');
+    my @Symbids = map { &get_symb_id($_); } @{$symbs};
+    #
+    my $stats_table = $courseid.'_problem_stats';
+    my $symb_restriction = join(' OR ',map {'a.symb_id='.$_;} @Symbids);
+    my $request = 'DROP TABLE '.$stats_table;
+    $dbh->do($request);
+    $request = 
+        'CREATE TEMPORARY TABLE '.$stats_table.' '.
+        'SELECT a.student_id,'.
+        'COUNT(a.award) AS count FROM '.
+        $performance_table.' AS a '.
+        'LEFT JOIN '.$student_table.' AS b ON a.student_id=b.student_id '.
+        'WHERE ('.$symb_restriction.')'.
+        " AND a.award!='INCORRECT_ATTEMPTED'";
+    if ($time_limits) {
+        $request .= ' AND '.$time_limits;
+    }
+    if ($section_limits) {
+        $request .= ' AND '.$section_limits;
+    }
+    if ($enrollment_limits) {
+        $request .= ' AND '.$enrollment_limits;
+    }
+    $request .= ' GROUP BY a.student_id';
+    &Apache::lonnet::logthis('request = '.$/.$request);
+    my $sth = $dbh->prepare($request);
+    $sth->execute();
+    $request = 
+        'SELECT AVG(count),STD(count),MAX(count),MIN(count),COUNT(count) '.
+        'FROM '.$stats_table;
+    my ($ave,$std,$max,$min,$count) = &execute_SQL_request($dbh,$request);
+    &Apache::lonnet::logthis('request = '.$/.$request);
+    return($min,$max,$ave,$std,$count);
+}
 
 ######################################################
 ######################################################
Index: loncom/interface/statistics/lonproblemstatistics.pm
diff -u loncom/interface/statistics/lonproblemstatistics.pm:1.78 loncom/interface/statistics/lonproblemstatistics.pm:1.79
--- loncom/interface/statistics/lonproblemstatistics.pm:1.78	Mon Mar 29 14:50:23 2004
+++ loncom/interface/statistics/lonproblemstatistics.pm	Thu Apr  1 15:02:56 2004
@@ -1,6 +1,6 @@
 # The LearningOnline Network with CAPA
 #
-# $Id: lonproblemstatistics.pm,v 1.78 2004/03/29 19:50:23 matthew Exp $
+# $Id: lonproblemstatistics.pm,v 1.79 2004/04/01 20:02:56 matthew Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -61,6 +61,7 @@
 use Time::HiRes;
 
 my @StatsArray;
+my %SeqStat;    # keys are symbs, values are hash refs
 
 ##
 ## Localization notes:
@@ -69,6 +70,31 @@
 ## header for plots created with Graph.pm, both of which more than likely do
 ## not support localization.
 ##
+#
+#
+##
+## Description of Field attributes
+##
+## Attribute     Required   Value       Meaning or Use
+##
+## name            yes      any scalar  Used to uniquely identify field
+## title           yes      any scalar  This is what the user sees to identify
+##                                      the field.  Passed through &mt().
+## long_title      yes      any scalar  Used as graph heading and in excel
+##                                      output.  NOT translated
+## align           no    (left|right|center)  HTML cell contents alignment
+## color           yes      html color  HTML cell background color
+##                                      used to visually group statistics
+## special         no          (link)   Indicates a link, target is name.link
+##                                      Currently set in &get_statistics()
+## graphable       no      (yes|no)     Can a bar graph of the field be 
+##                                      produced?
+## sortable        no      (yes|no)     Should a sort link be put in the
+##                                      column header?
+## selectable      yes     (yes|no)     Can the column be removed from the
+##                                      statistics display?
+## selected        yes     (yes|no)     Is the column selected by default?
+##
 my @Fields = (
            { name => 'problem_num',
              title => 'P#',
@@ -236,6 +262,150 @@
            },
 );
 
+my @SeqFields = (
+           { name   => 'title',
+             title  => 'Sequence',
+             align  => 'left',
+             color  => '#FFFFE6',
+             special  => 'no',
+             sortable => 'no', 
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'items',
+             title  => '#Items',
+             align  => 'right',
+             color  => '#FFFFE6',
+             format => '%4d',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'Number of Items in Sequence',
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'scoremean',
+             title  => 'Score Mean',
+             align  => 'right',
+             color  => '#FFFFE6',
+             format => '%4.2f',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'Mean Sequence Score',
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'scorestd',
+             title  => 'Score STD',
+             align  => 'right',
+             color  => '#FFFFE6',
+             format => '%4.2f',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'Standard Deviation of Sequence Scores',
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'scoremax',
+             title  => 'Score Max',
+             align  => 'right',
+             color  => '#FFFFE6',
+             format => '%4.2f',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'Maximum Sequence Score',
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'scoremin',
+             title  => 'Score Min',
+             align  => 'right',
+             color  => '#FFFFE6',
+             format => '%4.2f',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'Minumum Sequence Score',
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'scorecount',
+             title  => 'Score N',
+             align  => 'right',
+             color  => '#FFFFE6',
+             format => '%4d',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'Number of Students in score computations',
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'countmean',
+             title  => 'Count Mean',
+             align  => 'right',
+             color  => '#FFFFFF',
+             format => '%4.2f',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'Mean Sequence Score',
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'countstd',
+             title  => 'Count STD',
+             align  => 'right',
+             color  => '#FFFFFF',
+             format => '%4.2f',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'Standard Deviation of Sequence Scores',
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'countmax',
+             title  => 'Count Max',
+             align  => 'right',
+             color  => '#FFFFFF',
+             format => '%4.2f',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'Maximum Number of Correct Problems',
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'countmin',
+             title  => 'Count Min',
+             align  => 'right',
+             color  => '#FFFFFF',
+             format => '%4.2f',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'Minumum Number of Correct Problems',
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'count',
+             title  => 'Count N',
+             align  => 'right',
+             color  => '#FFFFFF',
+             format => '%4d',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'Number of Students in score computations',
+             selectable => 'yes',
+             selected => 'no',
+           },
+           { name   => 'KR-21',
+             title  => 'KR-21',
+             align  => 'right',
+             color  => '#FFAAAA',
+             format => '%4.2f',
+             sortable  => 'no',
+             graphable => 'no',
+             long_title => 'KR-21 reliability statistic',
+             selectable => 'yes',
+             selected => 'no',
+           },           
+);
+
 my %SelectedFields;
 
 sub parse_field_selection {
@@ -380,6 +550,7 @@
     #
     # Clear the package variables
     undef(@StatsArray);
+    undef(%SeqStat);
     #
     # Finally let the user know we are here
     my $interface = &CreateInterface();
@@ -435,17 +606,40 @@
             undef($plot);
         }
         if ($sortby eq 'container' && ! defined($plot)) {
+            &output_sequence_statistics($r);
             &output_html_by_sequence($r);
         } else {
             if (defined($plot)) {
                 &make_plot($r,$plot);
             }
             &output_html_stats($r);
+            &output_sequence_statistics($r);
         }
     }
     return;
 }
 
+sub output_sequence_statistics {
+    my ($r) = @_;
+    my $c=$r->connection();
+    $r->print('<h2>'.&mt('Sequence Statistics').'</h2>');
+    $r->print('<table border="0"><tr><td bgcolor="#777777">'."\n".
+              '<table border="0" cellpadding="3">'."\n".
+              '<tr bgcolor="#FFFFE6">');
+    $r->print(&sequence_html_header());
+    foreach my $seq (&Apache::lonstatistics::Sequences_with_Assess()) {
+        last if ($c->aborted);
+        next if ($seq->{'num_assess'} < 1);
+        &compute_sequence_statistics($seq);
+        $r->print(&sequence_html_output($seq));
+    }
+    $r->print('</table>');
+    $r->print('</table>');
+    $r->rflush();
+    return;
+}
+
+
 ##########################################################
 ##########################################################
 ##
@@ -584,6 +778,46 @@
     return $header_row;
 }
 
+sub sequence_html_header {
+    my $Str .= '<tr>';
+    foreach my $field (@SeqFields) {
+#        next if ($field->{'selected'} ne 'yes');
+        $Str .= '<th bgcolor="'.$field->{'color'}.'"';
+        $Str .= '>'.$field->{'title'}.'</th>';
+    }
+    $Str .= '</tr>';
+    return $Str;
+}
+
+
+sub sequence_html_output {
+    my ($seq) = @_;
+    my $data = $SeqStat{$seq->{'symb'}};
+#    $SeqStat{$symb}->{'max'}
+#    $SeqStat{$symb}->{'min'}
+#    $SeqStat{$symb}->{'mean'}
+#    $SeqStat{$symb}->{'std'}
+#    $SeqStat{$symb}->{'count'}
+#    $SeqStat{$symb}->{'max_possible'}
+    my $row = '<tr>';
+    foreach my $field (@SeqFields) {
+#        next if ($field->{'selected'} ne 'yes');
+        $row .= '<td bgcolor="'.$field->{'color'}.'"';
+        if (exists($field->{'align'})) {
+            $row .= ' align="'.$field->{'align'}.'"';
+        }
+        $row .= '>';
+        if (exists($field->{'format'})) {
+            $row .= sprintf($field->{'format'},$data->{$field->{'name'}});
+        } else {
+            $row .= $data->{$field->{'name'}};
+        }
+        $row .= '</td>';
+    }
+    $row .= '</tr>'."\n";
+    return $row;
+}
+
 ####################################################
 ####################################################
 ##
@@ -1148,7 +1382,6 @@
     return $data;
 }
 
-
 ###############################################
 ###############################################
 
@@ -1205,6 +1438,70 @@
 
 ###############################################
 ###############################################
+##
+## Compute KR-21
+##
+## To compute KR-21, you need the following information:
+##
+## K=the number of items in your test
+## M=the mean score on the test
+## s=the standard deviation of the scores on your test 
+##
+## then:
+## 
+## KR-21 rk= [K/(K-1)] * [1- (M*(K-M))/(K*s^2))]
+##
+###############################################
+###############################################
+sub compute_sequence_statistics {
+    my ($seq) = @_;
+    my $symb = $seq->{'symb'};
+    my @Resources;
+    foreach my $res (@{$seq->{'contents'}}) {
+        next if ($res->{'type'} ne 'assessment');
+        push (@Resources,$res->{'symb'});
+    }
+    my ($starttime,$endtime) = &Apache::lonstathelpers::get_time_limits();
+    #
+    # First compute statistics based on student scores
+    my ($smin,$smax,$sMean,$sSTD,$scount,$sMAX) = 
+        &Apache::loncoursedata::score_stats
+                    (\@Apache::lonstatistics::SelectedSections,
+                     $Apache::lonstatistics::enrollment_status,
+                     \@Resources,$starttime,$endtime,undef);
+    $SeqStat{$symb}->{'title'}  = $seq->{'title'};
+    $SeqStat{$symb}->{'scoremax'}  = $smax;
+    $SeqStat{$symb}->{'scoremin'}  = $smin;
+    $SeqStat{$symb}->{'scoremean'} = $sMean;
+    $SeqStat{$symb}->{'scorestd'}  = $sSTD;
+    $SeqStat{$symb}->{'scorecount'} = $scount;
+    $SeqStat{$symb}->{'max_possible'} = $sMAX;
+    #
+    # Compute statistics based on the number of correct problems
+    # 'correct' is taken to mean 
+    my ($cmin,$cmax,$cMean,$cSTD,$ccount)=
+        &Apache::loncoursedata::count_stats
+        (\@Apache::lonstatistics::SelectedSections,
+         $Apache::lonstatistics::enrollment_status,
+         \@Resources,$starttime,$endtime,undef);
+    my $K = $seq->{'num_assess_parts'};
+    my $kr_21;
+    if ($K > 1 && $cSTD > 0) {
+        $kr_21 =  ($K/($K-1)) * (1 - $cMean*($K-$cMean)/($K*$cSTD**2));
+    } else {
+        $kr_21 = 'nan';
+    }
+    $SeqStat{$symb}->{'countmax'} = $cmax;
+    $SeqStat{$symb}->{'countmin'} = $cmin;
+    $SeqStat{$symb}->{'countstd'} = $cSTD;
+    $SeqStat{$symb}->{'count'} = $ccount;
+    $SeqStat{$symb}->{'items'} = $K;
+    $SeqStat{$symb}->{'KR-21'}=$kr_21;
+
+    return;
+}
+
+
 
 =pod 
 
@@ -1264,111 +1561,6 @@
 =back
 
 =cut
-
-
-############################################################
-############################################################
-##
-##  How this all works:
-##     Statistics are computed by calling &get_statistics with the sequence,
-##     resource, and part id to run statistics on.  At various places within
-##     the loops which compute the statistics, as well as before and after 
-##     the entire process, subroutines can be called.  The subroutines are
-##     registered to the following hooks:
-##
-##         hook          subroutine inputs
-##     ----------------------------------------------------------
-##         pre           $r,$count
-##         pre_seq       $r,$count,$seq
-##         pre_res       $r,$count,$seq,$res
-##         calc          $r,$count,$seq,$res,$data
-##         post_res      $r,$count,$seq,$res
-##         post_seq      $r,$count,$seq
-##         post          $r,$count
-##
-##         abort         $r
-##
-##     subroutines will be called in the order in which they are registered.
-##   
-############################################################
-############################################################
-{
-
-my %hooks;
-my $aborted = 0;
-
-sub abort_computation {
-    $aborted = 1;
-}
-
-sub clear_hooks {
-    $aborted = 0;
-    undef(%hooks);
-}
-
-sub register_hook {
-    my ($hookname,$subref)=@_;
-    if ($hookname !~ /^(pre|pre_seq|pre_res|post|post_seq|post_res|calc)$/){
-        return;
-    }
-    if (ref($subref) ne 'CODE') {
-        &Apache::lonnet::logthis('attempt to register hook to non-code: '.
-                                 $hookname,' = '.$subref);
-    } else {
-        if (exists($hooks{$hookname})) {
-            push(@{$hooks{$hookname}},$subref);
-        } else {
-            $hooks{$hookname} = [$subref];
-        }
-    }
-    return;
-}
-
-sub run_hooks {
-    my $context = shift();
-    foreach my $hook (@{$hooks{$context}}) { 
-        if ($aborted && $context ne 'abort') {
-            last;
-        }
-        my $retvalue = $hook->(@_);
-        if (defined($retvalue) && $retvalue eq '0') {
-            $aborted = 1 if (! $aborted);
-        }
-    }
-}
-
-sub run_statistics {
-    my ($r) = @_;
-    my $count = 0;
-    &run_hooks('pre',$r,$count);
-    foreach my $seq (&Apache::lonstatistics::Sequences_with_Assess()) {
-        last if ($aborted);
-        next if ($seq->{'num_assess'}<1);
-        &run_hooks('pre_seq',$r,$count,$seq);
-        foreach my $res (@{$seq->{'contents'}}) {
-            last if ($aborted);
-            next if ($res->{'type'} ne 'assessment');
-            &run_hooks('pre_res',$r,$count,$seq,$res);            
-            foreach my $part (@{$res->{'parts'}}) {
-                last if ($aborted);
-                #
-                # This is where all the work happens
-                my $data = &get_statistics($seq,$res,$part,++$count);
-                &run_hooks('calc',$r,$count,$seq,$res,$part,$data); 
-            }
-            &run_hooks('post_res',$r,$count,$seq,$res);
-        }
-        &run_hooks('post_seq',$r,$count,$seq);
-    }
-    if ($aborted) {
-        &run_hooks('abort',$r);
-    } else {
-        &run_hooks('post',$r,$count);
-    }
-    return;
-}
-
-} # End of %hooks scope
 
 ############################################################
 ############################################################

--matthew1080849776--