[LON-CAPA-cvs] cvs: loncom /interface/statistics lonproblemanalysis.pm

matthew lon-capa-cvs@mail.lon-capa.org
Tue, 22 Feb 2005 04:32:46 -0000


This is a MIME encoded message

--matthew1109046766
Content-Type: text/plain

matthew		Mon Feb 21 23:32:46 2005 EDT

  Modified files:              
    /loncom/interface/statistics	lonproblemanalysis.pm 
  Log:
  half way through rewrite of numerical response analysis.
  Added progress window to student answer computation.
  &numerical_plot_percent is mostly done.
  &numerical_classify_response gathers more statistics and passes them back
      in a seperate hash.
  Minor code cleanups elsewhere.
  
  
--matthew1109046766
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20050221233246.txt"

Index: loncom/interface/statistics/lonproblemanalysis.pm
diff -u loncom/interface/statistics/lonproblemanalysis.pm:1.111 loncom/interface/statistics/lonproblemanalysis.pm:1.112
--- loncom/interface/statistics/lonproblemanalysis.pm:1.111	Wed Feb 16 12:43:20 2005
+++ loncom/interface/statistics/lonproblemanalysis.pm	Mon Feb 21 23:32:46 2005
@@ -1,6 +1,6 @@
 # The LearningOnline Network with CAPA
 #
-# $Id: lonproblemanalysis.pm,v 1.111 2005/02/16 17:43:20 matthew Exp $
+# $Id: lonproblemanalysis.pm,v 1.112 2005/02/22 04:32:46 matthew Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -260,9 +260,10 @@
                                          $respid,$students);
         if ($c->aborted()) { return; };
         #
-        my $responses = &numerical_classify_responses($response_data,$correct,
-                                                      $restriction_function);
-        if ($responses->{'_count'} == 0) {
+        my ($responses,$stats) = 
+            &numerical_classify_responses($response_data,$correct,
+                                          $restriction_function);
+        if ($stats->{'submission_count'} == 0) {
             $analysis_html.= 
                 '<tr><td colspan="2"><font size="+1"><b>'.
                 &mt($no_data_message,$plot_num,@extra_data).
@@ -274,16 +275,16 @@
                 '</b></font></td></tr>'.
                 '<tr><td colspan="2" align="center">'.
                 &mt($stats_message,
-                    $responses->{'_count'},
-                    $responses->{'_correct'},
-                    $responses->{'_count'}-$responses->{'_correct'},
-                    $responses->{'_students'},
+                    $stats->{'submission_count'},
+                    $stats->{'correct_count'},
+                    $stats->{'incorrect_count'},
+                    $stats->{'students'},
                     @extra_data).
                     '</td></tr>'.
                     '<tr>'.'<td valign="top" align="center">'.
-                    &numerical_plot_percent($r,$responses).'</td>'.
+                    &numerical_plot_percent($r,$responses,$stats).'</td>'.
                     '<td align="center" valign="top">'.
-                    &numerical_plot_differences($r,$responses).'</td>'.
+                    &numerical_plot_differences($r,$responses,$stats).'</td>'.
                     '</tr>';
         }
         if ($post_message ne '') {
@@ -298,40 +299,22 @@
 }
 
 sub numerical_plot_percent {
-    my ($r,$responses) = @_;
+    my ($r,$responses,$stats) = @_;
     #
-    my $total = $responses->{'_count'};
+    my $total = $stats->{'submission_count'};
     return '' if ($total == 0);
-    my $minbin = 5;
-    while (my ($interval,$submissions) = each(%$responses)) {
-        next if ($interval =~ /^_/);
-        my ($ans,$ans_low,$ans_high) = split(" ",$interval);
-        my $low_percent  = abs(100*($ans-$ans_low)/$ans);
-        my $high_percent = abs(100*($ans_high-$ans)/$ans);
-        if ($minbin > $high_percent) { $minbin = $high_percent; }
-        if ($minbin > $low_percent) { $minbin = $low_percent; }
-    }
-    #
+    my $min_bin_size = $stats->{'min_abs'};
+    my $low_bin  = $stats->{'lowest_ans'}-$stats->{'max_bin_size'};
+    my $high_bin = $stats->{'highest_ans'}+$stats->{'max_bin_size'};
     my @bins;
-    if ($minbin < 1) {
-        @bins = ('0.1','0.5','1.0','1.5','2.0','2.5','3.0','4.0','5.0',10,20,50,100);        
-    } elsif ($minbin < 2) {
-        @bins = ('0.5','1.0','1.5','2.0','2.5','3.0','4.0','5.0',10,20,50,100);
-    } elsif ($minbin < 5) {
-        @bins = (1,2,3,4,5,10,25,50,75,100,200);
-    } elsif ($minbin < 10) {
-        @bins = (2,4,6,8,10,12,15,20,25,30,50,75,100,200);
-    } else {
-        @bins = (5,10,15,20,25,30,50,75,100,200);
+    for (my $num = $low_bin;$num <= $high_bin;$num+=($min_bin_size/2)) {
+        push(@bins,$num);
     }
-    my @labels = (1..scalar(@bins));
     #
     my @correct;
     my @incorrect;
     my @count;
-    while (my ($interval,$submissions) = each(%$responses)) {
-        next if ($interval =~ /^_/);
-        my ($ans,$ans_low,$ans_high) = split(" ",$interval);
+    while (my ($ans,$submissions) = each(%$responses)) {
         while (my ($submission,$counts) = each(%$submissions)) {
             my ($correct_count,$incorrect_count) = @$counts;
             my $scaled_value = abs(($submission-$ans)/$ans);
@@ -345,13 +328,32 @@
         }
     }
     #
-    my @plot_correct;
-    my @plot_incorrect;
+    # Skip empty bins
+    my (@plot_correct,@plot_incorrect,@new_bins,@new_count);
+    my $min_skip = 2;
     for (my $i=0;$i<=$#bins;$i++) {
-        $plot_correct[$i] = $correct[$i]*100/$total;
-        $plot_incorrect[$i] = $incorrect[$i]*100/$total;
+        my $sum=0;
+        for (my $j=-$min_skip;$j<=$min_skip && $i+$j<=$#bins;$j++) {
+            $sum += $correct[$i+$j] + $incorrect[$i+$j];
+        }
+        if ($sum) {
+            push(@new_bins,$bins[$i]);
+            push(@plot_correct,$correct[$i]);
+            push(@plot_incorrect,$incorrect[$i]);
+            push(@new_count,$correct[$i]+$incorrect[$i]);
+        }
     }
+    @correct   = @plot_correct;
+    @incorrect = @plot_incorrect;
+    @count     = @new_count;
+    @bins      = @new_bins;
+    for (my $i=0;$i<=$#bins;$i++) {
+        $plot_correct[$i]   *= 100/$total;
+        $plot_incorrect[$i] *= 100/$total;
+    }
+    #
     my $title = &mt('Distribution by Percent');
+    my @labels = (1..scalar(@bins));
     my $graph = &Apache::loncommon::DrawBarGraph
         ($title,'Percent difference from correct','Number of answers',
          100,['#33FF00','#FF3300'],\@labels,\@plot_correct,\@plot_incorrect,
@@ -363,41 +365,24 @@
 }
 
 sub numerical_plot_differences {
-    my ($r,$responses) = @_;
+    my ($r,$responses,$stats) = @_;
     #
-    my $total = $responses->{'_count'};
+    my $total = $stats->{'submission_count'};
     return '' if ($total == 0);
-    my $minbin = undef;
-    my $maxbin = undef;
-    while (my ($interval,$submissions) = each(%$responses)) {
-        next if ($interval =~ /^_/);
-        my ($ans,$ans_low,$ans_high) = split(" ",$interval);
-        my $low_diff  = abs($ans-$ans_low);
-        my $high_diff = abs($ans_high-$ans);
-        if (! defined($maxbin)) { $maxbin = $low_diff;}
-        if (! defined($minbin)) { $minbin = $low_diff;}
-        #
-        if ($minbin > $high_diff) { $minbin = $high_diff; }
-        if ($minbin > $low_diff ) { $minbin = $low_diff; }
-        #
-        if ($maxbin < $high_diff) { $maxbin = $high_diff; }
-        if ($maxbin < $low_diff ) { $maxbin = $low_diff; }
-    }    
     #
     my @bins;
     my @labels;
     # Hmmmm, should switch to absolute difference
     for (my $i=1;$i<=20;$i++) {
-        push(@bins,$i*$minbin/2);
+        push(@bins,$i/2);
         push(@labels,$i);
     }
     #
     my @correct;
     my @incorrect;
     my @count;
-    while (my ($interval,$submissions) = each(%$responses)) {
-        next if ($interval =~ /^_/);
-        my ($ans,$ans_low,$ans_high) = split(" ",$interval);
+    while (my ($ans,$submissions) = each(%$responses)) {
+        next if ($ans =~ /^_/);
         while (my ($submission,$counts) = each(%$submissions)) {
             my ($correct_count,$incorrect_count) = @$counts;
             my $value = abs($submission-$ans);
@@ -429,60 +414,95 @@
 }
 
 sub numerical_classify_responses {
-    &Apache::lonnet::logthis('--------------');
     my ($full_row_data,$correct,$function) = @_;
     my %submission_data;
     my %students;
+    my %stats;
     my $max=0;
     foreach my $row (@$full_row_data) {
-#        &Apache::lonnet::logthis(' row = '.join(',',@$row));
         my %subm = &hashify_attempt($row);
         if (ref($correct) eq 'HASH') {
-            $subm{'correct'} = $correct->{$subm{'student'}}->{'answer'};
-            $subm{'unit'} = $correct->{$subm{'student'}}->{'unit'};
+            my $s_correct = $correct->{$subm{'student'}};
+            $subm{'correct'} = $s_correct->{'answer'};
+            foreach my $item ('unit','ans_low','ans_high') {
+                $subm{$item} = $s_correct->{$item};
+            }
         } else { # This probably never happens....
             $subm{'correct'} = $correct->{'answer'};
             $subm{'unit'} = $correct->{'unit'};
         }
+        # 
+        my $abs_low =abs($subm{'correct'}-$subm{'ans_low'});
+        my $abs_high=abs($subm{'correct'}-$subm{'ans_high'});
+        if (! defined($stats{'min_abs'}) ||
+            $stats{'min_abs'} > $abs_low) {
+            $stats{'min_abs'} = $abs_low;
+        }
+        if ($stats{'min_abs'} > $abs_high) {
+            $stats{'min_abs'} = $abs_high;
+        }
+        if (! defined($stats{'max_abs'}) ||
+            $stats{'max_abs'} < $abs_low) {
+            $stats{'max_abs'} = $abs_low;
+        }
+        if ($stats{'max_abs'} < $abs_high) {
+            $stats{'max_abs'} = $abs_high;
+        }
+        my $low_percent  = 100 * abs($abs_low  / $subm{'correct'});
+        my $high_percent = 100 * abs($abs_high / $subm{'correct'});
+        if (! defined($stats{'min_percent'}) ||
+            $stats{'min_percent'} > $low_percent) {
+            $stats{'min_percent'} = $low_percent;
+        }
+        if ($stats{'min_percent'} > $high_percent) {
+            $stats{'min_percent'} = $high_percent;
+        }
+        if (! defined($stats{'max_percent'}) ||
+            $stats{'max_percent'} < $low_percent) {
+            $stats{'max_percent'} = $low_percent;
+        }
+        if ($stats{'max_percent'} < $high_percent) {
+            $stats{'max_percent'} = $high_percent;
+        }
+        if (! defined($stats{'lowest_ans'}) ||
+            $stats{'lowest_ans'} > $subm{'correct'}) {
+            $stats{'lowest_ans'} = $subm{'correct'};
+        }
+        if (! defined($stats{'highest_ans'}) ||
+            $stats{'highest_ans'} < $subm{'correct'}) {
+            $stats{'highest_ans'} = $subm{'correct'};
+        }
+        # 
         $subm{'submission'} =~ s/=\d+\s*$//;
         if (&$function(\%subm)) {
             my $scaled = '1';
             my ($sname,$sdom) = split(':',$subm{'student'});
-            # Note that $subm{'unit'} is modified by the following call
-            # We do not use it again but you should be aware just in case.
             my ($myunit,$mysub) = ($subm{'unit'},$subm{'submission'});
             my $result = 
                 &capa::caparesponse_get_real_response($myunit,
                                                       $mysub,
                                                       \$scaled);
+            &Apache::lonnet::logthis('scaled = '.$scaled.' result ='.$result);
             next if (! defined($scaled));
-            next if ($result ne '6');
+#            next if ($result ne '6');
             my $submission = $scaled;
             $students{$subm{'student'}}++;
+            $stats{'submission_count'}++;
             if (&numerical_submission_is_correct($subm{'award'})) { 
-                &Apache::lonnet::logthis('correct:'.$submission.':'.$subm{'correct'});
-                $submission_data{'_correct'}++;
-                $submission_data{'_count'}++;
+                $stats{'correct_count'}++;
                 $submission_data{$subm{'correct'}}->{$submission}->[0]++;
             } elsif (&numerical_submission_is_incorrect($subm{'award'})) { 
-                &Apache::lonnet::logthis('incorrect:'.$submission.':'.$subm{'correct'});
-                $submission_data{'_count'}++;
+                $stats{'incorrect_count'}++;
                 $submission_data{$subm{'correct'}}->{$submission}->[1]++;
             }
-            my $value = 
-                $submission_data{$subm{'correct'}}->{$submission}->[0]+ 
-                $submission_data{$subm{'correct'}}->{$submission}->[1];
-            if ($max < $value) { $max = $value; }
         }
     }
-    $submission_data{'_max'} = $max;
-    $submission_data{'_students'}=scalar(keys(%students));
-    return \%submission_data;
+    $stats{'students'}=scalar(keys(%students));
+    return (\%submission_data,\%stats);
 }
 
 sub numerical_submission_is_correct {
     my ($award) = @_;
-    &Apache::lonnet::logthis('award = "'.$award.'"');
     if ($award =~ /^(APPROX_ANS|EXACT_ANS)$/) {
         return 1;
     } else {
@@ -535,7 +555,10 @@
     my ($r,$resource,$partid,$respid,$students)=@_;
     my $c = $r->connection();
     #
-    # FIX ME: May need progress dialog updates
+    my %prog_state=&Apache::lonhtmlcommon::Create_PrgWin
+        ($r,'Student Answer Compilation Status',
+         'Student Answer Compilation Progress', scalar(@$students),
+         'inline',undef,'Statistics','stats_status');
     #
     # Read in the cache (if it exists) before we start timing things.
     &Apache::lonstathelpers::ensure_proper_cache($resource->{'symb'});
@@ -553,13 +576,16 @@
                                                                 $sdom);
         # make the key
         my $key = $partid.'.'.$respid;
-        $correct->{$sname.':'.$sdom}->{'answer'} = 
-            $analysis->{$key.'.answer'}->[0];
-        $correct->{$sname.':'.$sdom}->{'unit'} = 
-            $analysis->{$key.'.unit'}->[0];
+        foreach my $item ('answer','unit','ans_high','ans_low') {
+            $correct->{$sname.':'.$sdom}->{$item} = 
+                $analysis->{$key.'.'.$item}->[0];
+        }
         $answers{$analysis->{$key.'.answer'}->[0]}++;
+        &Apache::lonhtmlcommon::Increment_PrgWin($r,\%prog_state,
+                                                 &mt('last student'));
     }
     &Apache::lonstathelpers::write_analysis_cache();
+    &Apache::lonhtmlcommon::Close_PrgWin($r,\%prog_state);
     return ($correct,\%answers);
 }
 
@@ -888,7 +914,6 @@
 
 sub RR_concept_plot {
     my ($concepts,$foil_data,$title) = @_;
-    &Apache::lonnet::logthis('got to RR_concept_plot');
     #
     my %correct_by_concept;
     my %incorrect_by_concept;
@@ -936,7 +961,6 @@
 
 sub RR_create_percent_selected_plot {
     my ($concepts,$foils,$foil_data,$title) = @_;
-    &Apache::lonnet::logthis('got to RR_create_percent_selected_plot');
     #
     if ($foil_data->{'_count'} == 0) { return ''; };
     my %correct_selections;
@@ -1008,7 +1032,6 @@
 sub RR_create_stacked_selection_plot {
     my ($foils,$foil_data,$title,$true_foils)=@_;
     #
-    &Apache::lonnet::logthis('got to RR_create_stacked_selection_plot');
     my @dataset; # array of array refs - multicolor rows $datasets[row]->[col]
     my @labels;
     my $count;

--matthew1109046766--