[LON-CAPA-cvs] cvs: loncom /interface/statistics lonproblemanalysis.pm

matthew lon-capa-cvs@mail.lon-capa.org
Wed, 10 Nov 2004 16:56:40 -0000


This is a MIME encoded message

--matthew1100105800
Content-Type: text/plain

matthew		Wed Nov 10 11:56:40 2004 EDT

  Modified files:              
    /loncom/interface/statistics	lonproblemanalysis.pm 
  Log:
  Numerical response analysis works for attempts, but not time analysis.
  Shows histogram of correct answers each time, which is dumb.
  
  
--matthew1100105800
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20041110115640.txt"

Index: loncom/interface/statistics/lonproblemanalysis.pm
diff -u loncom/interface/statistics/lonproblemanalysis.pm:1.102 loncom/interface/statistics/lonproblemanalysis.pm:1.103
--- loncom/interface/statistics/lonproblemanalysis.pm:1.102	Tue Nov  2 15:45:41 2004
+++ loncom/interface/statistics/lonproblemanalysis.pm	Wed Nov 10 11:56:39 2004
@@ -1,6 +1,6 @@
 # The LearningOnline Network with CAPA
 #
-# $Id: lonproblemanalysis.pm,v 1.102 2004/11/02 20:45:41 matthew Exp $
+# $Id: lonproblemanalysis.pm,v 1.103 2004/11/10 16:56:39 matthew Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -152,17 +152,8 @@
                                          $problem_data,
                                          \@Students);
             } elsif ($current_problem->{'resptype'} eq 'numerical') {
-                ## 
-                ## analyze all responses of a problem at once
-                my $res = $current_problem->{'resource'};
-                foreach my $partid (@{$res->{'parts'}}) {
-                    $current_problem->{'part'} = $partid;
-                    foreach my $respid (@{$res->{'partdata'}->{$partid}->{'ResponseIds'}}) {
-                        $current_problem->{'respid'}=$respid;
-                        &NumericalResponseAnalysis($r,$current_problem,
-                                                   $problem_data,\@Students);
-                    }
-                }
+                &numerical_response_analysis($r,$current_problem,
+                                             $problem_data,\@Students);
             } else {
                 $r->print('<h2>Analysis of '.$current_problem->{'resptype'}.' is not supported</h2>');
             }
@@ -178,7 +169,6 @@
     }
 }
 
-
 #########################################################
 #########################################################
 ##
@@ -186,80 +176,273 @@
 ##
 #########################################################
 #########################################################
-sub NumericalResponseAnalysis {
-    my ($r,$problem,$problem_data,$Students) = @_;
+sub numerical_response_analysis {
+    my ($r,$problem,$problem_analysis,$students) = @_;
     my $c = $r->connection();
-    my ($resource,$partid,$respid) = ($problem->{'resource'},
-                                      $problem->{'part'},
-                                      $problem->{'respid'});
     #
-    if (scalar(@{$resource->{'parts'}})>1) {
-        if (@{$resource->{'partdata'}->{$partid}->{'ResponseIds'}}>1) {
-            $r->print('<h3>'.
-                      &mt('Part [_1], response [_2].',$partid,$respid).
-                      '</h3>');
-        } else { 
-            $r->print('<h3>'.
-                      &mt('Part [_1]',$partid,$respid).
-                      '</h3>');
-        }
-    } elsif (@{$resource->{'partdata'}->{$partid}->{'ResponseIds'}}>1) {
-        $r->print('<h3>'.&mt('Response [_1]',$respid).'</h3>');
+    if ($ENV{'form.AnalyzeOver'} !~ /^(tries|time)$/) {
+        $r->print('Bad request');
     }
     #
-    my $analysis_html;
-    my $PerformanceData = &Apache::loncoursedata::get_response_data
+    my ($resource,$partid,$respid) = ($problem->{'resource'},
+                                      $problem->{'part'},
+                                      $problem->{'respid'});
+    # Gather student data
+    my $response_data = &Apache::loncoursedata::get_response_data
         (\@Apache::lonstatistics::SelectedSections,
          $Apache::lonstatistics::enrollment_status,
          $resource->{'symb'},$respid);
-    if (! defined($PerformanceData) || 
-        ref($PerformanceData) ne 'ARRAY' ) {
-        $analysis_html = '<h2>'.
-            &mt('There is no submission data for this resource').
-            '</h2>';
+    #
+    for (my $plot_num = 1;$plot_num<=$ENV{'form.NumPlots'};$plot_num++) {
+        my $restriction_function;
+        $restriction_function = sub {($_[0]->{'tries'} == $plot_num?1:0)};
+        #
+        my ($correct,$intervals,$answers) = 
+            &numerical_response_determine_intervals($r,$resource,$partid,
+                                                    $respid,$students);
+        if ($c->aborted()) { return; };
+        #
+        my $responses = &classify_response_data($response_data,
+                                                $correct,
+                                                $restriction_function);
+        my $student_count = $responses->{'_students'};
+        my $correct_count = $responses->{'_correct'};
+        my $total_count   = $responses->{'_count'};
+        my $max           = $responses->{'_max'};
+        #
+        my $analysis_html = '<h2>'.&mt('Attempt [_1]',$plot_num).'</h2>';
+        $analysis_html.= &numerical_one_dimensional_plot($r,600,150,
+                                                         scalar(@$students),
+                                                         $answers);
+        $analysis_html.= '<table><tr>'.
+            '<td valign="top" align="center">'.
+            &numerical_plot_percent($r,$responses).'</td>'.
+            '<td align="center" valign="top">'.
+            &numerical_plot_differences($r,$responses).'</td>'.
+            '</tr>'.$/.'</table>';
         $r->print($analysis_html);
-        return;
     }
     #
-    # This next call causes all the waiting around that people complain about
-    &Apache::lonstathelpers::GetStudentAnswers($r,$problem,$Students,
-                                               'Statistics',
-                                               'stats_status');
-    return if ($c->aborted());
-    #
-    # Collate the data
-    my %Data;
-    foreach my $student (@$Students) {
-        my $answer = $student->{'answer'};
-        $Data{$answer}++;
-    }
-    my @Labels = sort {$a <=> $b } keys(%Data);
-    my @PlotData = @Data{@Labels};
-    #
-    my $width  = 500; 
-    my $height = 100;
-    my $plot = &one_dimensional_plot($r,500,100,scalar(@$Students),
-                                     \@Labels,\@PlotData);
-
-    $r->print($plot);
     return;
 }
 
-sub one_dimensional_plot {
-    my ($r,$width,$height,$N,$Labels,$Data)=@_;
+sub numerical_plot_percent {
+    my ($r,$responses) = @_;
     #
-    # Compute data -> image scaling factors
-    my $min = $Labels->[0];
-    my $max = $Labels->[-1];
-    if ($max == $min) {
-        $max =$min+1;
+    my $total = $responses->{'_count'};
+    return '' if ($total == 0);
+    my $minbin = 0.5;
+    while (my ($interval,$submissions) = each(%$responses)) {
+        next if ($interval =~ /^_/);
+        my ($ans,$ans_low,$ans_high) = split(" ",$interval);
+        my $low_percent  = abs(($ans-$ans_low)/$ans);
+        my $high_percent = abs(($ans_high-$ans)/$ans);
+        if ($minbin > $high_percent) { $minbin = $high_percent; }
+        if ($minbin > $low_percent) { $minbin = $low_percent; }
+    }    
+    #
+    my @bins;
+    if ($minbin < 1) {
+        @bins = (0.1, ".5",1,1.5,2,2.5,3,4,5,10,20,50,100);        
+    } elsif ($minbin < 2) {
+        @bins = (0.5,1,1.5,2,2.5,3,4,5,10,20,50,100);
+    } elsif ($minbin < 5) {
+        @bins = (1,2,3,4,5,10,25,50,75,100,200);
+    } elsif ($minbin < 10) {
+        @bins = (2,4,6,8,10,12,15,20,25,30,50,75,100,200);
+    } else {
+        @bins = (5,10,15,20,25,30,50,75,100,200);
     }
-    my $h_scale = ($width-10)/($max-$min);
+    my @labels = (1..scalar(@bins));
     #
+    my @correct;
+    my @incorrect;
+    while (my ($interval,$submissions) = each(%$responses)) {
+        next if ($interval =~ /^_/);
+        my ($ans,$ans_low,$ans_high) = split(" ",$interval);
+        while (my ($submission,$count) = each(%$submissions)) {
+            my $scaled_value = abs(($submission-$ans)/$ans);
+            my $bin=0;
+            for ($bin=0;$bin<$#bins;$bin++) { # not <= for a reason
+                last if ($bins[$bin]>$scaled_value);
+            }
+            if (($submission <= $ans_high) && ($submission >= $ans_low)) {
+                $correct[$bin]+=$count;
+            } else {
+                $incorrect[$bin]+=$count;
+            }
+        }
+    }
+    #
+    for (my $i=0;$i<=$#bins;$i++) {
+        if (! defined(  $correct[$i])) {   $correct[$i]=0;}
+        if (! defined($incorrect[$i])) { $incorrect[$i]=0;}
+        $correct[$i]*=100/$total;
+        $incorrect[$i]*=100/$total;
+    }
+    my $title = &mt('Distribution by Percent');
+    my $graph = &Apache::loncommon::DrawBarGraph
+        ($title,'Percent difference from correct','Number of answers',
+         100,['#33FF00','#FF3300'],\@labels,\@correct,\@incorrect,
+         {xskip=>1});
+    #
+    my $table = $graph.'<table><tr><th>'.&mt('Bar').'</th>'.
+        '<th colspan="3">'.&mt('Range').'</th>';
+    for (my $i=0;$i<=$#bins;$i++) {
+        my $lownum;
+        if ($i == 0) {
+            $lownum = 0;
+        } else {
+            $lownum = $bins[$i-1];
+        }
+        my $highnum = $bins[$i];
+        $table .= 
+            '<tr>'.
+            '<td>'.$labels[$i].'</td>'.
+            '<td align="right">'.$lownum.'</td>'.
+            '<td>&nbsp;-&nbsp;</td>'.
+            '<td align="right">'.$highnum.'</td>'.'</tr>'.$/;
+    }
+    $table.= '</table>'.$/;
+    return $table;
+}
+
+sub numerical_plot_differences {
+    my ($r,$responses) = @_;
+    #
+    my $total = $responses->{'_count'};
+    return '' if ($total == 0);
+    my $minbin = undef;
+    my $maxbin = undef;
+    while (my ($interval,$submissions) = each(%$responses)) {
+        next if ($interval =~ /^_/);
+        my ($ans,$ans_low,$ans_high) = split(" ",$interval);
+        my $low_diff  = abs($ans-$ans_low);
+        my $high_diff = abs($ans_high-$ans);
+        if (! defined($maxbin)) { $maxbin = $low_diff;}
+        if (! defined($minbin)) { $minbin = $low_diff;}
+        #
+        if ($minbin > $high_diff) { $minbin = $high_diff; }
+        if ($minbin > $low_diff ) { $minbin = $low_diff; }
+        #
+        if ($maxbin < $high_diff) { $maxbin = $high_diff; }
+        if ($maxbin < $low_diff ) { $maxbin = $low_diff; }
+    }    
+    #
+    my @bins;
+    my @labels;
+    # Hmmmm, should switch to absolute difference
+    for (my $i=1;$i<=20;$i++) {
+        push(@bins,$i*$minbin/2);
+        push(@labels,$i);
+    }
+    #
+    my @correct;
+    my @incorrect;
+    while (my ($interval,$submissions) = each(%$responses)) {
+        next if ($interval =~ /^_/);
+        my ($ans,$ans_low,$ans_high) = split(" ",$interval);
+        while (my ($submission,$count) = each(%$submissions)) {
+            my $value = abs($submission-$ans);
+            my $bin=0;
+            for ($bin=0;$bin<$#bins;$bin++) { # not <= for a reason
+                last if ($bins[$bin]>$value);
+            }
+            if (($submission <= $ans_high) && ($submission >= $ans_low)) {
+                $correct[$bin]+=$count;
+            } else {
+                $incorrect[$bin]+=$count;
+            }
+        }
+    }
+    #
+    for (my $i=0;$i<=$#bins;$i++) {
+        if (! defined(  $correct[$i])) {   $correct[$i]=0;}
+        if (! defined($incorrect[$i])) { $incorrect[$i]=0;}
+        $correct[$i]*=100/$total;
+        $incorrect[$i]*=100/$total;
+    }
+    my $title = &mt('Distribution by Magnitude');
+    my $graph = &Apache::loncommon::DrawBarGraph
+        ($title,'magnitude difference from correct','Number of answers',
+         100,['#33FF00','#FF3300'],\@labels,\@correct,\@incorrect,{xskip=>1});
+    #
+    my $table = $graph.'<table><tr><th>'.&mt('Bar').'</th>'.
+        '<th colspan="3">'.&mt('Range').'</th>';
+    for (my $i=0;$i<=$#bins;$i++) {
+        my $lownum;
+        if ($i == 0) {
+            $lownum = 0;
+        } else {
+            $lownum = $bins[$i-1];
+        }
+        my $highnum = $bins[$i];
+        $table .= 
+            '<tr>'.
+            '<td>'.$labels[$i].'</td>'.
+            '<td align="right">'.$lownum.'</td>'.
+            '<td>&nbsp;-&nbsp;</td>'.
+            '<td align="right">'.$highnum.'</td>'.'</tr>'.$/;
+    }
+    $table.= '</table>'.$/;
+    return $table;
+}
+
+sub numerical_response_determine_intervals {
+    my ($r,$resource,$partid,$respid,$students)=@_;
+    my $c = $r->connection();
+    #
+    # FIX ME: Need progress dialog updates
+    #
+    # Read in the cache (if it exists) before we start timing things.
+    &Apache::lonstathelpers::ensure_proper_cache($resource->{'symb'});
+    #
+    my $correct;
+    # %intervals differs from %answers because it may be possible for two
+    # students to have the same correct answer but different intervals.
+    my %intervals; 
+    my %answers;
+    foreach my $student (@$students) {
+        last if ($c->aborted());
+        my $sname = $student->{'username'};
+        my $sdom = $student->{'domain'};
+        # analyze problem
+        my $analysis = 
+            &Apache::lonstathelpers::analyze_problem_as_student($resource,
+                                                                $sname,
+                                                                $sdom);
+        # make the key
+        my $key = $partid.'.'.$respid;
+        my $interval = join(' ',($analysis->{$key.'.answer'}->[0],
+                                 $analysis->{$key.'.ans_low'}->[0],
+                                 $analysis->{$key.'.ans_high'}->[0]));
+        $correct->{$sname.':'.$sdom} = $interval;
+        $intervals{$interval}++;
+        $answers{$analysis->{$key.'.answer'}->[0]}++;
+    }
+    &Apache::lonstathelpers::write_analysis_cache();
+    return ($correct,\%intervals,\%answers);
+}
+
+sub numerical_one_dimensional_plot {
+    my ($r,$width,$height,$n,$data)=@_;
+    #
+    # Compute data -> image scaling factors
     my $max_y = 0;
-    foreach (@$Data) {
-        $max_y = $_ if ($max_y < $_);
+    my $min_x = undef;
+    my $max_x = undef;
+    while (my ($answer,$count) = each(%$data)) {
+        $max_y = $count if ($max_y < $count);
+        if (! defined($min_x) || $answer < $min_x) {
+            $min_x = $answer;
+        }
+        if (! defined($max_x) || $answer > $max_x) {
+            $max_x = $answer;
+        }
     }
+    #
+    my $h_scale = ($width-10)/($max_x-$min_x);
+    #
     my $ticscale = 5;
     if ($max_y * $ticscale > $height/2) {
         $ticscale = int($height/2/$max_y);
@@ -269,17 +452,16 @@
     # Create the plot
     my $plot = 
         qq{<drawimage width="$width" height="$height" bgcolor="transparent" >};
-    for (my $idx=0;$idx<scalar(@$Labels);$idx++) {
-        my $xloc = 5+$h_scale*($Labels->[$idx] - $min);
-        my $top    = $height/2-$Data->[$idx]*$ticscale;
-        my $bottom = $height/2+$Data->[$idx]*$ticscale;
-        $plot .= 
-            &line($xloc,$top,$xloc,$bottom,'888888',1);
+    while (my ($answer,$count) = each(%$data)) {
+        my $xloc = 5+$h_scale*($answer - $min_x);
+        my $top    = $height/2-$count*$ticscale;
+        my $bottom = $height/2+$count*$ticscale;
+        $plot .= &line($xloc,$top,$xloc,$bottom,'888888',1);
     }
     #
     # Put the scale on last to ensure it is on top of the data.
-    if ($min < 0 && $max > 0) {
-        my $circle_x = 5+$h_scale*abs($min);  # '0' in data coordinates
+    if ($min_x < 0 && $max_x > 0) {
+        my $circle_x = 5+$h_scale*abs($min_x);  # '0' in data coordinates
         my $r = 4;
         $plot .= &line(5,$height/2,$circle_x-$r,$height/2,'000000',1);
         $plot .= &circle($circle_x,$height/2,$r+1,'000000');
@@ -290,20 +472,18 @@
     $plot .= '</drawimage>';
     my $plotresult =  &Apache::lonxml::xmlparse($r,'web',$plot);
     
-    my $title = 'Distribution of correct answers';
     my $result = '<table>'.
         '<tr><td colspan="3" align="center">'.
-        '<font size="+2">'.$title.' (N='.$N.')'.
-        '</font>'.
+        '<font size="+2">'.&mt('Distribution of correct answers').'</font>'.
+        '<br />'.&mt('[_1] students, [_2] distinct correct answers',
+                     $n,scalar(keys(%$data))).
+        '<br />'.&mt('Maximum number of coinciding values: [_1]',$max_y).
         '</td></tr>'.
         '<tr>'.
-        '<td valign="center">'.$min.'</td>'.
+        '<td valign="center">'.$min_x.'</td>'.
         '<td>'.$plotresult.'</td>'.
-        '<td valign="center">'.$max.'</td>'.
+        '<td valign="center">'.$max_x.'</td>'.
         '</tr>'.
-        '<tr><td colspan="3" align="center">'.
-        'Maximum Number of Coinciding Values: '.$max_y.
-        '</td></tr>'.
         '</table>';
     return $result;
 }
@@ -313,7 +493,7 @@
 ## These should probably go somewhere more suitable soon.
 sub line {
     my ($x1,$y1,$x2,$y2,$color,$thickness) = @_;
-    return qq{<line x1="$x1" y1="$y1" x2="$x2" y2="$y2" color="$color" thickness="$thickness" />$/};
+    return qq{<line x1="$x1" y1="$y1" x2="$x2" y2="$y2" color="$color" thickness="$thickness" />};
 }
 
 sub text {
@@ -486,8 +666,8 @@
             $no_data_text = 'No data for [_5] to [_6]';
         }
         my $foil_choice_data =
-            &RR_classify_response_data($response_data,$correct,
-                                       $restriction_function);
+            &classify_response_data($response_data,$correct,
+                                    $restriction_function);
         # &Apache::lonstathelpers::log_hash_ref($foil_choice_data);
         my $answers;
         if (ref($correct)) {
@@ -758,12 +938,22 @@
     return ($graph,\%count_per_foil);
 }
 
+
+#########################################################
+#########################################################
+##
+##       Misc routines
+##
+#########################################################
+#########################################################
+
 # if $correct is a hash ref, it is assumed to be indexed by student names.
 #    the values are assumed to be hash refs with a key of 'answer'.
-sub RR_classify_response_data {
+sub classify_response_data {
     my ($full_row_data,$correct,$function) = @_;
     my %submission_data;
     my %students;
+    my $max=0;
     foreach my $row (@$full_row_data) {
         my %subm = &hashify_attempt($row);
         if (ref($correct) eq 'HASH') {
@@ -775,12 +965,17 @@
         if (&$function(\%subm)) {
             $students{$subm{'student'}}++;
             $submission_data{'_count'}++;
+            
             if (&submission_is_correct($subm{'award'})) { 
                 $submission_data{'_correct'}++;
             }
-            $submission_data{$subm{'correct'}}->{$subm{'submission'}}++;
+            
+            if($max<++$submission_data{$subm{'correct'}}->{$subm{'submission'}}) {
+                $max=$submission_data{$subm{'correct'}}->{$subm{'submission'}};
+            }
         }
     }
+    $submission_data{'_max'} = $max;
     $submission_data{'_students'}=scalar(keys(%students));
     return \%submission_data;
 }

--matthew1100105800--