[LON-CAPA-cvs] cvs: loncom /interface/statistics lonproblemanalysis.pm
matthew
lon-capa-cvs@mail.lon-capa.org
Wed, 10 Nov 2004 16:56:40 -0000
This is a MIME encoded message
--matthew1100105800
Content-Type: text/plain
matthew Wed Nov 10 11:56:40 2004 EDT
Modified files:
/loncom/interface/statistics lonproblemanalysis.pm
Log:
Numerical response analysis works for attempts, but not time analysis.
Shows histogram of correct answers each time, which is dumb.
--matthew1100105800
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20041110115640.txt"
Index: loncom/interface/statistics/lonproblemanalysis.pm
diff -u loncom/interface/statistics/lonproblemanalysis.pm:1.102 loncom/interface/statistics/lonproblemanalysis.pm:1.103
--- loncom/interface/statistics/lonproblemanalysis.pm:1.102 Tue Nov 2 15:45:41 2004
+++ loncom/interface/statistics/lonproblemanalysis.pm Wed Nov 10 11:56:39 2004
@@ -1,6 +1,6 @@
# The LearningOnline Network with CAPA
#
-# $Id: lonproblemanalysis.pm,v 1.102 2004/11/02 20:45:41 matthew Exp $
+# $Id: lonproblemanalysis.pm,v 1.103 2004/11/10 16:56:39 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -152,17 +152,8 @@
$problem_data,
\@Students);
} elsif ($current_problem->{'resptype'} eq 'numerical') {
- ##
- ## analyze all responses of a problem at once
- my $res = $current_problem->{'resource'};
- foreach my $partid (@{$res->{'parts'}}) {
- $current_problem->{'part'} = $partid;
- foreach my $respid (@{$res->{'partdata'}->{$partid}->{'ResponseIds'}}) {
- $current_problem->{'respid'}=$respid;
- &NumericalResponseAnalysis($r,$current_problem,
- $problem_data,\@Students);
- }
- }
+ &numerical_response_analysis($r,$current_problem,
+ $problem_data,\@Students);
} else {
$r->print('<h2>Analysis of '.$current_problem->{'resptype'}.' is not supported</h2>');
}
@@ -178,7 +169,6 @@
}
}
-
#########################################################
#########################################################
##
@@ -186,80 +176,273 @@
##
#########################################################
#########################################################
-sub NumericalResponseAnalysis {
- my ($r,$problem,$problem_data,$Students) = @_;
+sub numerical_response_analysis {
+ my ($r,$problem,$problem_analysis,$students) = @_;
my $c = $r->connection();
- my ($resource,$partid,$respid) = ($problem->{'resource'},
- $problem->{'part'},
- $problem->{'respid'});
#
- if (scalar(@{$resource->{'parts'}})>1) {
- if (@{$resource->{'partdata'}->{$partid}->{'ResponseIds'}}>1) {
- $r->print('<h3>'.
- &mt('Part [_1], response [_2].',$partid,$respid).
- '</h3>');
- } else {
- $r->print('<h3>'.
- &mt('Part [_1]',$partid,$respid).
- '</h3>');
- }
- } elsif (@{$resource->{'partdata'}->{$partid}->{'ResponseIds'}}>1) {
- $r->print('<h3>'.&mt('Response [_1]',$respid).'</h3>');
+ if ($ENV{'form.AnalyzeOver'} !~ /^(tries|time)$/) {
+ $r->print('Bad request');
}
#
- my $analysis_html;
- my $PerformanceData = &Apache::loncoursedata::get_response_data
+ my ($resource,$partid,$respid) = ($problem->{'resource'},
+ $problem->{'part'},
+ $problem->{'respid'});
+ # Gather student data
+ my $response_data = &Apache::loncoursedata::get_response_data
(\@Apache::lonstatistics::SelectedSections,
$Apache::lonstatistics::enrollment_status,
$resource->{'symb'},$respid);
- if (! defined($PerformanceData) ||
- ref($PerformanceData) ne 'ARRAY' ) {
- $analysis_html = '<h2>'.
- &mt('There is no submission data for this resource').
- '</h2>';
+ #
+ for (my $plot_num = 1;$plot_num<=$ENV{'form.NumPlots'};$plot_num++) {
+ my $restriction_function;
+ $restriction_function = sub {($_[0]->{'tries'} == $plot_num?1:0)};
+ #
+ my ($correct,$intervals,$answers) =
+ &numerical_response_determine_intervals($r,$resource,$partid,
+ $respid,$students);
+ if ($c->aborted()) { return; };
+ #
+ my $responses = &classify_response_data($response_data,
+ $correct,
+ $restriction_function);
+ my $student_count = $responses->{'_students'};
+ my $correct_count = $responses->{'_correct'};
+ my $total_count = $responses->{'_count'};
+ my $max = $responses->{'_max'};
+ #
+ my $analysis_html = '<h2>'.&mt('Attempt [_1]',$plot_num).'</h2>';
+ $analysis_html.= &numerical_one_dimensional_plot($r,600,150,
+ scalar(@$students),
+ $answers);
+ $analysis_html.= '<table><tr>'.
+ '<td valign="top" align="center">'.
+ &numerical_plot_percent($r,$responses).'</td>'.
+ '<td align="center" valign="top">'.
+ &numerical_plot_differences($r,$responses).'</td>'.
+ '</tr>'.$/.'</table>';
$r->print($analysis_html);
- return;
}
#
- # This next call causes all the waiting around that people complain about
- &Apache::lonstathelpers::GetStudentAnswers($r,$problem,$Students,
- 'Statistics',
- 'stats_status');
- return if ($c->aborted());
- #
- # Collate the data
- my %Data;
- foreach my $student (@$Students) {
- my $answer = $student->{'answer'};
- $Data{$answer}++;
- }
- my @Labels = sort {$a <=> $b } keys(%Data);
- my @PlotData = @Data{@Labels};
- #
- my $width = 500;
- my $height = 100;
- my $plot = &one_dimensional_plot($r,500,100,scalar(@$Students),
- \@Labels,\@PlotData);
-
- $r->print($plot);
return;
}
-sub one_dimensional_plot {
- my ($r,$width,$height,$N,$Labels,$Data)=@_;
+sub numerical_plot_percent {
+ my ($r,$responses) = @_;
#
- # Compute data -> image scaling factors
- my $min = $Labels->[0];
- my $max = $Labels->[-1];
- if ($max == $min) {
- $max =$min+1;
+ my $total = $responses->{'_count'};
+ return '' if ($total == 0);
+ my $minbin = 0.5;
+ while (my ($interval,$submissions) = each(%$responses)) {
+ next if ($interval =~ /^_/);
+ my ($ans,$ans_low,$ans_high) = split(" ",$interval);
+ my $low_percent = abs(($ans-$ans_low)/$ans);
+ my $high_percent = abs(($ans_high-$ans)/$ans);
+ if ($minbin > $high_percent) { $minbin = $high_percent; }
+ if ($minbin > $low_percent) { $minbin = $low_percent; }
+ }
+ #
+ my @bins;
+ if ($minbin < 1) {
+ @bins = (0.1, ".5",1,1.5,2,2.5,3,4,5,10,20,50,100);
+ } elsif ($minbin < 2) {
+ @bins = (0.5,1,1.5,2,2.5,3,4,5,10,20,50,100);
+ } elsif ($minbin < 5) {
+ @bins = (1,2,3,4,5,10,25,50,75,100,200);
+ } elsif ($minbin < 10) {
+ @bins = (2,4,6,8,10,12,15,20,25,30,50,75,100,200);
+ } else {
+ @bins = (5,10,15,20,25,30,50,75,100,200);
}
- my $h_scale = ($width-10)/($max-$min);
+ my @labels = (1..scalar(@bins));
#
+ my @correct;
+ my @incorrect;
+ while (my ($interval,$submissions) = each(%$responses)) {
+ next if ($interval =~ /^_/);
+ my ($ans,$ans_low,$ans_high) = split(" ",$interval);
+ while (my ($submission,$count) = each(%$submissions)) {
+ my $scaled_value = abs(($submission-$ans)/$ans);
+ my $bin=0;
+ for ($bin=0;$bin<$#bins;$bin++) { # not <= for a reason
+ last if ($bins[$bin]>$scaled_value);
+ }
+ if (($submission <= $ans_high) && ($submission >= $ans_low)) {
+ $correct[$bin]+=$count;
+ } else {
+ $incorrect[$bin]+=$count;
+ }
+ }
+ }
+ #
+ for (my $i=0;$i<=$#bins;$i++) {
+ if (! defined( $correct[$i])) { $correct[$i]=0;}
+ if (! defined($incorrect[$i])) { $incorrect[$i]=0;}
+ $correct[$i]*=100/$total;
+ $incorrect[$i]*=100/$total;
+ }
+ my $title = &mt('Distribution by Percent');
+ my $graph = &Apache::loncommon::DrawBarGraph
+ ($title,'Percent difference from correct','Number of answers',
+ 100,['#33FF00','#FF3300'],\@labels,\@correct,\@incorrect,
+ {xskip=>1});
+ #
+ my $table = $graph.'<table><tr><th>'.&mt('Bar').'</th>'.
+ '<th colspan="3">'.&mt('Range').'</th>';
+ for (my $i=0;$i<=$#bins;$i++) {
+ my $lownum;
+ if ($i == 0) {
+ $lownum = 0;
+ } else {
+ $lownum = $bins[$i-1];
+ }
+ my $highnum = $bins[$i];
+ $table .=
+ '<tr>'.
+ '<td>'.$labels[$i].'</td>'.
+ '<td align="right">'.$lownum.'</td>'.
+ '<td> - </td>'.
+ '<td align="right">'.$highnum.'</td>'.'</tr>'.$/;
+ }
+ $table.= '</table>'.$/;
+ return $table;
+}
+
+sub numerical_plot_differences {
+ my ($r,$responses) = @_;
+ #
+ my $total = $responses->{'_count'};
+ return '' if ($total == 0);
+ my $minbin = undef;
+ my $maxbin = undef;
+ while (my ($interval,$submissions) = each(%$responses)) {
+ next if ($interval =~ /^_/);
+ my ($ans,$ans_low,$ans_high) = split(" ",$interval);
+ my $low_diff = abs($ans-$ans_low);
+ my $high_diff = abs($ans_high-$ans);
+ if (! defined($maxbin)) { $maxbin = $low_diff;}
+ if (! defined($minbin)) { $minbin = $low_diff;}
+ #
+ if ($minbin > $high_diff) { $minbin = $high_diff; }
+ if ($minbin > $low_diff ) { $minbin = $low_diff; }
+ #
+ if ($maxbin < $high_diff) { $maxbin = $high_diff; }
+ if ($maxbin < $low_diff ) { $maxbin = $low_diff; }
+ }
+ #
+ my @bins;
+ my @labels;
+ # Hmmmm, should switch to absolute difference
+ for (my $i=1;$i<=20;$i++) {
+ push(@bins,$i*$minbin/2);
+ push(@labels,$i);
+ }
+ #
+ my @correct;
+ my @incorrect;
+ while (my ($interval,$submissions) = each(%$responses)) {
+ next if ($interval =~ /^_/);
+ my ($ans,$ans_low,$ans_high) = split(" ",$interval);
+ while (my ($submission,$count) = each(%$submissions)) {
+ my $value = abs($submission-$ans);
+ my $bin=0;
+ for ($bin=0;$bin<$#bins;$bin++) { # not <= for a reason
+ last if ($bins[$bin]>$value);
+ }
+ if (($submission <= $ans_high) && ($submission >= $ans_low)) {
+ $correct[$bin]+=$count;
+ } else {
+ $incorrect[$bin]+=$count;
+ }
+ }
+ }
+ #
+ for (my $i=0;$i<=$#bins;$i++) {
+ if (! defined( $correct[$i])) { $correct[$i]=0;}
+ if (! defined($incorrect[$i])) { $incorrect[$i]=0;}
+ $correct[$i]*=100/$total;
+ $incorrect[$i]*=100/$total;
+ }
+ my $title = &mt('Distribution by Magnitude');
+ my $graph = &Apache::loncommon::DrawBarGraph
+ ($title,'magnitude difference from correct','Number of answers',
+ 100,['#33FF00','#FF3300'],\@labels,\@correct,\@incorrect,{xskip=>1});
+ #
+ my $table = $graph.'<table><tr><th>'.&mt('Bar').'</th>'.
+ '<th colspan="3">'.&mt('Range').'</th>';
+ for (my $i=0;$i<=$#bins;$i++) {
+ my $lownum;
+ if ($i == 0) {
+ $lownum = 0;
+ } else {
+ $lownum = $bins[$i-1];
+ }
+ my $highnum = $bins[$i];
+ $table .=
+ '<tr>'.
+ '<td>'.$labels[$i].'</td>'.
+ '<td align="right">'.$lownum.'</td>'.
+ '<td> - </td>'.
+ '<td align="right">'.$highnum.'</td>'.'</tr>'.$/;
+ }
+ $table.= '</table>'.$/;
+ return $table;
+}
+
+sub numerical_response_determine_intervals {
+ my ($r,$resource,$partid,$respid,$students)=@_;
+ my $c = $r->connection();
+ #
+ # FIX ME: Need progress dialog updates
+ #
+ # Read in the cache (if it exists) before we start timing things.
+ &Apache::lonstathelpers::ensure_proper_cache($resource->{'symb'});
+ #
+ my $correct;
+ # %intervals differs from %answers because it may be possible for two
+ # students to have the same correct answer but different intervals.
+ my %intervals;
+ my %answers;
+ foreach my $student (@$students) {
+ last if ($c->aborted());
+ my $sname = $student->{'username'};
+ my $sdom = $student->{'domain'};
+ # analyze problem
+ my $analysis =
+ &Apache::lonstathelpers::analyze_problem_as_student($resource,
+ $sname,
+ $sdom);
+ # make the key
+ my $key = $partid.'.'.$respid;
+ my $interval = join(' ',($analysis->{$key.'.answer'}->[0],
+ $analysis->{$key.'.ans_low'}->[0],
+ $analysis->{$key.'.ans_high'}->[0]));
+ $correct->{$sname.':'.$sdom} = $interval;
+ $intervals{$interval}++;
+ $answers{$analysis->{$key.'.answer'}->[0]}++;
+ }
+ &Apache::lonstathelpers::write_analysis_cache();
+ return ($correct,\%intervals,\%answers);
+}
+
+sub numerical_one_dimensional_plot {
+ my ($r,$width,$height,$n,$data)=@_;
+ #
+ # Compute data -> image scaling factors
my $max_y = 0;
- foreach (@$Data) {
- $max_y = $_ if ($max_y < $_);
+ my $min_x = undef;
+ my $max_x = undef;
+ while (my ($answer,$count) = each(%$data)) {
+ $max_y = $count if ($max_y < $count);
+ if (! defined($min_x) || $answer < $min_x) {
+ $min_x = $answer;
+ }
+ if (! defined($max_x) || $answer > $max_x) {
+ $max_x = $answer;
+ }
}
+ #
+ my $h_scale = ($width-10)/($max_x-$min_x);
+ #
my $ticscale = 5;
if ($max_y * $ticscale > $height/2) {
$ticscale = int($height/2/$max_y);
@@ -269,17 +452,16 @@
# Create the plot
my $plot =
qq{<drawimage width="$width" height="$height" bgcolor="transparent" >};
- for (my $idx=0;$idx<scalar(@$Labels);$idx++) {
- my $xloc = 5+$h_scale*($Labels->[$idx] - $min);
- my $top = $height/2-$Data->[$idx]*$ticscale;
- my $bottom = $height/2+$Data->[$idx]*$ticscale;
- $plot .=
- &line($xloc,$top,$xloc,$bottom,'888888',1);
+ while (my ($answer,$count) = each(%$data)) {
+ my $xloc = 5+$h_scale*($answer - $min_x);
+ my $top = $height/2-$count*$ticscale;
+ my $bottom = $height/2+$count*$ticscale;
+ $plot .= &line($xloc,$top,$xloc,$bottom,'888888',1);
}
#
# Put the scale on last to ensure it is on top of the data.
- if ($min < 0 && $max > 0) {
- my $circle_x = 5+$h_scale*abs($min); # '0' in data coordinates
+ if ($min_x < 0 && $max_x > 0) {
+ my $circle_x = 5+$h_scale*abs($min_x); # '0' in data coordinates
my $r = 4;
$plot .= &line(5,$height/2,$circle_x-$r,$height/2,'000000',1);
$plot .= &circle($circle_x,$height/2,$r+1,'000000');
@@ -290,20 +472,18 @@
$plot .= '</drawimage>';
my $plotresult = &Apache::lonxml::xmlparse($r,'web',$plot);
- my $title = 'Distribution of correct answers';
my $result = '<table>'.
'<tr><td colspan="3" align="center">'.
- '<font size="+2">'.$title.' (N='.$N.')'.
- '</font>'.
+ '<font size="+2">'.&mt('Distribution of correct answers').'</font>'.
+ '<br />'.&mt('[_1] students, [_2] distinct correct answers',
+ $n,scalar(keys(%$data))).
+ '<br />'.&mt('Maximum number of coinciding values: [_1]',$max_y).
'</td></tr>'.
'<tr>'.
- '<td valign="center">'.$min.'</td>'.
+ '<td valign="center">'.$min_x.'</td>'.
'<td>'.$plotresult.'</td>'.
- '<td valign="center">'.$max.'</td>'.
+ '<td valign="center">'.$max_x.'</td>'.
'</tr>'.
- '<tr><td colspan="3" align="center">'.
- 'Maximum Number of Coinciding Values: '.$max_y.
- '</td></tr>'.
'</table>';
return $result;
}
@@ -313,7 +493,7 @@
## These should probably go somewhere more suitable soon.
sub line {
my ($x1,$y1,$x2,$y2,$color,$thickness) = @_;
- return qq{<line x1="$x1" y1="$y1" x2="$x2" y2="$y2" color="$color" thickness="$thickness" />$/};
+ return qq{<line x1="$x1" y1="$y1" x2="$x2" y2="$y2" color="$color" thickness="$thickness" />};
}
sub text {
@@ -486,8 +666,8 @@
$no_data_text = 'No data for [_5] to [_6]';
}
my $foil_choice_data =
- &RR_classify_response_data($response_data,$correct,
- $restriction_function);
+ &classify_response_data($response_data,$correct,
+ $restriction_function);
# &Apache::lonstathelpers::log_hash_ref($foil_choice_data);
my $answers;
if (ref($correct)) {
@@ -758,12 +938,22 @@
return ($graph,\%count_per_foil);
}
+
+#########################################################
+#########################################################
+##
+## Misc routines
+##
+#########################################################
+#########################################################
+
# if $correct is a hash ref, it is assumed to be indexed by student names.
# the values are assumed to be hash refs with a key of 'answer'.
-sub RR_classify_response_data {
+sub classify_response_data {
my ($full_row_data,$correct,$function) = @_;
my %submission_data;
my %students;
+ my $max=0;
foreach my $row (@$full_row_data) {
my %subm = &hashify_attempt($row);
if (ref($correct) eq 'HASH') {
@@ -775,12 +965,17 @@
if (&$function(\%subm)) {
$students{$subm{'student'}}++;
$submission_data{'_count'}++;
+
if (&submission_is_correct($subm{'award'})) {
$submission_data{'_correct'}++;
}
- $submission_data{$subm{'correct'}}->{$subm{'submission'}}++;
+
+ if($max<++$submission_data{$subm{'correct'}}->{$subm{'submission'}}) {
+ $max=$submission_data{$subm{'correct'}}->{$subm{'submission'}};
+ }
}
}
+ $submission_data{'_max'} = $max;
$submission_data{'_students'}=scalar(keys(%students));
return \%submission_data;
}
--matthew1100105800--