[LON-CAPA-cvs] cvs: modules /gerd evaluate.pl newevaluate.pl
www
lon-capa-cvs@mail.lon-capa.org
Mon, 22 May 2006 02:33:30 -0000
This is a MIME encoded message
--www1148265210
Content-Type: text/plain
www Sun May 21 22:33:30 2006 EDT
Added files:
/modules/gerd newevaluate.pl
Modified files:
/modules/gerd evaluate.pl
Log:
Some more work on MPEX paper
--www1148265210
Content-Type: text/plain
Content-Disposition: attachment; filename="www-20060521223330.txt"
Index: modules/gerd/evaluate.pl
diff -u modules/gerd/evaluate.pl:1.9 modules/gerd/evaluate.pl:1.10
--- modules/gerd/evaluate.pl:1.9 Mon Apr 10 21:12:39 2006
+++ modules/gerd/evaluate.pl Sun May 21 22:33:27 2006
@@ -73,6 +73,8 @@
my %fcipost=();
my %mpexpre=();
my %mpexpost=();
+my %mpexprescore=();
+my %mpexpostscore=();
my %gender=();
my %fcigain=();
my %mpex=();
@@ -89,7 +91,10 @@
$fcipre{$user}=$entries[8]/30;
}
foreach my $mp (1..34) {
- if ($entries[38+$mp]=~/\d/) { $mpexpre{$user.':'.$mp}=($entries[38+$mp]-1)/4; }
+ if ($entries[38+$mp]=~/\d/) {
+ $mpexpre{$user.':'.$mp}=($entries[38+$mp]-1)/4;
+ $mpexprescore{$mp.'.'.$entries[38+$mp]}++;
+ }
}
if ($entries[73]=~/\d/) {
$fcipost{$user}=$entries[73]/30;
@@ -103,7 +108,10 @@
$fci{$user}=$fcipre{$user};
}
foreach my $mp (1..34) {
- if ($entries[103+$mp]=~/\d/) { $mpexpost{$user.':'.$mp}=($entries[103+$mp]-1)/4; }
+ if ($entries[103+$mp]=~/\d/) {
+ $mpexpost{$user.':'.$mp}=($entries[103+$mp]-1)/4;
+ $mpexpostscore{$mp.'.'.$entries[103+$mp]}++;
+ }
}
foreach my $mp (1..34) {
if ((defined($mpexpost{$user.':'.$mp})) && (defined($mpexpre{$user.':'.$mp}))) {
@@ -271,6 +279,24 @@
&hashcorrelate(\%conceptual,'',\%mpexpost,$mpexidx,\@userkeys,'ConcPost');
}
print "\n";
+
+foreach my $mp (1..34) {
+ print
+ "\nPre $mp: ".$mpexprescore{"$mp.1"}."-".$mpexprescore{"$mp.2"}."-".$mpexprescore{"$mp.3"}."-".$mpexprescore{"$mp.4"}."-".$mpexprescore{"$mp.5"};
+ my $sum=$mpexprescore{"$mp.1"}+$mpexprescore{"$mp.2"}+$mpexprescore{"$mp.3"}+$mpexprescore{"$mp.4"}+$mpexprescore{"$mp.5"};
+ print " ($sum) ";
+ my $disagree=($mpexprescore{"$mp.1"}+$mpexprescore{"$mp.2"})/$sum;
+ my $agree=($mpexprescore{"$mp.4"}+$mpexprescore{"$mp.5"})/$sum;
+ print "D: $disagree A: $agree\n";
+ print "Post $mp: ". $mpexpostscore{"$mp.1"}."-".$mpexpostscore{"$mp.2"}."-".$mpexpostscore{"$mp.3"}."-".$mpexpostscore{"$mp.4"}."-".$mpexpostscore{"$mp.5"};
+ my $sum=$mpexpostscore{"$mp.1"}+$mpexpostscore{"$mp.2"}+$mpexpostscore{"$mp.3"}+$mpexpostscore{"$mp.4"}+$mpexpostscore{"$mp.5"};
+ print " ($sum) ";
+ $disagree=($mpexpostscore{"$mp.1"}+$mpexpostscore{"$mp.2"})/$sum;
+ $agree=($mpexpostscore{"$mp.4"}+$mpexpostscore{"$mp.5"})/$sum;
+ print "D: $disagree A: $agree\n";
+
+}
+
exit;
#
# Calculate correlation between two hashes
Index: modules/gerd/newevaluate.pl
+++ modules/gerd/newevaluate.pl
use strict;
# Read the discussions
open(IN,'classified.csv');
my %evals=();
my %totals=();
my $line;
while ($line=<IN>) {
chomp($line);
my ($user,$length,$classes)=($line=~/^(\w+)\,.+\,(\d+)\,(\w+)$/);
unless ($user) { next; }
unless ($length) { next; }
my @entries=($classes=~/(\d\w)/g);
foreach (@entries) {
$evals{$user.':'.$_}+=$length/($#entries+1);
}
$totals{$user}+=$length;
}
close(IN);
my %discussion;
my %solution;
my %math;
my %physics;
my %surface;
my %procedural;
my %conceptual;
foreach my $user (keys %totals) {
foreach my $number (1..8) {
foreach my $char ('a'..'d') {
$discussion{$user.':'.$number.$char}=$evals{$user.':'.$number.$char}/$totals{$user};
}
}
$solution{$user}=$discussion{$user.':3b'}
+$discussion{$user.':4b'}
+$discussion{$user.':5b'}
+$discussion{$user.':6b'}
+$discussion{$user.':7b'}
+$discussion{$user.':8b'};
$math{$user} =$discussion{$user.':3c'}
+$discussion{$user.':4c'}
+$discussion{$user.':5c'}
+$discussion{$user.':6c'}
+$discussion{$user.':7c'}
+$discussion{$user.':8c'};
$physics{$user} =$discussion{$user.':3d'}
+$discussion{$user.':4d'}
+$discussion{$user.':5d'}
+$discussion{$user.':6d'}
+$discussion{$user.':7d'}
+$discussion{$user.':8d'};
$surface{$user} =$discussion{$user.':3b'}
+$discussion{$user.':4b'}
+$discussion{$user.':3c'}
+$discussion{$user.':4c'}
+$discussion{$user.':3d'}
+$discussion{$user.':4d'};
$procedural{$user} =$discussion{$user.':5b'}
+$discussion{$user.':6b'}
+$discussion{$user.':5c'}
+$discussion{$user.':6c'}
+$discussion{$user.':5d'}
+$discussion{$user.':6d'};
$conceptual{$user} =$discussion{$user.':7b'}
+$discussion{$user.':8b'}
+$discussion{$user.':7c'}
+$discussion{$user.':8c'}
+$discussion{$user.':7d'}
+$discussion{$user.':8d'};
}
#
# MPEX Keys
#
my @expert=('','D','D','A','D','A','A','A',
'D','D','D','A','D','D','D',
'D','D','D','A','D','D','D',
'D','D','D','A','A','D','D',
'D','A','A','A','D','A');
my @indcluster=(1,8,13,14,17,27);
my @cohcluster=(12,15,16,21,29);
my @concluster=(4,19,26,27,32);
my @reacluster=(10,18,22,25);
my @matcluster=(2,6,8,16,20);
my @effcluster=(3,6,7,24,31);
# Read the other info
open(IN,'compiled.csv');
my %grade=();
my %fcipre=();
my %fcipost=();
my %mpexpre=();
my %mpexpost=();
my %gender=();
my %fcigain=();
my %fci=();
my %clusterscore=();
$line=<IN>; # header
while ($line=<IN>) {
chomp($line);
my @entries=split(/\,/,$line);
my $user=$entries[4];
unless ($user=~/\w/) { next; }
$gender{$user}=$entries[1];
$grade{$user}=$entries[7]/4;
if ($entries[8]=~/\d/) {
$fcipre{$user}=$entries[8]/30;
}
foreach my $mp (1..34) {
if ($entries[38+$mp]=~/\d/) {
$mpexpre{$user.':'.$mp}=($entries[38+$mp]-1)/4;
}
}
if ($entries[73]=~/\d/) {
$fcipost{$user}=$entries[73]/30;
}
if (($fcipost{$user}) && ($fcipre{$user})) {
$fcigain{$user}=($fcipost{$user}-$fcipre{$user})/2+0.5;
$fci{$user}=($fcipost{$user}+$fcipre{$user})/2;
} elsif ($fcipost{$user}) {
$fci{$user}=$fcipost{$user};
} elsif ($fcipre{$user}) {
$fci{$user}=$fcipre{$user};
}
foreach my $mp (1..34) {
if ($entries[103+$mp]=~/\d/) {
$mpexpost{$user.':'.$mp}=($entries[103+$mp]-1)/4;
}
}
&calcscore($user,'independence',\@indcluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
&calcscore($user,'coherence',\@cohcluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
&calcscore($user,'concepts',\@concluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
&calcscore($user,'realitylink',\@reacluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
&calcscore($user,'mathlink',\@matcluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
&calcscore($user,'effort',\@effcluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
}
close(IN);
#
# All information read
# Variables between 0 and 1 for
# discussion
# fcipre
# fcipost
# mpexpre
# mpexpost
# grade
# fcigain
# conceptual
# procedural
# surface
# physics
# math
# solution
# Sample is everybody who got a grade in the course
my @userkeys=sort keys %grade;
exit;
#
# Calculate correlation between two hashes
#
sub hashcorrelate {
my ($array1,$arrayidx1,$array2,$arrayidx2,$userkeys,$label,$threshold,$round)=@_;
unless ($round) { $round=10; }
unless ($threshold) { $threshold=0.25; }
my ($c,$n)=&correlation(&buildarrays($array1,$arrayidx1,$array2,$arrayidx2,$userkeys));
unless (abs($c)>$threshold) { return ('',''); }
if ($c>0) {
$c=int($c*$round+0.5)/$round;
} else {
$c=int($c*$round-0.5)/$round;
}
return " $label: $c ($n)";
}
#
# Build arrays x and y
#
sub buildarrays {
my ($array1,$arrayidx1,$array2,$arrayidx2,$userkeys)=@_;
my $index=0;
my @x=();
my @y=();
foreach my $user (@$userkeys) {
my $firstindex=$user;
if ($arrayidx1) { $firstindex.=':'.$arrayidx1; }
my $secondindex=$user;
if ($arrayidx2) { $secondindex.=':'.$arrayidx2; }
if ((defined($$array1{$firstindex})) && (defined($$array2{$secondindex}))) {
$index++;
$x[$index]=$$array1{$firstindex};
$y[$index]=$$array2{$secondindex};
}
}
return (\@x,\@y);
}
#
# Calculate score within a cluster
#
sub calcscore {
my ($user,$idx,$cluster,$mpexpre,$mpexpost,$expert,$clusterscore)=@_;
my $presum=0;
my $prenum=0;
my $postsum=0;
my $postnum=0;
foreach my $question (@$cluster) {
if (defined($$mpexpre{$user.':'.$question})) {
$prenum++;
if (($$expert[$question] eq 'A') && ($mpexpre{$user.':'.$question}>0.5)) { $presum++; }
if (($$expert[$question] eq 'D') && ($mpexpre{$user.':'.$question}<0.5)) { $presum++; }
}
if (defined($$mpexpost{$user.':'.$question})) {
$postnum++;
if (($$expert[$question] eq 'A') && ($mpexpost{$user.':'.$question}>0.5)) { $postsum++; }
if (($$expert[$question] eq 'D') && ($mpexpost{$user.':'.$question}<0.5)) { $postsum++; }
}
}
if ($prenum>0) {
$$clusterscore{$user.':pre_'.$idx}=$presum/$prenum;
}
if ($postnum>0) {
$$clusterscore{$user.':post_'.$idx}=$postsum/$postnum;
}
if (($postnum>0) && ($prenum>0)) {
$$clusterscore{$user.':gain_'.$idx}=
($$clusterscore{$user.':post_'.$idx}-$$clusterscore{$user.':pre_'.$idx})/2+0.5;
}
}
#
# Correlation between arrays x and y - from WikiPedia
#
sub correlation {
my ($x,$y)=@_;
my $sum_sq_x = 0;
my $sum_sq_y = 0;
my $sum_coproduct = 0;
my $mean_x = $$x[1];
my $mean_y = $$y[1];
my $last_x = $$x[1];
my $last_y = $$y[1];
foreach my $i (2 .. $#$x) {
my $sweep = ($i - 1.0) / $i;
my $delta_x = $$x[$i] - $mean_x;
my $delta_y = $$y[$i] - $mean_y;
$sum_sq_x += $delta_x * $delta_x * $sweep;
$sum_sq_y += $delta_y * $delta_y * $sweep;
$sum_coproduct += $delta_x * $delta_y * $sweep;
$mean_x += $delta_x / $i;
$mean_y += $delta_y / $i;
}
if (($sum_sq_x==0) || ($sum_sq_y==0)) { return (0,$#$x); }
my $pop_sd_x = sqrt( $sum_sq_x / $#$x );
my $pop_sd_y = sqrt( $sum_sq_y / $#$x );
my $cov_x_y=$sum_coproduct / $#$x;
return ($cov_x_y / ($pop_sd_x * $pop_sd_y),$#$x);
}
--www1148265210--