[LON-CAPA-cvs] cvs: modules /gerd evaluate.pl newevaluate.pl

www lon-capa-cvs@mail.lon-capa.org
Mon, 22 May 2006 02:33:30 -0000


This is a MIME encoded message

--www1148265210
Content-Type: text/plain

www		Sun May 21 22:33:30 2006 EDT

  Added files:                 
    /modules/gerd	newevaluate.pl 

  Modified files:              
    /modules/gerd	evaluate.pl 
  Log:
  Some more work on MPEX paper
  
  
--www1148265210
Content-Type: text/plain
Content-Disposition: attachment; filename="www-20060521223330.txt"

Index: modules/gerd/evaluate.pl
diff -u modules/gerd/evaluate.pl:1.9 modules/gerd/evaluate.pl:1.10
--- modules/gerd/evaluate.pl:1.9	Mon Apr 10 21:12:39 2006
+++ modules/gerd/evaluate.pl	Sun May 21 22:33:27 2006
@@ -73,6 +73,8 @@
 my %fcipost=();
 my %mpexpre=();
 my %mpexpost=();
+my %mpexprescore=();
+my %mpexpostscore=();
 my %gender=();
 my %fcigain=();
 my %mpex=();
@@ -89,7 +91,10 @@
        $fcipre{$user}=$entries[8]/30;
     }
     foreach my $mp (1..34) {
-        if ($entries[38+$mp]=~/\d/) { $mpexpre{$user.':'.$mp}=($entries[38+$mp]-1)/4; }
+        if ($entries[38+$mp]=~/\d/) { 
+	    $mpexpre{$user.':'.$mp}=($entries[38+$mp]-1)/4;
+	    $mpexprescore{$mp.'.'.$entries[38+$mp]}++; 
+	}
     }
     if ($entries[73]=~/\d/) {
        $fcipost{$user}=$entries[73]/30;
@@ -103,7 +108,10 @@
 	$fci{$user}=$fcipre{$user};
     }
     foreach my $mp (1..34) {
-	if ($entries[103+$mp]=~/\d/) { $mpexpost{$user.':'.$mp}=($entries[103+$mp]-1)/4; }
+	if ($entries[103+$mp]=~/\d/) { 
+	    $mpexpost{$user.':'.$mp}=($entries[103+$mp]-1)/4;
+	    $mpexpostscore{$mp.'.'.$entries[103+$mp]}++; 
+	}
     }
     foreach my $mp (1..34) {
 	if ((defined($mpexpost{$user.':'.$mp})) && (defined($mpexpre{$user.':'.$mp}))) {
@@ -271,6 +279,24 @@
        &hashcorrelate(\%conceptual,'',\%mpexpost,$mpexidx,\@userkeys,'ConcPost');
 }
 print "\n";
+
+foreach my $mp (1..34) {
+    print 
+  "\nPre $mp: ".$mpexprescore{"$mp.1"}."-".$mpexprescore{"$mp.2"}."-".$mpexprescore{"$mp.3"}."-".$mpexprescore{"$mp.4"}."-".$mpexprescore{"$mp.5"};
+    my $sum=$mpexprescore{"$mp.1"}+$mpexprescore{"$mp.2"}+$mpexprescore{"$mp.3"}+$mpexprescore{"$mp.4"}+$mpexprescore{"$mp.5"};
+    print " ($sum) ";
+    my $disagree=($mpexprescore{"$mp.1"}+$mpexprescore{"$mp.2"})/$sum;
+    my $agree=($mpexprescore{"$mp.4"}+$mpexprescore{"$mp.5"})/$sum;
+    print "D: $disagree A: $agree\n";
+    print "Post $mp: ". $mpexpostscore{"$mp.1"}."-".$mpexpostscore{"$mp.2"}."-".$mpexpostscore{"$mp.3"}."-".$mpexpostscore{"$mp.4"}."-".$mpexpostscore{"$mp.5"};
+    my $sum=$mpexpostscore{"$mp.1"}+$mpexpostscore{"$mp.2"}+$mpexpostscore{"$mp.3"}+$mpexpostscore{"$mp.4"}+$mpexpostscore{"$mp.5"};
+    print " ($sum) ";
+    $disagree=($mpexpostscore{"$mp.1"}+$mpexpostscore{"$mp.2"})/$sum;
+    $agree=($mpexpostscore{"$mp.4"}+$mpexpostscore{"$mp.5"})/$sum;
+    print "D: $disagree A: $agree\n";
+
+}
+
 exit;
 #
 # Calculate correlation between two hashes

Index: modules/gerd/newevaluate.pl
+++ modules/gerd/newevaluate.pl
use strict;
# Read the discussions
open(IN,'classified.csv');
my %evals=();
my %totals=();
my $line;
while ($line=<IN>) {
    chomp($line);
    my ($user,$length,$classes)=($line=~/^(\w+)\,.+\,(\d+)\,(\w+)$/);
    unless ($user) { next; }
    unless ($length) { next; }
    my @entries=($classes=~/(\d\w)/g);
    foreach (@entries) {
	$evals{$user.':'.$_}+=$length/($#entries+1);
    }
    $totals{$user}+=$length;
}
close(IN);
my %discussion;
my %solution;
my %math;
my %physics;
my %surface;
my %procedural;
my %conceptual;
foreach my $user (keys %totals) {
    foreach my $number (1..8) {
	foreach my $char ('a'..'d') {
	    $discussion{$user.':'.$number.$char}=$evals{$user.':'.$number.$char}/$totals{$user};
	}
    }
    $solution{$user}=$discussion{$user.':3b'}
                    +$discussion{$user.':4b'}
                    +$discussion{$user.':5b'}
                    +$discussion{$user.':6b'}
                    +$discussion{$user.':7b'}
                    +$discussion{$user.':8b'};
    $math{$user}    =$discussion{$user.':3c'}
                    +$discussion{$user.':4c'}
                    +$discussion{$user.':5c'}
                    +$discussion{$user.':6c'}
                    +$discussion{$user.':7c'}
                    +$discussion{$user.':8c'};
    $physics{$user} =$discussion{$user.':3d'}
                    +$discussion{$user.':4d'}
                    +$discussion{$user.':5d'}
                    +$discussion{$user.':6d'}
                    +$discussion{$user.':7d'}
                    +$discussion{$user.':8d'};
    $surface{$user} =$discussion{$user.':3b'}
                    +$discussion{$user.':4b'}
                    +$discussion{$user.':3c'}
                    +$discussion{$user.':4c'}
                    +$discussion{$user.':3d'}
                    +$discussion{$user.':4d'};
    $procedural{$user} =$discussion{$user.':5b'}
                    +$discussion{$user.':6b'}
                    +$discussion{$user.':5c'}
                    +$discussion{$user.':6c'}
                    +$discussion{$user.':5d'}
                    +$discussion{$user.':6d'};
    $conceptual{$user} =$discussion{$user.':7b'}
                    +$discussion{$user.':8b'}
                    +$discussion{$user.':7c'}
                    +$discussion{$user.':8c'}
                    +$discussion{$user.':7d'}
                    +$discussion{$user.':8d'};
}
#
# MPEX Keys
#

my @expert=('','D','D','A','D','A','A','A',
            'D','D','D','A','D','D','D',
            'D','D','D','A','D','D','D',
            'D','D','D','A','A','D','D',
            'D','A','A','A','D','A');
my @indcluster=(1,8,13,14,17,27);
my @cohcluster=(12,15,16,21,29);
my @concluster=(4,19,26,27,32);
my @reacluster=(10,18,22,25);
my @matcluster=(2,6,8,16,20);
my @effcluster=(3,6,7,24,31);
# Read the other info
open(IN,'compiled.csv');
my %grade=();
my %fcipre=();
my %fcipost=();
my %mpexpre=();
my %mpexpost=();
my %gender=();
my %fcigain=();
my %fci=();
my %clusterscore=();

$line=<IN>; # header
while ($line=<IN>) {
    chomp($line);
    my @entries=split(/\,/,$line);
    my $user=$entries[4];
    unless ($user=~/\w/) { next; }
    $gender{$user}=$entries[1];
    $grade{$user}=$entries[7]/4;
    if ($entries[8]=~/\d/) {
       $fcipre{$user}=$entries[8]/30;
    }
    foreach my $mp (1..34) {
        if ($entries[38+$mp]=~/\d/) { 
	    $mpexpre{$user.':'.$mp}=($entries[38+$mp]-1)/4;
	}
    }
    if ($entries[73]=~/\d/) {
       $fcipost{$user}=$entries[73]/30;
    }
    if (($fcipost{$user}) && ($fcipre{$user})) {
	$fcigain{$user}=($fcipost{$user}-$fcipre{$user})/2+0.5;
	$fci{$user}=($fcipost{$user}+$fcipre{$user})/2;
    } elsif ($fcipost{$user}) {
	$fci{$user}=$fcipost{$user};
    } elsif ($fcipre{$user}) {
	$fci{$user}=$fcipre{$user};
    }
    foreach my $mp (1..34) {
	if ($entries[103+$mp]=~/\d/) { 
	    $mpexpost{$user.':'.$mp}=($entries[103+$mp]-1)/4;
	}
    }
    &calcscore($user,'independence',\@indcluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
    &calcscore($user,'coherence',\@cohcluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
    &calcscore($user,'concepts',\@concluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
    &calcscore($user,'realitylink',\@reacluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
    &calcscore($user,'mathlink',\@matcluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
    &calcscore($user,'effort',\@effcluster,\%mpexpre,\%mpexpost,\@expert,\%clusterscore);
}
close(IN);
#
# All information read
# Variables between 0 and 1 for
# discussion
# fcipre
# fcipost
# mpexpre
# mpexpost
# grade
# fcigain
# conceptual
# procedural
# surface
# physics
# math
# solution

# Sample is everybody who got a grade in the course
my @userkeys=sort keys %grade;

exit;
#
# Calculate correlation between two hashes
#
sub hashcorrelate {
   my ($array1,$arrayidx1,$array2,$arrayidx2,$userkeys,$label,$threshold,$round)=@_;
   unless ($round) { $round=10; }
   unless ($threshold) { $threshold=0.25; }
   my ($c,$n)=&correlation(&buildarrays($array1,$arrayidx1,$array2,$arrayidx2,$userkeys));
   unless (abs($c)>$threshold) { return ('',''); }
   if ($c>0) {
      $c=int($c*$round+0.5)/$round;
   } else {
      $c=int($c*$round-0.5)/$round;
   }
   return " $label: $c ($n)";
}

#
# Build arrays x and y
#
sub buildarrays {
   my ($array1,$arrayidx1,$array2,$arrayidx2,$userkeys)=@_;
   my $index=0;
   my @x=();
   my @y=();
   foreach my $user (@$userkeys) {
      my $firstindex=$user;
      if ($arrayidx1) { $firstindex.=':'.$arrayidx1; }
      my $secondindex=$user;
      if ($arrayidx2) { $secondindex.=':'.$arrayidx2; }
      if ((defined($$array1{$firstindex})) && (defined($$array2{$secondindex}))) {
         $index++;
         $x[$index]=$$array1{$firstindex};
         $y[$index]=$$array2{$secondindex};
      }
   }
   return (\@x,\@y);
}

#
# Calculate score within a cluster
#

sub calcscore {
    my ($user,$idx,$cluster,$mpexpre,$mpexpost,$expert,$clusterscore)=@_;
    my $presum=0;
    my $prenum=0;
    my $postsum=0;
    my $postnum=0;
    foreach my $question (@$cluster) {
	if (defined($$mpexpre{$user.':'.$question})) {
	    $prenum++;
	    if (($$expert[$question] eq 'A') && ($mpexpre{$user.':'.$question}>0.5)) { $presum++; }
	    if (($$expert[$question] eq 'D') && ($mpexpre{$user.':'.$question}<0.5)) { $presum++; }
	}
	if (defined($$mpexpost{$user.':'.$question})) {
	    $postnum++;
	    if (($$expert[$question] eq 'A') && ($mpexpost{$user.':'.$question}>0.5)) { $postsum++; }
	    if (($$expert[$question] eq 'D') && ($mpexpost{$user.':'.$question}<0.5)) { $postsum++; }
	}
    }
    if ($prenum>0) {
	$$clusterscore{$user.':pre_'.$idx}=$presum/$prenum;
    }
    if ($postnum>0) {
	$$clusterscore{$user.':post_'.$idx}=$postsum/$postnum;
    }
    if (($postnum>0) && ($prenum>0)) {
	$$clusterscore{$user.':gain_'.$idx}=
	    ($$clusterscore{$user.':post_'.$idx}-$$clusterscore{$user.':pre_'.$idx})/2+0.5;
    }
}

#
# Correlation between arrays x and y - from WikiPedia
#
sub correlation {
    my ($x,$y)=@_;
    my $sum_sq_x = 0;
    my $sum_sq_y = 0;
    my $sum_coproduct = 0;
    my $mean_x = $$x[1];
    my $mean_y = $$y[1];
    my $last_x = $$x[1];
    my $last_y = $$y[1];
    foreach my $i (2 .. $#$x) {
       my $sweep = ($i - 1.0) / $i;
       my $delta_x = $$x[$i] - $mean_x;
       my $delta_y = $$y[$i] - $mean_y;
       $sum_sq_x += $delta_x * $delta_x * $sweep;
       $sum_sq_y += $delta_y * $delta_y * $sweep;
       $sum_coproduct += $delta_x * $delta_y * $sweep;
       $mean_x += $delta_x / $i;
       $mean_y += $delta_y / $i;
    } 
    if (($sum_sq_x==0) || ($sum_sq_y==0)) { return (0,$#$x); }
    my $pop_sd_x = sqrt( $sum_sq_x / $#$x );
    my $pop_sd_y = sqrt( $sum_sq_y / $#$x );
    my $cov_x_y=$sum_coproduct / $#$x;
    return ($cov_x_y / ($pop_sd_x * $pop_sd_y),$#$x);
}

--www1148265210--