[LON-CAPA-cvs] cvs: modules /gerd evaluate.pl

www lon-capa-cvs@mail.lon-capa.org
Thu, 30 Mar 2006 14:24:47 -0000


www		Thu Mar 30 09:24:47 2006 EDT

  Modified files:              
    /modules/gerd	evaluate.pl 
  Log:
  Medium correlation between FCI and course grade??? At least it's positive.
  
  
Index: modules/gerd/evaluate.pl
diff -u modules/gerd/evaluate.pl:1.1 modules/gerd/evaluate.pl:1.2
--- modules/gerd/evaluate.pl:1.1	Thu Mar 30 08:29:53 2006
+++ modules/gerd/evaluate.pl	Thu Mar 30 09:24:45 2006
@@ -43,11 +43,15 @@
     unless ($user=~/\w/) { next; }
     $gender{$user}=$entries[1];
     $grade{$user}=$entries[7]/4;
-    $fcipre{$user}=$entries[8]/30;
+    if ($entries[8]=~/\d/) {
+       $fcipre{$user}=$entries[8]/30;
+    }
     foreach my $mp (1..34) {
-	$mpexpre{$user.':'.$mp}=($entries[38+$mp]-1)/4;
+        if ($entries[38+$mp]=~/\d/) { $mpexpre{$user.':'.$mp}=($entries[38+$mp]-1)/4; }
+    }
+    if ($entries[73]=~/\d/) {
+       $fcipost{$user}=$entries[73]/30;
     }
-    $fcipost{$user}=$entries[73]/30;
     if (($fcipost{$user}) && ($fcipre{$user})) {
 	$fcigain{$user}=($fcipost{$user}-$fcipre{$user})/2+0.5;
 	$fci{$user}=($fcipost{$user}+$fcipre{$user})/2;
@@ -57,7 +61,7 @@
 	$fci{$user}=$fcipre{$user};
     }
     foreach my $mp (1..34) {
-	$mpexpost{$user.':'.$mp}=($entries[103+$mp]-1)/4;
+	if ($entries[103+$mp]=~/\d/) { $mpexpost{$user.':'.$mp}=($entries[103+$mp]-1)/4; }
     }
     foreach my $mp (1..34) {
 	if (($mpexpost{$user.':'.$mp}) && ($mpexpre{$user.':'.$mp})) {
@@ -80,4 +84,67 @@
 # mpexpost
 # grade
 # fcigain
+
+# Sample is everybody who got a grade in the course
+my @userkeys=sort keys %grade;
+
+print join(",",&hashcorrelate(\%grade,'',\%fci,'',\@userkeys));
+
+exit;
+#
+# Calculate correlation between two hashes
+#
+sub hashcorrelate {
+   my ($array1,$arrayidx1,$array2,$arrayidx2,$userkeys)=@_;
+   return &correlation(&buildarrays($array1,$arrayidx1,$array2,$arrayidx2,$userkeys));
+}
+
 #
+# Build arrays x and y
+#
+sub buildarrays {
+   my ($array1,$arrayidx1,$array2,$arrayidx2,$userkeys)=@_;
+   my $index=1;
+   my @x=();
+   my @y=();
+   foreach my $user (@$userkeys) {
+      my $firstindex=$user;
+      if ($arrayidx1) { $firstindex.=':'.$arrayidx1; }
+      my $secondindex=$user;
+      if ($arrayidx2) { $secondindex.=':'.$arrayidx2; }
+      if (($$array1{$firstindex}) && ($$array2{$secondindex})) {
+         $index++;
+         $x[$index]=$$array1{$firstindex};
+         $y[$index]=$$array2{$secondindex};
+      }
+   }
+   return (\@x,\@y);
+}
+
+#
+# Correlation between arrays x and y - from WikiPedia
+#
+sub correlation {
+    my ($x,$y)=@_;
+    my $sum_sq_x = 0;
+    my $sum_sq_y = 0;
+    my $sum_coproduct = 0;
+    my $mean_x = $$x[1];
+    my $mean_y = $$y[1];
+    my $last_x = $$x[1];
+    my $last_y = $$y[1];
+    foreach my $i (2 .. $#$x) {
+       my $sweep = ($i - 1.0) / $i;
+       my $delta_x = $$x[$i] - $mean_x;
+       my $delta_y = $$y[$i] - $mean_y;
+       $sum_sq_x += $delta_x * $delta_x * $sweep;
+       $sum_sq_y += $delta_y * $delta_y * $sweep;
+       $sum_coproduct += $delta_x * $delta_y * $sweep;
+       $mean_x += $delta_x / $i;
+       $mean_y += $delta_y / $i;
+    } 
+    my $pop_sd_x = sqrt( $sum_sq_x / $#$x );
+    my $pop_sd_y = sqrt( $sum_sq_y / $#$x );
+    my $cov_x_y=$sum_coproduct / $#$x;
+    return ($cov_x_y / ($pop_sd_x * $pop_sd_y),$#$x);
+}