[LON-CAPA-cvs] cvs: modules /gerd/R unserialize.pl
www
www@source.lon-capa.org
Fri, 19 Jun 2009 13:13:05 -0000
This is a MIME encoded message
--www1245417185
Content-Type: text/plain
www Fri Jun 19 13:13:05 2009 EDT
Added files:
/modules/gerd/R unserialize.pl
Log:
Code by Paul Rubin to unserialize R output
--www1245417185
Content-Type: text/plain
Content-Disposition: attachment; filename="www-20090619131305.txt"
Index: modules/gerd/R/unserialize.pl
+++ modules/gerd/R/unserialize.pl
use Data::Dumper; # used to output hash contents
use Tie::IxHash::Easy; # autoties all subhashes to keep index order
#
# Rpeel takes a string containing serialized values from R,
# peels off the first syntactically complete unit (number, string or array),
# and returns a list (first unit, remainder).
#
sub Rpeel($) {
my $x = $_[0]; # the string containing the serialized R object(s)
if ($x =~ /^((?:i|d):(.+?);)(.*)$/) {
return ($1, $+); # x starts with a number
}
elsif ($x =~ /^s:(\d+):/) {
my $n = $1; # x starts with a string of length n
if ($x =~ /^(s:\d+:\"(.{$n})\";)(.*)$/) {
return ($1, $+); # x starts with a valid string
}
else {
die '!invalid string detected!';
}
}
elsif ($x =~ /^a:/) {
# x starts with an array -- need to find the closing brace
my $i = index $x, '{', 0; # position of first opening brace
if ($i < 0) {
die '!array with no opening brace';
}
my $open = 1; # counts open braces
my $j = index $x, '}', $i; # position of first closing brace
$i = index $x, '{', $i + 1; # position of next opening brace (if any)
my $pos = -1; # position of final closing brace
do {
if (($i < $j) && ($i > 0)) {
# encounter another opening brace before next closing brace
$open++;
$i = index $x, '{', $i + 1; # find the next opening brace
}
elsif ($j > 0) {
# next brace encountered is a closing brace
$open--;
$pos = $j;
$j = index $x, '}', $j + 1;
}
else {
die '!unmatched left brace!';
}
} until ($open eq 0);
# array runs from start to $pos
my $a = substr $x, 0, $pos + 1; # array
my $b = substr $x, $pos + 1; # remainder
return ($a, $b);
}
else {
die '!unrecognized R value!';
}
}
# --- end Rpeel ---
#
# Rreturn accepts a string containing a serialized R object
# and returns either the object's value (if it is scalar) or a reference
# to a hash containing the contents of the object. Any null keys in the hash
# are replaced by 'capaNNN' where NNN is the index of the entry in the original
# R array.
#
sub Rreturn($) {
my $x = $_[0]; # the string containing the serialized R object(s)
if ($x =~ /^(?:i|d):(.+?);$/) {
return $1; # return the value of the number
}
elsif ($x =~ /^s:(\d+):\"(.*)\";$/) {
# string -- verify the length
if (length($2) eq $1) {
return $2; # return the string
}
else {
die '!mismatch in string length';
}
}
elsif ($x =~ /^a:(\d+):\{(.*)\}$/) {
# array
my $dim = $1; # array size
$x = $2; # array contents
tie my %h, Tie::IxHash::Easy; # start a hash
keys(%h) = $dim; # allocate space for the hash
my $key;
my $y;
for (my $i = 0; $i < $dim; $i++) {
($y, $x) = &Rpeel($x); # strip off the entry for the key
if ($y eq '') {
die '!ran out of keys!';
}
$key = &Rreturn($y);
if ($key eq '') {
$key = "capa$i"; # correct null key
}
($y, $x) = &Rpeel($x); # strip off the value
if ($y eq '') {
die '!ran out of values!';
}
if ($y =~ /^a:/) {
$h{$key} = \&Rreturn($y); # array value: store as reference
}
else {
$h{$key} = &Rreturn($y); # scalar value: store the entry in the hash
}
}
return \%h; # return a reference to the hash
}
}
# --- end Rreturn ---
#
# Rentry takes a list of indices and gets the entry in a hash generated by Rreturn.
# Call: Rentry(Rvalue, index1, index2, ...) where Rvalue is a hash returned by Rreturn.
# Rentry will return the first scalar value it encounters (ignoring excess indices).
# If an invalid key is given, Rentry returns undef.
#
sub Rentry {
my $hash = shift; # pointer to hash
my $x;
my $i;
if (ref($hash) ne 'HASH') {
die '!argument to Rentry is not a hash!';
}
while ($i = shift) {
if (exists $hash->{$i}) {
$hash = $hash->{$i};
}
else {
return undef;
}
if (ref($hash) eq 'REF') {
$hash = $$hash; # dereference one layer
}
elsif (ref($hash) ne 'HASH') {
return $hash; # drilled down to a scalar
}
}
}
# --- end Rentry ---
# R output samples from MGT 914 hw5.R script
# sample scalar output: cor(..., ...) [correlation]
print "\n=== Scalar ===\n";
my $rout = "d:0.988603;";
print $rout, "\n";
print Rreturn($rout), "\n";
# numeric vector: residuals(...) [model residuals]
print "\n=== Vector ===\n";
my $rout = "a:8:{s:1:\"6\";d:-159.6375;s:1:\"7\";d:-2357.487;s:2:\"20\";d:2880.787;s:2:\"39\";d:1258.487;s:2:\"51\";d:-2291.788;s:2:\"71\";d:-2764.788;s:2:\"72\";d:-3285.062;s:2:\"76\";d:-663.7879;}";
print $rout, "\n";
my $result = &Rreturn($rout);
print Dumper($result), "\n";
# accessing individual values (including a nonexistent one)
print "50 => ", &Rentry($result, 50), "\n"; # nonexistent
print "51 => ", &Rentry($result, 51), "\n";
# two dimensional numeric matrix: cbind(..., ...) [stack columns side by side]
print "\n=== Matrix ===\n";
my $rout = "a:8:{i:1;a:2:{i:1;d:-159.6375;i:2;d:-159.6375;}i:2;a:2:{i:1;d:-2357.487;i:2;d:-2357.487;}i:3;a:2:{i:1;d:2880.787;i:2;d:2880.787;}i:4;a:2:{i:1;d:1258.487;i:2;d:1258.487;}i:5;a:2:{i:1;d:-2291.788;i:2;d:-2291.788;}i:6;a:2:{i:1;d:-2764.788;i:2;d:-2764.788;}i:7;a:2:{i:1;d:-3285.062;i:2;d:-3285.062;}i:8;a:2:{i:1;d:-663.7879;i:2;d:-663.7879;}}";
print $rout, "\n";
my $result = &Rreturn($rout);
print Dumper($result), "\n";
# accessing an individual row
print "row 5 = ", &Rentry($result, 5, 1), ", ", &Rentry($result, 5, 2), "\n";
# test object: t.test(...) [t-test result]
print "\n=== Test Result ===\n";
my $rout = "a:9:{s:9:\"statistic\";a:1:{s:1:\"t\";d:-0.3550185;}s:9:\"parameter\";a:1:{s:2:\"df\";d:82;}s:7:\"p.value\";d:0.7234873;s:8:\"conf.int\";a:2:{i:1;d:-1156.995;i:2;d:806.5719;}s:8:\"estimate\";a:2:{s:9:\"mean of x\";d:1751.872;s:9:\"mean of y\";d:1927.083;}s:10:\"null.value\";a:1:{s:19:\"difference in means\";d:0;}s:11:\"alternative\";s:9:\"two.sided\";s:6:\"method\";s:18:\" Two Sample t-test\";s:9:\"data.name\";s:9:\"d1 and d2\";}";
print $rout, "\n";
my $result = &Rreturn($rout);
print Dumper($result), "\n";
# accessing specific pieces
print "p-value = ", &Rentry($result, 'p.value'), "\n";
print "mean of y = ", &Rentry($result, 'estimate', 'mean of y'), "\n";
# linear regression model object: lm(...)
print "\n=== Linear Model ===\n";
my $rout = "a:12:{s:12:\"coefficients\";a:2:{s:11:\"(Intercept)\";d:20853.15;s:11:\"pct.hs.grad\";d:-159.9302;}s:9:\"residuals\";a:5:{s:1:\"1\";d:-531.3116;s:1:\"2\";d:440.1302;s:1:\"3\";d:463.2;s:1:\"4\";d:321.2;s:1:\"5\";d:-693.2186;}s:7:\"effects\";a:5:{s:11:\"(Intercept)\";d:-17662.25;s:11:\"pct.hs.grad\";d:1483.132;s:0:\"\";d:522.276;s:0:\"\";d:380.276;s:0:\"\";d:-925.689;}s:4:\"rank\";i:2;s:13:\"fitted.values\";a:5:{s:1:\"1\";d:9018.312;s:1:\"2\";d:7738.87;s:1:\"3\";d:7898.8;s:1:\"4\";d:7898.8;s:1:\"5\";d:6939.219;}s:6:\"assign\";a:2:{i:1;i:0;i:2;i:1;}s:2:\"qr\";a:5:{s:2:\"qr\";a:5:{i:1;a:2:{i:1;d:-2.236068;i:2;d:-181.1215;}i:2;a:2:{i:1;d:0.4472136;i:2;d:-9.273618;}i:3;a:2:{i:1;d:0.4472136;i:2;d:0.2332551;}i:4;a:2:{i:1;d:0.4472136;i:2;d:0.2332551;}i:5;a:2:{i:1;d:0.4472136;i:2;d:0.8802518;}}s:5:\"qraux\";a:2:{i:1;d:1.447214;i:2;d:1.341088;}s:5:\"pivot\";a:2:{i:1;i:1;i:2;i:2;}s:3:\"tol\";d:.0000001;s:4:\"rank\";i:2;}s:11:\"df.residual\";i:3;s:7:\"xlevels\";a:0:{}s:4:\"ca!
ll\";a:2:{i:1;s:2:\"lm\";i:2;s:29:\"crimes.per.100K ~ pct.hs.grad\";}s:5:\"terms\";a:3:{i:1;s:1:\"~\";i:2;s:15:\"crimes.per.100K\";i:3;s:11:\"pct.hs.grad\";}s:5:\"model\";a:2:{s:15:\"crimes.per.100K\";a:5:{i:1;i:8487;i:2;i:8179;i:3;i:8362;i:4;i:8220;i:5;i:6246;}s:11:\"pct.hs.grad\";a:5:{i:1;i:74;i:2;i:82;i:3;i:81;i:4;i:81;i:5;i:87;}}}";
print $rout, "\n";
my $result = &Rreturn($rout);
print Dumper($result), "\n";
# accessing an entry several levels deep
print "Second pivot entry = ", &Rentry($result, 'qr', 'pivot', 2), "\n";
# data frame
print "\n=== Data Frame ===\n";
my $rout = "a:2:{s:15:\"crimes.per.100K\";a:5:{i:1;i:8487;i:2;i:8179;i:3;i:8362;i:4;i:8220;i:5;i:6246;}s:11:\"pct.hs.grad\";a:5:{i:1;i:74;i:2;i:82;i:3;i:81;i:4;i:81;i:5;i:87;}}";
print $rout, "\n";
my $result = &Rreturn($rout);
print Dumper($result), "\n";
# accessing a particular observation
print "Third observation = ", &Rentry($result, 'crimes.per.100K', 3), ", ", &Rentry($result, 'pct.hs.grad', 3), "\n";
--www1245417185--