[LON-CAPA-cvs] cvs: modules /gerd/R unserialize.pl

www www@source.lon-capa.org
Fri, 19 Jun 2009 13:13:05 -0000


This is a MIME encoded message

--www1245417185
Content-Type: text/plain

www		Fri Jun 19 13:13:05 2009 EDT

  Added files:                 
    /modules/gerd/R	unserialize.pl 
  Log:
  Code by Paul Rubin to unserialize R output
  
  
--www1245417185
Content-Type: text/plain
Content-Disposition: attachment; filename="www-20090619131305.txt"


Index: modules/gerd/R/unserialize.pl
+++ modules/gerd/R/unserialize.pl
use Data::Dumper;  # used to output hash contents
use Tie::IxHash::Easy; # autoties all subhashes to keep index order

#
# Rpeel takes a string containing serialized values from R, 
# peels off the first syntactically complete unit (number, string or array),
# and returns a list (first unit, remainder).
#
sub Rpeel($) {
	my $x = $_[0];  # the string containing the serialized R object(s)
	if ($x =~ /^((?:i|d):(.+?);)(.*)$/) {
		return ($1, $+);  # x starts with a number
	}
	elsif ($x =~ /^s:(\d+):/) {
		my $n = $1;  # x starts with a string of length n
		if ($x =~ /^(s:\d+:\"(.{$n})\";)(.*)$/) {
			return ($1, $+);  # x starts with a valid string
		}
		else {
			die '!invalid string detected!';
		}
	}
	elsif ($x =~ /^a:/) {
		# x starts with an array -- need to find the closing brace
		my $i = index $x, '{', 0;  # position of first opening brace
		if ($i < 0) {
			die '!array with no opening brace';
		}
		my $open = 1;  # counts open braces
		my $j = index $x, '}', $i; # position of first closing brace
		$i = index $x, '{', $i + 1; # position of next opening brace (if any)
		my $pos = -1;  # position of final closing brace
		do {
			if (($i < $j) && ($i > 0)) {
				# encounter another opening brace before next closing brace
				$open++;
				$i = index $x, '{', $i + 1;  # find the next opening brace
			}
			elsif ($j > 0) {
				# next brace encountered is a closing brace
				$open--;
				$pos = $j;
				$j = index $x, '}', $j + 1;
			}
			else {
				die '!unmatched left brace!';			
			}			
		} until ($open eq 0);
		# array runs from start to $pos
		my $a = substr $x, 0, $pos + 1;  # array
		my $b = substr $x, $pos + 1;     # remainder
		return ($a, $b);
	}
	else {
		die '!unrecognized R value!';
	}
}
# --- end Rpeel ---

#
# Rreturn accepts a string containing a serialized R object
# and returns either the object's value (if it is scalar) or a reference
# to a hash containing the contents of the object.  Any null keys in the hash
# are replaced by 'capaNNN' where NNN is the index of the entry in the original
# R array.
#
sub Rreturn($) {
	my $x = $_[0];  # the string containing the serialized R object(s)
	if ($x =~ /^(?:i|d):(.+?);$/) {
		return $1;  # return the value of the number		
	}
	elsif ($x =~ /^s:(\d+):\"(.*)\";$/) {
		# string -- verify the length
		if (length($2) eq $1) {
			return $2;  # return the string
		}
		else {
			die '!mismatch in string length';
		}
	}
	elsif ($x =~ /^a:(\d+):\{(.*)\}$/) {
		# array
		my $dim = $1;  # array size
		$x = $2;  # array contents
		tie my %h, Tie::IxHash::Easy; # start a hash
		keys(%h) = $dim; # allocate space for the hash
		my $key;
		my $y;
		for (my $i = 0; $i < $dim; $i++) {
			($y, $x) = &Rpeel($x);  # strip off the entry for the key
			if ($y eq '') {
				die '!ran out of keys!';
			}
			$key = &Rreturn($y);
			if ($key eq '') {
				$key = "capa$i";  # correct null key
			}
			($y, $x) = &Rpeel($x);  # strip off the value
			if ($y eq '') {
				die '!ran out of values!';
			}
			if ($y =~ /^a:/) {
				$h{$key} = \&Rreturn($y);  # array value: store as reference
			}
			else {
			$h{$key} = &Rreturn($y);  # scalar value: store the entry in the hash				
			}
		}
		return \%h;  # return a reference to the hash
	}
}
# --- end Rreturn ---

#
# Rentry takes a list of indices and gets the entry in a hash generated by Rreturn.
# Call: Rentry(Rvalue, index1, index2, ...) where Rvalue is a hash returned by Rreturn.
# Rentry will return the first scalar value it encounters (ignoring excess indices).
# If an invalid key is given, Rentry returns undef.
#
sub Rentry {
	my $hash = shift;  # pointer to hash
	my $x;
	my $i;
	if (ref($hash) ne 'HASH') {
		die '!argument to Rentry is not a hash!';
	}
	while ($i = shift) {
		if (exists $hash->{$i}) {
			$hash = $hash->{$i};
		}
		else {
			return undef;
		}
		if (ref($hash) eq 'REF') {
			$hash = $$hash;  # dereference one layer
		}
		elsif (ref($hash) ne 'HASH') {
			return $hash;  # drilled down to a scalar
		}
	}
}
# --- end Rentry ---


# R output samples from MGT 914 hw5.R script

# sample scalar output: cor(..., ...) [correlation]
print "\n=== Scalar ===\n";
my $rout = "d:0.988603;";
print $rout, "\n";
print Rreturn($rout), "\n";

# numeric vector: residuals(...) [model residuals]
print "\n=== Vector ===\n";
my $rout = "a:8:{s:1:\"6\";d:-159.6375;s:1:\"7\";d:-2357.487;s:2:\"20\";d:2880.787;s:2:\"39\";d:1258.487;s:2:\"51\";d:-2291.788;s:2:\"71\";d:-2764.788;s:2:\"72\";d:-3285.062;s:2:\"76\";d:-663.7879;}";
print $rout, "\n";
my $result = &Rreturn($rout);
print Dumper($result), "\n";
# accessing individual values (including a nonexistent one)
print "50 => ", &Rentry($result, 50), "\n";  # nonexistent
print "51 => ", &Rentry($result, 51), "\n";

# two dimensional numeric matrix: cbind(..., ...) [stack columns side by side]
print "\n=== Matrix ===\n";
my $rout = "a:8:{i:1;a:2:{i:1;d:-159.6375;i:2;d:-159.6375;}i:2;a:2:{i:1;d:-2357.487;i:2;d:-2357.487;}i:3;a:2:{i:1;d:2880.787;i:2;d:2880.787;}i:4;a:2:{i:1;d:1258.487;i:2;d:1258.487;}i:5;a:2:{i:1;d:-2291.788;i:2;d:-2291.788;}i:6;a:2:{i:1;d:-2764.788;i:2;d:-2764.788;}i:7;a:2:{i:1;d:-3285.062;i:2;d:-3285.062;}i:8;a:2:{i:1;d:-663.7879;i:2;d:-663.7879;}}";
print $rout, "\n";
my $result = &Rreturn($rout);
print Dumper($result), "\n";
# accessing an individual row
print "row 5 = ", &Rentry($result, 5, 1), ", ", &Rentry($result, 5, 2), "\n";

# test object: t.test(...) [t-test result]
print "\n=== Test Result ===\n";
my $rout = "a:9:{s:9:\"statistic\";a:1:{s:1:\"t\";d:-0.3550185;}s:9:\"parameter\";a:1:{s:2:\"df\";d:82;}s:7:\"p.value\";d:0.7234873;s:8:\"conf.int\";a:2:{i:1;d:-1156.995;i:2;d:806.5719;}s:8:\"estimate\";a:2:{s:9:\"mean of x\";d:1751.872;s:9:\"mean of y\";d:1927.083;}s:10:\"null.value\";a:1:{s:19:\"difference in means\";d:0;}s:11:\"alternative\";s:9:\"two.sided\";s:6:\"method\";s:18:\" Two Sample t-test\";s:9:\"data.name\";s:9:\"d1 and d2\";}";
print $rout, "\n";
my $result = &Rreturn($rout);
print Dumper($result), "\n";
# accessing specific pieces
print "p-value = ", &Rentry($result, 'p.value'), "\n";
print "mean of y = ", &Rentry($result, 'estimate', 'mean of y'), "\n";

# linear regression model object: lm(...)
print "\n=== Linear Model ===\n";
my $rout = "a:12:{s:12:\"coefficients\";a:2:{s:11:\"(Intercept)\";d:20853.15;s:11:\"pct.hs.grad\";d:-159.9302;}s:9:\"residuals\";a:5:{s:1:\"1\";d:-531.3116;s:1:\"2\";d:440.1302;s:1:\"3\";d:463.2;s:1:\"4\";d:321.2;s:1:\"5\";d:-693.2186;}s:7:\"effects\";a:5:{s:11:\"(Intercept)\";d:-17662.25;s:11:\"pct.hs.grad\";d:1483.132;s:0:\"\";d:522.276;s:0:\"\";d:380.276;s:0:\"\";d:-925.689;}s:4:\"rank\";i:2;s:13:\"fitted.values\";a:5:{s:1:\"1\";d:9018.312;s:1:\"2\";d:7738.87;s:1:\"3\";d:7898.8;s:1:\"4\";d:7898.8;s:1:\"5\";d:6939.219;}s:6:\"assign\";a:2:{i:1;i:0;i:2;i:1;}s:2:\"qr\";a:5:{s:2:\"qr\";a:5:{i:1;a:2:{i:1;d:-2.236068;i:2;d:-181.1215;}i:2;a:2:{i:1;d:0.4472136;i:2;d:-9.273618;}i:3;a:2:{i:1;d:0.4472136;i:2;d:0.2332551;}i:4;a:2:{i:1;d:0.4472136;i:2;d:0.2332551;}i:5;a:2:{i:1;d:0.4472136;i:2;d:0.8802518;}}s:5:\"qraux\";a:2:{i:1;d:1.447214;i:2;d:1.341088;}s:5:\"pivot\";a:2:{i:1;i:1;i:2;i:2;}s:3:\"tol\";d:.0000001;s:4:\"rank\";i:2;}s:11:\"df.residual\";i:3;s:7:\"xlevels\";a:0:{}s:4:\"ca!
 ll\";a:2:{i:1;s:2:\"lm\";i:2;s:29:\"crimes.per.100K ~ pct.hs.grad\";}s:5:\"terms\";a:3:{i:1;s:1:\"~\";i:2;s:15:\"crimes.per.100K\";i:3;s:11:\"pct.hs.grad\";}s:5:\"model\";a:2:{s:15:\"crimes.per.100K\";a:5:{i:1;i:8487;i:2;i:8179;i:3;i:8362;i:4;i:8220;i:5;i:6246;}s:11:\"pct.hs.grad\";a:5:{i:1;i:74;i:2;i:82;i:3;i:81;i:4;i:81;i:5;i:87;}}}";
print $rout, "\n";
my $result = &Rreturn($rout);
print Dumper($result), "\n";
# accessing an entry several levels deep
print "Second pivot entry = ", &Rentry($result, 'qr', 'pivot', 2), "\n";

# data frame
print "\n=== Data Frame ===\n";
my $rout = "a:2:{s:15:\"crimes.per.100K\";a:5:{i:1;i:8487;i:2;i:8179;i:3;i:8362;i:4;i:8220;i:5;i:6246;}s:11:\"pct.hs.grad\";a:5:{i:1;i:74;i:2;i:82;i:3;i:81;i:4;i:81;i:5;i:87;}}";
print $rout, "\n";
my $result = &Rreturn($rout);
print Dumper($result), "\n";
# accessing a particular observation
print "Third observation = ", &Rentry($result, 'crimes.per.100K', 3), ", ", &Rentry($result, 'pct.hs.grad', 3), "\n";

--www1245417185--