[LON-CAPA-cvs] cvs: loncom /homework lonr.pm

www www@source.lon-capa.org
Fri, 19 Jun 2009 14:03:19 -0000


www		Fri Jun 19 14:03:19 2009 EDT

  Modified files:              
    /loncom/homework	lonr.pm 
  Log:
  Paul Rubin's code to unserialize R objects
  - use tie::ixhash::easy commented out for now. Not sure if we actually want
  that => code will be defunct
  - needs better error handling, original code had 'die'
  
  
Index: loncom/homework/lonr.pm
diff -u loncom/homework/lonr.pm:1.4 loncom/homework/lonr.pm:1.5
--- loncom/homework/lonr.pm:1.4	Sat Apr 18 23:43:50 2009
+++ loncom/homework/lonr.pm	Fri Jun 19 14:03:19 2009
@@ -1,7 +1,7 @@
 # The LearningOnline Network with CAPA
 # Interface routines to R CAS
 #
-# $Id: lonr.pm,v 1.4 2009/04/18 23:43:50 www Exp $
+# $Id: lonr.pm,v 1.5 2009/06/19 14:03:19 www Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -33,6 +33,150 @@
 use Apache::lonnet;
 use Apache::response();
 use LONCAPA;
+### Commented out for now: use Tie::IxHash::Easy; # autoties all subhashes to keep index order
+
+my $errormsg='';
+
+#
+# Rcroak: for use with R-error messages
+#
+sub Rcroak {
+   $errormsg=$_[0];
+}
+
+#
+#
+# Rpeel takes a string containing serialized values from R, 
+# peels off the first syntactically complete unit (number, string or array),
+# and returns a list (first unit, remainder).
+#
+sub Rpeel {
+        my $x = $_[0];  # the string containing the serialized R object(s)
+        if ($x =~ /^((?:i|d):(.+?);)(.*)$/) {
+                return ($1, $+);  # x starts with a number
+        }
+        elsif ($x =~ /^s:(\d+):/) {
+                my $n = $1;  # x starts with a string of length n
+                if ($x =~ /^(s:\d+:\"(.{$n})\";)(.*)$/) {
+                        return ($1, $+);  # x starts with a valid string
+                } else {
+                        &Rcroak('invalid string detected');
+                }
+        }
+        elsif ($x =~ /^a:/) {
+                # x starts with an array -- need to find the closing brace
+                my $i = index $x, '{', 0;  # position of first opening brace
+                if ($i < 0) {
+                        &Rcroak('array with no opening brace');
+                }
+                my $open = 1;  # counts open braces
+                my $j = index $x, '}', $i; # position of first closing brace
+                $i = index $x, '{', $i + 1; # position of next opening brace (if any)
+                my $pos = -1;  # position of final closing brace
+                do {
+                        if (($i < $j) && ($i > 0)) {
+                                # encounter another opening brace before next closing brace
+                                $open++;
+                                $i = index $x, '{', $i + 1;  # find the next opening brace
+                        } elsif ($j > 0) {
+                                # next brace encountered is a closing brace
+                                $open--;
+                                $pos = $j;
+                                $j = index $x, '}', $j + 1;
+                        } else {
+                                &Rcroak('unmatched left brace');
+                        }
+                } until ($open eq 0);
+                # array runs from start to $pos
+                my $a = substr $x, 0, $pos + 1;  # array
+                my $b = substr $x, $pos + 1;     # remainder
+                return ($a, $b);
+        } else {
+                &Rcroak('unrecognized R value');
+        }
+}
+# --- end Rpeel ---
+
+#
+# Rreturn accepts a string containing a serialized R object
+# and returns either the object's value (if it is scalar) or a reference
+# to a hash containing the contents of the object.  Any null keys in the hash
+# are replaced by 'capaNNN' where NNN is the index of the entry in the original
+# R array.
+#
+sub Rreturn {
+        my $x = $_[0];  # the string containing the serialized R object(s)
+        $errormsg='';
+        if ($x =~ /^(?:i|d):(.+?);$/) {
+                return $1;  # return the value of the number
+        } elsif ($x =~ /^s:(\d+):\"(.*)\";$/) {
+                # string -- verify the length
+                if (length($2) eq $1) {
+                        return $2;  # return the string
+                } else {
+                        return 'mismatch in string length';
+                }
+        } elsif ($x =~ /^a:(\d+):\{(.*)\}$/) {
+                # array
+                my $dim = $1;  # array size
+                $x = $2;  # array contents
+                tie(my %h,'Tie::IxHash::Easy'); # start a hash
+                keys(%h) = $dim; # allocate space for the hash
+                my $key;
+                my $y;
+                for (my $i = 0; $i < $dim; $i++) {
+                        ($y, $x) = &Rpeel($x);  # strip off the entry for the key
+                        if ($y eq '') {
+                                &Rcroak('ran out of keys');
+                        }
+                        $key = &Rreturn($y);
+                        if ($key eq '') {
+                                $key = "capa$i";  # correct null key
+                        }
+                        ($y, $x) = &Rpeel($x);  # strip off the value
+                        if ($y eq '') {
+                                &Rcroak('ran out of values');
+                        }
+                        if ($y =~ /^a:/) {
+                                $h{$key} = \&Rreturn($y);  # array value: store as reference
+                        } else {
+                        $h{$key} = &Rreturn($y);  # scalar value: store the entry in the hash
+                        }
+                }
+                if ($errormsg) { return $errormsg; }
+                return \%h;  # return a reference to the hash
+        }
+}
+# --- end Rreturn ---
+
+#
+# Rentry takes a list of indices and gets the entry in a hash generated by Rreturn.
+# Call: Rentry(Rvalue, index1, index2, ...) where Rvalue is a hash returned by Rreturn.
+# Rentry will return the first scalar value it encounters (ignoring excess indices).
+# If an invalid key is given, Rentry returns undef.
+#
+sub Rentry {
+        my $hash = shift;  # pointer to hash
+        my $x;
+        my $i;
+        if (ref($hash) ne 'HASH') {
+                &Rcroak('argument to Rentry is not a hash');
+        }
+        while ($i = shift) {
+                if (exists $hash->{$i}) {
+                   $hash = $hash->{$i};
+                } else {
+                   return undef;
+                }
+                if (ref($hash) eq 'REF') {
+                   $hash = $$hash;  # dereference one layer
+                } elsif (ref($hash) ne 'HASH') {
+                   return $hash;  # drilled down to a scalar
+                }
+        }
+}
+# --- end Rentry ---
+
 
 sub connect {
    return IO::Socket::UNIX->new(Peer    => $Apache::lonnet::perlvar{'lonSockDir'}.'/rsock',