[LON-CAPA-cvs] cvs: loncom /interface loncommon.pm

Sat, 11 Aug 2007 15:18:24 -0000

www		Sat Aug 11 11:18:24 2007 EDT

  Modified files:              
    /loncom/interface	loncommon.pm 
  Log:
  The previous version destroyed the space in
  "foo, bar"
  while before
  "foo", "bar"
  would not work.
  
  Formatting of CSV files is not under our control, and we cannot just ignore
  bad formatting.
  
  Looks like one needs to step through character by character. This version was
  tested with
  
  "Entry A", Entry B,  'Entry C, this is', 'Entry "D"'
  Susie,Fred",    Junk "here"  , Test
  "Test , ,, ",  'Test ,,,",','Test'
  
  and appears to deal gracefully with the bad syntax.
  
  
Index: loncom/interface/loncommon.pm
diff -u loncom/interface/loncommon.pm:1.560 loncom/interface/loncommon.pm:1.561

--- loncom/interface/loncommon.pm:1.560	Sat Aug 11 10:22:14 2007
+++ loncom/interface/loncommon.pm	Sat Aug 11 11:18:24 2007
@@ -1,7 +1,7 @@
 # The LearningOnline Network with CAPA
 # a pile of common routines
 #
-# $Id: loncommon.pm,v 1.560 2007/08/11 14:22:14 www Exp $
+# $Id: loncommon.pm,v 1.561 2007/08/11 15:18:24 www Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -5994,28 +5994,50 @@
             $i++;
         }
     } else {
-        my @allfields;
+        my $separator=',';
         if ($env{'form.upfiletype'} eq 'semisv') {
-            @allfields=split(/\s*;\s*/,$record,-1);
-        } else {
-            @allfields=split(/\s*\,\s*/,$record,-1);
+            $separator=';';
         }
         my $i=0;
-        my $j;
-        for ($j=0;$j<=$#allfields;$j++) {
-            my $field=$allfields[$j];
-            if ($field=~/^\s*(\"|\')/) {
-		my $delimiter=$1;
-                while (($field!~/$delimiter$/) && ($j<$#allfields)) {
-		    $j++;
-		    $field.=','.$allfields[$j];
-		}
-                $field=~s/^\s*$delimiter//;
-                $field=~s/$delimiter\s*$//;
-            }
-            $components{&takeleft($i)}=$field;
-	    $i++;
+# the character we are looking for to indicate the end of a quote or a record 
+        my $looking_for=$separator;
+# do not add the characters to the fields
+        my $ignore=0;
+# we just encountered a separator (or the beginning of the record)
+        my $just_found_separator=1;
+# store the field we are working on here
+        my $field='';
+# work our way through all characters in record
+        foreach my $character ($record=~/(.)/g) {
+            if ($character eq $looking_for) {
+               if ($character ne $separator) {
+# Found the end of a quote, again looking for separator
+                  $looking_for=$separator;
+                  $ignore=1;
+               } else {
+# Found a separator, store away what we got
+                  $components{&takeleft($i)}=$field;
+	          $i++;
+                  $just_found_separator=1;
+                  $ignore=0;
+                  $field='';
+               }
+               next;
+            }
+# single or double quotation marks after a separator indicate beginning of a quote
+# we are now looking for the end of the quote and need to ignore separators
+            if ((($character eq '"') || ($character eq "'")) && ($just_found_separator))  {
+               $looking_for=$character;
+               next;
+            }
+# ignore would be true after we reached the end of a quote
+            if ($ignore) { next; }
+            if (($just_found_separator) && ($character=~/\s/)) { next; }
+            $field.=$character;
+            $just_found_separator=0; 
         }
+# catch the very last entry, since we never encountered the separator
+        $components{&takeleft($i)}=$field;
     }
     return %components;
 }