[LON-CAPA-cvs] cvs: loncom /homework/cleanxml post_xml.pm

damieng damieng at source.lon-capa.org
Thu Nov 10 14:48:22 EST 2016


damieng		Thu Nov 10 19:48:22 2016 EDT

  Modified files:              
    /loncom/homework/cleanxml	post_xml.pm 
  Log:
  added removal of consecutive empty paragraphs
  
Index: loncom/homework/cleanxml/post_xml.pm
diff -u loncom/homework/cleanxml/post_xml.pm:1.9 loncom/homework/cleanxml/post_xml.pm:1.10
--- loncom/homework/cleanxml/post_xml.pm:1.9	Thu Jan 21 22:09:38 2016
+++ loncom/homework/cleanxml/post_xml.pm	Thu Nov 10 19:48:22 2016
@@ -1,7 +1,7 @@
 # The LearningOnline Network
 # Third step to clean a file.
 #
-# $Id: post_xml.pm,v 1.9 2016/01/21 22:09:38 damieng Exp $
+# $Id: post_xml.pm,v 1.10 2016/11/10 19:48:22 damieng Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -136,6 +136,8 @@
   
   fix_empty_lc_elements($root);
   
+  reduce_empty_p($root);
+  
   lowercase_attribute_values($root);
   
   replace_numericalresponse_unit_attribute($root);
@@ -2269,6 +2271,33 @@
   }
 }
 
+# remove consecutive empty paragraphs (they will not show anyway)
+sub reduce_empty_p {
+  my ($node) = @_;
+  my $next;
+  for (my $child=$node->firstChild; defined $child; $child=$next) {
+    $next = $child->nextSibling;
+    while (defined $next && $next->nodeType == XML_TEXT_NODE && $next->nodeValue =~ /^[ \t\f\n\r]*$/) {
+      $next = $next->nextSibling;
+    }
+    if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p' && defined $next &&
+        $next->nodeType == XML_ELEMENT_NODE && $next->nodeName eq 'p') {
+      my $first = $child->firstChild;
+      if (!defined $first || (!defined $first->nextSibling &&
+          $first->nodeType == XML_TEXT_NODE && $first->nodeValue =~ /^[ \t\f\n\r]*$/)) {
+        $first = $next->firstChild;
+        if (!defined $first || (!defined $first->nextSibling &&
+            $first->nodeType == XML_TEXT_NODE && $first->nodeValue =~ /^[ \t\f\n\r]*$/)) {
+          $node->removeChild($child);
+        }
+      }
+    }
+    if ($child->nodeType == XML_ELEMENT_NODE) {
+      reduce_empty_p($child);
+    }
+  }
+}
+
 # turn some attribute values into lowercase when they should be
 sub lowercase_attribute_values {
   my ($root) = @_;




More information about the LON-CAPA-cvs mailing list