[LON-CAPA-cvs] cvs: loncom /homework/cleanxml post_xml.pm
damieng
damieng at source.lon-capa.org
Thu Nov 10 14:48:22 EST 2016
damieng Thu Nov 10 19:48:22 2016 EDT
Modified files:
/loncom/homework/cleanxml post_xml.pm
Log:
added removal of consecutive empty paragraphs
Index: loncom/homework/cleanxml/post_xml.pm
diff -u loncom/homework/cleanxml/post_xml.pm:1.9 loncom/homework/cleanxml/post_xml.pm:1.10
--- loncom/homework/cleanxml/post_xml.pm:1.9 Thu Jan 21 22:09:38 2016
+++ loncom/homework/cleanxml/post_xml.pm Thu Nov 10 19:48:22 2016
@@ -1,7 +1,7 @@
# The LearningOnline Network
# Third step to clean a file.
#
-# $Id: post_xml.pm,v 1.9 2016/01/21 22:09:38 damieng Exp $
+# $Id: post_xml.pm,v 1.10 2016/11/10 19:48:22 damieng Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -136,6 +136,8 @@
fix_empty_lc_elements($root);
+ reduce_empty_p($root);
+
lowercase_attribute_values($root);
replace_numericalresponse_unit_attribute($root);
@@ -2269,6 +2271,33 @@
}
}
+# remove consecutive empty paragraphs (they will not show anyway)
+sub reduce_empty_p {
+ my ($node) = @_;
+ my $next;
+ for (my $child=$node->firstChild; defined $child; $child=$next) {
+ $next = $child->nextSibling;
+ while (defined $next && $next->nodeType == XML_TEXT_NODE && $next->nodeValue =~ /^[ \t\f\n\r]*$/) {
+ $next = $next->nextSibling;
+ }
+ if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p' && defined $next &&
+ $next->nodeType == XML_ELEMENT_NODE && $next->nodeName eq 'p') {
+ my $first = $child->firstChild;
+ if (!defined $first || (!defined $first->nextSibling &&
+ $first->nodeType == XML_TEXT_NODE && $first->nodeValue =~ /^[ \t\f\n\r]*$/)) {
+ $first = $next->firstChild;
+ if (!defined $first || (!defined $first->nextSibling &&
+ $first->nodeType == XML_TEXT_NODE && $first->nodeValue =~ /^[ \t\f\n\r]*$/)) {
+ $node->removeChild($child);
+ }
+ }
+ }
+ if ($child->nodeType == XML_ELEMENT_NODE) {
+ reduce_empty_p($child);
+ }
+ }
+}
+
# turn some attribute values into lowercase when they should be
sub lowercase_attribute_values {
my ($root) = @_;
More information about the LON-CAPA-cvs
mailing list