[LON-CAPA-cvs] cvs: loncom /interface lonfeedback.pm

faziophi faziophi@source.lon-capa.org
Tue, 22 Dec 2009 06:14:35 -0000


faziophi		Tue Dec 22 06:14:35 2009 EDT

  Modified files:              
    /loncom/interface	lonfeedback.pm 
  Log:
  -- Adds <table>, <tbody>, etc. to list of acceptable tags
  -- Adds HTML Tidy code to clean invalid HTML where needed
     **Commented out pending adding new dependencies**
  
  
Index: loncom/interface/lonfeedback.pm
diff -u loncom/interface/lonfeedback.pm:1.282 loncom/interface/lonfeedback.pm:1.283
--- loncom/interface/lonfeedback.pm:1.282	Tue Dec  8 13:33:12 2009
+++ loncom/interface/lonfeedback.pm	Tue Dec 22 06:14:35 2009
@@ -1,7 +1,7 @@
 # The LearningOnline Network
 # Feedback
 #
-# $Id: lonfeedback.pm,v 1.282 2009/12/08 13:33:12 wenzelju Exp $
+# $Id: lonfeedback.pm,v 1.283 2009/12/22 06:14:35 faziophi Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -41,6 +41,7 @@
 use Apache::lonenc();
 use Apache::lonrss();
 use HTML::LCParser();
+#use HTML::Tidy::libXML;
 use Apache::lonspeller();
 use Apache::longroup;
 use Cwd;
@@ -2678,11 +2679,12 @@
       ($override)) {
       # allows <B> <I> <P> <A> <LI> <OL> <UL> <EM> <BR> <TT> <STRONG> 
       # <BLOCKQUOTE> <DIV .*> <DIV> <IMG> <M> <SPAN> <H1> <H2> <H3> <H4> <SUB>
-      # <SUP>
+      # <SUP> <TABLE> <TR> <TD> <TH> <TBODY>
       %html=(B=>1, I=>1, P=>1, A=>1, LI=>1, OL=>1, UL=>1, EM=>1,
 	     BR=>1, TT=>1, STRONG=>1, BLOCKQUOTE=>1, DIV=>1, IMG=>1,
 	     M=>1, ALGEBRA=>1, SUB=>1, SUP=>1, SPAN=>1, 
-	     H1=>1, H2=>1, H3=>1, H4=>1, H5=>1);
+	     H1=>1, H2=>1, H3=>1, H4=>1, H5=>1, H6=>1, 
+	     TABLE=>1, TR=>1, TD=>1, TH=>1, TBODY=>1);
   }
 # Do the substitution of everything that is not explicitly allowed
   $message =~ s/\<(\/?\s*(\w+)[^\>\<]*)/
@@ -2958,9 +2960,21 @@
 		(
 		  [\s]*[\/]>|
 		  >.*<\/\1[\s]*>
-		)/x );
+		)/xs );
 }
-	
+
+sub tidy_html {
+	my ($message)=@_;
+# 	my $tidy = HTML::Tidy::libXML->new();
+# 	my $xhtml = $tidy->clean($message, 'utf-8', 1);
+# 	$xhtml =~ m/<body>(.*)<\/body>/is;
+# 	my $clean = $1;
+# 	# remove any empty block-level tags
+# 	$clean =~ s/<(table|p|div|tbody|blockquote|m|pre|algebra|center|ol|ul|span|h1|h2|h3|h4|h5|h6)\s*\/>//i;
+# 	$message=$clean;
+	return $message;
+}
+
 sub newline_to_br {
     my ($message)=@_;
     my $newmessage;
@@ -4145,6 +4159,8 @@
 
 =item newline_to_br()
 
+=item tidy_html()
+
 =item generate_preview_button()
 
 =item modify_attachments()