[LON-CAPA-cvs] cvs: loncom / loncapa_apache.conf /imspackages imsprocessor.pm /publisher loncleanup.pm /xml lonxml.pm doc/loncapafiles loncapafiles.lpml

www lon-capa-cvs@mail.lon-capa.org
Sat, 28 May 2005 01:32:35 -0000


This is a MIME encoded message

--www1117243955
Content-Type: text/plain

www		Fri May 27 21:32:35 2005 EDT

  Added files:                 
    /loncom/publisher	loncleanup.pm 

  Modified files:              
    /doc/loncapafiles	loncapafiles.lpml 
    /loncom	loncapa_apache.conf 
    /loncom/imspackages	imsprocessor.pm 
    /loncom/xml	lonxml.pm 
  Log:
  Put all XML/HTML cleanup into separate handler
  
  
--www1117243955
Content-Type: text/plain
Content-Disposition: attachment; filename="www-20050527213235.txt"

Index: doc/loncapafiles/loncapafiles.lpml
diff -u doc/loncapafiles/loncapafiles.lpml:1.420 doc/loncapafiles/loncapafiles.lpml:1.421
--- doc/loncapafiles/loncapafiles.lpml:1.420	Thu Apr  7 02:56:20 2005
+++ doc/loncapafiles/loncapafiles.lpml	Fri May 27 21:32:31 2005
@@ -2,7 +2,7 @@
  "http://lpml.sourceforge.net/DTD/lpml.dtd">
 <!-- loncapafiles.lpml -->
 
-<!-- $Id: loncapafiles.lpml,v 1.420 2005/04/07 06:56:20 albertel Exp $ -->
+<!-- $Id: loncapafiles.lpml,v 1.421 2005/05/28 01:32:31 www Exp $ -->
 
 <!--
 
@@ -3050,6 +3050,15 @@
 <status>works/unverified</status>
 </file>
 <file>
+<source>loncom/publisher/loncleanup.pm</source>
+<target dist='default'>home/httpd/lib/perl/Apache/loncleanup.pm</target>
+<categoryname>handler</categoryname>
+<description>
+Handler to clean up HTML files.
+</description>
+<status>works/unverified</status>
+</file>
+<file>
 <source>doc/man/Apache__lonretrieve.3</source>
 <target dist='default'>usr/man/man3/Apache::lonretrieve.3</target>
 <categoryname>doc</categoryname>
Index: loncom/loncapa_apache.conf
diff -u loncom/loncapa_apache.conf:1.108 loncom/loncapa_apache.conf:1.109
--- loncom/loncapa_apache.conf:1.108	Fri Apr 22 17:03:52 2005
+++ loncom/loncapa_apache.conf	Fri May 27 21:32:32 2005
@@ -1,7 +1,7 @@
 ##
 ## loncapa_apache.conf -- Apache HTTP LON-CAPA configuration file
 ##
-## $Id: loncapa_apache.conf,v 1.108 2005/04/22 21:03:52 albertel Exp $
+## $Id: loncapa_apache.conf,v 1.109 2005/05/28 01:32:32 www Exp $
 ##
 
 #
@@ -559,6 +559,16 @@
 ErrorDocument	  500 /adm/errorhandler
 </Location>
 
+<Location /adm/cleanup>
+PerlAccessHandler       Apache::lonacc
+SetHandler perl-script
+PerlHandler Apache::loncleanup
+ErrorDocument     403 /adm/login
+ErrorDocument     404 /adm/notfound.html
+ErrorDocument     406 /adm/unauthorized
+ErrorDocument	  500 /adm/errorhandler
+</Location>
+
 <Location /adm/cfile>
 PerlAccessHandler       Apache::lonacc
 SetHandler perl-script
Index: loncom/imspackages/imsprocessor.pm
diff -u loncom/imspackages/imsprocessor.pm:1.20 loncom/imspackages/imsprocessor.pm:1.21
--- loncom/imspackages/imsprocessor.pm:1.20	Tue May  3 14:38:37 2005
+++ loncom/imspackages/imsprocessor.pm	Fri May 27 21:32:32 2005
@@ -24,6 +24,7 @@
 package Apache::imsprocessor;
 
 use Apache::lonnet;
+use Apache::loncleanup;
 use LWP::UserAgent;
 use HTTP::Request::Common;
 use LONCAPA::Configuration;
@@ -2864,7 +2865,7 @@
         }
         if ($$settings{$id}{texttype} eq 'text/html') {
             $$settings{$id}{text} = &HTML::Entities::decode($$settings{$id}{text});
-            $$settings{$id}{text} = &Apache::lonxml::htmlclean($$settings{$id}{text});
+            $$settings{$id}{text} = &Apache::loncleanup::htmlclean($$settings{$id}{text});
             $$settings{$id}{text} =~ s#(<img src=["']?)([^>]+)(/?>)#$1../../resfiles/$2 />#gi;
             $$settings{$id}{text} =~ s#<([bh])r>#<$1r />#g;
 #            $$settings{$id}{text} =~ s#<p>#</p><p>#g;
@@ -2954,7 +2955,7 @@
                             }
                             if ($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{texttype} eq 'text/html') {
                                 $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &HTML::Entities::decode($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
-                                $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &Apache::lonxml::htmlclean($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
+                                $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &Apache::loncleanup::htmlclean($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
                                 $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} =~  s#(<img src=")([^>]+)>#$1../../resfiles/$2 />#gi;
                                 $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} =~  s#</?p>##g;
 
@@ -2996,7 +2997,7 @@
                             }
                             if ($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{texttype} eq 'text/html') {
                                 $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &HTML::Entities::decode($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
-                                $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &Apache::lonxml::htmlclean($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
+                                $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &Apache::loncleanup::htmlclean($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
 
                                 $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} =~  s#(<img src=")([^>]+)>#$1../../resfiles/$2 />#gi;
                                 $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} =~  s#</?p>##g;
@@ -3028,7 +3029,7 @@
                              
                             $$settings{$id}{$grp}{$answer_id}{text} = &HTML::Entities::decode($$settings{$id}{$grp}{$answer_id}{text});
                             $test_for_html = &test_for_html($$settings{$id}{$grp}{$answer_id}{text});
-                            $$settings{$id}{$grp}{$answer_id}{text} = &Apache::lonxml::chtmlclean($$settings{$id}{$grp}{$answer_id}{text});
+                            $$settings{$id}{$grp}{$answer_id}{text} = &Apache::loncleanup::htmlclean($$settings{$id}{$grp}{$answer_id}{text});
                             $$settings{$id}{$grp}{$answer_id}{text} =~  s#(<img src=")([^>]+)>#$1../../resfiles/$2 />#gi;
                             $$settings{$id}{$grp}{$answer_id}{text} =~  s#</?p>##g;
                         }
@@ -3091,7 +3092,7 @@
                 for (my $k=0; $k<@{$$allchoices{$id}}; $k++) {
                     if ($$settings{$id}{$$allchoices{$id}[$k]}{texttype} eq 'text/html') {
                         $$settings{$id}{$$allchoices{$id}[$k]}{text} = &HTML::Entities::decode($$settings{$id}{$$allchoices{$id}[$k]}{text});
-                        $$settings{$id}{$$allchoices{$id}[$k]}{text} = &Apache::lonxml::htmlclean($$settings{$id}{$$allchoices{$id}[$k]}{text});
+                        $$settings{$id}{$$allchoices{$id}[$k]}{text} = &Apache::loncleanup::htmlclean($$settings{$id}{$$allchoices{$id}[$k]}{text});
                         $$settings{$id}{$$allchoices{$id}[$k]}{text} =~  s#(<img src=")([^>]+)>#$1../../resfiles/$2 />#gi;
                         $$settings{$id}{$$allchoices{$id}[$k]}{text} =~  s#</?p>##g;
                     }
Index: loncom/xml/lonxml.pm
diff -u loncom/xml/lonxml.pm:1.373 loncom/xml/lonxml.pm:1.374
--- loncom/xml/lonxml.pm:1.373	Tue May  3 02:44:56 2005
+++ loncom/xml/lonxml.pm	Fri May 27 21:32:33 2005
@@ -1,7 +1,7 @@
 # The LearningOnline Network with CAPA
 # XML Parser Module 
 #
-# $Id: lonxml.pm,v 1.373 2005/05/03 06:44:56 albertel Exp $
+# $Id: lonxml.pm,v 1.374 2005/05/28 01:32:33 www Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -382,33 +382,6 @@
  return $finaloutput;
 }
 
-sub htmlclean {
-    my ($raw,$full)=@_;
-# Take care of CRLF etc
-
-    $raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
-    $raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
-    $raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
-    $raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
-
-# Generate empty tags, remove wrong end tags
-    $raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis;
-    $raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis;
-    unless ($full) {
-       $raw=~s/\<[\/]*(body|head|html)\>//gis;
-    }
-# Make standard tags lowercase
-    foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
-             'table','tr','td','th','p','br','hr','img','embed','font',
-             'a','strong','center','title','basefont','li','ol','ul',
-             'input','select','form','option','script','pre') {
-	$raw=~s/\<$_\s*\>/\<$_\>/gis;
-        $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
-        $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
-    }
-    return $raw;
-}
-
 sub latex_special_symbols {
     my ($string,$where)=@_;
     if ($where eq 'header') {
@@ -1238,10 +1211,7 @@
 	  $xml_help=&Apache::loncommon::helpLatexCheatsheet();
       }
       my $cleanbut = '';
-      if ($filetype eq 'html') {
-	  $cleanbut='<input type="submit" name="attemptclean" value="'.
-	      &mt('Save and then attempt to clean HTML').'" />';
-      }
+
       my $titledisplay=&display_title();
       my %lt=&Apache::lonlocal::texthash('st' => 'Save this',
 					 'vi' => 'View',
@@ -1329,7 +1299,7 @@
 # Edit action? Save file.
 #
     unless ($env{'request.state'} eq 'published') {
-	if (($env{'form.savethisfile'}) || ($env{'form.attemptclean'})) {
+	if ($env{'form.savethisfile'}) {
 	    if (&storefile($file,$env{'form.filecont'})) {
 		&Apache::lonxml::info("<font COLOR=\"#0000FF\">".
 				      &mt('Updated').": ".
@@ -1368,10 +1338,6 @@
 	    if ($filecontents=~/BEGIN LON-CAPA Internal/) {
 		&Apache::lonxml::error(&mt('This file appears to be a rendering of a Lon-CAPA resource. If this is correct, this resource will act very oddly and incorrectly.'));
 	    }
-
-	    if ($env{'form.attemptclean'}) {
-		$filecontents=&htmlclean($filecontents,1);
-	    }
 #
 # we are in construction space, see if edit mode forced
             &Apache::loncommon::get_unprocessed_cgi

Index: loncom/publisher/loncleanup.pm
+++ loncom/publisher/loncleanup.pm
# The LearningOnline Network with CAPA
# Handler to cleanup XML files
#
# $Id: loncleanup.pm,v 1.1 2005/05/28 01:32:33 www Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
#
###

package Apache::loncleanup;

use strict;
use Apache::File;
use File::Copy;
use Apache::Constants qw(:common :http :methods);
use Apache::loncacc;
use Apache::loncommon();
use Apache::lonlocal;
use Apache::lonnet;

sub latextrans {
    my $symbolfont=shift;
    my %latexsymb=(
		   '&#177;' => '\pm',
		   '&#180;' => '\times',
		   '&#184;' => '\div',
		   '&#210;' => '(R)',
		   '&#211;' => '\copy',
		   '&#216;' => '\neg',
		   '&#226;' => '(R)',
		   '&#227;' => '\copy',
		   '&#166;' => 'f',
		   'A' => '\Alpha',
		   'B' => '\Beta',
		   'G' => '\Gamma',
		   'D' => '\Delta',
		   'E' => '\Epsilon',
		   'Z' => '\Zeta',
		   'H' => '\Eta',
		   'Q' => '\Theta',
		   'I' => '\Iota',
		   'K' => '\Kappa',
		   'L' => '\Lambda',
		   'M' => '\Mu',
		   'N' => '\Nu',
		   'X' => '\Xi',
		   'O' => '\Omicron',
		   'P' => '\Pi',
		   'R' => '\Rho',
		   'S' => '\Sigma',
		   'T' => '\Tau',
		   'U' => 'Y',
		   'F' => '\Phi',
		   'C' => '\Chi',
		   'Y' => '\Psi',
		   'W' => '\Omega',
		   'a' => '\alpha',
		   'b' => '\beta',
		   'g' => '\gamma',
		   'd' => '\delta',
		   'e' => '\epsilon',
		   'z' => '\zeta',
		   'h' => '\eta',
		   'q' => '\theta',
		   'i' => '\iota',
		   'k' => '\kappa',
		   'l' => '\lambda',
		   'm' => '\mu',
		   'n' => '\nu',
		   'x' => '\xi',
		   'o' => '\omicron',
		   'p' => '\pi',
		   'r' => '\rho',
		   'V' => '\sigmaf',
		   's' => '\sigma',
		   't' => '\tau',
		   'u' => '\upsilon',
		   'f' => '\phi',
		   'c' => '\chi',
		   'y' => '\psi',
		   'w' => '\omega',
		   'J' => '\vartheta',
		   'j' => '\varphi',
		   'v' => '\varpi',
		   '&#161;' => '\Upsilon',
		   '&#162;' => "'",
		   '&#164;' => '/',
		   '&#178;' => '"',
		   '&#188;' => '\ldots',
		   '&#192;' => '\aleph',
		   '&#193;' => '\Im',
		   '&#194;' => '\Re',
		   '&#195;' => '\wp',
		   '&#212;' => '^{TM}',
		   '&#228;' => '^{TM}',
		   '&#240;' => 'EUR',
		   '&#171;' => '\leftrightarrow',
		   '&#172;' => '\leftarrow',
		   '&#173;' => '\uparrow',
		   '&#174;' => '\rightarrow',
		   '&#175;' => '\downarraw',
		   '&#191;' => '\hookleftarrow',
		   '&#219;' => '\Leftrightarrow',
		   '&#220;' => '\Leftarrow',
		   '&#221;' => '\Uparrow',
		   '&#222;' => '\Rightarrow',
		   '&#223;' => '\Downarrow',
		   '&#34;' => '\forall',
		   '&#36;' => '\exists',
		   '&#39;' => '\ni',
		   '&#42;' => '\ast',
		   '&#45;' => '-',
		   '&#64;' => '\cong',
		   '&#92;' => '\therefore',
		   '&#94;' => '\perp',
		   '&#126;' => '\sim',
		   '&#163;' => '\leq',
		   '&#165;' => '\infty',
		   '&#179;' => '\geq',
		   '&#181;' => '\propto',
		   '&#182;' => '\partial',
		   '&#183;' => '\cdot',
		   '&#185;' => '\not=',
		   '&#186;' => '\equiv',
		   '&#187;' => '\approx',
		   '&#196;' => '\otimes',
		   '&#197;' => '\oplus',
		   '&#198;' => '\emptyset',
		   '&#199;' => '\cap',
		   '&#200;' => '\cup',
		   '&#201;' => '\supset',
		   '&#202;' => '\supseteq',
		   '&#203;' => '\not\subset',
		   '&#204;' => '\subset',
		   '&#205;' => '\subseteq',
		   '&#206;' => '\in',
		   '&#207;' => '\not\in',
		   '&#208;' => '\angle',
		   '&#209;' => '\nabla',
		   '&#213;' => '\prod',
		   '&#214;' => '\surd',
		   '&#215;' => '\cdot',
		   '&#217;' => '\wedge',
		   '&#218;' => '\wee',
		   '&#229;' => '\sum',
		   '&#242;' => '\int',
		   '&#225;' => '\langle',
		   '&#241;' => '\rangle',
		   '&#224;' => '\diamondsuit',
		   '&#167;' => '\clubsuit',
		   '&#168;' => '\diamondsuit',
		   '&#169;' => '\heartsuit',
		   '&#170;' => '\spadesuit'
		   );
    my $output='';
    my $char='';
    my $entitymode=0;
    for (my $i=0; $i<length($symbolfont); $i++) {
        my $newchar=substr($symbolfont,$i,1);
        $char.=$newchar;
        if ($newchar eq '&') { $entitymode=1; }
        if (($entitymode) && ($newchar ne ';')) { next; }
        my $latex=$latexsymb{$char};
	if ($latex) {
	    $output.=$latex;
	} else {
	    $output.=$char;
	}
        $char='';
        $entitymode=0;
    }
    return $output;
}

sub insidetrans {
    my @args=@_;
    return '<font'.$args[0].$args[1].'><m>$'.&latextrans($args[2]).'$</m>';
}

sub symbolfontreplace {
    my $text=shift;
    my @fragments=split(/\<\/font\>/si,$text);
    for (my $i=0; $i<=$#fragments;$i++) {
	$fragments[$i]=~s/\<font([^\>]*)\s+face=[\"\']*symbol[\"\']*([^\>]*)\>(.*)$/&insidetrans($1,$2,$3)/gsie;
    }
    return join('</font>',@fragments);
}

sub htmlclean {
    my ($raw,$full)=@_;
# Take care of CRLF etc

    $raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
    $raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
    $raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
    $raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;

# Generate empty tags, remove wrong end tags
    $raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis;
    $raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis;
    unless ($full) {
       $raw=~s/\<[\/]*(body|head|html)\>//gis;
    }
# Make standard tags lowercase
    foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
             'table','tr','td','th','p','br','hr','img','embed','font',
             'a','strong','center','title','basefont','li','ol','ul',
             'input','select','form','option','script','pre') {
	$raw=~s/\<$_\s*\>/\<$_\>/gis;
        $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
        $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
    }
    return $raw;
}

sub phaseone {
}

sub phasetwo {
}

# ---------------------------------------------------------------- Main Handler
sub handler {

  my $r=shift;


# Get query string for limited number of parameters

  &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
					  ['filename']);

  if ($env{'form.filename'}) {
      $fn=$env{'form.filename'};
      $fn=~s/^http\:\/\/[^\/]+//;
  } else {
     $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
         ' unspecified filename for cleanup', $r->filename); 
     return HTTP_NOT_FOUND;
  }

  unless ($fn) { 
     $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
         ' trying to cleanup non-existing file', $r->filename); 
     return HTTP_NOT_FOUND;
  } 

# ----------------------------------------------------------- Start page output
  my $uname;
  my $udom;

  ($uname,$udom)=
    &Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain'));
  unless (($uname) && ($udom)) {
     $r->log_reason($uname.' at '.$udom.
         ' trying to cleanup file '.$env{'form.filename'}.
         ' ('.$fn.') - not authorized', 
         $r->filename); 
     return HTTP_NOT_ACCEPTABLE;
  }

  $fn=~s/\/\~(\w+)//;

  &Apache::loncommon::content_type($r,'text/html');
  $r->send_http_header;

  $r->print('<html><head><title>LON-CAPA Construction Space</title></head>');

  $r->print(&Apache::loncommon::bodytag('Cleanup XML Document'));

  if ($env{'form.phase'} eq 'two') {
      &phasetwo($r,$fn,$uname,$udom);
  } else {
      &phaseone($r,$fn,$uname,$udom);
  }

  $r->print('</body></html>');
  return OK;  
}

1;
__END__

--www1117243955--