[LON-CAPA-cvs] cvs: loncom / loncapa_apache.conf /imspackages imsprocessor.pm /publisher loncleanup.pm /xml lonxml.pm doc/loncapafiles loncapafiles.lpml
www
lon-capa-cvs@mail.lon-capa.org
Sat, 28 May 2005 01:32:35 -0000
This is a MIME encoded message
--www1117243955
Content-Type: text/plain
www Fri May 27 21:32:35 2005 EDT
Added files:
/loncom/publisher loncleanup.pm
Modified files:
/doc/loncapafiles loncapafiles.lpml
/loncom loncapa_apache.conf
/loncom/imspackages imsprocessor.pm
/loncom/xml lonxml.pm
Log:
Put all XML/HTML cleanup into separate handler
--www1117243955
Content-Type: text/plain
Content-Disposition: attachment; filename="www-20050527213235.txt"
Index: doc/loncapafiles/loncapafiles.lpml
diff -u doc/loncapafiles/loncapafiles.lpml:1.420 doc/loncapafiles/loncapafiles.lpml:1.421
--- doc/loncapafiles/loncapafiles.lpml:1.420 Thu Apr 7 02:56:20 2005
+++ doc/loncapafiles/loncapafiles.lpml Fri May 27 21:32:31 2005
@@ -2,7 +2,7 @@
"http://lpml.sourceforge.net/DTD/lpml.dtd">
<!-- loncapafiles.lpml -->
-<!-- $Id: loncapafiles.lpml,v 1.420 2005/04/07 06:56:20 albertel Exp $ -->
+<!-- $Id: loncapafiles.lpml,v 1.421 2005/05/28 01:32:31 www Exp $ -->
<!--
@@ -3050,6 +3050,15 @@
<status>works/unverified</status>
</file>
<file>
+<source>loncom/publisher/loncleanup.pm</source>
+<target dist='default'>home/httpd/lib/perl/Apache/loncleanup.pm</target>
+<categoryname>handler</categoryname>
+<description>
+Handler to clean up HTML files.
+</description>
+<status>works/unverified</status>
+</file>
+<file>
<source>doc/man/Apache__lonretrieve.3</source>
<target dist='default'>usr/man/man3/Apache::lonretrieve.3</target>
<categoryname>doc</categoryname>
Index: loncom/loncapa_apache.conf
diff -u loncom/loncapa_apache.conf:1.108 loncom/loncapa_apache.conf:1.109
--- loncom/loncapa_apache.conf:1.108 Fri Apr 22 17:03:52 2005
+++ loncom/loncapa_apache.conf Fri May 27 21:32:32 2005
@@ -1,7 +1,7 @@
##
## loncapa_apache.conf -- Apache HTTP LON-CAPA configuration file
##
-## $Id: loncapa_apache.conf,v 1.108 2005/04/22 21:03:52 albertel Exp $
+## $Id: loncapa_apache.conf,v 1.109 2005/05/28 01:32:32 www Exp $
##
#
@@ -559,6 +559,16 @@
ErrorDocument 500 /adm/errorhandler
</Location>
+<Location /adm/cleanup>
+PerlAccessHandler Apache::lonacc
+SetHandler perl-script
+PerlHandler Apache::loncleanup
+ErrorDocument 403 /adm/login
+ErrorDocument 404 /adm/notfound.html
+ErrorDocument 406 /adm/unauthorized
+ErrorDocument 500 /adm/errorhandler
+</Location>
+
<Location /adm/cfile>
PerlAccessHandler Apache::lonacc
SetHandler perl-script
Index: loncom/imspackages/imsprocessor.pm
diff -u loncom/imspackages/imsprocessor.pm:1.20 loncom/imspackages/imsprocessor.pm:1.21
--- loncom/imspackages/imsprocessor.pm:1.20 Tue May 3 14:38:37 2005
+++ loncom/imspackages/imsprocessor.pm Fri May 27 21:32:32 2005
@@ -24,6 +24,7 @@
package Apache::imsprocessor;
use Apache::lonnet;
+use Apache::loncleanup;
use LWP::UserAgent;
use HTTP::Request::Common;
use LONCAPA::Configuration;
@@ -2864,7 +2865,7 @@
}
if ($$settings{$id}{texttype} eq 'text/html') {
$$settings{$id}{text} = &HTML::Entities::decode($$settings{$id}{text});
- $$settings{$id}{text} = &Apache::lonxml::htmlclean($$settings{$id}{text});
+ $$settings{$id}{text} = &Apache::loncleanup::htmlclean($$settings{$id}{text});
$$settings{$id}{text} =~ s#(<img src=["']?)([^>]+)(/?>)#$1../../resfiles/$2 />#gi;
$$settings{$id}{text} =~ s#<([bh])r>#<$1r />#g;
# $$settings{$id}{text} =~ s#<p>#</p><p>#g;
@@ -2954,7 +2955,7 @@
}
if ($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{texttype} eq 'text/html') {
$$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &HTML::Entities::decode($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
- $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &Apache::lonxml::htmlclean($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
+ $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &Apache::loncleanup::htmlclean($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
$$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} =~ s#(<img src=")([^>]+)>#$1../../resfiles/$2 />#gi;
$$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} =~ s#</?p>##g;
@@ -2996,7 +2997,7 @@
}
if ($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{texttype} eq 'text/html') {
$$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &HTML::Entities::decode($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
- $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &Apache::lonxml::htmlclean($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
+ $$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} = &Apache::loncleanup::htmlclean($$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text});
$$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} =~ s#(<img src=")([^>]+)>#$1../../resfiles/$2 />#gi;
$$settings{$id}{$list}{$$allanswers{$id}{$list}[$k]}{text} =~ s#</?p>##g;
@@ -3028,7 +3029,7 @@
$$settings{$id}{$grp}{$answer_id}{text} = &HTML::Entities::decode($$settings{$id}{$grp}{$answer_id}{text});
$test_for_html = &test_for_html($$settings{$id}{$grp}{$answer_id}{text});
- $$settings{$id}{$grp}{$answer_id}{text} = &Apache::lonxml::chtmlclean($$settings{$id}{$grp}{$answer_id}{text});
+ $$settings{$id}{$grp}{$answer_id}{text} = &Apache::loncleanup::htmlclean($$settings{$id}{$grp}{$answer_id}{text});
$$settings{$id}{$grp}{$answer_id}{text} =~ s#(<img src=")([^>]+)>#$1../../resfiles/$2 />#gi;
$$settings{$id}{$grp}{$answer_id}{text} =~ s#</?p>##g;
}
@@ -3091,7 +3092,7 @@
for (my $k=0; $k<@{$$allchoices{$id}}; $k++) {
if ($$settings{$id}{$$allchoices{$id}[$k]}{texttype} eq 'text/html') {
$$settings{$id}{$$allchoices{$id}[$k]}{text} = &HTML::Entities::decode($$settings{$id}{$$allchoices{$id}[$k]}{text});
- $$settings{$id}{$$allchoices{$id}[$k]}{text} = &Apache::lonxml::htmlclean($$settings{$id}{$$allchoices{$id}[$k]}{text});
+ $$settings{$id}{$$allchoices{$id}[$k]}{text} = &Apache::loncleanup::htmlclean($$settings{$id}{$$allchoices{$id}[$k]}{text});
$$settings{$id}{$$allchoices{$id}[$k]}{text} =~ s#(<img src=")([^>]+)>#$1../../resfiles/$2 />#gi;
$$settings{$id}{$$allchoices{$id}[$k]}{text} =~ s#</?p>##g;
}
Index: loncom/xml/lonxml.pm
diff -u loncom/xml/lonxml.pm:1.373 loncom/xml/lonxml.pm:1.374
--- loncom/xml/lonxml.pm:1.373 Tue May 3 02:44:56 2005
+++ loncom/xml/lonxml.pm Fri May 27 21:32:33 2005
@@ -1,7 +1,7 @@
# The LearningOnline Network with CAPA
# XML Parser Module
#
-# $Id: lonxml.pm,v 1.373 2005/05/03 06:44:56 albertel Exp $
+# $Id: lonxml.pm,v 1.374 2005/05/28 01:32:33 www Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -382,33 +382,6 @@
return $finaloutput;
}
-sub htmlclean {
- my ($raw,$full)=@_;
-# Take care of CRLF etc
-
- $raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
- $raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
- $raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
- $raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
-
-# Generate empty tags, remove wrong end tags
- $raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis;
- $raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis;
- unless ($full) {
- $raw=~s/\<[\/]*(body|head|html)\>//gis;
- }
-# Make standard tags lowercase
- foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
- 'table','tr','td','th','p','br','hr','img','embed','font',
- 'a','strong','center','title','basefont','li','ol','ul',
- 'input','select','form','option','script','pre') {
- $raw=~s/\<$_\s*\>/\<$_\>/gis;
- $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
- $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
- }
- return $raw;
-}
-
sub latex_special_symbols {
my ($string,$where)=@_;
if ($where eq 'header') {
@@ -1238,10 +1211,7 @@
$xml_help=&Apache::loncommon::helpLatexCheatsheet();
}
my $cleanbut = '';
- if ($filetype eq 'html') {
- $cleanbut='<input type="submit" name="attemptclean" value="'.
- &mt('Save and then attempt to clean HTML').'" />';
- }
+
my $titledisplay=&display_title();
my %lt=&Apache::lonlocal::texthash('st' => 'Save this',
'vi' => 'View',
@@ -1329,7 +1299,7 @@
# Edit action? Save file.
#
unless ($env{'request.state'} eq 'published') {
- if (($env{'form.savethisfile'}) || ($env{'form.attemptclean'})) {
+ if ($env{'form.savethisfile'}) {
if (&storefile($file,$env{'form.filecont'})) {
&Apache::lonxml::info("<font COLOR=\"#0000FF\">".
&mt('Updated').": ".
@@ -1368,10 +1338,6 @@
if ($filecontents=~/BEGIN LON-CAPA Internal/) {
&Apache::lonxml::error(&mt('This file appears to be a rendering of a Lon-CAPA resource. If this is correct, this resource will act very oddly and incorrectly.'));
}
-
- if ($env{'form.attemptclean'}) {
- $filecontents=&htmlclean($filecontents,1);
- }
#
# we are in construction space, see if edit mode forced
&Apache::loncommon::get_unprocessed_cgi
Index: loncom/publisher/loncleanup.pm
+++ loncom/publisher/loncleanup.pm
# The LearningOnline Network with CAPA
# Handler to cleanup XML files
#
# $Id: loncleanup.pm,v 1.1 2005/05/28 01:32:33 www Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
#
###
package Apache::loncleanup;
use strict;
use Apache::File;
use File::Copy;
use Apache::Constants qw(:common :http :methods);
use Apache::loncacc;
use Apache::loncommon();
use Apache::lonlocal;
use Apache::lonnet;
sub latextrans {
my $symbolfont=shift;
my %latexsymb=(
'±' => '\pm',
'´' => '\times',
'¸' => '\div',
'Ò' => '(R)',
'Ó' => '\copy',
'Ø' => '\neg',
'â' => '(R)',
'ã' => '\copy',
'¦' => 'f',
'A' => '\Alpha',
'B' => '\Beta',
'G' => '\Gamma',
'D' => '\Delta',
'E' => '\Epsilon',
'Z' => '\Zeta',
'H' => '\Eta',
'Q' => '\Theta',
'I' => '\Iota',
'K' => '\Kappa',
'L' => '\Lambda',
'M' => '\Mu',
'N' => '\Nu',
'X' => '\Xi',
'O' => '\Omicron',
'P' => '\Pi',
'R' => '\Rho',
'S' => '\Sigma',
'T' => '\Tau',
'U' => 'Y',
'F' => '\Phi',
'C' => '\Chi',
'Y' => '\Psi',
'W' => '\Omega',
'a' => '\alpha',
'b' => '\beta',
'g' => '\gamma',
'd' => '\delta',
'e' => '\epsilon',
'z' => '\zeta',
'h' => '\eta',
'q' => '\theta',
'i' => '\iota',
'k' => '\kappa',
'l' => '\lambda',
'm' => '\mu',
'n' => '\nu',
'x' => '\xi',
'o' => '\omicron',
'p' => '\pi',
'r' => '\rho',
'V' => '\sigmaf',
's' => '\sigma',
't' => '\tau',
'u' => '\upsilon',
'f' => '\phi',
'c' => '\chi',
'y' => '\psi',
'w' => '\omega',
'J' => '\vartheta',
'j' => '\varphi',
'v' => '\varpi',
'¡' => '\Upsilon',
'¢' => "'",
'¤' => '/',
'²' => '"',
'¼' => '\ldots',
'À' => '\aleph',
'Á' => '\Im',
'Â' => '\Re',
'Ã' => '\wp',
'Ô' => '^{TM}',
'ä' => '^{TM}',
'ð' => 'EUR',
'«' => '\leftrightarrow',
'¬' => '\leftarrow',
'­' => '\uparrow',
'®' => '\rightarrow',
'¯' => '\downarraw',
'¿' => '\hookleftarrow',
'Û' => '\Leftrightarrow',
'Ü' => '\Leftarrow',
'Ý' => '\Uparrow',
'Þ' => '\Rightarrow',
'ß' => '\Downarrow',
'"' => '\forall',
'$' => '\exists',
''' => '\ni',
'*' => '\ast',
'-' => '-',
'@' => '\cong',
'\' => '\therefore',
'^' => '\perp',
'~' => '\sim',
'£' => '\leq',
'¥' => '\infty',
'³' => '\geq',
'µ' => '\propto',
'¶' => '\partial',
'·' => '\cdot',
'¹' => '\not=',
'º' => '\equiv',
'»' => '\approx',
'Ä' => '\otimes',
'Å' => '\oplus',
'Æ' => '\emptyset',
'Ç' => '\cap',
'È' => '\cup',
'É' => '\supset',
'Ê' => '\supseteq',
'Ë' => '\not\subset',
'Ì' => '\subset',
'Í' => '\subseteq',
'Î' => '\in',
'Ï' => '\not\in',
'Ð' => '\angle',
'Ñ' => '\nabla',
'Õ' => '\prod',
'Ö' => '\surd',
'×' => '\cdot',
'Ù' => '\wedge',
'Ú' => '\wee',
'å' => '\sum',
'ò' => '\int',
'á' => '\langle',
'ñ' => '\rangle',
'à' => '\diamondsuit',
'§' => '\clubsuit',
'¨' => '\diamondsuit',
'©' => '\heartsuit',
'ª' => '\spadesuit'
);
my $output='';
my $char='';
my $entitymode=0;
for (my $i=0; $i<length($symbolfont); $i++) {
my $newchar=substr($symbolfont,$i,1);
$char.=$newchar;
if ($newchar eq '&') { $entitymode=1; }
if (($entitymode) && ($newchar ne ';')) { next; }
my $latex=$latexsymb{$char};
if ($latex) {
$output.=$latex;
} else {
$output.=$char;
}
$char='';
$entitymode=0;
}
return $output;
}
sub insidetrans {
my @args=@_;
return '<font'.$args[0].$args[1].'><m>$'.&latextrans($args[2]).'$</m>';
}
sub symbolfontreplace {
my $text=shift;
my @fragments=split(/\<\/font\>/si,$text);
for (my $i=0; $i<=$#fragments;$i++) {
$fragments[$i]=~s/\<font([^\>]*)\s+face=[\"\']*symbol[\"\']*([^\>]*)\>(.*)$/&insidetrans($1,$2,$3)/gsie;
}
return join('</font>',@fragments);
}
sub htmlclean {
my ($raw,$full)=@_;
# Take care of CRLF etc
$raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
$raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
$raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
$raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
# Generate empty tags, remove wrong end tags
$raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis;
$raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis;
unless ($full) {
$raw=~s/\<[\/]*(body|head|html)\>//gis;
}
# Make standard tags lowercase
foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
'table','tr','td','th','p','br','hr','img','embed','font',
'a','strong','center','title','basefont','li','ol','ul',
'input','select','form','option','script','pre') {
$raw=~s/\<$_\s*\>/\<$_\>/gis;
$raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
$raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
}
return $raw;
}
sub phaseone {
}
sub phasetwo {
}
# ---------------------------------------------------------------- Main Handler
sub handler {
my $r=shift;
# Get query string for limited number of parameters
&Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
['filename']);
if ($env{'form.filename'}) {
$fn=$env{'form.filename'};
$fn=~s/^http\:\/\/[^\/]+//;
} else {
$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
' unspecified filename for cleanup', $r->filename);
return HTTP_NOT_FOUND;
}
unless ($fn) {
$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
' trying to cleanup non-existing file', $r->filename);
return HTTP_NOT_FOUND;
}
# ----------------------------------------------------------- Start page output
my $uname;
my $udom;
($uname,$udom)=
&Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain'));
unless (($uname) && ($udom)) {
$r->log_reason($uname.' at '.$udom.
' trying to cleanup file '.$env{'form.filename'}.
' ('.$fn.') - not authorized',
$r->filename);
return HTTP_NOT_ACCEPTABLE;
}
$fn=~s/\/\~(\w+)//;
&Apache::loncommon::content_type($r,'text/html');
$r->send_http_header;
$r->print('<html><head><title>LON-CAPA Construction Space</title></head>');
$r->print(&Apache::loncommon::bodytag('Cleanup XML Document'));
if ($env{'form.phase'} eq 'two') {
&phasetwo($r,$fn,$uname,$udom);
} else {
&phaseone($r,$fn,$uname,$udom);
}
$r->print('</body></html>');
return OK;
}
1;
__END__
--www1117243955--