[LON-CAPA-cvs] cvs: doc /otherfiles perl_modules.txt
albertel
lon-capa-cvs@mail.lon-capa.org
Wed, 06 Mar 2002 22:47:46 -0000
albertel Wed Mar 6 17:47:46 2002 EDT
Modified files:
/doc/otherfiles perl_modules.txt
Log:
- adding documentation of patches that need to be applied to HTML::Parser in orde for LON-CAPA to handle documents better
Index: doc/otherfiles/perl_modules.txt
diff -u doc/otherfiles/perl_modules.txt:1.11 doc/otherfiles/perl_modules.txt:1.12
--- doc/otherfiles/perl_modules.txt:1.11 Sat Mar 2 00:17:00 2002
+++ doc/otherfiles/perl_modules.txt Wed Mar 6 17:47:45 2002
@@ -210,6 +210,147 @@
HTML/Parser.pm 1
HTML/TokeParser.pm 1
+Need these patches applied:
+
+diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.1/hparser.c
+--- HTML-Parser-3.25/hparser.c Thu May 10 15:27:28 2001
++++ HTML-Parser-3.25.1/hparser.c Wed Feb 20 13:23:34 2002
+@@ -1094,14 +1094,21 @@
+ hctype_t tag_name_first, tag_name_char;
+ hctype_t attr_name_first, attr_name_char;
+
+- if (p_state->strict_names || p_state->xml_mode) {
++ if (p_state->strict_names) {
+ tag_name_first = attr_name_first = HCTYPE_NAME_FIRST;
+ tag_name_char = attr_name_char = HCTYPE_NAME_CHAR;
+ }
+ else {
+- tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT;
+- attr_name_first = HCTYPE_NOT_SPACE_GT;
+- attr_name_char = HCTYPE_NOT_SPACE_EQ_GT;
++ if (p_state->xml_mode) {
++ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_SLASH_GT;
++ attr_name_first = HCTYPE_NOT_SPACE_SLASH_GT;
++ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT;
++ }
++ else {
++ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT;
++ attr_name_first = HCTYPE_NOT_SPACE_GT;
++ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT;
++ }
+ }
+
+ s += 2;
+@@ -1158,8 +1165,11 @@
+ else {
+ char *word_start = s;
+ while (s < end && isHNOT_SPACE_GT(*s)) {
+- if (p_state->xml_mode && *s == '/')
+- break;
++ if (p_state->xml_mode && *s == '/') {
++ /* look ahead to see if the tag ends */
++ if ((s+1)==end || *(s+1)=='>')
++ break;
++ }
+ s++;
+ }
+ if (s == end)
+
+diff -urN HTML-Parser-3.25/Parser.pm HTML-Parser-3.25.2/Parser.pm
+--- HTML-Parser-3.25/Parser.pm Fri May 11 13:24:09 2001
++++ HTML-Parser-3.25.2/Parser.pm Wed Mar 6 16:47:46 2002
+@@ -427,6 +427,11 @@
+ There are currently no events associated with the marked section
+ markup, but the text can be returned as C<skipped_text>.
+
++=item $p->encoded_entities( [$bool] )
++
++By default, attr and @attr decode general enitites for attribute values.
++This turns off that behavior.
++
+ =back
+
+ As markup and text is recognized, handlers are invoked. The following
+diff -urN HTML-Parser-3.25/Parser.xs HTML-Parser-3.25.2/Parser.xs
+--- HTML-Parser-3.25/Parser.xs Thu May 10 15:27:28 2001
++++ HTML-Parser-3.25.2/Parser.xs Wed Mar 6 16:48:56 2002
+@@ -297,6 +297,7 @@
+ HTML::Parser::xml_mode = 3
+ HTML::Parser::unbroken_text = 4
+ HTML::Parser::marked_sections = 5
++ HTML::Parser::encoded_entities = 6
+ PREINIT:
+ bool *attr;
+ CODE:
+@@ -311,6 +312,7 @@
+ #else
+ croak("marked sections not supported"); break;
+ #endif
++ case 6: attr = &pstate->encoded_entities; break;
+ default:
+ croak("Unknown boolean attribute (%d)", ix);
+ }
+diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.2/hparser.c
+--- HTML-Parser-3.25/hparser.c Thu May 10 15:27:28 2001
++++ HTML-Parser-3.25.2/hparser.c Wed Mar 6 16:44:47 2002
+@@ -398,7 +398,8 @@
+ beg++; len -= 2;
+ }
+ attrval = newSVpvn(beg, len);
+- decode_entities(aTHX_ attrval, p_state->entity2char);
++ if (!p_state->encoded_entities)
++ decode_entities(aTHX_ attrval, p_state->entity2char);
+ }
+ else { /* boolean */
+ if (p_state->bool_attr_val)
+diff -urN HTML-Parser-3.25/hparser.h HTML-Parser-3.25.2/hparser.h
+--- HTML-Parser-3.25/hparser.h Tue May 8 13:03:27 2001
++++ HTML-Parser-3.25.2/hparser.h Wed Mar 6 16:48:18 2002
+@@ -99,6 +99,7 @@
+ bool strict_names;
+ bool xml_mode;
+ bool unbroken_text;
++ bool encoded_entities;
+
+ /* other configuration stuff */
+ SV* bool_attr_val;
+diff -urN HTML-Parser-3.25/t/encoded-entities.t HTML-Parser-3.25.2/t/encoded-entities.t
+--- HTML-Parser-3.25/t/encoded-entities.t Wed Dec 31 19:00:00 1969
++++ HTML-Parser-3.25.2/t/encoded-entities.t Wed Mar 6 17:13:53 2002
+@@ -0,0 +1,32 @@
++use strict;
++print "1..2\n";
++
++use HTML::Parser ();
++my $p = HTML::Parser->new();
++$p->encoded_entities(1);
++
++my $text = "";
++$p->handler(start =>
++ sub {
++ my($tag, $attr) = @_;
++ $text .= "S[$tag";
++ for my $k (sort keys %$attr) {
++ my $v = $attr->{$k};
++ $text .= " $k=$v";
++ }
++ $text .= "]";
++ }, "tagname,attr");
++
++my $html = <<'EOT';
++<tag arg="&<>">
++EOT
++
++$p->parse($html)->eof;
++
++print "not " unless $text eq 'S[tag arg=&<>]'; print "ok 1\n";
++
++$text = "";
++$p->encoded_entities(0);
++$p->parse($html)->eof;
++
++print "not " unless $text eq 'S[tag arg=&<>]'; print "ok 2\n";
+
---------------------------------------------- IO-stringy
http://www.cpan.org/authors/id/E/ER/ERYQ/IO-stringy-2.108.tar.gz
(needed by MIME-tools)