[LON-CAPA-cvs] cvs: doc /otherfiles perl_modules.txt

albertel lon-capa-cvs@mail.lon-capa.org
Wed, 06 Mar 2002 22:47:46 -0000


albertel		Wed Mar  6 17:47:46 2002 EDT

  Modified files:              
    /doc/otherfiles	perl_modules.txt 
  Log:
  - adding documentation of patches that need to be applied to HTML::Parser in orde for LON-CAPA to handle documents better
  
  
  
Index: doc/otherfiles/perl_modules.txt
diff -u doc/otherfiles/perl_modules.txt:1.11 doc/otherfiles/perl_modules.txt:1.12
--- doc/otherfiles/perl_modules.txt:1.11	Sat Mar  2 00:17:00 2002
+++ doc/otherfiles/perl_modules.txt	Wed Mar  6 17:47:45 2002
@@ -210,6 +210,147 @@
 		    HTML/Parser.pm 1
 		    HTML/TokeParser.pm 1
 
+Need these patches applied:
+
+diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.1/hparser.c
+--- HTML-Parser-3.25/hparser.c	Thu May 10 15:27:28 2001
++++ HTML-Parser-3.25.1/hparser.c	Wed Feb 20 13:23:34 2002
+@@ -1094,14 +1094,21 @@
+     hctype_t tag_name_first, tag_name_char;
+     hctype_t attr_name_first, attr_name_char;
+ 
+-    if (p_state->strict_names || p_state->xml_mode) {
++    if (p_state->strict_names) {
+ 	tag_name_first = attr_name_first = HCTYPE_NAME_FIRST;
+ 	tag_name_char  = attr_name_char  = HCTYPE_NAME_CHAR;
+     }
+     else {
+-	tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT;
+-	attr_name_first = HCTYPE_NOT_SPACE_GT;
+-	attr_name_char  = HCTYPE_NOT_SPACE_EQ_GT;
++	if (p_state->xml_mode) {
++	    tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_SLASH_GT;
++	    attr_name_first = HCTYPE_NOT_SPACE_SLASH_GT;
++	    attr_name_char  = HCTYPE_NOT_SPACE_EQ_GT;
++	}
++	else {
++	    tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT;
++	    attr_name_first = HCTYPE_NOT_SPACE_GT;
++	    attr_name_char  = HCTYPE_NOT_SPACE_EQ_GT;
++	}
+     }
+ 
+     s += 2;
+@@ -1158,8 +1165,11 @@
+ 	    else {
+ 		char *word_start = s;
+ 		while (s < end && isHNOT_SPACE_GT(*s)) {
+-		    if (p_state->xml_mode && *s == '/')
+-			break;
++		    if (p_state->xml_mode && *s == '/') {
++			/* look ahead to see if the tag ends */
++			if ((s+1)==end || *(s+1)=='>')
++			    break;
++		    }
+ 		    s++;
+ 		}
+ 		if (s == end)
+
+diff -urN HTML-Parser-3.25/Parser.pm HTML-Parser-3.25.2/Parser.pm
+--- HTML-Parser-3.25/Parser.pm	Fri May 11 13:24:09 2001
++++ HTML-Parser-3.25.2/Parser.pm	Wed Mar  6 16:47:46 2002
+@@ -427,6 +427,11 @@
+ There are currently no events associated with the marked section
+ markup, but the text can be returned as C<skipped_text>.
+ 
++=item $p->encoded_entities( [$bool] )
++
++By default, attr and @attr decode general enitites for attribute values.
++This turns off that behavior.
++
+ =back
+ 
+ As markup and text is recognized, handlers are invoked.  The following
+diff -urN HTML-Parser-3.25/Parser.xs HTML-Parser-3.25.2/Parser.xs
+--- HTML-Parser-3.25/Parser.xs	Thu May 10 15:27:28 2001
++++ HTML-Parser-3.25.2/Parser.xs	Wed Mar  6 16:48:56 2002
+@@ -297,6 +297,7 @@
+         HTML::Parser::xml_mode = 3
+ 	HTML::Parser::unbroken_text = 4
+         HTML::Parser::marked_sections = 5
++        HTML::Parser::encoded_entities = 6
+     PREINIT:
+ 	bool *attr;
+     CODE:
+@@ -311,6 +312,7 @@
+ #else
+ 	         croak("marked sections not supported"); break;
+ #endif
++	case  6: attr = &pstate->encoded_entities;     break;
+ 	default:
+ 	    croak("Unknown boolean attribute (%d)", ix);
+         }
+diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.2/hparser.c
+--- HTML-Parser-3.25/hparser.c	Thu May 10 15:27:28 2001
++++ HTML-Parser-3.25.2/hparser.c	Wed Mar  6 16:44:47 2002
+@@ -398,7 +398,8 @@
+ 			    beg++; len -= 2;
+ 			}
+ 			attrval = newSVpvn(beg, len);
+-			decode_entities(aTHX_ attrval, p_state->entity2char);
++			if (!p_state->encoded_entities)
++			    decode_entities(aTHX_ attrval, p_state->entity2char);
+ 		    }
+ 		    else { /* boolean */
+ 			if (p_state->bool_attr_val)
+diff -urN HTML-Parser-3.25/hparser.h HTML-Parser-3.25.2/hparser.h
+--- HTML-Parser-3.25/hparser.h	Tue May  8 13:03:27 2001
++++ HTML-Parser-3.25.2/hparser.h	Wed Mar  6 16:48:18 2002
+@@ -99,6 +99,7 @@
+     bool strict_names;
+     bool xml_mode;
+     bool unbroken_text;
++    bool encoded_entities;
+ 
+     /* other configuration stuff */
+     SV* bool_attr_val;
+diff -urN HTML-Parser-3.25/t/encoded-entities.t HTML-Parser-3.25.2/t/encoded-entities.t
+--- HTML-Parser-3.25/t/encoded-entities.t	Wed Dec 31 19:00:00 1969
++++ HTML-Parser-3.25.2/t/encoded-entities.t	Wed Mar  6 17:13:53 2002
+@@ -0,0 +1,32 @@
++use strict;
++print "1..2\n";
++
++use HTML::Parser ();
++my $p = HTML::Parser->new();
++$p->encoded_entities(1);
++
++my $text = "";
++$p->handler(start =>
++	    sub {
++		 my($tag, $attr) = @_;
++		 $text .= "S[$tag";
++		 for my $k (sort keys %$attr) {
++		     my $v =  $attr->{$k};
++		     $text .= " $k=$v";
++		 }
++		 $text .= "]";
++	     }, "tagname,attr");
++
++my $html = <<'EOT';
++<tag arg="&amp;&lt;&gt">
++EOT
++
++$p->parse($html)->eof;
++
++print "not " unless $text eq 'S[tag arg=&amp;&lt;&gt]';  print "ok 1\n";
++
++$text = "";
++$p->encoded_entities(0);
++$p->parse($html)->eof;
++
++print "not " unless $text eq 'S[tag arg=&<>]';  print "ok 2\n";
+
 ---------------------------------------------- IO-stringy
 http://www.cpan.org/authors/id/E/ER/ERYQ/IO-stringy-2.108.tar.gz
 (needed by MIME-tools)