[LON-CAPA-cvs] cvs: loncom /build xfml_parse.pl

harris41 lon-capa-cvs@mail.lon-capa.org
Wed, 20 Feb 2002 00:21:42 -0000


This is a MIME encoded message

--harris411014164502
Content-Type: text/plain

harris41		Tue Feb 19 19:21:42 2002 EDT

  Modified files:              
    /loncom/build	xfml_parse.pl 
  Log:
  a cleaner leaner script (major rewrite)
  
  
--harris411014164502
Content-Type: text/plain
Content-Disposition: attachment; filename="harris41-20020219192142.txt"

Index: loncom/build/xfml_parse.pl
diff -u loncom/build/xfml_parse.pl:1.2 loncom/build/xfml_parse.pl:1.3
--- loncom/build/xfml_parse.pl:1.2	Fri Feb  1 05:56:41 2002
+++ loncom/build/xfml_parse.pl	Tue Feb 19 19:21:42 2002
@@ -12,22 +12,15 @@
 ##                                                                           ##
 ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
 ## 1. Notes                                                                  ##
-## 2. Get command line arguments                                             ##
-## 3. First pass through (grab distribution-specific information)            ##
-## 4. Second pass through (parse out what is not necessary)                  ##
-## 5. Third pass through (translate markup according to specified mode)      ##
-## 6. Functions (most all just format contents of different markup tags)     ##
-## 7. POD (plain old documentation, CPAN style)                              ##
+## 2. Read in filter file                                                    ##
+## 3. Initialize and clear conditions                                        ##
+## 4. Run through and apply clauses                                          ##
 ##                                                                           ##
 ###############################################################################
 
 # ----------------------------------------------------------------------- Notes
 #
-# I am using a multiple pass-through approach to parsing
-# the xfml file.  This saves memory and makes sure the server
-# will never be overloaded.
-#
-# This is meant to parse files meeting the piml document type.
+# This is meant to parse files meeting the xfml document type.
 # See xfml.dtd.  XFML=XML Filtering Markup Language.
 
 use HTML::TokeParser;
@@ -43,26 +36,26 @@
 }
 
 my %eh;
-my %ih;
+
+# ---------------------------------------------- Read in filter file from @ARGV
 my $tofilter=shift @ARGV;
-open IN,"<$tofilter";
-my @lines=<IN>; my $parsestring=join('',@lines); undef @lines;
-close IN;
+open IN,"<$tofilter"; my @lines=<IN>;
+my $parsestring=join('',@lines); undef @lines; close IN;
 my $parser = HTML::TokeParser->new(\$parsestring) or
     die('can\'t create TokeParser object');
 $parser->xml_mode('1');
 
-# Define handling methods for mode-dependent text rendering
-
+# --------------------------------------------- initialize and clear conditions
 my %conditions; &cc;
 
+# Define handling methods for mode-dependent text rendering
 $parser->{textify}={
-    xfml => \&format_xfml,
+    'xfml' => \&format_xfml,
     'when:name' => \&format_when_name,
     'when:attribute' => \&format_when_attribute,
     'when:cdata' => \&format_when_cdata,
-    'choice:include' => \&format_choice_include,
     'choice:exclude' => \&format_choice_exclude,
+    'clause' => \&format_clause,
     };
 
 my $text;
@@ -70,158 +63,156 @@
 my $wloc=0;
 my %eha;
 
-while (my $token = $parser->get_tag('xfml')) {
-    &format_xfml(@{$token});
-    $text = $parser->get_text('/xfml');
-    $token = $parser->get_tag('/xfml');
-}
-
-#open IN,"<$tofilter";
-my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2;
-$parser = HTML::TokeParser->new(\$parsestring2) or
+# ----------------------------------------------- Run through and apply clauses
+my @lines2=<>; my $output=join('',@lines2); undef @lines2;
+my $lparser = HTML::TokeParser->new(\$output) or
     die('can\'t create TokeParser object');
-$parser->xml_mode('1');
-
-my $token;
-my $hloc=0;
-my %ts;
-my $tr;
-my $echild=0;
-my $exclude=0;
-my $excluden=0;
-my $excludea=0;
-my $et=0;
-my $cdata='';
-my $excludenold=0;
-my $ign=0;
-
-while ($token = $parser->get_token()) {
-    if ($token->[0] eq 'D') {
-	print $token->[1];
-    }
-    elsif ($token->[0] eq 'C') {
-	print $token->[1];
-    }
-    elsif ($token->[0] eq 'S') {
-	$cdata='';
-	$hloc++;
-# if token can be excluded, then pretend it is until all conditions are
-# run (eha); then output during end tag processing
-# else, output
-
-# a token can be excluded when it is an eh key, or a child node of
-# an eh key
-
-	if ($eh{$token->[1]}) {
-	    $echild=$token->[1];
+$lparser->xml_mode('1');
+my $parsestring2;
+while (my $token = $parser->get_tag('clause')) {
+    $parsestring2=$output;
+    $lparser = HTML::TokeParser->new(\$parsestring2);
+    $lparser->xml_mode('1');
+    $output='';
+    &format_clause(@{$token});
+    $text = $parser->get_text('/clause');
+    $token = $parser->get_tag('/clause');
+
+    my $token='';
+    my $ttype='';
+    my $excludeflag=0;
+    my $outcache='';
+    while ($token = $lparser->get_token()) {
+	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
+	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
+	elsif ($token->[0] eq 'T') {
+	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
+		or $ttype eq 'E') {
+		$output.=$token->[1];
+	    }
+	    else {
+		$outcache.=$token->[1];
+	    }
 	}
-	if ($echild) {
-	    # run through names for echild
-	    # then attributes and/or values and/or cdata
-	    my $name=$token->[1];
-	    my @attributes=@{$token->[3]};
-	    my %atthash=%{$token->[2]};
-	    foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
-		foreach my $namematch (@{$namemlist}) {
-		    my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
-		    if ($name=~/$nm/) {
-			$excludenold=$excluden;
-			$excluden++;
-			foreach my $attributemlist
-			    (@{$eha{$echild}->{'attribute'}}) {
-				foreach my $attributematch 
-				    (@{$attributemlist}) {
-					my ($an,$am)=
-					    split(/\=/,$attributematch,2);
-					$am=~s/^.//;
-					$am=~s/.$//;
-					if ($atthash{$an}) {
-					    if ($atthash{$an}=~/$am/) {
-						$excludea++;
-					    }
-					}
-				    }
-			    }
-		    }
-		}
+	elsif ($token->[0] eq 'S') {
+	    if ($eh{$token->[1]} or $excludeflag==1) {
+		$ttype='';
+		$excludeflag=1;
+		$outcache.=$token->[4];
+	    }
+	    else {
+		$ttype='S';
+		$output.=$token->[4];
+	    }
+	    if ($excludeflag==1) {
+		
 	    }
-	    $tr.=$token->[4];
 	}
-	else {
-	    print $token->[4];
+	elsif ($token->[0] eq 'E') {
+	    if ($eh{$token->[1]} and $excludeflag==1) {
+		$ttype='E';
+		$excludeflag=0;
+		$outcache.=$token->[2];
+		my $retval=&evalconditions($outcache);
+		if (&evalconditions($outcache)) {
+		    $output.=$outcache;
+		}
+		else {
+		    $output.='<!-- FILTERED OUT -->';
+		}
+		$outcache='';
+	    }
+	    elsif ($excludeflag==1) {
+		$ttype='';
+		$outcache.=$token->[2];
+	    }
+	    else {
+		$output.=$token->[2];
+		$ttype='E';
+	    }
 	}
     }
-    elsif ($token->[0] eq 'E') {
-	if ($echild) {
-	    $tr.=$token->[2];
-	    if ($excluden) {
-		my $i=0;
-		CDATALOOP:
-		foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
-		    $i++;
-		    my $j;
-		    foreach my $cdatamatch (@{$cdatamlist}) {
-			$j++;
-#				print "CDATA: $cdatamatch, $cdata\n";
-			my $cm=$cdatamatch;
-			my $not=0;
-			if ($cm=~/\!/) {
-			    $not=1;
-			    $cm=~s/^.//;
+    &cc;
+}
+print $output;
+
+# -------------------------------------------------------------- evalconditions
+sub evalconditions {
+    my ($parsetext)=@_;
+    my $eparser = HTML::TokeParser->new(\$parsetext);
+    unless (@{$conditions{'name'}} or
+	    @{$conditions{'attribute'}}) {
+	return 0;
+    }
+    my $nameflag=0;
+    my $cdataflag=0;
+    my $matchflag=0;
+    my $Ttoken='';
+    while (my $token = $eparser->get_token()) {
+	if ($token->[0] eq 'S') {
+	    foreach my $name (@{$conditions{'name'}}) {
+		my $flag=0;
+		my $match=$name;
+		if ($match=~/^\!/) {
+		    $match=~s/^\!//g;
+		    $flag=1;
+		}
+		$match=~s/^\///g;
+		$match=~s/\/$//g;
+		if ((!$flag and $token->[1]=~/$match/) or
+		    ($flag and $token->[1]!~/$match/)) {
+		    $nameflag=1;
+		}
+	    }
+	    $Ttoken='';
+	}
+	elsif ($token->[0] eq 'E') {
+	    foreach my $name (@{$conditions{'name'}}) {
+		my $flag=0;
+		my $match=$name;
+		if ($match=~/^\!/) {
+		    $match=~s/^\!//g;
+		    $flag=1;
+		}
+		$match=~s/^\///g;
+		$match=~s/\/$//g;
+		if ((!$flag and $token->[1]=~/$match/) or
+		    ($flag and $token->[1]!~/$match/)) {
+		    foreach my $cdata (@{$conditions{'cdata'}}) {
+			my $flag=0;
+			my $match=$cdata;
+			if ($match=~/^\!/) {
+			    $match=~s/^\!//g;
+			    $flag=1;
 			}
-			$cm=~s/^.//; $cm=~s/.$//;
-			if ($not and $cdata=~/$cm/) {
-			    $ign=1; $exclude=0;
+			$match=~s/^\///g;
+			$match=~s/\/$//g;
+			if ((!$flag and $Ttoken=~/$match/) or
+			    ($flag and $Ttoken!~/$match/)) {
+			    $cdataflag=1;
 			}
-			if ((!$not and $cdata!~/$cm/)
-			    or ($not and $cdata=~/$cm/)) {
-#				nothing happens
-#			    $exclude=0;
+		    }
+		    if (@{$conditions{'cdata'}}) {
+			if ($cdataflag) {
+			    return 0;
 			}
-			elsif (($not and $cdata!~/$cm/)
-			       or (!$not and $cdata=~/$cm/)) {
-			    $exclude++ unless $ign;
+		    }
+		    else {
+			if ($nameflag) {
+			    return 0;
 			}
 		    }
+		    $nameflag=0;
 		}
 	    }
 	}
-	if ($eh{$token->[1]}) {
-	    $ign=0;
-	    $echild=0;
-	    if (!$exclude and !$excludea) {
-		print $tr;
-#		print $token->[2];
-		$tr='';
-	    }
-	    elsif ($exclude>0 or $excludea>0) {
-#		print "EXCLUDING $token->[1] $exclude $excludea $excluden\n";
-		$exclude=0; $excluden=0; $excludea=0;
-		$tr='';
-	    }
-	    $exclude=0; $excluden=0; $excludea=0;
-	}
-	else {
-	    if ($echild) {
-#		$tr.=$token->[2];
+	elsif ($token->[0] eq 'T') {
+	    if ($nameflag) {
+		$Ttoken.=$token->[1];
 	    }
-	    else {
-		print $token->[2];
-		$tr='';
-	    }
-	}
-	$hloc--;
-    }
-    elsif ($token->[0] eq 'T') {
-	if ($echild) {
-	    $tr.=$token->[1];
-	    $cdata=$token->[1];
-	}
-	else {
-	    print $token->[1];
-	    $tr='';
 	}
     }
+    return 1;
 }
 
 # ------------------------------------------------------------ clear conditions
@@ -230,6 +221,7 @@
     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
+    %eh=(1,1); delete $eh{1};
 }
 
 # --------------------------------------- remove starting and ending whitespace
@@ -239,35 +231,29 @@
 
 
 
+
 # --------------------------------------------------------- Format xfml section
 sub format_xfml {
     my (@tokeninfo)=@_;
     return '';
 }
 
+# ------------------------------------------------------- Format clause section
+sub format_clause {
+    my (@tokeninfo)=@_;
+    return '';
+}
+
 # ---------------------------------------------------- Format when:name section
 sub format_when_name {
     my (@tokeninfo)=@_;
-    $wloc++;
+#    $wloc++;
     my $att_match=$tokeninfo[2]->{'match'};
     push @{$conditions{'name'}},$att_match;
     my $text=&trim($parser->get_text('/when:name'));
     $parser->get_tag('/when:name');
-    $wloc--;
-    &cc unless $wloc;
-    return '';
-}
-
-# ----------------------------------------------- Format when:attribute section
-sub format_when_attribute {
-    my (@tokeninfo)=@_;
-    $wloc++;
-    my $att_match=$tokeninfo[2]->{'match'};
-    push @{$conditions{'attribute'}},$att_match;
-    my $text=&trim($parser->get_text('/when:attribute'));
-    $parser->get_tag('/when:attribute');
-    $wloc--;
-    &cc unless $wloc;
+#    $wloc--;
+#    &cc unless $wloc;
     return '';
 }
 
@@ -280,16 +266,7 @@
     my $text=&trim($parser->get_text('/when:cdata'));
     $parser->get_tag('/when:cdata');
     $wloc--;
-    &cc unless $wloc;
-    return '';
-}
-
-# ----------------------------------------------- Format choice:include section
-sub format_choice_include {
-    my (@tokeninfo)=@_;
-    my $text=&trim($parser->get_text('/choice:include'));
-    $parser->get_tag('/choice:include');
-    $ih{$tokeninfo[2]->{'match'}}++;
+#    &cc unless $wloc;
     return '';
 }
 

--harris411014164502--