[LON-CAPA-cvs] cvs: loncom /build xfml_parse.pl
harris41
lon-capa-cvs@mail.lon-capa.org
Wed, 20 Feb 2002 00:21:42 -0000
This is a MIME encoded message
--harris411014164502
Content-Type: text/plain
harris41 Tue Feb 19 19:21:42 2002 EDT
Modified files:
/loncom/build xfml_parse.pl
Log:
a cleaner leaner script (major rewrite)
--harris411014164502
Content-Type: text/plain
Content-Disposition: attachment; filename="harris41-20020219192142.txt"
Index: loncom/build/xfml_parse.pl
diff -u loncom/build/xfml_parse.pl:1.2 loncom/build/xfml_parse.pl:1.3
--- loncom/build/xfml_parse.pl:1.2 Fri Feb 1 05:56:41 2002
+++ loncom/build/xfml_parse.pl Tue Feb 19 19:21:42 2002
@@ -12,22 +12,15 @@
## ##
## ORGANIZATION OF THIS PERL SCRIPT ##
## 1. Notes ##
-## 2. Get command line arguments ##
-## 3. First pass through (grab distribution-specific information) ##
-## 4. Second pass through (parse out what is not necessary) ##
-## 5. Third pass through (translate markup according to specified mode) ##
-## 6. Functions (most all just format contents of different markup tags) ##
-## 7. POD (plain old documentation, CPAN style) ##
+## 2. Read in filter file ##
+## 3. Initialize and clear conditions ##
+## 4. Run through and apply clauses ##
## ##
###############################################################################
# ----------------------------------------------------------------------- Notes
#
-# I am using a multiple pass-through approach to parsing
-# the xfml file. This saves memory and makes sure the server
-# will never be overloaded.
-#
-# This is meant to parse files meeting the piml document type.
+# This is meant to parse files meeting the xfml document type.
# See xfml.dtd. XFML=XML Filtering Markup Language.
use HTML::TokeParser;
@@ -43,26 +36,26 @@
}
my %eh;
-my %ih;
+
+# ---------------------------------------------- Read in filter file from @ARGV
my $tofilter=shift @ARGV;
-open IN,"<$tofilter";
-my @lines=<IN>; my $parsestring=join('',@lines); undef @lines;
-close IN;
+open IN,"<$tofilter"; my @lines=<IN>;
+my $parsestring=join('',@lines); undef @lines; close IN;
my $parser = HTML::TokeParser->new(\$parsestring) or
die('can\'t create TokeParser object');
$parser->xml_mode('1');
-# Define handling methods for mode-dependent text rendering
-
+# --------------------------------------------- initialize and clear conditions
my %conditions; &cc;
+# Define handling methods for mode-dependent text rendering
$parser->{textify}={
- xfml => \&format_xfml,
+ 'xfml' => \&format_xfml,
'when:name' => \&format_when_name,
'when:attribute' => \&format_when_attribute,
'when:cdata' => \&format_when_cdata,
- 'choice:include' => \&format_choice_include,
'choice:exclude' => \&format_choice_exclude,
+ 'clause' => \&format_clause,
};
my $text;
@@ -70,158 +63,156 @@
my $wloc=0;
my %eha;
-while (my $token = $parser->get_tag('xfml')) {
- &format_xfml(@{$token});
- $text = $parser->get_text('/xfml');
- $token = $parser->get_tag('/xfml');
-}
-
-#open IN,"<$tofilter";
-my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2;
-$parser = HTML::TokeParser->new(\$parsestring2) or
+# ----------------------------------------------- Run through and apply clauses
+my @lines2=<>; my $output=join('',@lines2); undef @lines2;
+my $lparser = HTML::TokeParser->new(\$output) or
die('can\'t create TokeParser object');
-$parser->xml_mode('1');
-
-my $token;
-my $hloc=0;
-my %ts;
-my $tr;
-my $echild=0;
-my $exclude=0;
-my $excluden=0;
-my $excludea=0;
-my $et=0;
-my $cdata='';
-my $excludenold=0;
-my $ign=0;
-
-while ($token = $parser->get_token()) {
- if ($token->[0] eq 'D') {
- print $token->[1];
- }
- elsif ($token->[0] eq 'C') {
- print $token->[1];
- }
- elsif ($token->[0] eq 'S') {
- $cdata='';
- $hloc++;
-# if token can be excluded, then pretend it is until all conditions are
-# run (eha); then output during end tag processing
-# else, output
-
-# a token can be excluded when it is an eh key, or a child node of
-# an eh key
-
- if ($eh{$token->[1]}) {
- $echild=$token->[1];
+$lparser->xml_mode('1');
+my $parsestring2;
+while (my $token = $parser->get_tag('clause')) {
+ $parsestring2=$output;
+ $lparser = HTML::TokeParser->new(\$parsestring2);
+ $lparser->xml_mode('1');
+ $output='';
+ &format_clause(@{$token});
+ $text = $parser->get_text('/clause');
+ $token = $parser->get_tag('/clause');
+
+ my $token='';
+ my $ttype='';
+ my $excludeflag=0;
+ my $outcache='';
+ while ($token = $lparser->get_token()) {
+ if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
+ elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; }
+ elsif ($token->[0] eq 'T') {
+ if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
+ or $ttype eq 'E') {
+ $output.=$token->[1];
+ }
+ else {
+ $outcache.=$token->[1];
+ }
}
- if ($echild) {
- # run through names for echild
- # then attributes and/or values and/or cdata
- my $name=$token->[1];
- my @attributes=@{$token->[3]};
- my %atthash=%{$token->[2]};
- foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
- foreach my $namematch (@{$namemlist}) {
- my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
- if ($name=~/$nm/) {
- $excludenold=$excluden;
- $excluden++;
- foreach my $attributemlist
- (@{$eha{$echild}->{'attribute'}}) {
- foreach my $attributematch
- (@{$attributemlist}) {
- my ($an,$am)=
- split(/\=/,$attributematch,2);
- $am=~s/^.//;
- $am=~s/.$//;
- if ($atthash{$an}) {
- if ($atthash{$an}=~/$am/) {
- $excludea++;
- }
- }
- }
- }
- }
- }
+ elsif ($token->[0] eq 'S') {
+ if ($eh{$token->[1]} or $excludeflag==1) {
+ $ttype='';
+ $excludeflag=1;
+ $outcache.=$token->[4];
+ }
+ else {
+ $ttype='S';
+ $output.=$token->[4];
+ }
+ if ($excludeflag==1) {
+
}
- $tr.=$token->[4];
}
- else {
- print $token->[4];
+ elsif ($token->[0] eq 'E') {
+ if ($eh{$token->[1]} and $excludeflag==1) {
+ $ttype='E';
+ $excludeflag=0;
+ $outcache.=$token->[2];
+ my $retval=&evalconditions($outcache);
+ if (&evalconditions($outcache)) {
+ $output.=$outcache;
+ }
+ else {
+ $output.='<!-- FILTERED OUT -->';
+ }
+ $outcache='';
+ }
+ elsif ($excludeflag==1) {
+ $ttype='';
+ $outcache.=$token->[2];
+ }
+ else {
+ $output.=$token->[2];
+ $ttype='E';
+ }
}
}
- elsif ($token->[0] eq 'E') {
- if ($echild) {
- $tr.=$token->[2];
- if ($excluden) {
- my $i=0;
- CDATALOOP:
- foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
- $i++;
- my $j;
- foreach my $cdatamatch (@{$cdatamlist}) {
- $j++;
-# print "CDATA: $cdatamatch, $cdata\n";
- my $cm=$cdatamatch;
- my $not=0;
- if ($cm=~/\!/) {
- $not=1;
- $cm=~s/^.//;
+ &cc;
+}
+print $output;
+
+# -------------------------------------------------------------- evalconditions
+sub evalconditions {
+ my ($parsetext)=@_;
+ my $eparser = HTML::TokeParser->new(\$parsetext);
+ unless (@{$conditions{'name'}} or
+ @{$conditions{'attribute'}}) {
+ return 0;
+ }
+ my $nameflag=0;
+ my $cdataflag=0;
+ my $matchflag=0;
+ my $Ttoken='';
+ while (my $token = $eparser->get_token()) {
+ if ($token->[0] eq 'S') {
+ foreach my $name (@{$conditions{'name'}}) {
+ my $flag=0;
+ my $match=$name;
+ if ($match=~/^\!/) {
+ $match=~s/^\!//g;
+ $flag=1;
+ }
+ $match=~s/^\///g;
+ $match=~s/\/$//g;
+ if ((!$flag and $token->[1]=~/$match/) or
+ ($flag and $token->[1]!~/$match/)) {
+ $nameflag=1;
+ }
+ }
+ $Ttoken='';
+ }
+ elsif ($token->[0] eq 'E') {
+ foreach my $name (@{$conditions{'name'}}) {
+ my $flag=0;
+ my $match=$name;
+ if ($match=~/^\!/) {
+ $match=~s/^\!//g;
+ $flag=1;
+ }
+ $match=~s/^\///g;
+ $match=~s/\/$//g;
+ if ((!$flag and $token->[1]=~/$match/) or
+ ($flag and $token->[1]!~/$match/)) {
+ foreach my $cdata (@{$conditions{'cdata'}}) {
+ my $flag=0;
+ my $match=$cdata;
+ if ($match=~/^\!/) {
+ $match=~s/^\!//g;
+ $flag=1;
}
- $cm=~s/^.//; $cm=~s/.$//;
- if ($not and $cdata=~/$cm/) {
- $ign=1; $exclude=0;
+ $match=~s/^\///g;
+ $match=~s/\/$//g;
+ if ((!$flag and $Ttoken=~/$match/) or
+ ($flag and $Ttoken!~/$match/)) {
+ $cdataflag=1;
}
- if ((!$not and $cdata!~/$cm/)
- or ($not and $cdata=~/$cm/)) {
-# nothing happens
-# $exclude=0;
+ }
+ if (@{$conditions{'cdata'}}) {
+ if ($cdataflag) {
+ return 0;
}
- elsif (($not and $cdata!~/$cm/)
- or (!$not and $cdata=~/$cm/)) {
- $exclude++ unless $ign;
+ }
+ else {
+ if ($nameflag) {
+ return 0;
}
}
+ $nameflag=0;
}
}
}
- if ($eh{$token->[1]}) {
- $ign=0;
- $echild=0;
- if (!$exclude and !$excludea) {
- print $tr;
-# print $token->[2];
- $tr='';
- }
- elsif ($exclude>0 or $excludea>0) {
-# print "EXCLUDING $token->[1] $exclude $excludea $excluden\n";
- $exclude=0; $excluden=0; $excludea=0;
- $tr='';
- }
- $exclude=0; $excluden=0; $excludea=0;
- }
- else {
- if ($echild) {
-# $tr.=$token->[2];
+ elsif ($token->[0] eq 'T') {
+ if ($nameflag) {
+ $Ttoken.=$token->[1];
}
- else {
- print $token->[2];
- $tr='';
- }
- }
- $hloc--;
- }
- elsif ($token->[0] eq 'T') {
- if ($echild) {
- $tr.=$token->[1];
- $cdata=$token->[1];
- }
- else {
- print $token->[1];
- $tr='';
}
}
+ return 1;
}
# ------------------------------------------------------------ clear conditions
@@ -230,6 +221,7 @@
@{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
@{$conditions{'value'}}=(); pop @{$conditions{'value'}};
@{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
+ %eh=(1,1); delete $eh{1};
}
# --------------------------------------- remove starting and ending whitespace
@@ -239,35 +231,29 @@
+
# --------------------------------------------------------- Format xfml section
sub format_xfml {
my (@tokeninfo)=@_;
return '';
}
+# ------------------------------------------------------- Format clause section
+sub format_clause {
+ my (@tokeninfo)=@_;
+ return '';
+}
+
# ---------------------------------------------------- Format when:name section
sub format_when_name {
my (@tokeninfo)=@_;
- $wloc++;
+# $wloc++;
my $att_match=$tokeninfo[2]->{'match'};
push @{$conditions{'name'}},$att_match;
my $text=&trim($parser->get_text('/when:name'));
$parser->get_tag('/when:name');
- $wloc--;
- &cc unless $wloc;
- return '';
-}
-
-# ----------------------------------------------- Format when:attribute section
-sub format_when_attribute {
- my (@tokeninfo)=@_;
- $wloc++;
- my $att_match=$tokeninfo[2]->{'match'};
- push @{$conditions{'attribute'}},$att_match;
- my $text=&trim($parser->get_text('/when:attribute'));
- $parser->get_tag('/when:attribute');
- $wloc--;
- &cc unless $wloc;
+# $wloc--;
+# &cc unless $wloc;
return '';
}
@@ -280,16 +266,7 @@
my $text=&trim($parser->get_text('/when:cdata'));
$parser->get_tag('/when:cdata');
$wloc--;
- &cc unless $wloc;
- return '';
-}
-
-# ----------------------------------------------- Format choice:include section
-sub format_choice_include {
- my (@tokeninfo)=@_;
- my $text=&trim($parser->get_text('/choice:include'));
- $parser->get_tag('/choice:include');
- $ih{$tokeninfo[2]->{'match'}}++;
+# &cc unless $wloc;
return '';
}
--harris411014164502--