[LON-CAPA-cvs] cvs: modules /gerd/Wiki convert.pl getall.pl
www
www at source.lon-capa.org
Wed Dec 14 22:18:04 EST 2011
www Thu Dec 15 03:18:04 2011 EDT
Added files:
/modules/gerd/Wiki getall.pl
Modified files:
/modules/gerd/Wiki convert.pl
Log:
Wiki translator starting to do reasonable things.
Index: modules/gerd/Wiki/convert.pl
diff -u modules/gerd/Wiki/convert.pl:1.8 modules/gerd/Wiki/convert.pl:1.9
--- modules/gerd/Wiki/convert.pl:1.8 Wed Dec 14 22:52:49 2011
+++ modules/gerd/Wiki/convert.pl Thu Dec 15 03:18:04 2011
@@ -104,7 +104,7 @@
my $redir=&unformat($1);
unless ($residx{$redir}) {
# How did that happen? The redirect points nowhere
- # print "*** WARNING: $redir not defined!\n";
+ print "*** WARNING: $redir not defined!\n";
} else {
my $oldref=$idxres{$idx};
# Changing $oldref to $redir
@@ -130,16 +130,25 @@
$targetdir.='/'.$dir2;
mkdir($targetdir);
}
- open(IN,'dump/'.$idx.'.wikitext');
+ open(IN,'dump/'.$idx.'.raw') || print "WARNING: could not read $idx ===================\n";
print "====== Processing $idx: $idxres{$idx}\n";
+ my $outflag=0;
+ my $output='';
while (my $line=<IN>) {
chomp($line);
- if ($line=~/\[\[([^\]\|\#]+)/) {
- my $ref=$1;
- &rellink($targetdir,$ref);
- }
+ if ($line=~/^\<\!--\s*$/) { last; }
+ if ($line=~/\<\!-- start content --\>/) { $outflag=1; next; }
+ unless ($outflag) { next; }
+# ----------------------------------------------------- Substitutions
+ $line=~s/\<img class=\"tex inlinemath\" alt=\"([^\"]+)\"[^\>]+\>/<m>\$$1\$<\/m>/gs;
+ $line=~s/\<img class=\"tex\" alt=\"([^\"]+)\"[^\>]+\>/<m>\\\[$1\\\]<\/m>/gs;
+ $line=~s/\<a href\=\"[^\"]+\" title=\"([^\"]+)\"\>/&alink($targetdir,$1)/gse;
+ $line=~s/\<a[^\>]+title\=([^\"]+)\"[^\>]+\>\s*\<img[^\>]+\>\s*\<\/a\>/&imglink($targetdir,$1)/gse;
+# ------------------------------------------------- End Substitutions
+ $output.=$line."\n";
}
close(IN);
+ print $output;
}
sub unformat {
@@ -152,10 +161,16 @@
return(lc($ref));
}
+sub alink {
+ return '<a href="'.&rellink(@_).'">';
+}
+
+sub imglink {
+ return '<img src="'.&rellink(@_).'" />';
+}
sub rellink {
my ($srcdir,$ref)=@_;
- my ($dir0,$dir1,$dir2)=split(/\//,$srcdir);
$ref=~s/\_/ /g;
$ref=~s/^[^\w\?\(]+//gs;
$ref=~s/[^\w\?\)]+$//gs;
Index: modules/gerd/Wiki/getall.pl
+++ modules/gerd/Wiki/getall.pl
use strict;
open(IN,'dump/articles.dat');
while (my $line=<IN>) {
chomp($line);
my ($idx,$res)=split(/\t/,$line);
$res=~s/ /\_/gs;
system('wget -O htmldump/'.$idx.'.raw "http://scripts.mit.edu/~srayyan/PERwiki/index.php?title='.$res.'"');
}
close(IN);
More information about the LON-CAPA-cvs
mailing list