[LON-CAPA-cvs] cvs: modules /gerd/Wiki convert.pl getall.pl

www www at source.lon-capa.org
Wed Dec 14 22:18:04 EST 2011


www		Thu Dec 15 03:18:04 2011 EDT

  Added files:                 
    /modules/gerd/Wiki	getall.pl 

  Modified files:              
    /modules/gerd/Wiki	convert.pl 
  Log:
  Wiki translator starting to do reasonable things.
  
  
Index: modules/gerd/Wiki/convert.pl
diff -u modules/gerd/Wiki/convert.pl:1.8 modules/gerd/Wiki/convert.pl:1.9
--- modules/gerd/Wiki/convert.pl:1.8	Wed Dec 14 22:52:49 2011
+++ modules/gerd/Wiki/convert.pl	Thu Dec 15 03:18:04 2011
@@ -104,7 +104,7 @@
       my $redir=&unformat($1);
       unless ($residx{$redir}) {
 # How did that happen? The redirect points nowhere
-    #     print "*** WARNING: $redir not defined!\n";
+         print "*** WARNING: $redir not defined!\n";
       } else {
          my $oldref=$idxres{$idx};
 # Changing $oldref to $redir
@@ -130,16 +130,25 @@
       $targetdir.='/'.$dir2;
       mkdir($targetdir);
     }
-    open(IN,'dump/'.$idx.'.wikitext');
+    open(IN,'dump/'.$idx.'.raw') || print "WARNING: could not read $idx ===================\n";
     print "====== Processing $idx: $idxres{$idx}\n";
+    my $outflag=0;
+    my $output='';
     while (my $line=<IN>) {
        chomp($line);
-       if ($line=~/\[\[([^\]\|\#]+)/) {
-          my $ref=$1;
-          &rellink($targetdir,$ref);
-       }
+       if ($line=~/^\<\!--\s*$/) { last; }
+       if ($line=~/\<\!-- start content --\>/) { $outflag=1; next; }
+       unless ($outflag) { next; }
+# ----------------------------------------------------- Substitutions
+       $line=~s/\<img class=\"tex inlinemath\" alt=\"([^\"]+)\"[^\>]+\>/<m>\$$1\$<\/m>/gs;
+       $line=~s/\<img class=\"tex\" alt=\"([^\"]+)\"[^\>]+\>/<m>\\\[$1\\\]<\/m>/gs;
+       $line=~s/\<a href\=\"[^\"]+\" title=\"([^\"]+)\"\>/&alink($targetdir,$1)/gse;
+       $line=~s/\<a[^\>]+title\=([^\"]+)\"[^\>]+\>\s*\<img[^\>]+\>\s*\<\/a\>/&imglink($targetdir,$1)/gse;
+# ------------------------------------------------- End Substitutions
+       $output.=$line."\n";
     }
     close(IN);
+    print $output;
 }
 
 sub unformat {
@@ -152,10 +161,16 @@
    return(lc($ref));
 }
 
+sub alink {
+   return '<a href="'.&rellink(@_).'">';
+}
+
+sub imglink {
+   return '<img src="'.&rellink(@_).'" />';
+}
 
 sub rellink {
    my ($srcdir,$ref)=@_;
-   my ($dir0,$dir1,$dir2)=split(/\//,$srcdir);
    $ref=~s/\_/ /g;
    $ref=~s/^[^\w\?\(]+//gs;
    $ref=~s/[^\w\?\)]+$//gs;

Index: modules/gerd/Wiki/getall.pl
+++ modules/gerd/Wiki/getall.pl
use strict;
open(IN,'dump/articles.dat');
while (my $line=<IN>) {
   chomp($line);
   my ($idx,$res)=split(/\t/,$line);
   $res=~s/ /\_/gs;
   system('wget -O htmldump/'.$idx.'.raw "http://scripts.mit.edu/~srayyan/PERwiki/index.php?title='.$res.'"');
}
close(IN);




More information about the LON-CAPA-cvs mailing list