[LON-CAPA-cvs] cvs: modules /gerd/Wiki convert.pl

www www at source.lon-capa.org
Tue Dec 13 13:09:52 EST 2011


www		Tue Dec 13 18:09:52 2011 EDT

  Modified files:              
    /modules/gerd/Wiki	convert.pl 
  Log:
  Taking care of redirects
  
  
Index: modules/gerd/Wiki/convert.pl
diff -u modules/gerd/Wiki/convert.pl:1.1 modules/gerd/Wiki/convert.pl:1.2
--- modules/gerd/Wiki/convert.pl:1.1	Tue Dec 13 03:05:59 2011
+++ modules/gerd/Wiki/convert.pl	Tue Dec 13 18:09:52 2011
@@ -1,17 +1,23 @@
 use strict;
 # Read the article index file
 my %residx=();
+my %idxres=();
 my %resfile=();
+my %fileres=();
+my %titleres=();
 my %duplicatecnt=();
 open(IN,'dump/articles.dat');
 while (my $line=<IN>) {
    chomp($line);
    my ($idx,$res)=split(/\t/,$line);
-   $residx{$line}=$idx;
+   $residx{$res}=$idx;
+   $idxres{$idx}=$res;
    my $file;
+   my $title;
    if ($res=~/\s[\-\–]+\s/) {
       my ($dir,$remainder)=split(/\s+[\-\–]+\s+/,$res);
       $dir=~s/\W//g;
+      $title=$remainder;
       $remainder=~s/\,/\_/g;
       $remainder=~s/\s/\_/g;
       $remainder=~s/\W//g;
@@ -19,6 +25,7 @@
       $file=$dir.'/'.$remainder;
    } else {
       $file=$res;
+      $title=$res;
       $file=~s/\,/\_/g;
       $file=~s/\s/\_/g;
       $file=~s/\W//g;
@@ -30,6 +37,37 @@
       $file.='_'.$duplicatecnt{$file};
    }
    $resfile{$file}=$idx;
-   print $file."\n";
+   $fileres{$idx}=$file;
+   $title=~s/[^\w \,\-\']//g;
+   $titleres{$idx}=$title;
 }
 close(IN);
+# So far:
+# residx: Wiki-reference -> index
+# idxres: index -> Wiki-reference
+# resfile: filename -> index
+# fileres: index -> filename
+# titleres: index -> clean title
+#
+# Now deal with rewrites
+#
+foreach my $idx (keys(%idxres)) {
+   open(IN,'dump/'.$idx.'.wikitext');
+   my $line=<IN>;
+   close(IN);
+   chomp ($line);
+   if ($line=~/\#REDIRECT\s*\[\[([^\]]+)\]\]$/) {
+      my $redir=$1;
+      unless ($residx{$redir}) {
+# How did that happen? The redirect points nowhere
+         print "*** WARNING: $redir not defined!\n";
+      } else {
+         my $oldref=$idxres{$idx};
+# Changing $oldref to $redir
+         $residx{$oldref}=$residx{$redir};
+      }
+# No need to visit this file again
+      delete $idxres{$idx};
+   }
+}
+# We are now only left with real files that need translation




More information about the LON-CAPA-cvs mailing list