[LON-CAPA-cvs] cvs: modules /gerd/Wiki convert.pl

www www at source.lon-capa.org
Wed Dec 14 13:40:26 EST 2011


www		Wed Dec 14 18:40:26 2011 EDT

  Modified files:              
    /modules/gerd/Wiki	convert.pl 
  Log:
  Formatting leniency in Wiki
  
  
Index: modules/gerd/Wiki/convert.pl
diff -u modules/gerd/Wiki/convert.pl:1.6 modules/gerd/Wiki/convert.pl:1.7
--- modules/gerd/Wiki/convert.pl:1.6	Wed Dec 14 16:44:56 2011
+++ modules/gerd/Wiki/convert.pl	Wed Dec 14 18:40:26 2011
@@ -6,11 +6,25 @@
 my %idxres=();
 my %resfile=();
 my %fileres=();
+my %filereference=();
 my %titleres=();
 my %duplicatecnt=();
 my %filelink=();
 my %filedir=();
 
+# Get the files
+
+my @files=<dump/*>;
+foreach my $file (@files) {
+   $file=~s/^dump\///;
+   $filereference{&unformat($file)}=$file;
+}
+
+#foreach my $key (keys(%filereference)) {
+#   print $key." -> ".$filereference{$key}."\n";
+#}
+#exit;
+
 # Read the copy/paste of the treemenu source code
 
 open(IN,'dump/treemenu.dat');
@@ -18,7 +32,6 @@
 while (my $line=<IN>) {
    chomp($line);
    my ($res)=($line=~/title=\\\"([^\"]+)\"/);
-   $res=~s/\\//g;
    if ($res=~/\s[\-\–]+\s/) {
       my ($dir,$remainder)=split(/\s+[\-\–]+\s+/,$res);
       $dir=~s/\W//g;
@@ -32,7 +45,7 @@
       } else {
          $thisdir=$currentdir.'/'.$dir;
       }
-      $resdir{$res}=$thisdir;
+      $resdir{&unformat($res)}=$thisdir;
    }
 }
 close(IN);
@@ -43,8 +56,6 @@
 while (my $line=<IN>) {
    chomp($line);
    my ($idx,$res)=split(/\t/,$line);
-   $residx{$res}=$idx;
-   $idxres{$idx}=$res;
    my $file;
    my $title;
    if ($res=~/\s[\-\–]+\s/) {
@@ -55,6 +66,9 @@
       $file=$res;
       $title=$res;
    }
+   $res=&unformat($res);
+   $residx{$res}=$idx;
+   $idxres{$idx}=$res;
    $file=~s/\,/\_/g;
    $file=~s/\s/\_/g;
    $file=~s/\W//g;
@@ -77,7 +91,7 @@
 # resfile: filename -> index
 # fileres: index -> filename
 # titleres: index -> clean title
-#
+# filereference: Wiki-reference -> filename in dump 
 # Now deal with rewrites
 #
 foreach my $idx (keys(%idxres)) {
@@ -86,7 +100,7 @@
    close(IN);
    chomp ($line);
    if ($line=~/\#REDIRECT\s*\[\[([^\]]+)\]\]$/) {
-      my $redir=$1;
+      my $redir=&unformat($1);
       unless ($residx{$redir}) {
 # How did that happen? The redirect points nowhere
          print "*** WARNING: $redir not defined!\n";
@@ -127,6 +141,17 @@
     close(IN);
 }
 
+sub unformat {
+   my ($ref)=@_;
+   $ref=~s/\\//g;
+   $ref=~s/\s/\_/gs;
+   $ref=~s/^[^\w\?\(]+//gs;
+   $ref=~s/[^\w\?\)]+$//gs;
+   $ref=~s/\%27/\'/g;
+   return(lc($ref));
+}
+
+
 sub rellink {
    my ($srcdir,$ref)=@_;
    my ($dir0,$dir1,$dir2)=split(/\//,$srcdir);
@@ -137,26 +162,25 @@
    my $filename;
    my $filedir;
    if ($ref=~/^file\:\s*(.+)$/i) {
-      my $link=$1;
+      my $link=&unformat($1);
+      my $fn=$filereference{$link};
+      unless ($fn) {
+         print "WARNING: [$ref] does not exist\n";
+      }
       my $returnref;
       my $returndir;
       if ($filelink{$link}) {
          $returnref=$filelink{$link};
          $returndir=$filedir{$link};
       } else {
-         unless (-e 'dump/'.$link) {
-            print "WARNING: [$ref] ($link) does not exist\n";
-            return '';
-         }
-         my $filename=$link;
-         $filename=~s/\s+/\_/g;
-         my ($fileb,$ext)=($filename=~/^(.+)\.(\w+)$/);
+         my ($fileb,$ext)=($fn=~/^(.+)\.(\w+)$/);
          $ext=lc($ext);
-         $fileb=~s/\W//;
+         $fileb=~s/\s/\_/gs;
+         $fileb=~s/\W//gs;
          $filename=$fileb.'.'.$ext;
          $filedir=$srcdir.'/files';
          mkdir($filedir);
-         copy('dump/'.$link,$filedir.'/'.$filename);
+         copy('dump/'.$fn,$filedir.'/'.$filename);
      }
    } elsif ($ref=~/^wikipedia\:(.+)$/i) {
       $ref=~s/wikipedia\:/http\:en.wikipedia.org\//;
@@ -164,7 +188,7 @@
    } elsif ($ref=~/^http\:(.+)$/i) {
       return $ref;
    } else {
-       my $idx=$residx{$ref};
+       my $idx=$residx{&unformat($ref)};
        unless ($idx) {
           print "WARNING: Unknown index for [$ref]\n";
           return '';




More information about the LON-CAPA-cvs mailing list