[LON-CAPA-cvs] cvs: modules /gerd/Wiki convert.pl
www
www at source.lon-capa.org
Wed Dec 14 13:40:26 EST 2011
www Wed Dec 14 18:40:26 2011 EDT
Modified files:
/modules/gerd/Wiki convert.pl
Log:
Formatting leniency in Wiki
Index: modules/gerd/Wiki/convert.pl
diff -u modules/gerd/Wiki/convert.pl:1.6 modules/gerd/Wiki/convert.pl:1.7
--- modules/gerd/Wiki/convert.pl:1.6 Wed Dec 14 16:44:56 2011
+++ modules/gerd/Wiki/convert.pl Wed Dec 14 18:40:26 2011
@@ -6,11 +6,25 @@
my %idxres=();
my %resfile=();
my %fileres=();
+my %filereference=();
my %titleres=();
my %duplicatecnt=();
my %filelink=();
my %filedir=();
+# Get the files
+
+my @files=<dump/*>;
+foreach my $file (@files) {
+ $file=~s/^dump\///;
+ $filereference{&unformat($file)}=$file;
+}
+
+#foreach my $key (keys(%filereference)) {
+# print $key." -> ".$filereference{$key}."\n";
+#}
+#exit;
+
# Read the copy/paste of the treemenu source code
open(IN,'dump/treemenu.dat');
@@ -18,7 +32,6 @@
while (my $line=<IN>) {
chomp($line);
my ($res)=($line=~/title=\\\"([^\"]+)\"/);
- $res=~s/\\//g;
if ($res=~/\s[\-\â]+\s/) {
my ($dir,$remainder)=split(/\s+[\-\â]+\s+/,$res);
$dir=~s/\W//g;
@@ -32,7 +45,7 @@
} else {
$thisdir=$currentdir.'/'.$dir;
}
- $resdir{$res}=$thisdir;
+ $resdir{&unformat($res)}=$thisdir;
}
}
close(IN);
@@ -43,8 +56,6 @@
while (my $line=<IN>) {
chomp($line);
my ($idx,$res)=split(/\t/,$line);
- $residx{$res}=$idx;
- $idxres{$idx}=$res;
my $file;
my $title;
if ($res=~/\s[\-\â]+\s/) {
@@ -55,6 +66,9 @@
$file=$res;
$title=$res;
}
+ $res=&unformat($res);
+ $residx{$res}=$idx;
+ $idxres{$idx}=$res;
$file=~s/\,/\_/g;
$file=~s/\s/\_/g;
$file=~s/\W//g;
@@ -77,7 +91,7 @@
# resfile: filename -> index
# fileres: index -> filename
# titleres: index -> clean title
-#
+# filereference: Wiki-reference -> filename in dump
# Now deal with rewrites
#
foreach my $idx (keys(%idxres)) {
@@ -86,7 +100,7 @@
close(IN);
chomp ($line);
if ($line=~/\#REDIRECT\s*\[\[([^\]]+)\]\]$/) {
- my $redir=$1;
+ my $redir=&unformat($1);
unless ($residx{$redir}) {
# How did that happen? The redirect points nowhere
print "*** WARNING: $redir not defined!\n";
@@ -127,6 +141,17 @@
close(IN);
}
+sub unformat {
+ my ($ref)=@_;
+ $ref=~s/\\//g;
+ $ref=~s/\s/\_/gs;
+ $ref=~s/^[^\w\?\(]+//gs;
+ $ref=~s/[^\w\?\)]+$//gs;
+ $ref=~s/\%27/\'/g;
+ return(lc($ref));
+}
+
+
sub rellink {
my ($srcdir,$ref)=@_;
my ($dir0,$dir1,$dir2)=split(/\//,$srcdir);
@@ -137,26 +162,25 @@
my $filename;
my $filedir;
if ($ref=~/^file\:\s*(.+)$/i) {
- my $link=$1;
+ my $link=&unformat($1);
+ my $fn=$filereference{$link};
+ unless ($fn) {
+ print "WARNING: [$ref] does not exist\n";
+ }
my $returnref;
my $returndir;
if ($filelink{$link}) {
$returnref=$filelink{$link};
$returndir=$filedir{$link};
} else {
- unless (-e 'dump/'.$link) {
- print "WARNING: [$ref] ($link) does not exist\n";
- return '';
- }
- my $filename=$link;
- $filename=~s/\s+/\_/g;
- my ($fileb,$ext)=($filename=~/^(.+)\.(\w+)$/);
+ my ($fileb,$ext)=($fn=~/^(.+)\.(\w+)$/);
$ext=lc($ext);
- $fileb=~s/\W//;
+ $fileb=~s/\s/\_/gs;
+ $fileb=~s/\W//gs;
$filename=$fileb.'.'.$ext;
$filedir=$srcdir.'/files';
mkdir($filedir);
- copy('dump/'.$link,$filedir.'/'.$filename);
+ copy('dump/'.$fn,$filedir.'/'.$filename);
}
} elsif ($ref=~/^wikipedia\:(.+)$/i) {
$ref=~s/wikipedia\:/http\:en.wikipedia.org\//;
@@ -164,7 +188,7 @@
} elsif ($ref=~/^http\:(.+)$/i) {
return $ref;
} else {
- my $idx=$residx{$ref};
+ my $idx=$residx{&unformat($ref)};
unless ($idx) {
print "WARNING: Unknown index for [$ref]\n";
return '';
More information about the LON-CAPA-cvs
mailing list