[LON-CAPA-cvs] cvs: loncom /localize/localize synch.pl

bisitz bisitz@source.lon-capa.org
Thu, 15 Jan 2009 11:28:12 -0000


bisitz		Thu Jan 15 11:28:12 2009 EDT

  Modified files:              
    /loncom/localize/localize	synch.pl 
  Log:
  - Bugfix:
    Reading of Lexicon hashes functional again:
    Added explicit variable declaration for Lexicon hash which is necessary since strict mode introduction in revision 1.9
  
  - Check hash creation for errors and valid content.
  - Stop script execution if files couldn't be opened (read/write).
  - Use ignore file list to skip files without actual translation data.
    This also avoids other none-strict mode issues.
  - Added more comments and some corrections and optimizations to explain what's going on
  - Added statistical output data (amount of hash values, additions, ...)
  - Optimized debug output (new lines, ...)
  
  Warning:
  Synching with a so far unknown combination of translation files destroys the character encoding. Further tests will follow. Other combinations work fine, e.g. only de.pm and ar.pm.
  
  
  
Index: loncom/localize/localize/synch.pl
diff -u loncom/localize/localize/synch.pl:1.10 loncom/localize/localize/synch.pl:1.11
--- loncom/localize/localize/synch.pl:1.10	Tue Jan 13 16:46:35 2009
+++ loncom/localize/localize/synch.pl	Thu Jan 15 11:28:12 2009
@@ -6,50 +6,83 @@
 # ----------------------------------------------------------------
 # Configuration
 
-#  Add a ascending number after each new translation
+#  Add an ascending number after each new translation
 # 1: add, 0: don't add
 my $numbered=0;
 
-# Add a comment after each new translation.
-# This comment contains a combination of translations which are build by using already existing translations.
+# Add a translation help comment after each new translation.
+# This comment contains a combination of translations which are built by using already existing translations.
 # 1: add, 0: don't add
 my $helper=0; 
 
 # Debug Mode
 # Displays additional output for debugging purposes
+# WARNING: Creates a huge amount of output. Recommended to be used only for small test files.
+# 1: display, 0: don't display
 my $debug=0;
 
+# List of files to be ignored when synching
+# These files don't contain actual translation phrases
+my @ignorefiles=(
+       "x_chef.pm"
+      ,"en.pm"
+   );
 
 # ----------------------------------------------------------------
 # ----- Sub Routines -----
 
 sub readlexicon {
+    # Read translation file into memory
     my $fn=shift;
-    open(IN,$fn);
+    open(IN,$fn) or die;
     my %lexicon=();
     my $contents=join('',<IN>);
     close(IN);
+    # Tidy up: remove header data
     $contents=~s/package Apache\:[^\;]+//;
     $contents=~s/use base[^\;]+//;
+    # Build hash with hash from file
+    my %Lexicon=();
     eval($contents.'; %lexicon=%Lexicon;');
+    if ($@ ne "") {
+        print "\nAn error occurred during the attempt to retrieve the translation hash for the file '$fn'.\n"
+             ."Error: ".$@."\n";
+        die;
+    }
+    # Remove entries which are not needed for synch
     delete $lexicon{'_AUTO'};
     delete $lexicon{'char_encoding'};
     delete $lexicon{'language_code'};
+    # Hash is expected not to be empty
+    print scalar(keys(%lexicon))." found... ";
+    if (!scalar(keys(%lexicon))) {
+        print "\nWarning: No translation phrases from '$fn'.\n";
+    }
     return %lexicon;
 }
 
 sub readnew {
-    open(IN,'newphrases.txt');
+    print "\n" if $debug;
+    open(IN,'newphrases.txt') or die;
     my %lexicon=();
     while (my $line=<IN>) {
 	chomp($line);
 	$lexicon{$line}=$line;
-        print "    New entry: $line\n" if $debug;
+        print "    New entry: '$line'\n" if $debug;
     }
     close(IN);
     return %lexicon;
 }
 
+sub ignorefile {
+    my $file = shift;
+    foreach my $ignfile (@ignorefiles) {
+        if ($ignfile eq $file) { return 1 }
+    }
+    return 0;
+}
+  
+
 
 # ----------------------------------------------------------------
 # ----- Main Program -----
@@ -63,14 +96,16 @@
 # Create master hash for the entire set of all translations
 print "Building master hash:\n";
 
-# Initialy fill master hash with phrases which are additionally needed/wanted.
+# Initially fill master hash with phrases which are additionally needed/wanted.
 print "  Adding new phrases... ";
 my %master=&readnew();
+print scalar(keys(%master))." added... ";
 print "ok.\n";
-  
+
 # Add all the different phrases of all translation files to master hash
 foreach (<*.pm>) {
-    print "  Reading ".$_." ... ";
+    if (&ignorefile($_)) { next }
+    print "  Reading '".$_."'... ";
     %master=(%master,&readlexicon($_));
    print "ok.\n";
 }
@@ -80,27 +115,30 @@
 # But the phrases will not be added to any translation file even if they were missing in it.
 # Remove these obsolete phrases from master hash
 print "  Removing obsolete phrases... ";
-open(IN,'removephrases.txt');
+open(IN,'removephrases.txt') or die;
+my $rm=0;
 while (my $line=<IN>) {
     chomp($line);
     delete $master{$line};
+    $rm++;
 }
 close(IN);
-print "ok.\n";
+print "$rm removed... ok.\n";
 
 
 print "Synchronization:\n";
 foreach my $fn (<*.pm>) {
-    print "  Synching ".$fn." ... ";
+    if (&ignorefile($fn)) { next }
+    print "  Synching '".$fn."'... ";
     # Build hash with all translations of current translation file
     my %lang=&readlexicon($fn);
     # Copy current translation file so that the old file could be overwritten with the new content
     # while the copy is used to read from.
     system ("cp $fn $fn.original");
-    open(IN,$fn.'.original');
+    open(IN,$fn.'.original') or die;
     # Rebuild current translation file
     # by writing all exisiting entries until SYNCMARKER
-    open(OUT,'>'.$fn);
+    open(OUT,'>'.$fn) or die;
     my $found=0;
     while (<IN>) {
 	if ($_=~/\#\s*SYNCMARKER/) { $found=1; last; } 
@@ -108,14 +146,15 @@
     }
     # Append missing phrases to new version of current translation file
     # by synching old version of current translation file with master hash
-    if ($found) {
+    if ($found) { # Only change files where SYNCMARKER was found
 	$i=0;
 	print OUT "\n\#SYNC ".localtime()."\n";
         # Sync master with current translation file:
 	foreach my $key (sort keys %master) {
+	    print "\n    Checking key: '$key'" if $debug;
 	    unless ($key) { next; }
 	    unless ($lang{$key}) {
-                print "    Found to be added: $key\n" if $debug;
+                # Translation helper?
                 if ($helper) {
 		    $comment='';
 		    my $copytrans=$key;
@@ -127,34 +166,32 @@
 		        $comment='# '.$copytrans;
                     }
                 }
+                # Numbered?
+		$i++;
 		if ($numbered) {
-		    $i++;
 		    $num=' ('.$i.')';
 		} else {
 		    $num='';
 		}
+                # Find delimiter for key and value
 		if ($key=~/\'/) {
 		    $dlm='"';
 		} else {
 		    $dlm="'";
 		}
-		if ($helper) {
-		    print OUT (<<ENDNEW);
-   $dlm$key$dlm
-=> $dlm$key$num$dlm,
-$comment
-
-ENDNEW
-		} else {
-		    print OUT (<<ENDNEW);
+                # Write new entry to translation file
+                print OUT (<<ENDNEW);
    $dlm$key$dlm
 => $dlm$key$num$dlm,
-
 ENDNEW
-		}
-	    }
-	}
-
+                if ($helper) {
+                    print OUT $comment
+                }
+                print OUT "\n";
+		print " > added" if $debug;
+            }
+        }
+        # Add SYNCMARKER at end of file
 	print OUT "\n\#SYNCMARKER\n";
 	foreach (<IN>) {
 	    print OUT $_;
@@ -162,7 +199,8 @@
     }
     close (IN);
     close (OUT);
-    print"ok.\n";
+    print "\n" if $debug;
+    print"$i added... ok.\n";
 }
 print "Synchronization completed.\n";