[LON-CAPA-cvs] cvs: loncom /localize/localize checkduplicates.pl
bisitz
bisitz@source.lon-capa.org
Tue, 07 Apr 2009 10:51:53 -0000
bisitz Tue Apr 7 10:51:53 2009 EDT
Added files:
/loncom/localize/localize checkduplicates.pl
Log:
New script to check for duplicate keys in translation files
Index: loncom/localize/localize/checkduplicates.pl
+++ loncom/localize/localize/checkduplicates.pl
#!/usr/bin/perl
# The LearningOnline Network with CAPA
# $Id: checkduplicates.pl,v 1.1 2009/04/07 10:51:53 bisitz Exp $
# 07.04.2009 Stefan Bisitz
use strict;
use warnings;
my $man = "
checkduplicates - Checks if hash keys in translation files occur more than one time. If so, a warning is displayed.
The found keys and corresponding values need to be changed. Otherwise, there is no gurantee which value is taken. This is dangerous, if same keys but different values are used or if one value is changed but the screen still shows the old value which actually comes from the other occurence.
SYNOPSIS:\tcheckduplicates -h
\t\tcheckduplicates FILE
OPTIONS:
-h\t\tDisplay this help and exit.
";
my $filename;
die "Use option -h for help.\n" unless exists $ARGV[0];
#analyze options
if ( $ARGV[0] =~ m/^\s*-h/ ) {
print $man;
exit();
}else{
$filename = ($ARGV[0]);
die "$filename is not a file.\n" unless -f $ARGV[0];
}
# ----------------------------------------------------------------
# Start Analysis
print "checkduplicates is searching for duplicates in $filename...\n";
# Manually read all stored keys from translation file (inlcuding probable duplicates)
my @all_keys;
my $line;
open( FH, "<", $filename ) or die "$filename cannot be opened\n";
while ( !eof(FH) ) {
$line = readline(FH);
next if $line=~/^\s*#/;
#$exprNP=~s/^["'](.*)["']$/$1/; # Remove " and ' at beginning and end
if ($line =~ m/ "(.*)"/) { # Find and save "..." key
push(@all_keys, $1);
} elsif ($line =~ m/ '(.*)'/) { # Find and save '...' key
push(@all_keys, $1);
}
}
close(FH);
# Read lexicon hash from translation file into hash
my %lexicon = &readlexicon($filename);
# Synch lexicon hash and Array of keys to find all doublettes
# Check for each key in the lexicon hash if this key occures more than one time in the hash file
# If found, print warning and count
my $dupl = 0; # total counter to count when a key occurred more than one time
my %found; # Hash to save keys which have already been found
foreach my $lex_key (keys %lexicon) {
my $counter = 0;
foreach my $all_key (@all_keys) {
if ($all_key eq $lex_key) {
$counter++;
if ( ($counter > 1) && (!$found{$all_key}) ) {
$dupl++ if ($counter == 2);
$found{$all_key} = 1;
print 'Found duplicate key: '.$lex_key."\n";
}
}
}
}
if ($dupl == 0) {
print "Be happy - No duplicates found.\n";
} else {
print "--- Found $dupl duplicate(s) in $filename which need to be corrected!\n";
}
# ----------------------------------------------------------------
# Code taken from sync.pl
# in : $filename
# out: %lexicon
sub readlexicon {
# Read translation file into memory
my $fn=shift;
open(IN,$fn) or die;
my %lexicon=();
my $contents=join('',<IN>);
close(IN);
# Tidy up: remove header data
$contents=~s/package Apache\:[^\;]+//;
$contents=~s/use base[^\;]+//;
# Build hash with hash from file
my %Lexicon=();
eval($contents.'; %lexicon=%Lexicon;');
if ($@ ne "") {
print "\nAn error occurred during the attempt to retrieve the translation hash for the file '$fn'.\n"
."Error: ".$@."\n";
die;
}
# Remove entries which are not needed for synch
delete $lexicon{'_AUTO'};
delete $lexicon{'char_encoding'};
delete $lexicon{'language_code'};
# Hash is expected not to be empty
if (!scalar(keys(%lexicon))) {
print "\nWarning: No translation phrases found in '$fn'.\n";
}
return %lexicon;
}
# ----------------------------------------------------------------