[LON-CAPA-cvs] cvs: loncom /localize/localize checkduplicates.pl

bisitz bisitz@source.lon-capa.org
Tue, 07 Apr 2009 10:51:53 -0000

bisitz		Tue Apr  7 10:51:53 2009 EDT

  Added files:                 
    /loncom/localize/localize	checkduplicates.pl 
  New script to check for duplicate keys in translation files

Index: loncom/localize/localize/checkduplicates.pl
+++ loncom/localize/localize/checkduplicates.pl
# The LearningOnline Network with CAPA
# $Id: checkduplicates.pl,v 1.1 2009/04/07 10:51:53 bisitz Exp $

# 07.04.2009 Stefan Bisitz

use strict;
use warnings;

my $man = "
checkduplicates - Checks if hash keys in translation files occur more than one time. If so, a warning is displayed.

The found keys and corresponding values need to be changed. Otherwise, there is no gurantee which value is taken. This is dangerous, if same keys but different values are used or if one value is changed but the screen still shows the old value which actually comes from the other occurence.

SYNOPSIS:\tcheckduplicates -h 
\t\tcheckduplicates FILE

-h\t\tDisplay this help and exit.


my $filename; 
die "Use option -h for help.\n" unless exists $ARGV[0];
#analyze options
if ( $ARGV[0] =~ m/^\s*-h/ ) {
	print $man;
	$filename = ($ARGV[0]);
	die "$filename is not a file.\n" unless -f $ARGV[0];

# ----------------------------------------------------------------
# Start Analysis
print "checkduplicates is searching for duplicates in $filename...\n";

# Manually read all stored keys from translation file (inlcuding probable duplicates)
my @all_keys;
my $line;
open( FH, "<", $filename ) or die "$filename cannot be opened\n";
while ( !eof(FH) ) {
    $line = readline(FH);
    next if $line=~/^\s*#/;
    #$exprNP=~s/^["'](.*)["']$/$1/; # Remove " and ' at beginning and end
    if ($line =~ m/   "(.*)"/) { # Find and save "..." key
        push(@all_keys, $1);
    } elsif ($line =~ m/   '(.*)'/) { # Find and save '...' key
        push(@all_keys, $1);

# Read lexicon hash from translation file into hash
my %lexicon = &readlexicon($filename);

# Synch lexicon hash and Array of keys to find all doublettes
# Check for each key in the lexicon hash if this key occures more than one time in the hash file
# If found, print warning and count

my $dupl = 0; # total counter to count when a key occurred more than one time
my %found; # Hash to save keys which have already been found

foreach my $lex_key (keys %lexicon) {
    my $counter = 0;
    foreach my $all_key (@all_keys) {
        if ($all_key eq $lex_key) {
            if ( ($counter > 1) && (!$found{$all_key}) ) {
                $dupl++ if ($counter == 2);
                $found{$all_key} = 1;
                print 'Found duplicate key: '.$lex_key."\n";
if ($dupl == 0) {
    print "Be happy - No duplicates found.\n";
} else {
    print "--- Found $dupl duplicate(s) in $filename which need to be corrected!\n";

# ----------------------------------------------------------------
# Code taken from sync.pl
# in : $filename
# out: %lexicon

sub readlexicon {
    # Read translation file into memory
    my $fn=shift;
    open(IN,$fn) or die;
    my %lexicon=();
    my $contents=join('',<IN>);
    # Tidy up: remove header data
    $contents=~s/package Apache\:[^\;]+//;
    $contents=~s/use base[^\;]+//;
    # Build hash with hash from file
    my %Lexicon=();
    eval($contents.'; %lexicon=%Lexicon;');
    if ($@ ne "") {
        print "\nAn error occurred during the attempt to retrieve the translation hash for the file '$fn'.\n"
             ."Error: ".$@."\n";
    # Remove entries which are not needed for synch
    delete $lexicon{'_AUTO'};
    delete $lexicon{'char_encoding'};
    delete $lexicon{'language_code'};
    # Hash is expected not to be empty
    if (!scalar(keys(%lexicon))) {
        print "\nWarning: No translation phrases found in '$fn'.\n";
    return %lexicon;

# ----------------------------------------------------------------