[LON-CAPA-cvs] cvs: modules /raeburn phrasechecker.pl

raeburn raeburn@source.lon-capa.org
Tue, 13 Jan 2009 13:48:04 -0000


This is a MIME encoded message

--raeburn1231854484
Content-Type: text/plain

raeburn		Tue Jan 13 13:48:04 2009 EDT

  Added files:                 
    /modules/raeburn	phrasechecker.pl 
  Log:
  - Compare two tagged versions to find file which have different revisiosn numbers.
  - Compile lists of localized phrases - both &mt() and lonlocal::texthash() - which are absent in a language file (e.g., de.pm)
  tagged for the newer version.
  - Check if "missing" phrases exist in language file in CVS HEAD, and display notification if they do. 
  
  
--raeburn1231854484
Content-Type: text/plain
Content-Disposition: attachment; filename="raeburn-20090113134804.txt"


Index: modules/raeburn/phrasechecker.pl
+++ modules/raeburn/phrasechecker.pl
#!/usr/bin/perl

use strict;

my $path = '/home/raeburn/LClocalization/raeburn';
my $newnum = '2_8_0';
my $new = 'version_'.$newnum;
my $oldnum = '2_7_1';
my $old = 'version_'.$oldnum;

my $oldtopdir = '/home/loncapa_'.$oldnum.'/loncapa/';
my $newtopdir = '/home/loncapa_'.$newnum.'/loncapa/';
my $headdir = '/home/loncapa/';

my (%oldrevs,%newrevs,%changed);
my @scripts = qw(loncnew lond loncron loncontrol);

my %oldlang=&readlexicon($oldtopdir.'loncom/localize/localize/de.pm');
my %newlang=&readlexicon($newtopdir.'loncom/localize/localize/de.pm');
my %headlang=&readlexicon($headdir.'loncom/localize/localize/de.pm');

print "lexicon for $oldnum has ".scalar(keys(%oldlang))." strings\n";
print "lexicon for $newnum has ".scalar(keys(%newlang))." strings\n";
print "lexicon for CVS HEAD has ".scalar(keys(%headlang))." strings\n";

open (my $errorlog,">$path/errors.txt");
# Get revision changes;
my $fh;
&recurse_CVS_Entries($oldtopdir,'',\%oldrevs,$errorlog);
&recurse_CVS_Entries($newtopdir,'',\%newrevs,$errorlog);

foreach my $file (sort(keys(%newrevs))) {
    my ($fname) =~ ($file =~ m{/([^/]+)$/}); 
    next if ($file =~ /Attic/);
    next if (!(($file =~ /\.p[lm]$/) || (grep/^\Q$fname\E$/,@scripts)));
    if (-e '/home/loncapa_'.$newnum.'/loncapa/'.$file) { 
        if ($newrevs{$file} ne $oldrevs{$file}) {
            $changed{$file} = 1;
        }
    }
}

open(my $nochglog,'>'.$path.'/unchanged_'.$oldnum.'_'.$newnum.'.txt');
open(my $chglog,'>'.$path.'/changed_'.$oldnum.'_'.$newnum.'.txt');
foreach my $file (sort(keys(%changed))) {
    my (%oldmt,%newmt,%oldth,%newth,%oldsplitline,%newsplitline);
    next if ($file =~ m{^loncom/localize/localize/});
    if (open(my $fj, "<$oldtopdir/$file")) {
        my $inlt = 0;
        my ($openquote,$unterminated);
        while (<$fj>) {
            my $text = $_;
            if ($text =~ /\&?Apache::lonlocal::texthash/) {
                $inlt = 1;
            }
            if ($inlt) {
                $text =~ s/\s*[\r\f\n]*\s*$//;
                if ($text =~ /([^,\s])+\s*=>\s*(['"])([^\2]+?)\2\s*\.?\s*([ul]c\()?(\$?[^,]*)\)?\s*,?(\);)?/s) {
                    $oldth{$3.$4.$5} = 1;
                }
                if ($text =~ /\)\s*;/) {
                    $inlt = 0;
                }
            } elsif ($openquote =~ /^['"]$/) {
                if ($text =~ /^([^\Q$openquote\E]+?)\Q$openquote\E[,\)]/) {
                    my $remnant = $1;
                    $remnant =~ s/^\s+//;
                    chomp($unterminated);
                    $unterminated =~ s/\s+$//;
                    $oldmt{$unterminated.'<<cr>>'.$remnant} = 1;
                    $oldsplitline{$unterminated.'<<cr>>'.$remnant} = 1; 
                }
                $openquote = '';
                $unterminated = '';
            } elsif ($text =~ /(&?Apache::lonlocal::|\&)mt\(/) {
                ($openquote,$unterminated) = &find_mt_items($text,\%oldmt);
            }
        }
    }
    if (open(my $fj, "<$newtopdir/$file")) {
        my $inlt = 0;
        my ($openquote,$unterminated);
        while (<$fj>) {
            my $text = $_;
            if ($text =~ /\&?Apache::lonlocal::texthash/) {
                $inlt = 1;
            }
            if ($inlt) {
                $text =~ s/\s*[\r\f\n]*\s*$//;
                if ($text =~ /([^,\s])+\s*=>\s*(['"])([^\2]+?)\2\s*\.?\s*([ul]c\()?(\$?[^,]*)\)?\s*,?(\);)?/s) {
                    $newth{$3.$4.$5} = 1;
                }
                if ($text =~ /\)\s*;/) {
                    $inlt = 0;
                }
            } elsif ($openquote =~ /^['"]$/) {
                if ($text =~ /^([^\Q$openquote\E]+?)\Q$openquote\E[,\)]/) {
                    my $remnant = $1;
                    $remnant =~ s/^\s+//;
                    chomp($unterminated);
                    $unterminated =~ s/\s+$//;
                    $newmt{$unterminated."<<cr>>".$remnant} = 1;
                    $newsplitline{$unterminated.'<<cr>>'.$remnant} = 1;
                }
                $openquote = '';
                $unterminated = '';
            } elsif ($text =~ /(\&?Apache::lonlocal::|\&)mt\(/) {
                ($openquote,$unterminated) = &find_mt_items($text,\%newmt);
            }
        }
    }
    my @missing;
    foreach my $item (sort(keys(%newmt))) {
        if ((!defined($oldmt{$item})) && (!defined($oldth{$item}))) {
            if (!defined($newlang{$item})) {
                if (defined($headlang{$item})) {
                    print "$file - ||$item|| in CVS HEAD de.pm\n";
                } else {
                    push(@missing,$item);
                }
            }
        }
    }
    foreach my $item (sort(keys(%newth))) {
        if ((!defined($oldmt{$item})) && (!defined($oldth{$item})) && (!defined($newmt{$item}))) {
            if (!defined($newlang{$item})) {
                if (defined($headlang{$item})) {
                    print "$file - ||$item|| in CVS HEAD de.pm\n";
                } else {
                    push(@missing,$item);
                }
            }
        }
    }

    my $module = $file;
    $module =~ s/^\///;
    $module =~ s{/}{_}g;
    if (@missing) {
        if (open(my $fk,">$path/$module")) {
            foreach my $str (@missing) {
                print $fk "$str\n\n";
            }
            close($fk);
        }
        print $chglog "$file: $oldrevs{$file} -> $newrevs{$file} - ".scalar(@missing)." phrases to translate.\n";
    } else {
        print $nochglog "$file: $oldrevs{$file} -> $newrevs{$file} - no new untranslated phrases.\n";   
    }
    foreach my $key (keys(%oldsplitline)) {
        print "$file has an multi-line item - ||$key||\n";
    }
}
close($chglog);
close($nochglog);

sub recurse_CVS_Entries {
    my ($topdir,$path,$revshash) = @_;
    my $fh;
    if (open($fh,'<'.$topdir.'/'.$path.'/CVS/Entries')) {
        my (@dirs);
        while (<$fh>) {
            my $line = $_;
            chomp($line);
            next if ($line eq 'D');
            if ($line =~ m{^D/([^/]+)/}) {
                push(@dirs,$1);
            } elsif ($line =~ m{^/([^/]+)/([^/]+)}) {
                if (ref($revshash) eq 'HASH') {
                    $revshash->{$path.'/'.$1} = $2;
                }
            } else {
                print "$path -- $line is unexpected pattern\n";
            }
        }
        close($fh);
        foreach my $dir (@dirs) {
            &recurse_CVS_Entries($topdir,$path.'/'.$dir,$revshash);
        }
    } else {
        print "Couldn't open ".$topdir.'/'.$path.'/CVS/Entries'."\n";
    }
    return;
}

sub readlexicon {
    my $fn=shift;
    my %Lexicon;
    if (open(IN,$fn)) {
        my $contents=join('',<IN>);
        close(IN);
        $contents=~s/package Apache\:[^\;]+//;
        $contents=~s/use base[^\;]+//;
        $contents=~s/#SYNCMARKER//;
        $contents=~s/^1\;$//;         
        eval($contents);
        delete $Lexicon{'_AUTO'};
        delete $Lexicon{'char_encoding'};
        delete $Lexicon{'language_code'};
        return %Lexicon;
    } else {
        print "Couldn't open $fn\n";
    }
}

sub lt_partials {
    my ($string,$newlang,$releasechg,$reported,$changes,$hashval,$texthashvals) = @_;
    my ($output,$missing);
    if ($string =~ //) {
        my $val = $3;
        my $case = $4;
        my $rest = $5;
        if (!defined($newlang->{$val})) {
            my $partial;
            if ($case ne '' || $rest ne "") {
                if (grep(/\Q$val\E/,keys(%{$newlang}))) {
                    $partial = "\nPartial: $val\n";
                }
            }
            my $miss = $string;
            chomp($miss);
            if ($changes->{$miss}) {
                if (!$hashval->{$val.$case.$rest}) {
                    $hashval->{$val.$case.$rest} = 1;
                    $texthashvals .= "$partial.$val.$case.$rest\n";
                }
            } elsif (!$reported->{$val.$case.$rest}) {
                $reported->{$val.$case.$rest} = 1;
                $missing .= $partial.$val.$case.$rest."\n";
                if ($partial) {
                    $missing .= "\n";
                }
            }
        }
        if ($texthashvals) {
           $output .= $texthashvals."\n";
        }
    }
    return ($missing,$output);
}

sub find_mt_items {
    my ($incoming,$strings) = @_;
    my ($openquote,$unterminated);
    my (undef,@text) = split(/(\&?Apache::lonlocal::|\&)mt\(/,$incoming);
    foreach my $item (@text) {
        if ($item =~ /^(['"])([^\1]*?)\1[.,\)]/) {
            my $sep = $1;
            my $other = '"';
            if ($sep eq '"') {
                $other = "'";
            }
            my $phrase = $2;
            if ($phrase =~ /^([^\Q$sep\E]*)([ul]c\()?(\$?[^\Q$sep\E]*)$/) {
                if (ref($strings) eq 'HASH') {
                    $strings->{$phrase} = 1;
                }
            } elsif ($item =~ m{\\\Q$sep\E}) {
                my $chgitem = $item; 
                $chgitem =~ s/\\\Q$sep\E/<<<<<$other>>>>>/g; 
                if ($chgitem =~ /^(\Q$sep\E)([^\Q$sep\E]*?)\Q$sep\E[,\)]/) {
                    $phrase = $2;
                    $phrase =~ s/<<<<<\Q$other\E>>>>>/\\$sep/g;
                    if (ref($strings) eq 'HASH') {
                        $strings->{$phrase} = 1;
                    }
                }
            }
        } elsif ($item =~ /^(['"])([^\1]*?)[\r\n\f]$/) {
            $openquote = $1;
            $unterminated = $2;
        }
    }
    return ($openquote,$unterminated);
}

sub mt_partials {
    my ($phrase,$newlang,$releasechg,$reported) = @_;
    my $missingmt;
    if ($phrase =~ //) {
        my $val = $1;
        my $case = $2;
        my $rest = $3;

        if (!defined($newlang->{$val})) {
            my $partial;
            if ($case ne '' || $rest ne '') {
                if (grep(/\Q$val\E/,keys(%{$newlang}))) {
                    $partial = "Partial: $val\n";
                }
            }
            if (!$releasechg->{$phrase}) {
                if (!$reported->{$phrase}) {
                    $reported->{$phrase} = 1;
                    $missingmt .= $partial.$phrase."\n";
                    if ($partial) {
                        $missingmt .= "\n";
                    }
                }
            }
        }
    }
    return $missingmt;
}


--raeburn1231854484--