[LON-CAPA-cvs] cvs: modules /raeburn phrasechecker.pl
raeburn
raeburn@source.lon-capa.org
Tue, 13 Jan 2009 13:48:04 -0000
This is a MIME encoded message
--raeburn1231854484
Content-Type: text/plain
raeburn Tue Jan 13 13:48:04 2009 EDT
Added files:
/modules/raeburn phrasechecker.pl
Log:
- Compare two tagged versions to find file which have different revisiosn numbers.
- Compile lists of localized phrases - both &mt() and lonlocal::texthash() - which are absent in a language file (e.g., de.pm)
tagged for the newer version.
- Check if "missing" phrases exist in language file in CVS HEAD, and display notification if they do.
--raeburn1231854484
Content-Type: text/plain
Content-Disposition: attachment; filename="raeburn-20090113134804.txt"
Index: modules/raeburn/phrasechecker.pl
+++ modules/raeburn/phrasechecker.pl
#!/usr/bin/perl
use strict;
my $path = '/home/raeburn/LClocalization/raeburn';
my $newnum = '2_8_0';
my $new = 'version_'.$newnum;
my $oldnum = '2_7_1';
my $old = 'version_'.$oldnum;
my $oldtopdir = '/home/loncapa_'.$oldnum.'/loncapa/';
my $newtopdir = '/home/loncapa_'.$newnum.'/loncapa/';
my $headdir = '/home/loncapa/';
my (%oldrevs,%newrevs,%changed);
my @scripts = qw(loncnew lond loncron loncontrol);
my %oldlang=&readlexicon($oldtopdir.'loncom/localize/localize/de.pm');
my %newlang=&readlexicon($newtopdir.'loncom/localize/localize/de.pm');
my %headlang=&readlexicon($headdir.'loncom/localize/localize/de.pm');
print "lexicon for $oldnum has ".scalar(keys(%oldlang))." strings\n";
print "lexicon for $newnum has ".scalar(keys(%newlang))." strings\n";
print "lexicon for CVS HEAD has ".scalar(keys(%headlang))." strings\n";
open (my $errorlog,">$path/errors.txt");
# Get revision changes;
my $fh;
&recurse_CVS_Entries($oldtopdir,'',\%oldrevs,$errorlog);
&recurse_CVS_Entries($newtopdir,'',\%newrevs,$errorlog);
foreach my $file (sort(keys(%newrevs))) {
my ($fname) =~ ($file =~ m{/([^/]+)$/});
next if ($file =~ /Attic/);
next if (!(($file =~ /\.p[lm]$/) || (grep/^\Q$fname\E$/,@scripts)));
if (-e '/home/loncapa_'.$newnum.'/loncapa/'.$file) {
if ($newrevs{$file} ne $oldrevs{$file}) {
$changed{$file} = 1;
}
}
}
open(my $nochglog,'>'.$path.'/unchanged_'.$oldnum.'_'.$newnum.'.txt');
open(my $chglog,'>'.$path.'/changed_'.$oldnum.'_'.$newnum.'.txt');
foreach my $file (sort(keys(%changed))) {
my (%oldmt,%newmt,%oldth,%newth,%oldsplitline,%newsplitline);
next if ($file =~ m{^loncom/localize/localize/});
if (open(my $fj, "<$oldtopdir/$file")) {
my $inlt = 0;
my ($openquote,$unterminated);
while (<$fj>) {
my $text = $_;
if ($text =~ /\&?Apache::lonlocal::texthash/) {
$inlt = 1;
}
if ($inlt) {
$text =~ s/\s*[\r\f\n]*\s*$//;
if ($text =~ /([^,\s])+\s*=>\s*(['"])([^\2]+?)\2\s*\.?\s*([ul]c\()?(\$?[^,]*)\)?\s*,?(\);)?/s) {
$oldth{$3.$4.$5} = 1;
}
if ($text =~ /\)\s*;/) {
$inlt = 0;
}
} elsif ($openquote =~ /^['"]$/) {
if ($text =~ /^([^\Q$openquote\E]+?)\Q$openquote\E[,\)]/) {
my $remnant = $1;
$remnant =~ s/^\s+//;
chomp($unterminated);
$unterminated =~ s/\s+$//;
$oldmt{$unterminated.'<<cr>>'.$remnant} = 1;
$oldsplitline{$unterminated.'<<cr>>'.$remnant} = 1;
}
$openquote = '';
$unterminated = '';
} elsif ($text =~ /(&?Apache::lonlocal::|\&)mt\(/) {
($openquote,$unterminated) = &find_mt_items($text,\%oldmt);
}
}
}
if (open(my $fj, "<$newtopdir/$file")) {
my $inlt = 0;
my ($openquote,$unterminated);
while (<$fj>) {
my $text = $_;
if ($text =~ /\&?Apache::lonlocal::texthash/) {
$inlt = 1;
}
if ($inlt) {
$text =~ s/\s*[\r\f\n]*\s*$//;
if ($text =~ /([^,\s])+\s*=>\s*(['"])([^\2]+?)\2\s*\.?\s*([ul]c\()?(\$?[^,]*)\)?\s*,?(\);)?/s) {
$newth{$3.$4.$5} = 1;
}
if ($text =~ /\)\s*;/) {
$inlt = 0;
}
} elsif ($openquote =~ /^['"]$/) {
if ($text =~ /^([^\Q$openquote\E]+?)\Q$openquote\E[,\)]/) {
my $remnant = $1;
$remnant =~ s/^\s+//;
chomp($unterminated);
$unterminated =~ s/\s+$//;
$newmt{$unterminated."<<cr>>".$remnant} = 1;
$newsplitline{$unterminated.'<<cr>>'.$remnant} = 1;
}
$openquote = '';
$unterminated = '';
} elsif ($text =~ /(\&?Apache::lonlocal::|\&)mt\(/) {
($openquote,$unterminated) = &find_mt_items($text,\%newmt);
}
}
}
my @missing;
foreach my $item (sort(keys(%newmt))) {
if ((!defined($oldmt{$item})) && (!defined($oldth{$item}))) {
if (!defined($newlang{$item})) {
if (defined($headlang{$item})) {
print "$file - ||$item|| in CVS HEAD de.pm\n";
} else {
push(@missing,$item);
}
}
}
}
foreach my $item (sort(keys(%newth))) {
if ((!defined($oldmt{$item})) && (!defined($oldth{$item})) && (!defined($newmt{$item}))) {
if (!defined($newlang{$item})) {
if (defined($headlang{$item})) {
print "$file - ||$item|| in CVS HEAD de.pm\n";
} else {
push(@missing,$item);
}
}
}
}
my $module = $file;
$module =~ s/^\///;
$module =~ s{/}{_}g;
if (@missing) {
if (open(my $fk,">$path/$module")) {
foreach my $str (@missing) {
print $fk "$str\n\n";
}
close($fk);
}
print $chglog "$file: $oldrevs{$file} -> $newrevs{$file} - ".scalar(@missing)." phrases to translate.\n";
} else {
print $nochglog "$file: $oldrevs{$file} -> $newrevs{$file} - no new untranslated phrases.\n";
}
foreach my $key (keys(%oldsplitline)) {
print "$file has an multi-line item - ||$key||\n";
}
}
close($chglog);
close($nochglog);
sub recurse_CVS_Entries {
my ($topdir,$path,$revshash) = @_;
my $fh;
if (open($fh,'<'.$topdir.'/'.$path.'/CVS/Entries')) {
my (@dirs);
while (<$fh>) {
my $line = $_;
chomp($line);
next if ($line eq 'D');
if ($line =~ m{^D/([^/]+)/}) {
push(@dirs,$1);
} elsif ($line =~ m{^/([^/]+)/([^/]+)}) {
if (ref($revshash) eq 'HASH') {
$revshash->{$path.'/'.$1} = $2;
}
} else {
print "$path -- $line is unexpected pattern\n";
}
}
close($fh);
foreach my $dir (@dirs) {
&recurse_CVS_Entries($topdir,$path.'/'.$dir,$revshash);
}
} else {
print "Couldn't open ".$topdir.'/'.$path.'/CVS/Entries'."\n";
}
return;
}
sub readlexicon {
my $fn=shift;
my %Lexicon;
if (open(IN,$fn)) {
my $contents=join('',<IN>);
close(IN);
$contents=~s/package Apache\:[^\;]+//;
$contents=~s/use base[^\;]+//;
$contents=~s/#SYNCMARKER//;
$contents=~s/^1\;$//;
eval($contents);
delete $Lexicon{'_AUTO'};
delete $Lexicon{'char_encoding'};
delete $Lexicon{'language_code'};
return %Lexicon;
} else {
print "Couldn't open $fn\n";
}
}
sub lt_partials {
my ($string,$newlang,$releasechg,$reported,$changes,$hashval,$texthashvals) = @_;
my ($output,$missing);
if ($string =~ //) {
my $val = $3;
my $case = $4;
my $rest = $5;
if (!defined($newlang->{$val})) {
my $partial;
if ($case ne '' || $rest ne "") {
if (grep(/\Q$val\E/,keys(%{$newlang}))) {
$partial = "\nPartial: $val\n";
}
}
my $miss = $string;
chomp($miss);
if ($changes->{$miss}) {
if (!$hashval->{$val.$case.$rest}) {
$hashval->{$val.$case.$rest} = 1;
$texthashvals .= "$partial.$val.$case.$rest\n";
}
} elsif (!$reported->{$val.$case.$rest}) {
$reported->{$val.$case.$rest} = 1;
$missing .= $partial.$val.$case.$rest."\n";
if ($partial) {
$missing .= "\n";
}
}
}
if ($texthashvals) {
$output .= $texthashvals."\n";
}
}
return ($missing,$output);
}
sub find_mt_items {
my ($incoming,$strings) = @_;
my ($openquote,$unterminated);
my (undef,@text) = split(/(\&?Apache::lonlocal::|\&)mt\(/,$incoming);
foreach my $item (@text) {
if ($item =~ /^(['"])([^\1]*?)\1[.,\)]/) {
my $sep = $1;
my $other = '"';
if ($sep eq '"') {
$other = "'";
}
my $phrase = $2;
if ($phrase =~ /^([^\Q$sep\E]*)([ul]c\()?(\$?[^\Q$sep\E]*)$/) {
if (ref($strings) eq 'HASH') {
$strings->{$phrase} = 1;
}
} elsif ($item =~ m{\\\Q$sep\E}) {
my $chgitem = $item;
$chgitem =~ s/\\\Q$sep\E/<<<<<$other>>>>>/g;
if ($chgitem =~ /^(\Q$sep\E)([^\Q$sep\E]*?)\Q$sep\E[,\)]/) {
$phrase = $2;
$phrase =~ s/<<<<<\Q$other\E>>>>>/\\$sep/g;
if (ref($strings) eq 'HASH') {
$strings->{$phrase} = 1;
}
}
}
} elsif ($item =~ /^(['"])([^\1]*?)[\r\n\f]$/) {
$openquote = $1;
$unterminated = $2;
}
}
return ($openquote,$unterminated);
}
sub mt_partials {
my ($phrase,$newlang,$releasechg,$reported) = @_;
my $missingmt;
if ($phrase =~ //) {
my $val = $1;
my $case = $2;
my $rest = $3;
if (!defined($newlang->{$val})) {
my $partial;
if ($case ne '' || $rest ne '') {
if (grep(/\Q$val\E/,keys(%{$newlang}))) {
$partial = "Partial: $val\n";
}
}
if (!$releasechg->{$phrase}) {
if (!$reported->{$phrase}) {
$reported->{$phrase} = 1;
$missingmt .= $partial.$phrase."\n";
if ($partial) {
$missingmt .= "\n";
}
}
}
}
}
return $missingmt;
}
--raeburn1231854484--