[LON-CAPA-cvs] cvs: doc /loncapafiles loncapafiles.lpml loncom/localize transliterate.pm loncom/lonnet/perl lonnet.pm

raeburn raeburn at source.lon-capa.org
Tue Feb 26 09:42:32 EST 2019


raeburn		Tue Feb 26 14:42:32 2019 EDT

  Added files:                 
    /loncom/localize	transliterate.pm 

  Modified files:              
    /loncom/lonnet/perl	lonnet.pm 
    /doc/loncapafiles	loncapafiles.lpml 
  Log:
  - Bug 6792
    - Replace a non-ascii character in the filename of uploaded file with an
      appropriate ascii character (if available).
    - If lonnet::clean_filename() reduces filename to .extension, prepend
      timestamp_milliseconds.
  
  
Index: loncom/lonnet/perl/lonnet.pm
diff -u loncom/lonnet/perl/lonnet.pm:1.1405 loncom/lonnet/perl/lonnet.pm:1.1406
--- loncom/lonnet/perl/lonnet.pm:1.1405	Fri Feb 15 20:56:18 2019
+++ loncom/lonnet/perl/lonnet.pm	Tue Feb 26 14:42:27 2019
@@ -1,7 +1,7 @@
 # The LearningOnline Network
 # TCP networking package
 #
-# $Id: lonnet.pm,v 1.1405 2019/02/15 20:56:18 raeburn Exp $
+# $Id: lonnet.pm,v 1.1406 2019/02/26 14:42:27 raeburn Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -101,6 +101,7 @@
 use LONCAPA::lonmetadata;
 use LONCAPA::Lond;
 use LONCAPA::LWPReq;
+use LONCAPA::transliterate;
 
 use File::Copy;
 
@@ -3856,6 +3857,9 @@
     }
 # Replace spaces by underscores
     $fname=~s/\s+/\_/g;
+# Transliterate non-ascii text to ascii
+    my $lang = &Apache::lonlocal::current_language();
+    $fname = &LONCAPA::transliterate::fname_to_ascii($fname,$lang);
 # Replace all other weird characters by nothing
     $fname=~s{[^/\w\.\-]}{}g;
 # Replace all .\d. sequences with _\d. so they no longer look like version
@@ -3863,6 +3867,7 @@
     $fname=~s/\.(\d+)(?=\.)/_$1/g;
     return $fname;
 }
+
 # This Function checks if an Image's dimensions exceed either $resizewidth (width) 
 # or $resizeheight (height) - both pixels. If so, the image is scaled to produce an 
 # image with the same aspect ratio as the original, but with dimensions which do 
@@ -3937,6 +3942,14 @@
     $fname=&clean_filename($fname);
     # See if there is anything left
     unless ($fname) { return 'error: no uploaded file'; }
+    # If filename now begins with a . prepend unix timestamp _ milliseconds
+    if ($fname =~ /^\./) {
+        my ($s,$usec) = &gettimeofday();
+        while (length($usec) < 6) {
+            $usec = '0'.$usec;
+        }
+        $fname = $s.'_'.substr($usec,0,3).$fname;
+    }
     # Files uploaded to help request form, or uploaded to "create course" page are handled differently
     if ((($formname eq 'screenshot') && ($subdir eq 'helprequests')) ||
         (($formname eq 'coursecreatorxml') && ($subdir eq 'batchupload')) ||
Index: doc/loncapafiles/loncapafiles.lpml
diff -u doc/loncapafiles/loncapafiles.lpml:1.981 doc/loncapafiles/loncapafiles.lpml:1.982
--- doc/loncapafiles/loncapafiles.lpml:1.981	Thu Feb 21 20:57:45 2019
+++ doc/loncapafiles/loncapafiles.lpml	Tue Feb 26 14:42:31 2019
@@ -2,7 +2,7 @@
  "http://lpml.sourceforge.net/DTD/lpml.dtd">
 <!-- loncapafiles.lpml -->
 
-<!-- $Id: loncapafiles.lpml,v 1.981 2019/02/21 20:57:45 raeburn Exp $ -->
+<!-- $Id: loncapafiles.lpml,v 1.982 2019/02/26 14:42:31 raeburn Exp $ -->
 
 <!--
 
@@ -4672,6 +4672,14 @@
 </filenames>
 </fileglob>
 <file>
+<source>loncom/localize/transliterate.pm</source>
+<target dist='default'>home/httpd/lib/perl/LONCAPA/transliterate.pm</target>
+<categoryname>handler</categoryname>
+<description>
+Transliteration of non-ascii to ascii characters in filenames.
+</description>
+</file>
+<file>
 <source>loncom/interface/loncoursedata.pm</source>
 <target dist='default'>home/httpd/lib/perl/Apache/loncoursedata.pm</target>
 <categoryname>handler</categoryname>

Index: loncom/localize/transliterate.pm
+++ loncom/localize/transliterate.pm
# The LearningOnline Network with CAPA
# Transliteration to ascii
#
# $Id: transliterate.pm,v 1.1 2019/02/26 14:42:22 raeburn Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
######################################################################
######################################################################

=pod

=head1 NAME

LONCAPA::transliterate - transliterate non-ascii characters
in filenames.

=head1 SYNOPSIS

When fed a filename it will replace instances of non-ascii
characters with transliterations.

=head1 OVERVIEW

Used to replace non-ascii character(s) with a transliteration
of the character(s) to ascii character(s).

If there are preferred replacements for a particular language
then those should be included in a separate subroutine which
is called before the transliteration of last resort (which is
done with Text::Unidecode).

=head1 SUBROUTINES

=cut

package LONCAPA::transliterate;

use strict;
use utf8;
use Text::Unidecode qw(unidecode);
use Encode qw(decode_utf8 encode_utf8);

=pod

=over

=item * fname_to_ascii()

Inputs: $fname (required), $language (optional)

Output: $fname

Replaces non-ascii characters with a transliteration
of the character to an ascii character (using Text::Unidecode) 

If the language code is de, transliteration via 
german_to_ascii() is used first to handle umlauts and eszett,
before using Text::Unidecode.

If other routines are added to support preferred transliteration
of non-ascii characters for specific languages, they should be
added as new subroutines to this file, and then called if the
language code has an appropriate value.

=back

=cut

sub fname_to_ascii {
    my ($fname,$language) = @_;
    if ($fname =~ /([^\x{00}-\x{7f}])/) {
        $fname=&decode_utf8($fname);
        if ($language eq 'de') {
            $fname = &german_to_ascii($fname);
        }
        $fname = unidecode($fname);
        $fname=&encode_utf8($fname);
    }
    return $fname;
}

=pod 

=over

=item * german_to_ascii()

Input: $fname (required)

Output: $fname

Replaces letters with umlauts with the equivalent letter
without an umlaut plus letter e. Case is preserved.

Replaces eszett with double s.

=back

=cut

sub german_to_ascii {
    my ($fname) = @_;
    my %characters = (
                       'Ä' => 'AE',
                       'Ö' => 'OE',
                       'Ü' => 'UE',
                       'ä' => 'ae',
                       'ö' => 'oe',
                       'ü' => 'ue',
                       'ß' => 'ss',
                      );
    $fname =~ s/([ÄäÖöÜüß])/$characters{$1}/g;
    return $fname;
}

1;




More information about the LON-CAPA-cvs mailing list