[LON-CAPA-cvs] cvs: doc /loncapafiles loncapafiles.lpml loncom/localize transliterate.pm loncom/lonnet/perl lonnet.pm
raeburn
raeburn at source.lon-capa.org
Tue Feb 26 09:42:32 EST 2019
raeburn Tue Feb 26 14:42:32 2019 EDT
Added files:
/loncom/localize transliterate.pm
Modified files:
/loncom/lonnet/perl lonnet.pm
/doc/loncapafiles loncapafiles.lpml
Log:
- Bug 6792
- Replace a non-ascii character in the filename of uploaded file with an
appropriate ascii character (if available).
- If lonnet::clean_filename() reduces filename to .extension, prepend
timestamp_milliseconds.
Index: loncom/lonnet/perl/lonnet.pm
diff -u loncom/lonnet/perl/lonnet.pm:1.1405 loncom/lonnet/perl/lonnet.pm:1.1406
--- loncom/lonnet/perl/lonnet.pm:1.1405 Fri Feb 15 20:56:18 2019
+++ loncom/lonnet/perl/lonnet.pm Tue Feb 26 14:42:27 2019
@@ -1,7 +1,7 @@
# The LearningOnline Network
# TCP networking package
#
-# $Id: lonnet.pm,v 1.1405 2019/02/15 20:56:18 raeburn Exp $
+# $Id: lonnet.pm,v 1.1406 2019/02/26 14:42:27 raeburn Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -101,6 +101,7 @@
use LONCAPA::lonmetadata;
use LONCAPA::Lond;
use LONCAPA::LWPReq;
+use LONCAPA::transliterate;
use File::Copy;
@@ -3856,6 +3857,9 @@
}
# Replace spaces by underscores
$fname=~s/\s+/\_/g;
+# Transliterate non-ascii text to ascii
+ my $lang = &Apache::lonlocal::current_language();
+ $fname = &LONCAPA::transliterate::fname_to_ascii($fname,$lang);
# Replace all other weird characters by nothing
$fname=~s{[^/\w\.\-]}{}g;
# Replace all .\d. sequences with _\d. so they no longer look like version
@@ -3863,6 +3867,7 @@
$fname=~s/\.(\d+)(?=\.)/_$1/g;
return $fname;
}
+
# This Function checks if an Image's dimensions exceed either $resizewidth (width)
# or $resizeheight (height) - both pixels. If so, the image is scaled to produce an
# image with the same aspect ratio as the original, but with dimensions which do
@@ -3937,6 +3942,14 @@
$fname=&clean_filename($fname);
# See if there is anything left
unless ($fname) { return 'error: no uploaded file'; }
+ # If filename now begins with a . prepend unix timestamp _ milliseconds
+ if ($fname =~ /^\./) {
+ my ($s,$usec) = &gettimeofday();
+ while (length($usec) < 6) {
+ $usec = '0'.$usec;
+ }
+ $fname = $s.'_'.substr($usec,0,3).$fname;
+ }
# Files uploaded to help request form, or uploaded to "create course" page are handled differently
if ((($formname eq 'screenshot') && ($subdir eq 'helprequests')) ||
(($formname eq 'coursecreatorxml') && ($subdir eq 'batchupload')) ||
Index: doc/loncapafiles/loncapafiles.lpml
diff -u doc/loncapafiles/loncapafiles.lpml:1.981 doc/loncapafiles/loncapafiles.lpml:1.982
--- doc/loncapafiles/loncapafiles.lpml:1.981 Thu Feb 21 20:57:45 2019
+++ doc/loncapafiles/loncapafiles.lpml Tue Feb 26 14:42:31 2019
@@ -2,7 +2,7 @@
"http://lpml.sourceforge.net/DTD/lpml.dtd">
<!-- loncapafiles.lpml -->
-<!-- $Id: loncapafiles.lpml,v 1.981 2019/02/21 20:57:45 raeburn Exp $ -->
+<!-- $Id: loncapafiles.lpml,v 1.982 2019/02/26 14:42:31 raeburn Exp $ -->
<!--
@@ -4672,6 +4672,14 @@
</filenames>
</fileglob>
<file>
+<source>loncom/localize/transliterate.pm</source>
+<target dist='default'>home/httpd/lib/perl/LONCAPA/transliterate.pm</target>
+<categoryname>handler</categoryname>
+<description>
+Transliteration of non-ascii to ascii characters in filenames.
+</description>
+</file>
+<file>
<source>loncom/interface/loncoursedata.pm</source>
<target dist='default'>home/httpd/lib/perl/Apache/loncoursedata.pm</target>
<categoryname>handler</categoryname>
Index: loncom/localize/transliterate.pm
+++ loncom/localize/transliterate.pm
# The LearningOnline Network with CAPA
# Transliteration to ascii
#
# $Id: transliterate.pm,v 1.1 2019/02/26 14:42:22 raeburn Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
######################################################################
######################################################################
=pod
=head1 NAME
LONCAPA::transliterate - transliterate non-ascii characters
in filenames.
=head1 SYNOPSIS
When fed a filename it will replace instances of non-ascii
characters with transliterations.
=head1 OVERVIEW
Used to replace non-ascii character(s) with a transliteration
of the character(s) to ascii character(s).
If there are preferred replacements for a particular language
then those should be included in a separate subroutine which
is called before the transliteration of last resort (which is
done with Text::Unidecode).
=head1 SUBROUTINES
=cut
package LONCAPA::transliterate;
use strict;
use utf8;
use Text::Unidecode qw(unidecode);
use Encode qw(decode_utf8 encode_utf8);
=pod
=over
=item * fname_to_ascii()
Inputs: $fname (required), $language (optional)
Output: $fname
Replaces non-ascii characters with a transliteration
of the character to an ascii character (using Text::Unidecode)
If the language code is de, transliteration via
german_to_ascii() is used first to handle umlauts and eszett,
before using Text::Unidecode.
If other routines are added to support preferred transliteration
of non-ascii characters for specific languages, they should be
added as new subroutines to this file, and then called if the
language code has an appropriate value.
=back
=cut
sub fname_to_ascii {
my ($fname,$language) = @_;
if ($fname =~ /([^\x{00}-\x{7f}])/) {
$fname=&decode_utf8($fname);
if ($language eq 'de') {
$fname = &german_to_ascii($fname);
}
$fname = unidecode($fname);
$fname=&encode_utf8($fname);
}
return $fname;
}
=pod
=over
=item * german_to_ascii()
Input: $fname (required)
Output: $fname
Replaces letters with umlauts with the equivalent letter
without an umlaut plus letter e. Case is preserved.
Replaces eszett with double s.
=back
=cut
sub german_to_ascii {
my ($fname) = @_;
my %characters = (
'Ã' => 'AE',
'Ã' => 'OE',
'Ã' => 'UE',
'ä' => 'ae',
'ö' => 'oe',
'ü' => 'ue',
'Ã' => 'ss',
);
$fname =~ s/([ÃäÃöÃüÃ])/$characters{$1}/g;
return $fname;
}
1;
More information about the LON-CAPA-cvs
mailing list