[LON-CAPA-cvs] cvs: loncom /publisher lonpublisher.pm
onken
lon-capa-cvs-allow@mail.lon-capa.org
Thu, 14 Aug 2008 13:39:04 -0000
onken Thu Aug 14 09:39:04 2008 EDT
Modified files:
/loncom/publisher lonpublisher.pm
Log:
this is a work simplification for german authors for present.
Index: loncom/publisher/lonpublisher.pm
diff -u loncom/publisher/lonpublisher.pm:1.244 loncom/publisher/lonpublisher.pm:1.245
--- loncom/publisher/lonpublisher.pm:1.244 Fri Aug 1 14:11:19 2008
+++ loncom/publisher/lonpublisher.pm Thu Aug 14 09:39:02 2008
@@ -1,7 +1,7 @@
# The LearningOnline Network with CAPA
# Publication Handler
#
-# $Id: lonpublisher.pm,v 1.244 2008/08/01 18:11:19 bisitz Exp $
+# $Id: lonpublisher.pm,v 1.245 2008/08/14 13:39:02 onken Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -118,6 +118,8 @@
use File::Copy;
use Apache::Constants qw(:common :http :methods);
use HTML::LCParser;
+use HTML::Entities;
+use Encode::Encoder;
use Apache::lonxml;
use Apache::loncacc;
use DBI;
@@ -1172,17 +1174,22 @@
$textonly=~s/\<script[^\<]+\<\/script\>//g;
$textonly=~s/\<m\>[^\<]+\<\/m\>//g;
$textonly=~s/\<[^\>]*\>//g;
- $textonly=~tr/A-Z/a-z/;
- $textonly=~s/[\$\&][a-z]\w*//g;
- $textonly=~s/[^a-z\s]//g;
-
- foreach ($textonly=~m/(\w+)/g) {
- unless ($nokey{$_}) {
- $keywords{$_}=1;
- }
- }
- }
+ #this is a work simplification for german authors for present
+ $textonly=HTML::Entities::decode($textonly); #decode HTML-character
+ $textonly=Encode::Encoder::encode('utf8', $textonly); #encode to perl internal unicode
+ $textonly=~tr/A-ZÜÄÖ/a-züäö/; #add lowercase rule for german "Umlaute"
+ $textonly=~s/[\$\&][a-z]\w*//g;
+ $textonly=~s/[^a-z^ü^ä^ö^ß\s]//g; #dont delete german "Umlaute"
+
+ foreach ($textonly=~m/[^\s]+/g) { #match all but whitespaces
+ unless ($nokey{$_}) {
+ $keywords{$_}=1;
+ }
+ }
+
+
+ }
foreach my $addkey (split(/[\"\'\,\;]/,$metadatafields{'keywords'})) {
$addkey=~s/\s+/ /g;