[LON-CAPA-cvs] cvs: loncom /publisher lonpublisher.pm

onken lon-capa-cvs-allow@mail.lon-capa.org
Thu, 14 Aug 2008 13:39:04 -0000


onken		Thu Aug 14 09:39:04 2008 EDT

  Modified files:              
    /loncom/publisher	lonpublisher.pm 
  Log:
  this is a work simplification for german authors for present.
  
  
Index: loncom/publisher/lonpublisher.pm
diff -u loncom/publisher/lonpublisher.pm:1.244 loncom/publisher/lonpublisher.pm:1.245
--- loncom/publisher/lonpublisher.pm:1.244	Fri Aug  1 14:11:19 2008
+++ loncom/publisher/lonpublisher.pm	Thu Aug 14 09:39:02 2008
@@ -1,7 +1,7 @@
 # The LearningOnline Network with CAPA
 # Publication Handler
 #
-# $Id: lonpublisher.pm,v 1.244 2008/08/01 18:11:19 bisitz Exp $
+# $Id: lonpublisher.pm,v 1.245 2008/08/14 13:39:02 onken Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -118,6 +118,8 @@
 use File::Copy;
 use Apache::Constants qw(:common :http :methods);
 use HTML::LCParser;
+use HTML::Entities;
+use Encode::Encoder;
 use Apache::lonxml;
 use Apache::loncacc;
 use DBI;
@@ -1172,17 +1174,22 @@
 	$textonly=~s/\<script[^\<]+\<\/script\>//g;
 	$textonly=~s/\<m\>[^\<]+\<\/m\>//g;
 	$textonly=~s/\<[^\>]*\>//g;
-	$textonly=~tr/A-Z/a-z/;
-	$textonly=~s/[\$\&][a-z]\w*//g;
-	$textonly=~s/[^a-z\s]//g;
-	
-	foreach ($textonly=~m/(\w+)/g) {
-	    unless ($nokey{$_}) {
-		$keywords{$_}=1;
-	    } 
-	}
-    }
 
+        #this is a work simplification for german authors for present
+        $textonly=HTML::Entities::decode($textonly);           #decode HTML-character
+        $textonly=Encode::Encoder::encode('utf8', $textonly);  #encode to perl internal unicode
+        $textonly=~tr/A-ZÜÄÖ/a-züäö/;      #add lowercase rule for german "Umlaute"
+        $textonly=~s/[\$\&][a-z]\w*//g;
+        $textonly=~s/[^a-z^ü^ä^ö^ß\s]//g;  #dont delete german "Umlaute"
+
+        foreach ($textonly=~m/[^\s]+/g) {  #match all but whitespaces
+            unless ($nokey{$_}) {
+                $keywords{$_}=1;
+            }
+        }
+
+
+    }
             
     foreach my $addkey (split(/[\"\'\,\;]/,$metadatafields{'keywords'})) {
 	$addkey=~s/\s+/ /g;