[LON-CAPA-cvs] cvs: loncom /interface lonsearchcat.pm

matthew lon-capa-cvs@mail.lon-capa.org
Thu, 03 Jun 2004 19:23:09 -0000


This is a MIME encoded message

--matthew1086290589
Content-Type: text/plain

matthew		Thu Jun  3 15:23:09 2004 EDT

  Modified files:              
    /loncom/interface	lonsearchcat.pm 
  Log:
  Major changes to search phrase parsing.  Now use recursive descent parser
  to make sense of phrases.  Multi-word (quoted) phrases, - (meaning not),
  and 'or'd phrases are handled properly.
  Added &process_phrase_input, which creates the SQL query for phrase inputs.
  Removed &build_SQL_query and &recursive_SQL_query_build
  Added &output_unparsed_phrase_error.  
  Localized &output_blank_field_error.
  For basic search fixed bug where basic search phrase was not remembered 
  and filled in properly when the "revise search" button was hit.
  
  
--matthew1086290589
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20040603152309.txt"

Index: loncom/interface/lonsearchcat.pm
diff -u loncom/interface/lonsearchcat.pm:1.227 loncom/interface/lonsearchcat.pm:1.228
--- loncom/interface/lonsearchcat.pm:1.227	Mon May 10 14:59:18 2004
+++ loncom/interface/lonsearchcat.pm	Thu Jun  3 15:23:08 2004
@@ -1,7 +1,7 @@
 # The LearningOnline Network with CAPA
 # Search Catalog
 #
-# $Id: lonsearchcat.pm,v 1.227 2004/05/10 18:59:18 matthew Exp $
+# $Id: lonsearchcat.pm,v 1.228 2004/06/03 19:23:08 matthew Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -75,6 +75,7 @@
 use Apache::lonlocal;
 use LONCAPA::lonmetadata();
 use HTML::Entities();
+use Parse::RecDescent;
 
 ######################################################################
 ######################################################################
@@ -337,6 +338,29 @@
         &course_search($r);
     } elsif(($ENV{'form.phase'} eq 'basic_search') ||
             ($ENV{'form.phase'} eq 'adv_search')) {
+        #
+        # We are running a search, try to parse it
+        my ($query,$customquery,$customshow,$libraries) = 
+            (undef,undef,undef,undef);
+        my $pretty_string;
+        if ($ENV{'form.phase'} eq 'basic_search') {
+            ($query,$pretty_string,$libraries) = 
+                &parse_basic_search($r,$closebutton,$hidden_fields);
+            return OK if (! defined($query));
+            &make_persistent({ basicexp => $ENV{'form.basicexp'}},
+                             $persistent_db_file);
+        } else {                      # Advanced search
+            ($query,$customquery,$customshow,$libraries,$pretty_string) 
+                = &parse_advanced_search($r,$closebutton,$hidden_fields);
+            return OK if (! defined($query));
+        }
+        &make_persistent({ query => $query,
+                           customquery => $customquery,
+                           customshow => $customshow,
+                           libraries => $libraries,
+                           pretty_string => $pretty_string },
+                         $persistent_db_file);
+        #
         # Set up table
         if (! defined(&create_results_table())) {
 	    my $errorstring=&Apache::lonmysql::get_error();
@@ -364,29 +388,12 @@
 END
             return OK;
         }
-        #
-        # We are running a search
-        my ($query,$customquery,$customshow,$libraries) = 
-            (undef,undef,undef,undef);
-        my $pretty_string;
-        if ($ENV{'form.phase'} eq 'basic_search') {
-            ($query,$pretty_string,$libraries) = 
-                &parse_basic_search($r,$closebutton,$hidden_fields);
-        } else {                      # Advanced search
-            ($query,$customquery,$customshow,$libraries,$pretty_string) 
-                = &parse_advanced_search($r,$closebutton,$hidden_fields);
-            return OK if (! defined($query));
-        }
-        &make_persistent({ query => $query,
-                           customquery => $customquery,
-                           customshow => $customshow,
-                           libraries => $libraries,
-                           pretty_string => $pretty_string },
-                         $persistent_db_file);
         ##
         ## Print out the frames interface
         ##
-        &print_frames_interface($r);
+        if (defined($query)) {
+            &print_frames_interface($r);
+        }
     }
     return OK;
 } 
@@ -603,8 +610,11 @@
         }
         $scrout.='<table>'.
             '<tr><td align="center" valign="top">'.
-            &Apache::lonhtmlcommon::textbox('basicexp',
-                                            $ENV{'form.basicexp'},50).'<br />'.
+            &Apache::lonhtmlcommon::textbox
+            ('basicexp',
+             &HTML::Entities::encode($ENV{'form.basicexp'},'<>&"'),50
+             ).
+             '<br />'.
             '<font size="-1">'.&searchhelp().'</font>'.'</td>'.
             '<td><font size="-1">'.
             '<nobr>'.('&nbsp;'x3).$adv_search_link.'</nobr>'.'<br />'.
@@ -613,12 +623,6 @@
             '</font></td>'.
             '</tr>'.$/;
         #
-#        $scrout .= '<tr><td align="center">'.
-#            '<font size="-1">'.
-#            $userelatedwords.('&nbsp;'x3).
-#            $onlysearchdomain.('&nbsp;'x2).$adv_search_link.
-#            '</font>'.
-#            '</td></tr>'.$/;
         $scrout .= '<tr><td align="center" colspan="2">'.
             '<font size="-1">'.
             '<input type="submit" name="basicsubmit" '.
@@ -999,7 +1003,7 @@
 ######################################################################
 ######################################################################
 sub searchhelp {
-    return &mt('Enter terms or phrases separated by AND, OR, or NOT');
+    return &mt('Enter words and quoted phrases');
 }
 
 ######################################################################
@@ -1206,7 +1210,6 @@
 		   'lastrevisiondatestart_year','lastrevisiondateend_month',
 		   'lastrevisiondateend_day','lastrevisiondateend_year') {
 	$ENV{'form.'.$field}=~s/[^\w\/\s\(\)\=\-\"\']//g;
-        $ENV{'form.'.$field}=~s/(not\s*$|^\s*(and|or)|)//gi;
     }
     foreach ('mode','form','element') {
 	# is this required?  Hmmm.
@@ -1251,21 +1254,29 @@
     # Evaluate logical expression AND/OR/NOT phrase fields.
     foreach my $field (@BasicFields) {
 	next if (!defined($ENV{'form.'.$field}) || $ENV{'form.'.$field} eq '');
-        foreach my $searchphrase(&process_phrase_input($ENV{'form.'.$field})){
-            $pretty_search_string .= $font."$field</font> contains <b>".
-                $searchphrase."</b>";
+        my ($error,$SQLQuery) = 
+            &process_phrase_input($ENV{'form.'.$field},
+                                  $ENV{'form.'.$field.'_related'},$field);
+        if (defined($error)) {
+            &output_unparsed_phrase_error($r,$closebutton,'phase=disp_adv',
+                                         $hidden_fields,$field);
+            return;
+        } else {
+            $pretty_search_string .= 
+                $font.$field.'</font>: '.$ENV{'form.'.$field};
             if ($ENV{'form.'.$field.'_related'}) {
-                my @New_Words;
-                ($searchphrase,@New_Words) = &related_version($searchphrase);
-                if (@New_Words) {
-                    $pretty_search_string .= " with related words: ".
-                        "<b>@New_Words</b>.";
+                my @Words = 
+                    &Apache::loncommon::get_related_words
+                    ($ENV{'form.'.$field});
+                if (@Words) {
+                    $pretty_search_string.= ' with related words: '.
+                        join(', ',@Words[0..4]);
                 } else {
-                    $pretty_search_string .= " with no related words.";
+                    $pretty_search_string.= ' with related words.';
                 }
             }
-            $pretty_search_string .= "<br />\n";
-            push @queries,&build_SQL_query($field,$searchphrase);
+            $pretty_search_string .= '<br />';
+            push (@queries,$SQLQuery);
         }
     }
     #
@@ -1282,7 +1293,8 @@
         }
     }
     if (defined($searchphrase)) {
-        push @queries,&build_SQL_query('mime',$searchphrase);
+        my ($error,$SQLsearch) = &process_phrase_input($searchphrase,0,'mime');
+        push @queries,$SQLsearch;
         $pretty_search_string .=$font.'mime</font> contains <b>'.
             $searchphrase.'</b><br />';
     }
@@ -1402,11 +1414,11 @@
     $pretty_search_string .= $pretty_domains_string."<br />\n";
     #
     if (@queries) {
-	$query="SELECT * FROM metadata WHERE ".join(" AND ",@queries);
+	$query="SELECT * FROM metadata WHERE (".join(") AND (",@queries).')';
     } elsif ($customquery) {
         $query = '';
     }
-#    &Apache::lonnet::logthis('query = '.$/.$query);
+    # &Apache::lonnet::logthis('query = '.$/.$query);
     return ($query,$customquery,$customshow,$libraries_to_query,
             $pretty_search_string);
 }
@@ -1468,7 +1480,7 @@
     #
     # Clean up fields for safety
     for my $field ('basicexp') {
-	$ENV{"form.$field"}=~s/[^\w\s\(\)\-]//g;
+	$ENV{"form.$field"}=~s/[^\w\s\'\"\!\(\)\-]//g;
     }
     foreach ('mode','form','element') {
 	# is this required?  Hmmm.
@@ -1481,36 +1493,28 @@
     #
     # Check to see if enough of a query is filled in
     my $search_string = $ENV{'form.basicexp'};
-    $search_string =~ s/(not\s*$|^\s*(and|or)|)//gi;
     if (! &filled($search_string)) {
 	&output_blank_field_error($r,$closebutton,'phase=disp_basic');
 	return OK;
     }
-    my $pretty_search_string='';
+    my $pretty_search_string=$search_string;
     my @Queries;
-    my $concatarg=join(',',
-                       ('title', 'author', 'subject', 'notes', 'abstract',
-                        'keywords'));
-    foreach my $search (&process_phrase_input($search_string)){
-        if ($ENV{'form.related'}) {
-            $pretty_search_string .= ' and <br />' if ($pretty_search_string ne '');
-            $pretty_search_string .= '<b>'.$search.'</b>';
-            my @New_Words;
-            ($search,@New_Words) = &related_version($search);
-            next if (! $search);
-            if (@New_Words) {
-                $pretty_search_string .= 
-                    " with related words: <b>@New_Words</b>";
-            }
-        } else {
-            $pretty_search_string .= ' and ' if ($pretty_search_string ne '');
-            $pretty_search_string .= '<b>'.$search.'</b>';
-        }
-        #
-        # Build SQL query string based on form page
-        push(@Queries,
-             &build_SQL_query('concat_ws(" ",'.$concatarg.')',$search));
+    my $searchfield = 'concat_ws(" ",'.join(',',
+                                            ('title','author','subject',
+                                             'notes','abstract','keywords')
+                                            ).')';
+    my ($error,$SQLQuery) = &process_phrase_input($search_string,
+                                                    $ENV{'form.related'},
+                                                    $searchfield);
+    if ($error) {
+        &output_unparsed_phrase_error($r,$closebutton,'phase=disp_basic',
+                                      '','basicexp');
+        return;
     }
+    push(@Queries,$SQLQuery);
+    #foreach my $q (@Queries) {
+    #    &Apache::lonnet::logthis('    '.$q);
+    #}
     my $final_query = 'SELECT * FROM metadata WHERE '.join(" AND ",@Queries);
     #
     if (defined($pretty_domains_string) && $pretty_domains_string ne '') {
@@ -1518,40 +1522,189 @@
     }
     $pretty_search_string .= "<br />\n";
     $pretty_search_string =~ s:^<br /> and ::;
-#    &Apache::lonnet::logthis($final_query);
+    # &Apache::lonnet::logthis($final_query);
     return ($final_query,$pretty_search_string,
             $libraries_to_query);
 }
 
+
+###############################################################
+###############################################################
+
+my @Phrases;
+
+sub concat {
+    my ($item) = @_;
+    my $results = '';
+    foreach (@$item) {
+        if (ref($_) eq 'ARRAY') {
+            $results .= join(' ',@$_);
+        }
+    }
+    return $results;
+}
+
 sub process_phrase_input {
-    my ($phrase)=@_;
-    my @Phrases;
-    # &Apache::lonnet::logthis('phrase = :'.$phrase.':');
-    my $in_quotes = 0;
-    my @Words = split(/\s+/,$phrase);
-    foreach my $word (@Words) {
-        $word =~ s/(\w+)\"(\w+)/$1$2/g;
-        if ($in_quotes) {
-            if ($word =~ s/(\")$//) {
-                $in_quotes = 0;
+    my ($phrase,$related,$field)=@_;
+    #&Apache::lonnet::logthis('phrase = :'.$phrase.':');
+    my $grammar = <<'ENDGRAMMAR';
+    searchphrase:
+        expression /^\Z/ {
+            # &Apache::lonsearchcat::print_item(\@item,0);
+            [@item];
+        }
+    expression:
+        phrase(s)   {
+            [@item];
+        }
+    phrase:
+        orword {
+            [@item];
+        }
+      | andword {
+            [@item];
+        }
+      | minusword {
+            unshift(@::Phrases,$item[1]->[0]);
+            unshift(@::Phrases,$item[1]->[1]);
+            [@item];
+        }
+      | word {
+            unshift(@::Phrases,$item[1]);
+            [@item];
+        } 
+    #
+    orword:
+        word 'OR' phrase {
+            unshift(@::Phrases,'OR');
+            unshift(@::Phrases,$item[1]);
+            [@item];
+        }
+        | word 'or' phrase {
+            unshift(@::Phrases,'OR');
+            unshift(@::Phrases,$item[1]);
+            [@item];
+        }    
+        | minusword 'OR' phrase {
+            unshift(@::Phrases,'OR');
+            unshift(@::Phrases,$item[1]->[0]);
+            unshift(@::Phrases,$item[1]->[1]);
+            [@item];
+        }
+        | minusword 'or' phrase {
+            unshift(@::Phrases,'OR');
+            unshift(@::Phrases,$item[1]->[0]);
+            unshift(@::Phrases,$item[1]->[1]);
+            [@item];
+        }    
+    andword:
+        word phrase {
+            unshift(@::Phrases,'AND');
+            unshift(@::Phrases,$item[1]);
+            [@item];
+        }
+        | minusword phrase {
+            unshift(@::Phrases,'AND');
+            unshift(@::Phrases,$item[1]->[0]);
+            unshift(@::Phrases,$item[1]->[1]);
+            [@item];
+        }
+    #
+    minusword:
+        '-' word {
+            [$item[2],'NOT'];
+        }
+    word:
+        "'" term(s) "'" {
+          &Apache::lonsearchcat::concat(\@item);
+        }
+      | '"' term(s) '"' {
+          &Apache::lonsearchcat::concat(\@item);
+        }
+      | term {
+            $item[1];
+        }
+    term:
+        /[\w\Q:!@#$%^&*()+_=|{}<>,.;\\\/?\E]+/ {
+            $item[1];
+        }
+ENDGRAMMAR
+    #
+    # The end result of parsing the phrase with the grammar is an array
+    # @::Phrases.
+    # $phrase = "gene splicing" or cat -> "gene splicing","OR","cat"
+    # $phrase = "genetic engineering" -dna ->
+    #                      "genetic engineering","AND","NOT","dna"
+    # $phrase = cat or dog -poodle -> "cat","OR","dog","AND","NOT","poodle"
+    undef(@::Phrases);
+    my $p = new Parse::RecDescent($grammar);
+    if (! defined($p->searchphrase($phrase))) {
+        &Apache::lonnet::logthis('lonsearchcat:unable to process:'.$phrase);
+        return 'Unable to process phrase '.$phrase;
+    }
+    #
+    # Go through the phrases and make sense of them.  
+    # Apply modifiers NOT OR and AND to the phrases.
+    my @NewPhrases;
+    while(@::Phrases) {
+        my $phrase = shift(@::Phrases);
+        # &Apache::lonnet::logthis('phrase = '.$phrase);
+        my $phrasedata;
+        if ($phrase =~ /^(NOT|OR|AND)$/) {
+            if ($phrase eq 'OR') {
+                $phrasedata->{'or'}++;
+                if (! @::Phrases) { $phrasedata = undef; last; }
+                $phrase = shift(@::Phrases);
+            } elsif ($phrase eq 'AND') {
+                $phrasedata->{'and'}++;
+                if (! @::Phrases) { $phrasedata = undef; last; }
+                $phrase = shift(@::Phrases);
             }
-            if ($Phrases[-1] ne '') {
-                $Phrases[-1] .= ' ';
+            if ($phrase eq 'NOT') {
+                $phrasedata->{'negate'}++;
+                if (! @::Phrases) { $phrasedata = undef; last; }
+                $phrase = shift(@::Phrases);
             }
-            $Phrases[-1] .= $word;
+        }
+        $phrasedata->{'phrase'} = $phrase;
+        if ($related) {
+            my @NewWords;
+            (undef,@NewWords) = &related_version($phrasedata->{'phrase'});
+            $phrasedata->{'related_words'} = \@NewWords;
+        }
+        push(@NewPhrases,$phrasedata);
+    }
+    #
+    # Actually build the sql query from the phrases
+    my $SQLQuery;
+    foreach my $phrase (@NewPhrases) {
+        my $query;
+        if ($phrase->{'negate'}) {
+            $query .= $field.' NOT LIKE "%'.$phrase->{'phrase'}.'%"';
         } else {
-            if ($word =~ s/^(\")//) {
-                $in_quotes=1;
+            $query .= $field.' LIKE "%'.$phrase->{'phrase'}.'%"';
+        }
+        foreach my $related (@{$phrase->{'related_words'}}) {
+            if ($phrase->{'negate'}) {
+                $query .= ' AND '.$field.' NOT LIKE "%'.$related.'%"';
+            } else {
+                $query .= ' OR '.$field.' LIKE "%'.$related.'%"';
+            }
+        }
+        if ($SQLQuery) {
+            if ($phrase->{'or'}) {
+                $SQLQuery .= ' OR ('.$query.')';
+            } else {
+                $SQLQuery .= ' AND ('.$query.')';
             }
-            push(@Phrases,$word);
+        } else {
+            $SQLQuery = '('.$query.')';
         }
     }
     #
-    #foreach my $p (@Phrases) {
-    #    &Apache::lonnet::logthis('    subphrase = '.$p);
-    #}
+    # &Apache::lonnet::logthis("SQLQuery = $SQLQuery");
     #
-    return @Phrases;
+    return undef,$SQLQuery;
 }
 
 ######################################################################
@@ -1581,30 +1734,6 @@
     return $result,sort(@Words);
 }
 
-######################################################################
-######################################################################
-
-=pod 
-
-=item &build_SQL_query() 
-
-Builds a SQL query string from a logical expression with AND/OR keywords
-using Text::Query and &recursive_SQL_query_builder()
-
-=cut
-
-######################################################################
-######################################################################
-sub build_SQL_query {
-    my ($field_name,$logic_statement)=@_;
-    my $q=new Text::Query('abc',
-			  -parse => 'Text::Query::ParseAdvanced',
-			  -build => 'Text::Query::Build');
-    $q->prepare($logic_statement);
-    my $matchexp=${$q}{'matchexp'}; chomp $matchexp;
-    my $sql_query=&recursive_SQL_query_build($field_name,$matchexp);
-    return $sql_query;
-}
 
 ######################################################################
 ######################################################################
@@ -1639,47 +1768,6 @@
     return $matchexp;
 }
 
-######################################################################
-######################################################################
-
-=pod 
-
-=item &recursive_SQL_query_build() 
-
-Recursively constructs an SQL query.  Takes as input $dkey and $pattern.
-
-=cut
-
-######################################################################
-######################################################################
-sub recursive_SQL_query_build {
-    my ($dkey,$pattern)=@_;
-    my @matches=($pattern=~/(\[[^\]|\[]*\])/g);
-    return $pattern unless @matches;
-    foreach my $match (@matches) {
-        $match=~/\[ (\w+)\s(.*) \]/;
-        my ($key,$value)=($1,$2);
-        my $replacement='';
-        if ($key eq 'literal') {
-            $replacement="($dkey LIKE \"\%$value\%\")";
-        } elsif (lc($key) eq 'not') {
-            $value=~s/LIKE/NOT LIKE/;
-#          $replacement="($dkey not like $value)";
-            $replacement="$value";
-        } elsif ($key eq 'and') {
-            $value=~/(.*[\"|\)]) ([|\(|\^].*)/;
-            $replacement="($1 AND $2)";
-	} elsif ($key eq 'or') {
-            $value=~/(.*[\"|\)]) ([|\(|\^].*)/;
-            $replacement="($1 OR $2)";
-	}
-	substr($pattern,
-               index($pattern,$match),
-               length($match),
-               $replacement);
-    }
-    &recursive_SQL_query_build($dkey,$pattern);
-}
 
 ######################################################################
 ######################################################################
@@ -2254,9 +2342,10 @@
         #
         # Loop through the servers we have contacted but do not
         # have results from yet, looking for results.
-        while (my ($server,$status) = each(%Server_status)) {
+        foreach my $server (keys(%Server_status)) {
             last if ($connection->aborted());
             &update_seconds($r);
+            my $status = $Server_status{$server};
             if ($status eq 'con_lost') {
                 delete ($Server_status{$server});
                 next;
@@ -3210,6 +3299,51 @@
 
 =pod 
 
+=item &output_unparsed_phrase_error()
+
+=cut
+
+######################################################################
+######################################################################
+sub output_unparsed_phrase_error {
+    my ($r,$closebutton,$parms,$hidden_fields,$field)=@_;
+    my $errorstring;
+    if ($field eq 'basicexp') {
+        $errorstring = &mt('Unable to understand the search phrase <i>[_1]</i>.  Please modify your search.',$ENV{'form.basicexp'});
+    } else {
+        $errorstring = &mt('Unable to understand the search phrase <b>[_1]</b>:<i>[_2]</i>.',$field,$ENV{'form.'.$field});
+    }
+    my $bodytag = &Apache::loncommon::bodytag('Search');
+    my $heading = &mt('Unparsed Field');
+    my $revise  = &mt('Revise search request');
+    # make query information persistent to allow for subsequent revision
+    $r->print(<<ENDPAGE);
+<html>
+<head>
+<title>The LearningOnline Network with CAPA</title>
+</head>
+$bodytag
+<form method="post" action="/adm/searchcat">
+$hidden_fields
+$closebutton
+<hr />
+<h2>$heading</h2>
+<p>
+$errorstring
+</p>
+<p>
+<a href="/adm/searchcat?$parms&persistent_db_id=$ENV{'form.persistent_db_id'}">$revise</a>
+</p>
+</body>
+</html>
+ENDPAGE
+}
+
+######################################################################
+######################################################################
+
+=pod 
+
 =item &output_blank_field_error()
 
 Output a complete page that indicates the user has not filled in enough
@@ -3227,33 +3361,31 @@
 ######################################################################
 sub output_blank_field_error {
     my ($r,$closebutton,$parms,$hidden_fields)=@_;
-    my $bodytag=&Apache::loncommon::bodytag(undef,undef,undef,1);
-    # make query information persistent to allow for subsequent revision
-    $r->print(<<BEGINNING);
+    my $bodytag=&Apache::loncommon::bodytag('Search');
+    my $errormsg = &mt('You did not fill in enough information for the search to be started.  You need to fill in relevant fields on the search page in order for a query to be processed.');
+    my $revise = &mt('Revise Search Request');
+    my $heading = &mt('Unactionable Search Queary');
+    $r->print(<<ENDPAGE);
 <html>
 <head>
 <title>The LearningOnline Network with CAPA</title>
-BEGINNING
-    $r->print(<<RESULTS);
 </head>
 $bodytag
-<img align='right' src='/adm/lonIcons/lonlogos.gif' />
-<h1>Search Catalog</h1>
 <form method="post" action="/adm/searchcat">
 $hidden_fields
-<a href="/adm/searchcat?$parms&persistent_db_id=$ENV{'form.persistent_db_id'}"
->Revise search request</a>&nbsp;
 $closebutton
 <hr />
-<h3>Unactionable search query.</h3>
+<h2>$heading</h2>
 <p>
-You did not fill in enough information for the search to be started.
-You need to fill in relevant fields on the search page in order 
-for a query to be processed.
+$errormsg
+</p>
+<p>
+<a href="/adm/searchcat?$parms&persistent_db_id=$ENV{'form.persistent_db_id'}">$revise</a>&nbsp;
 </p>
 </body>
 </html>
-RESULTS
+ENDPAGE
+    return;
 }
 
 ######################################################################

--matthew1086290589--