[LON-CAPA-cvs] cvs: loncom /interface lonsearchcat.pm
matthew
lon-capa-cvs@mail.lon-capa.org
Thu, 03 Jun 2004 19:23:09 -0000
This is a MIME encoded message
--matthew1086290589
Content-Type: text/plain
matthew Thu Jun 3 15:23:09 2004 EDT
Modified files:
/loncom/interface lonsearchcat.pm
Log:
Major changes to search phrase parsing. Now use recursive descent parser
to make sense of phrases. Multi-word (quoted) phrases, - (meaning not),
and 'or'd phrases are handled properly.
Added &process_phrase_input, which creates the SQL query for phrase inputs.
Removed &build_SQL_query and &recursive_SQL_query_build
Added &output_unparsed_phrase_error.
Localized &output_blank_field_error.
For basic search fixed bug where basic search phrase was not remembered
and filled in properly when the "revise search" button was hit.
--matthew1086290589
Content-Type: text/plain
Content-Disposition: attachment; filename="matthew-20040603152309.txt"
Index: loncom/interface/lonsearchcat.pm
diff -u loncom/interface/lonsearchcat.pm:1.227 loncom/interface/lonsearchcat.pm:1.228
--- loncom/interface/lonsearchcat.pm:1.227 Mon May 10 14:59:18 2004
+++ loncom/interface/lonsearchcat.pm Thu Jun 3 15:23:08 2004
@@ -1,7 +1,7 @@
# The LearningOnline Network with CAPA
# Search Catalog
#
-# $Id: lonsearchcat.pm,v 1.227 2004/05/10 18:59:18 matthew Exp $
+# $Id: lonsearchcat.pm,v 1.228 2004/06/03 19:23:08 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -75,6 +75,7 @@
use Apache::lonlocal;
use LONCAPA::lonmetadata();
use HTML::Entities();
+use Parse::RecDescent;
######################################################################
######################################################################
@@ -337,6 +338,29 @@
&course_search($r);
} elsif(($ENV{'form.phase'} eq 'basic_search') ||
($ENV{'form.phase'} eq 'adv_search')) {
+ #
+ # We are running a search, try to parse it
+ my ($query,$customquery,$customshow,$libraries) =
+ (undef,undef,undef,undef);
+ my $pretty_string;
+ if ($ENV{'form.phase'} eq 'basic_search') {
+ ($query,$pretty_string,$libraries) =
+ &parse_basic_search($r,$closebutton,$hidden_fields);
+ return OK if (! defined($query));
+ &make_persistent({ basicexp => $ENV{'form.basicexp'}},
+ $persistent_db_file);
+ } else { # Advanced search
+ ($query,$customquery,$customshow,$libraries,$pretty_string)
+ = &parse_advanced_search($r,$closebutton,$hidden_fields);
+ return OK if (! defined($query));
+ }
+ &make_persistent({ query => $query,
+ customquery => $customquery,
+ customshow => $customshow,
+ libraries => $libraries,
+ pretty_string => $pretty_string },
+ $persistent_db_file);
+ #
# Set up table
if (! defined(&create_results_table())) {
my $errorstring=&Apache::lonmysql::get_error();
@@ -364,29 +388,12 @@
END
return OK;
}
- #
- # We are running a search
- my ($query,$customquery,$customshow,$libraries) =
- (undef,undef,undef,undef);
- my $pretty_string;
- if ($ENV{'form.phase'} eq 'basic_search') {
- ($query,$pretty_string,$libraries) =
- &parse_basic_search($r,$closebutton,$hidden_fields);
- } else { # Advanced search
- ($query,$customquery,$customshow,$libraries,$pretty_string)
- = &parse_advanced_search($r,$closebutton,$hidden_fields);
- return OK if (! defined($query));
- }
- &make_persistent({ query => $query,
- customquery => $customquery,
- customshow => $customshow,
- libraries => $libraries,
- pretty_string => $pretty_string },
- $persistent_db_file);
##
## Print out the frames interface
##
- &print_frames_interface($r);
+ if (defined($query)) {
+ &print_frames_interface($r);
+ }
}
return OK;
}
@@ -603,8 +610,11 @@
}
$scrout.='<table>'.
'<tr><td align="center" valign="top">'.
- &Apache::lonhtmlcommon::textbox('basicexp',
- $ENV{'form.basicexp'},50).'<br />'.
+ &Apache::lonhtmlcommon::textbox
+ ('basicexp',
+ &HTML::Entities::encode($ENV{'form.basicexp'},'<>&"'),50
+ ).
+ '<br />'.
'<font size="-1">'.&searchhelp().'</font>'.'</td>'.
'<td><font size="-1">'.
'<nobr>'.(' 'x3).$adv_search_link.'</nobr>'.'<br />'.
@@ -613,12 +623,6 @@
'</font></td>'.
'</tr>'.$/;
#
-# $scrout .= '<tr><td align="center">'.
-# '<font size="-1">'.
-# $userelatedwords.(' 'x3).
-# $onlysearchdomain.(' 'x2).$adv_search_link.
-# '</font>'.
-# '</td></tr>'.$/;
$scrout .= '<tr><td align="center" colspan="2">'.
'<font size="-1">'.
'<input type="submit" name="basicsubmit" '.
@@ -999,7 +1003,7 @@
######################################################################
######################################################################
sub searchhelp {
- return &mt('Enter terms or phrases separated by AND, OR, or NOT');
+ return &mt('Enter words and quoted phrases');
}
######################################################################
@@ -1206,7 +1210,6 @@
'lastrevisiondatestart_year','lastrevisiondateend_month',
'lastrevisiondateend_day','lastrevisiondateend_year') {
$ENV{'form.'.$field}=~s/[^\w\/\s\(\)\=\-\"\']//g;
- $ENV{'form.'.$field}=~s/(not\s*$|^\s*(and|or)|)//gi;
}
foreach ('mode','form','element') {
# is this required? Hmmm.
@@ -1251,21 +1254,29 @@
# Evaluate logical expression AND/OR/NOT phrase fields.
foreach my $field (@BasicFields) {
next if (!defined($ENV{'form.'.$field}) || $ENV{'form.'.$field} eq '');
- foreach my $searchphrase(&process_phrase_input($ENV{'form.'.$field})){
- $pretty_search_string .= $font."$field</font> contains <b>".
- $searchphrase."</b>";
+ my ($error,$SQLQuery) =
+ &process_phrase_input($ENV{'form.'.$field},
+ $ENV{'form.'.$field.'_related'},$field);
+ if (defined($error)) {
+ &output_unparsed_phrase_error($r,$closebutton,'phase=disp_adv',
+ $hidden_fields,$field);
+ return;
+ } else {
+ $pretty_search_string .=
+ $font.$field.'</font>: '.$ENV{'form.'.$field};
if ($ENV{'form.'.$field.'_related'}) {
- my @New_Words;
- ($searchphrase,@New_Words) = &related_version($searchphrase);
- if (@New_Words) {
- $pretty_search_string .= " with related words: ".
- "<b>@New_Words</b>.";
+ my @Words =
+ &Apache::loncommon::get_related_words
+ ($ENV{'form.'.$field});
+ if (@Words) {
+ $pretty_search_string.= ' with related words: '.
+ join(', ',@Words[0..4]);
} else {
- $pretty_search_string .= " with no related words.";
+ $pretty_search_string.= ' with related words.';
}
}
- $pretty_search_string .= "<br />\n";
- push @queries,&build_SQL_query($field,$searchphrase);
+ $pretty_search_string .= '<br />';
+ push (@queries,$SQLQuery);
}
}
#
@@ -1282,7 +1293,8 @@
}
}
if (defined($searchphrase)) {
- push @queries,&build_SQL_query('mime',$searchphrase);
+ my ($error,$SQLsearch) = &process_phrase_input($searchphrase,0,'mime');
+ push @queries,$SQLsearch;
$pretty_search_string .=$font.'mime</font> contains <b>'.
$searchphrase.'</b><br />';
}
@@ -1402,11 +1414,11 @@
$pretty_search_string .= $pretty_domains_string."<br />\n";
#
if (@queries) {
- $query="SELECT * FROM metadata WHERE ".join(" AND ",@queries);
+ $query="SELECT * FROM metadata WHERE (".join(") AND (",@queries).')';
} elsif ($customquery) {
$query = '';
}
-# &Apache::lonnet::logthis('query = '.$/.$query);
+ # &Apache::lonnet::logthis('query = '.$/.$query);
return ($query,$customquery,$customshow,$libraries_to_query,
$pretty_search_string);
}
@@ -1468,7 +1480,7 @@
#
# Clean up fields for safety
for my $field ('basicexp') {
- $ENV{"form.$field"}=~s/[^\w\s\(\)\-]//g;
+ $ENV{"form.$field"}=~s/[^\w\s\'\"\!\(\)\-]//g;
}
foreach ('mode','form','element') {
# is this required? Hmmm.
@@ -1481,36 +1493,28 @@
#
# Check to see if enough of a query is filled in
my $search_string = $ENV{'form.basicexp'};
- $search_string =~ s/(not\s*$|^\s*(and|or)|)//gi;
if (! &filled($search_string)) {
&output_blank_field_error($r,$closebutton,'phase=disp_basic');
return OK;
}
- my $pretty_search_string='';
+ my $pretty_search_string=$search_string;
my @Queries;
- my $concatarg=join(',',
- ('title', 'author', 'subject', 'notes', 'abstract',
- 'keywords'));
- foreach my $search (&process_phrase_input($search_string)){
- if ($ENV{'form.related'}) {
- $pretty_search_string .= ' and <br />' if ($pretty_search_string ne '');
- $pretty_search_string .= '<b>'.$search.'</b>';
- my @New_Words;
- ($search,@New_Words) = &related_version($search);
- next if (! $search);
- if (@New_Words) {
- $pretty_search_string .=
- " with related words: <b>@New_Words</b>";
- }
- } else {
- $pretty_search_string .= ' and ' if ($pretty_search_string ne '');
- $pretty_search_string .= '<b>'.$search.'</b>';
- }
- #
- # Build SQL query string based on form page
- push(@Queries,
- &build_SQL_query('concat_ws(" ",'.$concatarg.')',$search));
+ my $searchfield = 'concat_ws(" ",'.join(',',
+ ('title','author','subject',
+ 'notes','abstract','keywords')
+ ).')';
+ my ($error,$SQLQuery) = &process_phrase_input($search_string,
+ $ENV{'form.related'},
+ $searchfield);
+ if ($error) {
+ &output_unparsed_phrase_error($r,$closebutton,'phase=disp_basic',
+ '','basicexp');
+ return;
}
+ push(@Queries,$SQLQuery);
+ #foreach my $q (@Queries) {
+ # &Apache::lonnet::logthis(' '.$q);
+ #}
my $final_query = 'SELECT * FROM metadata WHERE '.join(" AND ",@Queries);
#
if (defined($pretty_domains_string) && $pretty_domains_string ne '') {
@@ -1518,40 +1522,189 @@
}
$pretty_search_string .= "<br />\n";
$pretty_search_string =~ s:^<br /> and ::;
-# &Apache::lonnet::logthis($final_query);
+ # &Apache::lonnet::logthis($final_query);
return ($final_query,$pretty_search_string,
$libraries_to_query);
}
+
+###############################################################
+###############################################################
+
+my @Phrases;
+
+sub concat {
+ my ($item) = @_;
+ my $results = '';
+ foreach (@$item) {
+ if (ref($_) eq 'ARRAY') {
+ $results .= join(' ',@$_);
+ }
+ }
+ return $results;
+}
+
sub process_phrase_input {
- my ($phrase)=@_;
- my @Phrases;
- # &Apache::lonnet::logthis('phrase = :'.$phrase.':');
- my $in_quotes = 0;
- my @Words = split(/\s+/,$phrase);
- foreach my $word (@Words) {
- $word =~ s/(\w+)\"(\w+)/$1$2/g;
- if ($in_quotes) {
- if ($word =~ s/(\")$//) {
- $in_quotes = 0;
+ my ($phrase,$related,$field)=@_;
+ #&Apache::lonnet::logthis('phrase = :'.$phrase.':');
+ my $grammar = <<'ENDGRAMMAR';
+ searchphrase:
+ expression /^\Z/ {
+ # &Apache::lonsearchcat::print_item(\@item,0);
+ [@item];
+ }
+ expression:
+ phrase(s) {
+ [@item];
+ }
+ phrase:
+ orword {
+ [@item];
+ }
+ | andword {
+ [@item];
+ }
+ | minusword {
+ unshift(@::Phrases,$item[1]->[0]);
+ unshift(@::Phrases,$item[1]->[1]);
+ [@item];
+ }
+ | word {
+ unshift(@::Phrases,$item[1]);
+ [@item];
+ }
+ #
+ orword:
+ word 'OR' phrase {
+ unshift(@::Phrases,'OR');
+ unshift(@::Phrases,$item[1]);
+ [@item];
+ }
+ | word 'or' phrase {
+ unshift(@::Phrases,'OR');
+ unshift(@::Phrases,$item[1]);
+ [@item];
+ }
+ | minusword 'OR' phrase {
+ unshift(@::Phrases,'OR');
+ unshift(@::Phrases,$item[1]->[0]);
+ unshift(@::Phrases,$item[1]->[1]);
+ [@item];
+ }
+ | minusword 'or' phrase {
+ unshift(@::Phrases,'OR');
+ unshift(@::Phrases,$item[1]->[0]);
+ unshift(@::Phrases,$item[1]->[1]);
+ [@item];
+ }
+ andword:
+ word phrase {
+ unshift(@::Phrases,'AND');
+ unshift(@::Phrases,$item[1]);
+ [@item];
+ }
+ | minusword phrase {
+ unshift(@::Phrases,'AND');
+ unshift(@::Phrases,$item[1]->[0]);
+ unshift(@::Phrases,$item[1]->[1]);
+ [@item];
+ }
+ #
+ minusword:
+ '-' word {
+ [$item[2],'NOT'];
+ }
+ word:
+ "'" term(s) "'" {
+ &Apache::lonsearchcat::concat(\@item);
+ }
+ | '"' term(s) '"' {
+ &Apache::lonsearchcat::concat(\@item);
+ }
+ | term {
+ $item[1];
+ }
+ term:
+ /[\w\Q:!@#$%^&*()+_=|{}<>,.;\\\/?\E]+/ {
+ $item[1];
+ }
+ENDGRAMMAR
+ #
+ # The end result of parsing the phrase with the grammar is an array
+ # @::Phrases.
+ # $phrase = "gene splicing" or cat -> "gene splicing","OR","cat"
+ # $phrase = "genetic engineering" -dna ->
+ # "genetic engineering","AND","NOT","dna"
+ # $phrase = cat or dog -poodle -> "cat","OR","dog","AND","NOT","poodle"
+ undef(@::Phrases);
+ my $p = new Parse::RecDescent($grammar);
+ if (! defined($p->searchphrase($phrase))) {
+ &Apache::lonnet::logthis('lonsearchcat:unable to process:'.$phrase);
+ return 'Unable to process phrase '.$phrase;
+ }
+ #
+ # Go through the phrases and make sense of them.
+ # Apply modifiers NOT OR and AND to the phrases.
+ my @NewPhrases;
+ while(@::Phrases) {
+ my $phrase = shift(@::Phrases);
+ # &Apache::lonnet::logthis('phrase = '.$phrase);
+ my $phrasedata;
+ if ($phrase =~ /^(NOT|OR|AND)$/) {
+ if ($phrase eq 'OR') {
+ $phrasedata->{'or'}++;
+ if (! @::Phrases) { $phrasedata = undef; last; }
+ $phrase = shift(@::Phrases);
+ } elsif ($phrase eq 'AND') {
+ $phrasedata->{'and'}++;
+ if (! @::Phrases) { $phrasedata = undef; last; }
+ $phrase = shift(@::Phrases);
}
- if ($Phrases[-1] ne '') {
- $Phrases[-1] .= ' ';
+ if ($phrase eq 'NOT') {
+ $phrasedata->{'negate'}++;
+ if (! @::Phrases) { $phrasedata = undef; last; }
+ $phrase = shift(@::Phrases);
}
- $Phrases[-1] .= $word;
+ }
+ $phrasedata->{'phrase'} = $phrase;
+ if ($related) {
+ my @NewWords;
+ (undef,@NewWords) = &related_version($phrasedata->{'phrase'});
+ $phrasedata->{'related_words'} = \@NewWords;
+ }
+ push(@NewPhrases,$phrasedata);
+ }
+ #
+ # Actually build the sql query from the phrases
+ my $SQLQuery;
+ foreach my $phrase (@NewPhrases) {
+ my $query;
+ if ($phrase->{'negate'}) {
+ $query .= $field.' NOT LIKE "%'.$phrase->{'phrase'}.'%"';
} else {
- if ($word =~ s/^(\")//) {
- $in_quotes=1;
+ $query .= $field.' LIKE "%'.$phrase->{'phrase'}.'%"';
+ }
+ foreach my $related (@{$phrase->{'related_words'}}) {
+ if ($phrase->{'negate'}) {
+ $query .= ' AND '.$field.' NOT LIKE "%'.$related.'%"';
+ } else {
+ $query .= ' OR '.$field.' LIKE "%'.$related.'%"';
+ }
+ }
+ if ($SQLQuery) {
+ if ($phrase->{'or'}) {
+ $SQLQuery .= ' OR ('.$query.')';
+ } else {
+ $SQLQuery .= ' AND ('.$query.')';
}
- push(@Phrases,$word);
+ } else {
+ $SQLQuery = '('.$query.')';
}
}
#
- #foreach my $p (@Phrases) {
- # &Apache::lonnet::logthis(' subphrase = '.$p);
- #}
+ # &Apache::lonnet::logthis("SQLQuery = $SQLQuery");
#
- return @Phrases;
+ return undef,$SQLQuery;
}
######################################################################
@@ -1581,30 +1734,6 @@
return $result,sort(@Words);
}
-######################################################################
-######################################################################
-
-=pod
-
-=item &build_SQL_query()
-
-Builds a SQL query string from a logical expression with AND/OR keywords
-using Text::Query and &recursive_SQL_query_builder()
-
-=cut
-
-######################################################################
-######################################################################
-sub build_SQL_query {
- my ($field_name,$logic_statement)=@_;
- my $q=new Text::Query('abc',
- -parse => 'Text::Query::ParseAdvanced',
- -build => 'Text::Query::Build');
- $q->prepare($logic_statement);
- my $matchexp=${$q}{'matchexp'}; chomp $matchexp;
- my $sql_query=&recursive_SQL_query_build($field_name,$matchexp);
- return $sql_query;
-}
######################################################################
######################################################################
@@ -1639,47 +1768,6 @@
return $matchexp;
}
-######################################################################
-######################################################################
-
-=pod
-
-=item &recursive_SQL_query_build()
-
-Recursively constructs an SQL query. Takes as input $dkey and $pattern.
-
-=cut
-
-######################################################################
-######################################################################
-sub recursive_SQL_query_build {
- my ($dkey,$pattern)=@_;
- my @matches=($pattern=~/(\[[^\]|\[]*\])/g);
- return $pattern unless @matches;
- foreach my $match (@matches) {
- $match=~/\[ (\w+)\s(.*) \]/;
- my ($key,$value)=($1,$2);
- my $replacement='';
- if ($key eq 'literal') {
- $replacement="($dkey LIKE \"\%$value\%\")";
- } elsif (lc($key) eq 'not') {
- $value=~s/LIKE/NOT LIKE/;
-# $replacement="($dkey not like $value)";
- $replacement="$value";
- } elsif ($key eq 'and') {
- $value=~/(.*[\"|\)]) ([|\(|\^].*)/;
- $replacement="($1 AND $2)";
- } elsif ($key eq 'or') {
- $value=~/(.*[\"|\)]) ([|\(|\^].*)/;
- $replacement="($1 OR $2)";
- }
- substr($pattern,
- index($pattern,$match),
- length($match),
- $replacement);
- }
- &recursive_SQL_query_build($dkey,$pattern);
-}
######################################################################
######################################################################
@@ -2254,9 +2342,10 @@
#
# Loop through the servers we have contacted but do not
# have results from yet, looking for results.
- while (my ($server,$status) = each(%Server_status)) {
+ foreach my $server (keys(%Server_status)) {
last if ($connection->aborted());
&update_seconds($r);
+ my $status = $Server_status{$server};
if ($status eq 'con_lost') {
delete ($Server_status{$server});
next;
@@ -3210,6 +3299,51 @@
=pod
+=item &output_unparsed_phrase_error()
+
+=cut
+
+######################################################################
+######################################################################
+sub output_unparsed_phrase_error {
+ my ($r,$closebutton,$parms,$hidden_fields,$field)=@_;
+ my $errorstring;
+ if ($field eq 'basicexp') {
+ $errorstring = &mt('Unable to understand the search phrase <i>[_1]</i>. Please modify your search.',$ENV{'form.basicexp'});
+ } else {
+ $errorstring = &mt('Unable to understand the search phrase <b>[_1]</b>:<i>[_2]</i>.',$field,$ENV{'form.'.$field});
+ }
+ my $bodytag = &Apache::loncommon::bodytag('Search');
+ my $heading = &mt('Unparsed Field');
+ my $revise = &mt('Revise search request');
+ # make query information persistent to allow for subsequent revision
+ $r->print(<<ENDPAGE);
+<html>
+<head>
+<title>The LearningOnline Network with CAPA</title>
+</head>
+$bodytag
+<form method="post" action="/adm/searchcat">
+$hidden_fields
+$closebutton
+<hr />
+<h2>$heading</h2>
+<p>
+$errorstring
+</p>
+<p>
+<a href="/adm/searchcat?$parms&persistent_db_id=$ENV{'form.persistent_db_id'}">$revise</a>
+</p>
+</body>
+</html>
+ENDPAGE
+}
+
+######################################################################
+######################################################################
+
+=pod
+
=item &output_blank_field_error()
Output a complete page that indicates the user has not filled in enough
@@ -3227,33 +3361,31 @@
######################################################################
sub output_blank_field_error {
my ($r,$closebutton,$parms,$hidden_fields)=@_;
- my $bodytag=&Apache::loncommon::bodytag(undef,undef,undef,1);
- # make query information persistent to allow for subsequent revision
- $r->print(<<BEGINNING);
+ my $bodytag=&Apache::loncommon::bodytag('Search');
+ my $errormsg = &mt('You did not fill in enough information for the search to be started. You need to fill in relevant fields on the search page in order for a query to be processed.');
+ my $revise = &mt('Revise Search Request');
+ my $heading = &mt('Unactionable Search Queary');
+ $r->print(<<ENDPAGE);
<html>
<head>
<title>The LearningOnline Network with CAPA</title>
-BEGINNING
- $r->print(<<RESULTS);
</head>
$bodytag
-<img align='right' src='/adm/lonIcons/lonlogos.gif' />
-<h1>Search Catalog</h1>
<form method="post" action="/adm/searchcat">
$hidden_fields
-<a href="/adm/searchcat?$parms&persistent_db_id=$ENV{'form.persistent_db_id'}"
->Revise search request</a>
$closebutton
<hr />
-<h3>Unactionable search query.</h3>
+<h2>$heading</h2>
<p>
-You did not fill in enough information for the search to be started.
-You need to fill in relevant fields on the search page in order
-for a query to be processed.
+$errormsg
+</p>
+<p>
+<a href="/adm/searchcat?$parms&persistent_db_id=$ENV{'form.persistent_db_id'}">$revise</a>
</p>
</body>
</html>
-RESULTS
+ENDPAGE
+ return;
}
######################################################################
--matthew1086290589--