telling the user what stopwords have been removed from the
authorJoshua Ferraro <jmf@liblime.com>
Thu, 22 Nov 2007 19:47:17 +0000 (13:47 -0600)
committerJoshua Ferraro <jmf@liblime.com>
Sun, 25 Nov 2007 22:25:00 +0000 (16:25 -0600)
query -- can be quite confusing if you don't know!

Signed-off-by: Joshua Ferraro <jmf@liblime.com>
C4/Search.pm
catalogue/search.pl
koha-tmpl/intranet-tmpl/prog/en/modules/catalogue/results.tmpl

index 155f811..028a43b 100644 (file)
@@ -558,6 +558,7 @@ sub getRecords {
 # STOPWORDS
 sub _remove_stopwords {
     my ($operand,$index) = @_;
+       my @stopwords_removed;
     # phrase and exact-qualified indexes shoudln't have stopwords removed
     if ($index!~m/phr|ext/){
     # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
@@ -565,13 +566,16 @@ sub _remove_stopwords {
     #       otherwise, a french word like "leçon" woudl be split into "le" "çon", le 
     #       is an empty word, we get "çon" and wouldn't find anything...
         foreach (keys %{C4::Context->stopwords}) {
-            next if ($_ =~/(and|or|not)/); # don't remove operators 
-            $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /i;
-            $operand=~ s/^$_\P{IsAlpha}/ /i;
-            $operand=~ s/\P{IsAlpha}$_$/ /i;
+            next if ($_ =~/(and|or|not)/); # don't remove operators
+                       if ($operand =~ /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/) {
+               $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
+               $operand=~ s/^$_\P{IsAlpha}/ /gi;
+               $operand=~ s/\P{IsAlpha}$_$/ /gi;
+                               push @stopwords_removed, $_;
+                       }
         }
     }
-    return $operand;
+    return ($operand, \@stopwords_removed);
 }
 
 # TRUNCATION
@@ -681,11 +685,13 @@ sub buildQuery {
        my $simple_query = $operands[0];
        my $query_cgi;
        my $query_desc;
+       my $query_type;
 
        my $limit;
        my $limit_cgi;
        my $limit_desc;
 
+       my $stopwords_removed;
 # STEP I: determine if this is a form-based / simple query or if it's nested
 
 # check if this is a known query language query, if it is, return immediately,
@@ -728,9 +734,9 @@ sub buildQuery {
                 my $index_plus_comma="$index," if $index;
 
                 # Remove Stopwords  
-                $operand = _remove_stopwords($operand,$index);
+                ($operand, $stopwords_removed) = _remove_stopwords($operand,$index);
                 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
-
+                               warn "REMOVED STOPWORDS: @$stopwords_removed" if $DEBUG;
                 my $indexes_set;
 
                 # Detect Truncation
@@ -873,7 +879,7 @@ sub buildQuery {
     warn "LIMIT CGI:".$limit_cgi if $DEBUG;
     warn "LIMIT DESC:".$limit_desc if $DEBUG;
 
-       return ( undef, $query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc );
+       return ( undef, $query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type );
 }
 
 # IMO this subroutine is pretty messy still -- it's responsible for
index 4be9a40..32b016a 100755 (executable)
@@ -395,12 +395,12 @@ my $hits;
 my $expanded_facet = $params->{'expand'};
 
 # Define some global variables
-my ( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type);
+my ( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type);
 
 my @results;
 
 ## I. BUILD THE QUERY
-( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by);
+( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by);
 
 ## parse the query_cgi string and put it into a form suitable for <input>s
 my @query_inputs;
@@ -491,6 +491,7 @@ for (my $i=0;$i<=@servers;$i++) {
                        if ($query_desc || $limit_desc) {
                $template->param(searchdesc => 1);
                        }
+                       $template->param(stopwords_removed => "@$stopwords_removed");
             $template->param(results_per_page =>  $results_per_page);
             $template->param(SEARCH_RESULTS => \@newresults);
                        ## Build the page numbers on the bottom of the page
index 6d48827..53cfe44 100644 (file)
@@ -29,6 +29,7 @@
     <h3>
         <!-- TMPL_VAR NAME="total" --> results found for '<!-- TMPL_VAR NAME="query_desc" --><!-- TMPL_VAR NAME="limit_desc" -->'
     </h3>
+       <!-- TMPL_IF NAME="stopwords_removed" --><div><p class="tip">Ignored the following common words: "<!-- TMPL_VAR NAME="stopwords_removed" -->"<p></div><!-- /TMPL_IF -->
     <!-- TMPL_ELSE -->
         <!-- TMPL_IF NAME="searchdesc" -->
             <h3>No results found</h3>