adding a space between query and limit concat

[koha_fer] / C4 / Search.pm
diff --git a/C4/Search.pm b/C4/Search.pm

index 0186eb1..c277ff3 100644 (file)
--- a/C4/Search.pm
+++ b/C4/Search.pm
@@ -558,20 +558,24 @@ sub getRecords {
  # STOPWORDS
  sub _remove_stopwords {
      my ($operand,$index) = @_;
-    # phrase and exact-qualified indexes shoudln't have stopwords removed
+       my @stopwords_removed;
+    # phrase and exact-qualified indexes shouldn't have stopwords removed
      if ($index!~m/phr|ext/){
      # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
      #       we use IsAlpha unicode definition, to deal correctly with diacritics.
-    #       otherwise, a french word like "leçon" woudl be split into "le" "çon", le 
-    #       is an empty word, we get "çon" and wouldn't find anything...
+    #       otherwise, a French word like "leçon" woudl be split into "le" "çon", le 
+    #       is an empty word, we'd get "çon" and wouldn't find anything...
          foreach (keys %{C4::Context->stopwords}) {
-            next if ($_ =~/(and|or|not)/); # don't remove operators 
-            $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /i;
-            $operand=~ s/^$_\P{IsAlpha}/ /i;
-            $operand=~ s/\P{IsAlpha}$_$/ /i;
+            next if ($_ =~/(and|or|not)/); # don't remove operators
+                       if ($operand =~ /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/) {
+               $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
+               $operand=~ s/^$_\P{IsAlpha}/ /gi;
+               $operand=~ s/\P{IsAlpha}$_$/ /gi;
+                               push @stopwords_removed, $_;
+                       }
          }
      }
-    return $operand;
+    return ($operand, \@stopwords_removed);
  }
  
  # TRUNCATION
@@ -673,37 +677,39 @@ sub buildQuery {
      my @limits    = @$limits    if $limits;
      my @sort_by   = @$sort_by   if $sort_by;
  
-    my $stemming      = C4::Context->preference("QueryStemming")     || 0;
-    my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
-    my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
+    my $stemming      = C4::Context->preference("QueryStemming")               || 0;
+       my $auto_truncation = C4::Context->preference("QueryAutoTruncate")              || 0;
+    my $weight_fields = C4::Context->preference("QueryWeightFields")           || 0;
+    my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")                          || 0;
+       my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords")  || 0;
  
      my $query = $operands[0];
         my $simple_query = $operands[0];
         my $query_cgi;
-       my $query_search_desc;
+       my $query_desc;
+       my $query_type;
  
         my $limit;
         my $limit_cgi;
         my $limit_desc;
  
-# STEP I: determine if this is a form-based / simple query or if it's complex (if complex,
-# pass it off to zebra directly)
+       my $stopwords_removed;
  
-# check if this is a known query language query, if it is, return immediately,
-# the user is responsible for constructing valid syntax:
+       # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
+       # DIAGNOSTIC ONLY!!
      if ( $query =~ /^ccl=/ ) {
-        return ( undef, $', $', $', '', '', '', 'ccl' );
+        return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
      }
      if ( $query =~ /^cql=/ ) {
-        return ( undef, $', $', $', '', '', '', 'cql' );
+        return ( undef, $', $', $', $', '', '', '', '', 'cql' );
      }
      if ( $query =~ /^pqf=/ ) {
-        return ( undef, $', $', $', '', '', '', 'pqf' );
+        return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
      }
  
-# FIXME: this is bound to be broken now
-    if ( $query =~ /(\(|\))/ ) {    # sorry, too complex, assume CCL
-        return ( undef, $query, $query_cgi, $query_search_desc, $limit, $limit_cgi, $limit_desc, 'ccl' );
+       # pass nested queries directly
+    if ( $query =~ /(\(|\))/ ) {
+        return ( undef, $query, $simple_query, $query_cgi, $query, $limit, $limit_cgi, $limit_desc, $stopwords_removed, 'ccl' );
      }
  
  # form-based queries are limited to non-nested at a specific depth, so we can easily
@@ -720,28 +726,37 @@ sub buildQuery {
              # COMBINE OPERANDS, INDEXES AND OPERATORS
              if ( $operands[$i] ) {
  
-                               $weight_fields = 0 if $operands[$i] =~ /(:|=)/;
+                               # a flag to determine whether or not to add the index to the query
+                               my $indexes_set;
+                               # if the user is sophisticated enough to specify an index, turn off some defaults
+                               if ($operands[$i] =~ /(:|=)/) {
+                                       $weight_fields = 0;
+                                       $stemming = 0;
+                                       $remove_stopwords = 0;
+                               }
                  my $operand = $operands[$i];
                  my $index   = $indexes[$i];
  
-                # if there's no index, don't use one, it will throw a CCL error
+                               # some helpful index modifs
                  my $index_plus = "$index:" if $index;
                  my $index_plus_comma="$index," if $index;
  
-                # Remove Stopwords  
-                $operand = _remove_stopwords($operand,$index);
-                warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
-
-                my $indexes_set;
+                # Remove Stopwords
+                               if ($remove_stopwords) {
+                ($operand, $stopwords_removed) = _remove_stopwords($operand,$index);
+                       warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
+                                       warn "REMOVED STOPWORDS: @$stopwords_removed" if ($stopwords_removed && $DEBUG);
+                               }
  
                  # Detect Truncation
                  my ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr);
                  my $truncated_operand;
                  ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr) = _detect_truncation($operand,$index);
                  warn "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<" if $DEBUG;
+
                  # Apply Truncation
-                # Problem is when build_weights gets ahold if this is wraps in quotes which breaks the truncation :/
                  if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){
+                                       # don't field weight or add the index to the query, we do it here
                      $indexes_set = 1;
                      undef $weight_fields;
                      my $previous_truncation_operand;
@@ -791,7 +806,7 @@ sub buildQuery {
                                                 $query_cgi .="&op=$operators[$i-1]";
                                                 $query_cgi .="&idx=$index" if $index;
                                                 $query_cgi .="&q=$operands[$i]" if $operands[$i];
-                                               $query_search_desc .=" $operators[$i-1] $index_plus $operands[$i]";
+                                               $query_desc .=" $operators[$i-1] $index_plus $operands[$i]";
                      }
  
                      # the default operator is and
@@ -801,15 +816,16 @@ sub buildQuery {
                          $query .= "$operand";
                                                 $query_cgi .="&op=and&idx=$index" if $index;
                                                 $query_cgi .="&q=$operands[$i]" if $operands[$i];
-                        $query_search_desc .= " and $index_plus $operands[$i]";
+                        $query_desc .= " and $index_plus $operands[$i]";
                      }
                  }
  
+                               # there isn't a pervious operand, don't need an operator
                  else { 
                                         # field-weighted queries already have indexes set
                                         $query .=" $index_plus " unless $indexes_set;
                                         $query .= $operand;
-                                       $query_search_desc .= " $index_plus $operands[$i]";
+                                       $query_desc .= " $index_plus $operands[$i]";
                                         $query_cgi.="&idx=$index" if $index;
                                         $query_cgi.="&q=$operands[$i]" if $operands[$i];
  
@@ -852,10 +868,10 @@ sub buildQuery {
                 $limit.="($group_OR_limits)";
         }
         # normalize the strings
-       for ($query, $query_search_desc, $limit, $limit_desc) {
+       for ($query, $query_desc, $limit, $limit_desc) {
                 $_ =~ s/  / /g;    # remove extra spaces
         $_ =~ s/^ //g;     # remove any beginning spaces
-               $_ =~ s/ $//g;     # remove any beginning spaces
+               $_ =~ s/ $//g;     # remove any ending spaces
         $_ =~ s/:/=/g;     # causes probs for server
         $_ =~ s/==/=/g;    # remove double == from query
  
@@ -864,16 +880,16 @@ sub buildQuery {
         $query_cgi =~ s/^&//;
  
         # append the limit to the query
-       $query .= $limit;
+       $query .= " ".$limit;
  
      warn "QUERY:".$query if $DEBUG;
         warn "QUERY CGI:".$query_cgi if $DEBUG;
-    warn "QUERY DESC:".$query_search_desc if $DEBUG;
+    warn "QUERY DESC:".$query_desc if $DEBUG;
      warn "LIMIT:".$limit if $DEBUG;
      warn "LIMIT CGI:".$limit_cgi if $DEBUG;
      warn "LIMIT DESC:".$limit_desc if $DEBUG;
  
-       return ( undef, $query,$simple_query,$query_cgi,$query_search_desc,$limit,$limit_cgi,$limit_desc );
+       return ( undef, $query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type );
  }
  
  # IMO this subroutine is pretty messy still -- it's responsible for
@@ -1129,7 +1145,7 @@ sub NZgetRecords {
  =head2 NZanalyse
  
    NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
-  the list is builded from inverted index in nozebra SQL table
+  the list is built from an inverted index in the nozebra SQL table
    note that title is here only for convenience : the sorting will be very fast when requested on title
    if the sorting is requested on something else, we will have to reread all results, and that may be longer.
  
@@ -1232,7 +1248,7 @@ sub NZanalyse {
                  my ($biblionumbers,$value);
                  next unless $_;
                  warn "EXECUTE : $server, $left, $_";
-                $sth->execute($server, $left, $_);
+                $sth->execute($server, $left, $_) or warn "execute failed: $!";
                  while (my ($line,$value) = $sth->fetchrow) {
                      # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
                      # otherwise, fill the result