adding a space between query and limit concat

[koha_fer] / C4 / Search.pm
diff --git a/C4/Search.pm b/C4/Search.pm

index 4177490..c277ff3 100644 (file)
--- a/C4/Search.pm
+++ b/C4/Search.pm
@@ -27,7 +27,7 @@ use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  
  # set the version for version checking
  $VERSION = 3.00;
-$DEBUG=0;
+$DEBUG=1;
  
  =head1 NAME
  
@@ -252,7 +252,7 @@ sub SimpleSearch {
  # performs the search
  sub getRecords {
      my (
-        $koha_query,     $federated_query,  $sort_by_ref,
+        $koha_query,     $simple_query,  $sort_by_ref,
          $servers_ref,    $results_per_page, $offset,
          $expanded_facet, $branches,         $query_type,
          $scan
@@ -284,9 +284,11 @@ sub getRecords {
              $query_to_use = $koha_query;
          }
          else {
-            $query_to_use = $federated_query;
+            $query_to_use = $simple_query;
          }
  
+               $query_to_use = $simple_query if $scan;
+
          # check if we've got a query_type defined
          eval {
              if ($query_type)
@@ -318,15 +320,13 @@ sub getRecords {
              }
              else {
                  if ($scan) {
-
-                    #                 warn "preparing to scan";
+                     #               warn "preparing to scan:$query_to_use";
                      $results[$i] =
                        $zconns[$i]->scan(
                          new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
                        );
                  }
                  else {
-
                      #             warn "LAST : $query_to_use";
                      $results[$i] =
                        $zconns[$i]->search(
@@ -409,7 +409,6 @@ sub getRecords {
                      ## This is just an index scan
                      if ($scan) {
                          my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
-
                   # here we create a minimal MARC record and hand it off to the
                   # template just like a normal result ... perhaps not ideal, but
                   # it works for now
@@ -417,7 +416,7 @@ sub getRecords {
                          $tmprecord->encoding('UTF-8');
                          my $tmptitle;
  
-          # srote the minimal record in author/title (depending on MARC flavour)
+                       # srote the minimal record in author/title (depending on MARC flavour)
                          if ( C4::Context->preference("marcflavour") eq
                              "UNIMARC" )
                          {
@@ -559,20 +558,24 @@ sub getRecords {
  # STOPWORDS
  sub _remove_stopwords {
      my ($operand,$index) = @_;
-    # phrase and exact-qualified indexes shoudln't have stopwords removed
+       my @stopwords_removed;
+    # phrase and exact-qualified indexes shouldn't have stopwords removed
      if ($index!~m/phr|ext/){
      # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
      #       we use IsAlpha unicode definition, to deal correctly with diacritics.
-    #       otherwise, a french word like "leçon" woudl be split into "le" "çon", le 
-    #       is an empty word, we get "çon" and wouldn't find anything...
+    #       otherwise, a French word like "leçon" woudl be split into "le" "çon", le 
+    #       is an empty word, we'd get "çon" and wouldn't find anything...
          foreach (keys %{C4::Context->stopwords}) {
-            next if ($_ =~/(and|or|not)/); # don't remove operators 
-            $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /i;
-            $operand=~ s/^$_\P{IsAlpha}/ /i;
-            $operand=~ s/\P{IsAlpha}$_$/ /i;
+            next if ($_ =~/(and|or|not)/); # don't remove operators
+                       if ($operand =~ /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/) {
+               $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
+               $operand=~ s/^$_\P{IsAlpha}/ /gi;
+               $operand=~ s/\P{IsAlpha}$_$/ /gi;
+                               push @stopwords_removed, $_;
+                       }
          }
      }
-    return $operand;
+    return ($operand, \@stopwords_removed);
  }
  
  # TRUNCATION
@@ -674,35 +677,39 @@ sub buildQuery {
      my @limits    = @$limits    if $limits;
      my @sort_by   = @$sort_by   if $sort_by;
  
-    my $stemming      = C4::Context->preference("QueryStemming")     || 0;
+    my $stemming      = C4::Context->preference("QueryStemming")               || 0;
+       my $auto_truncation = C4::Context->preference("QueryAutoTruncate")              || 0;
+    my $weight_fields = C4::Context->preference("QueryWeightFields")           || 0;
+    my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")                          || 0;
+       my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords")  || 0;
  
-    # only turn on field weighting in simple searches
-    my $weight_fields;
-   # if (@operands==1) {
-        $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
-    #}
-    my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
+    my $query = $operands[0];
+       my $simple_query = $operands[0];
+       my $query_cgi;
+       my $query_desc;
+       my $query_type;
  
-    my $human_search_desc;      # a human-readable query
-    my $machine_search_desc;    #a machine-readable query
+       my $limit;
+       my $limit_cgi;
+       my $limit_desc;
  
-    my $query = $operands[0];
-# STEP I: determine if this is a form-based / simple query or if it's complex (if complex,
-# pass it off to zebra directly)
+       my $stopwords_removed;
  
-# check if this is a known query language query, if it is, return immediately,
-# the user is responsible for constructing valid syntax:
+       # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
+       # DIAGNOSTIC ONLY!!
      if ( $query =~ /^ccl=/ ) {
-        return ( undef, $', $', $', 'ccl' );
+        return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
      }
      if ( $query =~ /^cql=/ ) {
-        return ( undef, $', $', $', 'cql' );
+        return ( undef, $', $', $', $', '', '', '', '', 'cql' );
      }
      if ( $query =~ /^pqf=/ ) {
-        return ( undef, $', $', $', 'pqf' );
+        return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
      }
-    if ( $query =~ /(\(|\)|:|=)/ ) {    # sorry, too complex, assume CCL
-        return ( undef, $query, $query, $query, 'ccl' );
+
+       # pass nested queries directly
+    if ( $query =~ /(\(|\))/ ) {
+        return ( undef, $query, $simple_query, $query_cgi, $query, $limit, $limit_cgi, $limit_desc, $stopwords_removed, 'ccl' );
      }
  
  # form-based queries are limited to non-nested at a specific depth, so we can easily
@@ -718,27 +725,38 @@ sub buildQuery {
  
              # COMBINE OPERANDS, INDEXES AND OPERATORS
              if ( $operands[$i] ) {
+
+                               # a flag to determine whether or not to add the index to the query
+                               my $indexes_set;
+                               # if the user is sophisticated enough to specify an index, turn off some defaults
+                               if ($operands[$i] =~ /(:|=)/) {
+                                       $weight_fields = 0;
+                                       $stemming = 0;
+                                       $remove_stopwords = 0;
+                               }
                  my $operand = $operands[$i];
                  my $index   = $indexes[$i];
  
-                # if there's no index, don't use one, it will throw a CCL error
+                               # some helpful index modifs
                  my $index_plus = "$index:" if $index;
                  my $index_plus_comma="$index," if $index;
  
-                # Remove Stopwords  
-                $operand = _remove_stopwords($operand,$index);
-                warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
-
-                my $indexes_set;
+                # Remove Stopwords
+                               if ($remove_stopwords) {
+                ($operand, $stopwords_removed) = _remove_stopwords($operand,$index);
+                       warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
+                                       warn "REMOVED STOPWORDS: @$stopwords_removed" if ($stopwords_removed && $DEBUG);
+                               }
  
                  # Detect Truncation
                  my ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr);
                  my $truncated_operand;
                  ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr) = _detect_truncation($operand,$index);
                  warn "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<" if $DEBUG;
+
                  # Apply Truncation
-                # Problem is when build_weights gets ahold if this is wraps in quotes which breaks the truncation :/
                  if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){
+                                       # don't field weight or add the index to the query, we do it here
                      $indexes_set = 1;
                      undef $weight_fields;
                      my $previous_truncation_operand;
@@ -782,10 +800,13 @@ sub buildQuery {
  
                      # user-specified operator
                      if ( $operators[$i-1] ) {
-                        $human_search_desc .=" $operators[$i-1] $index_plus $operands[$i]";
                          $query .= " $operators[$i-1] ";
                          $query .= " $index_plus " unless $indexes_set;
                          $query .= " $operand";
+                                               $query_cgi .="&op=$operators[$i-1]";
+                                               $query_cgi .="&idx=$index" if $index;
+                                               $query_cgi .="&q=$operands[$i]" if $operands[$i];
+                                               $query_desc .=" $operators[$i-1] $index_plus $operands[$i]";
                      }
  
                      # the default operator is and
@@ -793,129 +814,82 @@ sub buildQuery {
                          $query .= " and ";
                          $query .= "$index_plus " unless $indexes_set;
                          $query .= "$operand";
-                        $human_search_desc .= " and $index_plus $operands[$i]";
+                                               $query_cgi .="&op=and&idx=$index" if $index;
+                                               $query_cgi .="&q=$operands[$i]" if $operands[$i];
+                        $query_desc .= " and $index_plus $operands[$i]";
                      }
                  }
  
-                # There's no previous operand - FIXME: completely ignoring our $query, no field weighting, no stemming
-                # FIXME: also, doesn't preserve original order
+                               # there isn't a pervious operand, don't need an operator
                  else { 
-                    # if there are terms to fit with truncation
-#                    if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){
- #                       # add the non-truncated ones first
-  #                      $query.= "$index_plus @$nontruncated " if (scalar(@$nontruncated)>0);
-   #                     if (scalar(@$righttruncated)>0){
-    #                        $query .= "and $index_plus_comma"."rtrn:@$righttruncated ";
-     #                   }            
-      #                  if (scalar(@$lefttruncated)>0){
-       #                     $query .= "and $index_plus_comma"."ltrn:@$lefttruncated ";
-        #                }            
-         #               if (scalar(@$rightlefttruncated)>0){
-          #                  $query .= "and $index_plus_comma"."rltrn:@$rightlefttruncated ";
-           #             }
-            #            $human_search_desc .= $query;
-             #       } else {
-                        # field-weighted queries already have indexes set
-                        $query.=" $index_plus " unless $indexes_set;
-                        $query             .= $operand;
-                        $human_search_desc .= " $index_plus $operands[$i]";
-              #      }            
+                                       # field-weighted queries already have indexes set
+                                       $query .=" $index_plus " unless $indexes_set;
+                                       $query .= $operand;
+                                       $query_desc .= " $index_plus $operands[$i]";
+                                       $query_cgi.="&idx=$index" if $index;
+                                       $query_cgi.="&q=$operands[$i]" if $operands[$i];
+
                      $previous_operand = 1;
                  }
              }    #/if $operands
          }    # /for
      }
      warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
-    # add limits
-    my $limit_query;
-    my $limit_search_desc;
-    foreach my $limit (@limits) {
-
-        # FIXME: not quite right yet ... will work on this soon -- JF
-        my $type = $1 if $limit =~ m/([^:]+):([^:]*)/;
-        if ( $limit =~ /available/ ) {
-            $limit_query .= " (($query and datedue=0000-00-00) or ($query and datedue=0000-00-00 not lost=1) or ($query and datedue=0000-00-00 not lost=2))";
-            #$limit_search_desc.=" and available";
-        }
-        elsif ( ($limit_query) && ( index( $limit_query, $type, 0 ) > 0 ) ) {
-            if ( $limit_query !~ /\(/ ) {
-                $limit_query =
-                    substr( $limit_query, 0, index( $limit_query, $type, 0 ) )
-                  . "("
-                  . substr( $limit_query, index( $limit_query, $type, 0 ) )
-                  . " or $limit )"
-                  if $limit;
-                $limit_search_desc =
-                  substr( $limit_search_desc, 0,
-                    index( $limit_search_desc, $type, 0 ) )
-                  . "("
-                  . substr( $limit_search_desc,
-                    index( $limit_search_desc, $type, 0 ) )
-                  . " or $limit )"
-                  if $limit;
-            }
-            else {
-                chop $limit_query;
-                chop $limit_search_desc;
-                $limit_query       .= " or $limit )" if $limit;
-                $limit_search_desc .= " or $limit )" if $limit;
-            }
-        }
-        elsif ( ($limit_query) && ( $limit =~ /mc/ ) ) {
-            $limit_query       .= " or $limit" if $limit;
-            $limit_search_desc .= " or $limit" if $limit;
-        }
  
-        # these are treated as AND
-        elsif ($limit_query) {
-           if ($limit =~ /branch/){
-                $limit_query       .= " ) and ( $limit" if $limit;
-            $limit_search_desc .= " ) and ( $limit" if $limit;
-        }else{
-            $limit_query       .= " or $limit" if $limit;
-                    $limit_search_desc .= " or $limit" if $limit;
-        }
+    # add limits
+       my $group_OR_limits;
+    foreach my $this_limit (@limits) {
+        if ( $this_limit =~ /available/ ) {
+                       # FIXME: switch to zebra search for null values
+            $limit .= " (($query and datedue=0000-00-00) or ($query and datedue=0000-00-00 not lost=1) or ($query and datedue=0000-00-00 not lost=2))";
+                       $limit_cgi .= "&limit=available";
+                       $limit_desc .="";
          }
  
-        # otherwise, there is nothing but the limit
-        else {
-            $limit_query       .= "$limit" if $limit;
-            $limit_search_desc .= "$limit" if $limit;
+               # these are treated as OR
+        elsif ( $this_limit =~ /mc/ ) {
+            $group_OR_limits .= " or " if $group_OR_limits;
+                       $limit_desc .=" or " if $group_OR_limits;
+                       $group_OR_limits .= "$this_limit";
+                       $limit_cgi .="&limit=$this_limit";
+                       $limit_desc .= "$this_limit";
          }
-    }
-
-    # if there's also a query, we need to AND the limits to it
-    if ( ($limit_query) && ($query) ) {
-        $limit_query       = " and (" . $limit_query . ")";
-        $limit_search_desc = " and ($limit_search_desc)" if $limit_search_desc;
  
+               # regular old limits
+               else {
+                       $limit .= " and " if $limit || $query;
+                       $limit .= "$this_limit";
+                       $limit_cgi .="&limit=$this_limit";
+                       $limit_desc .=" and $this_limit";
+               }
      }
-    #warn "LIMIT: $limit_query";
-    $query             .= $limit_query;
-    $human_search_desc .= $limit_search_desc;
-
-    # now normalize the strings
-    $query =~ s/  / /g;    # remove extra spaces
-    $query =~ s/^ //g;     # remove any beginning spaces
-    $query =~ s/:/=/g;     # causes probs for server
-    $query =~ s/==/=/g;    # remove double == from query
-
-    my $federated_query = $human_search_desc;
-    $federated_query =~ s/  / /g;
-    $federated_query =~ s/^ //g;
-    $federated_query =~ s/:/=/g;
-    my $federated_query_opensearch = $federated_query;
-
-#     my $federated_query_RPN = new ZOOM::Query::CCL2RPN( $query , C4::Context->ZConn('biblioserver'));
-
-    $human_search_desc =~ s/  / /g;
-    $human_search_desc =~ s/^ //g;
-    my $koha_query = $query;
-
-    warn "QUERY:".$koha_query if $DEBUG;
-    warn "SEARCHDESC:".$human_search_desc if $DEBUG;
-    warn "FEDERATED QUERY:".$federated_query if $DEBUG;
-    return ( undef, $human_search_desc, $koha_query, $federated_query );
+       if ($group_OR_limits) {
+               $limit.=" and " if ($query || $limit );
+               $limit.="($group_OR_limits)";
+       }
+       # normalize the strings
+       for ($query, $query_desc, $limit, $limit_desc) {
+               $_ =~ s/  / /g;    # remove extra spaces
+       $_ =~ s/^ //g;     # remove any beginning spaces
+               $_ =~ s/ $//g;     # remove any ending spaces
+       $_ =~ s/:/=/g;     # causes probs for server
+       $_ =~ s/==/=/g;    # remove double == from query
+
+       }
+               
+       $query_cgi =~ s/^&//;
+
+       # append the limit to the query
+       $query .= " ".$limit;
+
+    warn "QUERY:".$query if $DEBUG;
+       warn "QUERY CGI:".$query_cgi if $DEBUG;
+    warn "QUERY DESC:".$query_desc if $DEBUG;
+    warn "LIMIT:".$limit if $DEBUG;
+    warn "LIMIT CGI:".$limit_cgi if $DEBUG;
+    warn "LIMIT DESC:".$limit_desc if $DEBUG;
+
+       return ( undef, $query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type );
  }
  
  # IMO this subroutine is pretty messy still -- it's responsible for
@@ -1162,22 +1136,16 @@ sub searchResults {
    NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
  
  =cut
-
  sub NZgetRecords {
-    my (
-        $koha_query,     $federated_query,  $sort_by_ref,
-        $servers_ref,    $results_per_page, $offset,
-        $expanded_facet, $branches,         $query_type,
-        $scan
-    ) = @_;
-    my $result = NZanalyse($koha_query);
+    my ($query,$simple_query,$sort_by_ref,$servers_ref,$results_per_page,$offset,$expanded_facet,$branches,$query_type,$scan) = @_;
+    my $result = NZanalyse($query);
      return (undef,NZorder($result,@$sort_by_ref[0],$results_per_page,$offset),undef);
  }
  
  =head2 NZanalyse
  
    NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
-  the list is builded from inverted index in nozebra SQL table
+  the list is built from an inverted index in the nozebra SQL table
    note that title is here only for convenience : the sorting will be very fast when requested on title
    if the sorting is requested on something else, we will have to reread all results, and that may be longer.
  
@@ -1188,12 +1156,13 @@ sub NZanalyse {
      # $server contains biblioserver or authorities, depending on what we search on.
      #warn "querying : $string on $server";
      $server='biblioserver' unless $server;
+
      # if we have a ", replace the content to discard temporarily any and/or/not inside
      my $commacontent;
      if ($string =~/"/) {
          $string =~ s/"(.*?)"/__X__/;
          $commacontent = $1;
-#         print "commacontent : $commacontent\n";
+               warn "commacontent : $commacontent" if $DEBUG;
      }
      # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
      # then, call again NZanalyse with $left and $right
@@ -1201,13 +1170,13 @@ sub NZanalyse {
      $string =~ /(.*)( and | or | not | AND | OR | NOT )(.*)/;
      my $left = $1;
      my $right = $3;
-    my $operand = lc($2);
+    my $operand = lc($2); # FIXME: and/or/not are operators, not operands
      # it's not a leaf, we have a and/or/not
      if ($operand) {
          # reintroduce comma content if needed
          $right =~ s/__X__/"$commacontent"/ if $commacontent;
          $left =~ s/__X__/"$commacontent"/ if $commacontent;
-#         warn "node : $left / $operand / $right\n";
+        warn "node : $left / $operand / $right\n" if $DEBUG;
          my $leftresult = NZanalyse($left,$server);
          my $rightresult = NZanalyse($right,$server);
          # OK, we have the results for right and left part of the query
@@ -1248,7 +1217,7 @@ sub NZanalyse {
      } else {
          $string =~  s/__X__/"$commacontent"/ if $commacontent;
          $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
-#         warn "leaf : $string\n";
+         warn "leaf : $string\n" if $DEBUG;
          # parse the string in in operator/operand/value again
          $string =~ /(.*)(>=|<=)(.*)/;
          my $left = $1;
@@ -1279,7 +1248,7 @@ sub NZanalyse {
                  my ($biblionumbers,$value);
                  next unless $_;
                  warn "EXECUTE : $server, $left, $_";
-                $sth->execute($server, $left, $_);
+                $sth->execute($server, $left, $_) or warn "execute failed: $!";
                  while (my ($line,$value) = $sth->fetchrow) {
                      # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
                      # otherwise, fill the result
@@ -1295,10 +1264,10 @@ sub NZanalyse {
                          my $cleaned = $entry;
                          $cleaned =~ s/-\d*$//;
                          # if the entry already in the hash, take it & increase weight
-#                         warn "===== $cleaned =====";
+                         warn "===== $cleaned =====" if $DEBUG;
                          if ($results =~ "$cleaned") {
                              $temp .= "$entry;$entry;";
-#                             warn "INCLUDING $entry";
+                             warn "INCLUDING $entry" if $DEBUG;
                          }
                      }
                      $results = $temp;
@@ -1313,7 +1282,7 @@ sub NZanalyse {
              # split each word, query the DB and build the biblionumbers result
              foreach (split / /,$string) {
                  next if C4::Context->stopwords->{uc($_)}; # skip if stopword
-                #warn "search on all indexes on $_";
+                warn "search on all indexes on $_" if $DEBUG;
                  my $biblionumbers;
                  next unless $_;
                  $sth->execute($server, $_);
@@ -1322,7 +1291,7 @@ sub NZanalyse {
                  }
                  # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
                  if ($results) {
-#                 warn "RES for $_ = $biblionumbers";
+                 warn "RES for $_ = $biblionumbers" if $DEBUG;
                      my @leftresult = split /;/, $biblionumbers;
                      my $temp;
                      foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight
@@ -1330,20 +1299,20 @@ sub NZanalyse {
                          my $cleaned = $entry;
                          $cleaned =~ s/-\d*$//;
                          # if the entry already in the hash, take it & increase weight
-#                         warn "===== $cleaned =====";
+                         warn "===== $cleaned =====" if $DEBUG;
                          if ($results =~ "$cleaned") {
                              $temp .= "$entry;$entry;";
-#                             warn "INCLUDING $entry";
+                             warn "INCLUDING $entry" if $DEBUG;
                          }
                      }
                      $results = $temp;
                  } else {
-#                 warn "NEW RES for $_ = $biblionumbers";
+                 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
                      $results = $biblionumbers;
                  }
              }
          }
-#         warn "return : $results for LEAF : $string";
+         warn "return : $results for LEAF : $string" if $DEBUG;
          return $results;
      }
  }