# set the version for version checking
$VERSION = 3.00;
+$DEBUG=1;
=head1 NAME
# performs the search
sub getRecords {
my (
- $koha_query, $federated_query, $sort_by_ref,
+ $koha_query, $simple_query, $sort_by_ref,
$servers_ref, $results_per_page, $offset,
$expanded_facet, $branches, $query_type,
$scan
$query_to_use = $koha_query;
}
else {
- $query_to_use = $federated_query;
+ $query_to_use = $simple_query;
}
+ $query_to_use = $simple_query if $scan;
+
# check if we've got a query_type defined
eval {
if ($query_type)
}
else {
if ($scan) {
-
- # warn "preparing to scan";
+ # warn "preparing to scan:$query_to_use";
$results[$i] =
$zconns[$i]->scan(
new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
);
}
else {
-
# warn "LAST : $query_to_use";
$results[$i] =
$zconns[$i]->search(
## This is just an index scan
if ($scan) {
my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
-
# here we create a minimal MARC record and hand it off to the
# template just like a normal result ... perhaps not ideal, but
# it works for now
$tmprecord->encoding('UTF-8');
my $tmptitle;
- # srote the minimal record in author/title (depending on MARC flavour)
+ # srote the minimal record in author/title (depending on MARC flavour)
if ( C4::Context->preference("marcflavour") eq
"UNIMARC" )
{
# STOPWORDS
sub _remove_stopwords {
my ($operand,$index) = @_;
- # phrase and exact-qualified indexes shoudln't have stopwords removed
+ my @stopwords_removed;
+ # phrase and exact-qualified indexes shouldn't have stopwords removed
if ($index!~m/phr|ext/){
# remove stopwords from operand : parse all stopwords & remove them (case insensitive)
# we use IsAlpha unicode definition, to deal correctly with diacritics.
- # otherwise, a french word like "leçon" woudl be split into "le" "çon", le
- # is an empty word, we get "çon" and wouldn't find anything...
+ # otherwise, a French word like "leçon" woudl be split into "le" "çon", le
+ # is an empty word, we'd get "çon" and wouldn't find anything...
foreach (keys %{C4::Context->stopwords}) {
- next if ($_ =~/(and|or|not)/); # don't remove operators
- $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /i;
- $operand=~ s/^$_\P{IsAlpha}/ /i;
- $operand=~ s/\P{IsAlpha}$_$/ /i;
+ next if ($_ =~/(and|or|not)/); # don't remove operators
+ if ($operand =~ /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/) {
+ $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
+ $operand=~ s/^$_\P{IsAlpha}/ /gi;
+ $operand=~ s/\P{IsAlpha}$_$/ /gi;
+ push @stopwords_removed, $_;
+ }
}
}
- return $operand;
+ return ($operand, \@stopwords_removed);
}
# TRUNCATION
my @limits = @$limits if $limits;
my @sort_by = @$sort_by if $sort_by;
- my $stemming = C4::Context->preference("QueryStemming") || 0;
+ my $stemming = C4::Context->preference("QueryStemming") || 0;
+ my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
+ my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
+ my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
+ my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
- # only turn on field weighting in simple searches
- my $weight_fields;
- # if (@operands==1) {
- $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
- #}
- my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
+ my $query = $operands[0];
+ my $simple_query = $operands[0];
+ my $query_cgi;
+ my $query_desc;
+ my $query_type;
- my $human_search_desc; # a human-readable query
- my $machine_search_desc; #a machine-readable query
+ my $limit;
+ my $limit_cgi;
+ my $limit_desc;
- my $query = $operands[0];
-# STEP I: determine if this is a form-based / simple query or if it's complex (if complex,
-# pass it off to zebra directly)
+ my $stopwords_removed;
-# check if this is a known query language query, if it is, return immediately,
-# the user is responsible for constructing valid syntax:
+ # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
+ # DIAGNOSTIC ONLY!!
if ( $query =~ /^ccl=/ ) {
- return ( undef, $', $', $', 'ccl' );
+ return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
}
if ( $query =~ /^cql=/ ) {
- return ( undef, $', $', $', 'cql' );
+ return ( undef, $', $', $', $', '', '', '', '', 'cql' );
}
if ( $query =~ /^pqf=/ ) {
- return ( undef, $', $', $', 'pqf' );
+ return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
}
- if ( $query =~ /(\(|\)|:|=)/ ) { # sorry, too complex, assume CCL
- return ( undef, $query, $query, $query, 'ccl' );
+
+ # pass nested queries directly
+ if ( $query =~ /(\(|\))/ ) {
+ return ( undef, $query, $simple_query, $query_cgi, $query, $limit, $limit_cgi, $limit_desc, $stopwords_removed, 'ccl' );
}
# form-based queries are limited to non-nested at a specific depth, so we can easily
# COMBINE OPERANDS, INDEXES AND OPERATORS
if ( $operands[$i] ) {
+
+ # a flag to determine whether or not to add the index to the query
+ my $indexes_set;
+ # if the user is sophisticated enough to specify an index, turn off some defaults
+ if ($operands[$i] =~ /(:|=)/) {
+ $weight_fields = 0;
+ $stemming = 0;
+ $remove_stopwords = 0;
+ }
my $operand = $operands[$i];
my $index = $indexes[$i];
- # if there's no index, don't use one, it will throw a CCL error
+ # some helpful index modifs
my $index_plus = "$index:" if $index;
my $index_plus_comma="$index," if $index;
- # Remove Stopwords
- $operand = _remove_stopwords($operand,$index);
- warn "OPERAND w/out STOPWORDS: >$operand<";
-
- my $indexes_set;
+ # Remove Stopwords
+ if ($remove_stopwords) {
+ ($operand, $stopwords_removed) = _remove_stopwords($operand,$index);
+ warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
+ warn "REMOVED STOPWORDS: @$stopwords_removed" if ($stopwords_removed && $DEBUG);
+ }
# Detect Truncation
my ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr);
my $truncated_operand;
($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr) = _detect_truncation($operand,$index);
- warn "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<";
+ warn "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<" if $DEBUG;
+
# Apply Truncation
- # Problem is when build_weights gets ahold if this is wraps in quotes which breaks the truncation :/
if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){
+ # don't field weight or add the index to the query, we do it here
$indexes_set = 1;
undef $weight_fields;
my $previous_truncation_operand;
}
}
$operand = $truncated_operand if $truncated_operand;
- warn "TRUNCATED OPERAND: >$truncated_operand<";
+ warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
# Handle Stemming
my $stemmed_operand;
$stemmed_operand = _build_stemmed_operand($operand) if $stemming;
- warn "STEMMED OPERAND: >$stemmed_operand<";
+ warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
# Handle Field Weighting
my $weighted_operand;
$weighted_operand = _build_weighted_query($operand,$stemmed_operand,$index) if $weight_fields;
- warn "FIELD WEIGHTED OPERAND: >$weighted_operand<";
+ warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
$operand = $weighted_operand if $weight_fields;
$indexes_set = 1 if $weight_fields;
# user-specified operator
if ( $operators[$i-1] ) {
- $human_search_desc .=" $operators[$i-1] $index_plus $operands[$i]";
$query .= " $operators[$i-1] ";
$query .= " $index_plus " unless $indexes_set;
$query .= " $operand";
+ $query_cgi .="&op=$operators[$i-1]";
+ $query_cgi .="&idx=$index" if $index;
+ $query_cgi .="&q=$operands[$i]" if $operands[$i];
+ $query_desc .=" $operators[$i-1] $index_plus $operands[$i]";
}
# the default operator is and
$query .= " and ";
$query .= "$index_plus " unless $indexes_set;
$query .= "$operand";
- $human_search_desc .= " and $index_plus $operands[$i]";
+ $query_cgi .="&op=and&idx=$index" if $index;
+ $query_cgi .="&q=$operands[$i]" if $operands[$i];
+ $query_desc .= " and $index_plus $operands[$i]";
}
}
- # There's no previous operand - FIXME: completely ignoring our $query, no field weighting, no stemming
- # FIXME: also, doesn't preserve original order
+ # there isn't a pervious operand, don't need an operator
else {
- # if there are terms to fit with truncation
-# if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){
- # # add the non-truncated ones first
- # $query.= "$index_plus @$nontruncated " if (scalar(@$nontruncated)>0);
- # if (scalar(@$righttruncated)>0){
- # $query .= "and $index_plus_comma"."rtrn:@$righttruncated ";
- # }
- # if (scalar(@$lefttruncated)>0){
- # $query .= "and $index_plus_comma"."ltrn:@$lefttruncated ";
- # }
- # if (scalar(@$rightlefttruncated)>0){
- # $query .= "and $index_plus_comma"."rltrn:@$rightlefttruncated ";
- # }
- # $human_search_desc .= $query;
- # } else {
- # field-weighted queries already have indexes set
- $query.=" $index_plus " unless $indexes_set;
- $query .= $operand;
- $human_search_desc .= " $index_plus $operands[$i]";
- # }
+ # field-weighted queries already have indexes set
+ $query .=" $index_plus " unless $indexes_set;
+ $query .= $operand;
+ $query_desc .= " $index_plus $operands[$i]";
+ $query_cgi.="&idx=$index" if $index;
+ $query_cgi.="&q=$operands[$i]" if $operands[$i];
+
$previous_operand = 1;
}
} #/if $operands
} # /for
}
- warn "QUERY BEFORE LIMITS: >$query<";
- # add limits
- my $limit_query;
- my $limit_search_desc;
- foreach my $limit (@limits) {
-
- # FIXME: not quite right yet ... will work on this soon -- JF
- my $type = $1 if $limit =~ m/([^:]+):([^:]*)/;
- if ( $limit =~ /available/ ) {
- $limit_query .= " (($query and datedue=0000-00-00) or ($query and datedue=0000-00-00 not lost=1) or ($query and datedue=0000-00-00 not lost=2))";
- #$limit_search_desc.=" and available";
- }
- elsif ( ($limit_query) && ( index( $limit_query, $type, 0 ) > 0 ) ) {
- if ( $limit_query !~ /\(/ ) {
- $limit_query =
- substr( $limit_query, 0, index( $limit_query, $type, 0 ) )
- . "("
- . substr( $limit_query, index( $limit_query, $type, 0 ) )
- . " or $limit )"
- if $limit;
- $limit_search_desc =
- substr( $limit_search_desc, 0,
- index( $limit_search_desc, $type, 0 ) )
- . "("
- . substr( $limit_search_desc,
- index( $limit_search_desc, $type, 0 ) )
- . " or $limit )"
- if $limit;
- }
- else {
- chop $limit_query;
- chop $limit_search_desc;
- $limit_query .= " or $limit )" if $limit;
- $limit_search_desc .= " or $limit )" if $limit;
- }
- }
- elsif ( ($limit_query) && ( $limit =~ /mc/ ) ) {
- $limit_query .= " or $limit" if $limit;
- $limit_search_desc .= " or $limit" if $limit;
- }
+ warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
- # these are treated as AND
- elsif ($limit_query) {
- if ($limit =~ /branch/){
- $limit_query .= " ) and ( $limit" if $limit;
- $limit_search_desc .= " ) and ( $limit" if $limit;
- }else{
- $limit_query .= " or $limit" if $limit;
- $limit_search_desc .= " or $limit" if $limit;
- }
+ # add limits
+ my $group_OR_limits;
+ foreach my $this_limit (@limits) {
+ if ( $this_limit =~ /available/ ) {
+ # FIXME: switch to zebra search for null values
+ $limit .= " (($query and datedue=0000-00-00) or ($query and datedue=0000-00-00 not lost=1) or ($query and datedue=0000-00-00 not lost=2))";
+ $limit_cgi .= "&limit=available";
+ $limit_desc .="";
}
- # otherwise, there is nothing but the limit
- else {
- $limit_query .= "$limit" if $limit;
- $limit_search_desc .= "$limit" if $limit;
+ # these are treated as OR
+ elsif ( $this_limit =~ /mc/ ) {
+ $group_OR_limits .= " or " if $group_OR_limits;
+ $limit_desc .=" or " if $group_OR_limits;
+ $group_OR_limits .= "$this_limit";
+ $limit_cgi .="&limit=$this_limit";
+ $limit_desc .= "$this_limit";
}
- }
-
- # if there's also a query, we need to AND the limits to it
- if ( ($limit_query) && ($query) ) {
- $limit_query = " and (" . $limit_query . ")";
- $limit_search_desc = " and ($limit_search_desc)" if $limit_search_desc;
+ # regular old limits
+ else {
+ $limit .= " and " if $limit || $query;
+ $limit .= "$this_limit";
+ $limit_cgi .="&limit=$this_limit";
+ $limit_desc .=" and $this_limit";
+ }
}
- #warn "LIMIT: $limit_query";
- $query .= $limit_query;
- $human_search_desc .= $limit_search_desc;
-
- # now normalize the strings
- $query =~ s/ / /g; # remove extra spaces
- $query =~ s/^ //g; # remove any beginning spaces
- $query =~ s/:/=/g; # causes probs for server
- $query =~ s/==/=/g; # remove double == from query
-
- my $federated_query = $human_search_desc;
- $federated_query =~ s/ / /g;
- $federated_query =~ s/^ //g;
- $federated_query =~ s/:/=/g;
- my $federated_query_opensearch = $federated_query;
-
-# my $federated_query_RPN = new ZOOM::Query::CCL2RPN( $query , C4::Context->ZConn('biblioserver'));
-
- $human_search_desc =~ s/ / /g;
- $human_search_desc =~ s/^ //g;
- my $koha_query = $query;
-
- #warn "QUERY:".$koha_query;
- #warn "SEARCHDESC:".$human_search_desc;
- #warn "FEDERATED QUERY:".$federated_query;
- return ( undef, $human_search_desc, $koha_query, $federated_query );
+ if ($group_OR_limits) {
+ $limit.=" and " if ($query || $limit );
+ $limit.="($group_OR_limits)";
+ }
+ # normalize the strings
+ for ($query, $query_desc, $limit, $limit_desc) {
+ $_ =~ s/ / /g; # remove extra spaces
+ $_ =~ s/^ //g; # remove any beginning spaces
+ $_ =~ s/ $//g; # remove any ending spaces
+ $_ =~ s/:/=/g; # causes probs for server
+ $_ =~ s/==/=/g; # remove double == from query
+
+ }
+
+ $query_cgi =~ s/^&//;
+
+ # append the limit to the query
+ $query .= $limit;
+
+ warn "QUERY:".$query if $DEBUG;
+ warn "QUERY CGI:".$query_cgi if $DEBUG;
+ warn "QUERY DESC:".$query_desc if $DEBUG;
+ warn "LIMIT:".$limit if $DEBUG;
+ warn "LIMIT CGI:".$limit_cgi if $DEBUG;
+ warn "LIMIT DESC:".$limit_desc if $DEBUG;
+
+ return ( undef, $query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type );
}
# IMO this subroutine is pretty messy still -- it's responsible for
NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
=cut
-
sub NZgetRecords {
- my (
- $koha_query, $federated_query, $sort_by_ref,
- $servers_ref, $results_per_page, $offset,
- $expanded_facet, $branches, $query_type,
- $scan
- ) = @_;
- my $result = NZanalyse($koha_query);
+ my ($query,$simple_query,$sort_by_ref,$servers_ref,$results_per_page,$offset,$expanded_facet,$branches,$query_type,$scan) = @_;
+ my $result = NZanalyse($query);
return (undef,NZorder($result,@$sort_by_ref[0],$results_per_page,$offset),undef);
}
=head2 NZanalyse
NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
- the list is builded from inverted index in nozebra SQL table
+ the list is built from an inverted index in the nozebra SQL table
note that title is here only for convenience : the sorting will be very fast when requested on title
if the sorting is requested on something else, we will have to reread all results, and that may be longer.
# $server contains biblioserver or authorities, depending on what we search on.
#warn "querying : $string on $server";
$server='biblioserver' unless $server;
+
# if we have a ", replace the content to discard temporarily any and/or/not inside
my $commacontent;
if ($string =~/"/) {
$string =~ s/"(.*?)"/__X__/;
$commacontent = $1;
-# print "commacontent : $commacontent\n";
+ warn "commacontent : $commacontent" if $DEBUG;
}
# split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
# then, call again NZanalyse with $left and $right
$string =~ /(.*)( and | or | not | AND | OR | NOT )(.*)/;
my $left = $1;
my $right = $3;
- my $operand = lc($2);
+ my $operand = lc($2); # FIXME: and/or/not are operators, not operands
# it's not a leaf, we have a and/or/not
if ($operand) {
# reintroduce comma content if needed
$right =~ s/__X__/"$commacontent"/ if $commacontent;
$left =~ s/__X__/"$commacontent"/ if $commacontent;
-# warn "node : $left / $operand / $right\n";
+ warn "node : $left / $operand / $right\n" if $DEBUG;
my $leftresult = NZanalyse($left,$server);
my $rightresult = NZanalyse($right,$server);
# OK, we have the results for right and left part of the query
} else {
$string =~ s/__X__/"$commacontent"/ if $commacontent;
$string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
-# warn "leaf : $string\n";
+ warn "leaf : $string\n" if $DEBUG;
# parse the string in in operator/operand/value again
$string =~ /(.*)(>=|<=)(.*)/;
my $left = $1;
my ($biblionumbers,$value);
next unless $_;
warn "EXECUTE : $server, $left, $_";
- $sth->execute($server, $left, $_);
+ $sth->execute($server, $left, $_) or warn "execute failed: $!";
while (my ($line,$value) = $sth->fetchrow) {
# if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
# otherwise, fill the result
my $cleaned = $entry;
$cleaned =~ s/-\d*$//;
# if the entry already in the hash, take it & increase weight
-# warn "===== $cleaned =====";
+ warn "===== $cleaned =====" if $DEBUG;
if ($results =~ "$cleaned") {
$temp .= "$entry;$entry;";
-# warn "INCLUDING $entry";
+ warn "INCLUDING $entry" if $DEBUG;
}
}
$results = $temp;
# split each word, query the DB and build the biblionumbers result
foreach (split / /,$string) {
next if C4::Context->stopwords->{uc($_)}; # skip if stopword
- #warn "search on all indexes on $_";
+ warn "search on all indexes on $_" if $DEBUG;
my $biblionumbers;
next unless $_;
$sth->execute($server, $_);
}
# do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
if ($results) {
-# warn "RES for $_ = $biblionumbers";
+ warn "RES for $_ = $biblionumbers" if $DEBUG;
my @leftresult = split /;/, $biblionumbers;
my $temp;
foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight
my $cleaned = $entry;
$cleaned =~ s/-\d*$//;
# if the entry already in the hash, take it & increase weight
-# warn "===== $cleaned =====";
+ warn "===== $cleaned =====" if $DEBUG;
if ($results =~ "$cleaned") {
$temp .= "$entry;$entry;";
-# warn "INCLUDING $entry";
+ warn "INCLUDING $entry" if $DEBUG;
}
}
$results = $temp;
} else {
-# warn "NEW RES for $_ = $biblionumbers";
+ warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
$results = $biblionumbers;
}
}
}
-# warn "return : $results for LEAF : $string";
+ warn "return : $results for LEAF : $string" if $DEBUG;
return $results;
}
}