# set the version for version checking
$VERSION = 3.00;
+$DEBUG=1;
=head1 NAME
$result->{title} =~ s /\"//g;
$result->{title} =~ s /\(//g;
$result->{title} =~ s /\)//g;
- # remove valid operators
- $result->{title} =~ s/(and|or|not)//g;
+ # remove valid operators
+ $result->{title} =~ s/(and|or|not)//g;
$query = "ti,ext=$result->{title}";
$query .= " and mt=$result->{itemtype}" if ($result->{itemtype});
if ($result->{author}){
$result->{author} =~ s /\"//g;
$result->{author} =~ s /\(//g;
$result->{author} =~ s /\)//g;
- # remove valid operators
- $result->{author} =~ s/(and|or|not)//g;
+ # remove valid operators
+ $result->{author} =~ s/(and|or|not)//g;
$query .= " and au,ext=$result->{author}";
}
}
# performs the search
sub getRecords {
my (
- $koha_query, $federated_query, $sort_by_ref,
+ $koha_query, $simple_query, $sort_by_ref,
$servers_ref, $results_per_page, $offset,
$expanded_facet, $branches, $query_type,
$scan
$query_to_use = $koha_query;
}
else {
- $query_to_use = $federated_query;
+ $query_to_use = $simple_query;
}
+ $query_to_use = $simple_query if $scan;
+
# check if we've got a query_type defined
eval {
if ($query_type)
}
else {
if ($scan) {
-
- # warn "preparing to scan";
+ # warn "preparing to scan:$query_to_use";
$results[$i] =
$zconns[$i]->scan(
new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
);
}
else {
-
# warn "LAST : $query_to_use";
$results[$i] =
$zconns[$i]->search(
elsif ($sort eq "author_za") {
$sort_by.="1=1003 >i ";
}
- elsif ($sort eq "popularity_asc") {
- $sort_by.="1=9003 <i ";
- }
- elsif ($sort eq "popularity_dsc") {
+ elsif ($sort eq "popularity_asc") {
+ $sort_by.="1=9003 <i ";
+ }
+ elsif ($sort eq "popularity_dsc") {
$sort_by.="1=9003 >i ";
}
- elsif ($sort eq "call_number_asc") {
+ elsif ($sort eq "call_number_asc") {
$sort_by.="1=20 <i ";
}
- elsif ($sort eq "call_number_dsc") {
+ elsif ($sort eq "call_number_dsc") {
$sort_by.="1=20 >i ";
}
- elsif ($sort eq "pubdate_asc") {
+ elsif ($sort eq "pubdate_asc") {
$sort_by.="1=31 <i ";
}
- elsif ($sort eq "pubdate_dsc") {
+ elsif ($sort eq "pubdate_dsc") {
$sort_by.="1=31 >i ";
}
- elsif ($sort eq "acqdate_asc") {
+ elsif ($sort eq "acqdate_asc") {
$sort_by.="1=32 <i ";
}
- elsif ($sort eq "acqdate_dsc") {
+ elsif ($sort eq "acqdate_dsc") {
$sort_by.="1=32 >i ";
}
- elsif ($sort eq "title_az") {
+ elsif ($sort eq "title_az") {
$sort_by.="1=4 <i ";
}
- elsif ($sort eq "title_za") {
+ elsif ($sort eq "title_za") {
$sort_by.="1=4 >i ";
}
}
- if ($sort_by) {
- if ( $results[$i]->sort( "yaz", $sort_by ) < 0) {
- warn "WARNING sort $sort_by failed";
- }
- }
+ if ($sort_by) {
+ if ( $results[$i]->sort( "yaz", $sort_by ) < 0) {
+ warn "WARNING sort $sort_by failed";
+ }
+ }
}
while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
my $ev = $zconns[ $i - 1 ]->last_event();
## This is just an index scan
if ($scan) {
my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
-
# here we create a minimal MARC record and hand it off to the
# template just like a normal result ... perhaps not ideal, but
# it works for now
$tmprecord->encoding('UTF-8');
my $tmptitle;
- # srote the minimal record in author/title (depending on MARC flavour)
+ # srote the minimal record in author/title (depending on MARC flavour)
if ( C4::Context->preference("marcflavour") eq
"UNIMARC" )
{
return ( undef, $results_hashref, \@facets_loop );
}
+# STOPWORDS
sub _remove_stopwords {
- my ($operand,$index) = @_;
- # if the index contains more than one qualifier, but not phrase:
- if ($index!~m/phr|ext/){
- # operand may be a wordlist deleting stopwords
- # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
- # we use IsAlpha unicode definition, to deal correctly with diacritics.
- # otherwise, a french word like "leçon" is splitted in "le" "çon", le is an empty word, we get "çon"
- # and don't find anything...
- foreach (keys %{C4::Context->stopwords}) {
- $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /i;
- $operand=~ s/^$_\P{IsAlpha}/ /i;
- $operand=~ s/\P{IsAlpha}$_$/ /i;
-
- }
- }
- return $operand;
+ my ($operand,$index) = @_;
+ my @stopwords_removed;
+ # phrase and exact-qualified indexes shouldn't have stopwords removed
+ if ($index!~m/phr|ext/){
+ # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
+ # we use IsAlpha unicode definition, to deal correctly with diacritics.
+ # otherwise, a French word like "leçon" woudl be split into "le" "çon", le
+ # is an empty word, we'd get "çon" and wouldn't find anything...
+ foreach (keys %{C4::Context->stopwords}) {
+ next if ($_ =~/(and|or|not)/); # don't remove operators
+ if ($operand =~ /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/) {
+ $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
+ $operand=~ s/^$_\P{IsAlpha}/ /gi;
+ $operand=~ s/\P{IsAlpha}$_$/ /gi;
+ push @stopwords_removed, $_;
+ }
+ }
+ }
+ return ($operand, \@stopwords_removed);
}
-sub _add_truncation {
- my ($operand,$index) = @_;
- my (@nontruncated,@righttruncated,@lefttruncated,@rightlefttruncated,@regexpr);
- # if the index contains more than one qualifier, but not phrase, add truncation qualifiers
- #if (index($index,"phr")<0 && index($index,",")>0){
- # warn "ADDING TRUNCATION QUALIFIERS";
- $operand =~s/^ //g;
- my @wordlist= split (/\s/,$operand);
- foreach my $word (@wordlist){
- if ($word=~s/^\*([^\*]+)\*$/$1/){
- push @rightlefttruncated,$word;
- }
- elsif($word=~s/^\*([^\*]+)$/$1/){
- push @lefttruncated,$word;
-
- }
- elsif ($word=~s/^([^\*]+)\*$/$1/){
- push @righttruncated,$word;
- }
- elsif (index($word,"*")<0){
- push @nontruncated,$word;
- }
- else {
- push @regexpr,$word;
-
- }
- }
- #}
- return (\@nontruncated,\@righttruncated,\@lefttruncated,\@rightlefttruncated,\@regexpr);
+# TRUNCATION
+sub _detect_truncation {
+ my ($operand,$index) = @_;
+ my (@nontruncated,@righttruncated,@lefttruncated,@rightlefttruncated,@regexpr);
+ $operand =~s/^ //g;
+ my @wordlist= split (/\s/,$operand);
+ foreach my $word (@wordlist){
+ if ($word=~s/^\*([^\*]+)\*$/$1/){
+ push @rightlefttruncated,$word;
+ }
+ elsif($word=~s/^\*([^\*]+)$/$1/){
+ push @lefttruncated,$word;
+ }
+ elsif ($word=~s/^([^\*]+)\*$/$1/){
+ push @righttruncated,$word;
+ }
+ elsif (index($word,"*")<0){
+ push @nontruncated,$word;
+ }
+ else {
+ push @regexpr,$word;
+ }
+ }
+ return (\@nontruncated,\@righttruncated,\@lefttruncated,\@rightlefttruncated,\@regexpr);
}
sub _build_stemmed_operand {
- my ($operand) = @_;
- my $stemmed_operand;
- #$operand =~ s/^(and |or |not )//i;
- # STEMMING FIXME: may need to refine the field weighting so stemmed operands don't
- # disrupt the query ranking, this needs more testing
- # FIXME: the locale should be set based on the user's language and/or search choice
- my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
- # FIXME: these should be stored in the db so the librarian can modify the behavior
- $stemmer->add_exceptions(
- {
- 'and' => 'and',
+ my ($operand) = @_;
+ my $stemmed_operand;
+ # FIXME: the locale should be set based on the user's language and/or search choice
+ my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
+ # FIXME: these should be stored in the db so the librarian can modify the behavior
+ $stemmer->add_exceptions(
+ {
+ 'and' => 'and',
'or' => 'or',
'not' => 'not',
- }
+ }
- );
- my @words = split( / /, $operand );
- my $stems = $stemmer->stem(@words);
- foreach my $stem (@$stems) {
- $stemmed_operand .= "$stem";
- $stemmed_operand .= "?" unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
- $stemmed_operand .= " ";
- }
- #warn "STEMMED OPERAND: $stemmed_operand";
- return $stemmed_operand;
+ );
+ my @words = split( / /, $operand );
+ my $stems = $stemmer->stem(@words);
+ for my $stem (@$stems) {
+ $stemmed_operand .= "$stem";
+ $stemmed_operand .= "?" unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
+ $stemmed_operand .= " ";
+ }
+ #warn "STEMMED OPERAND: $stemmed_operand";
+ return $stemmed_operand;
}
sub _build_weighted_query {
- # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
- # pretty well but will work much better when we have an actual query parser
- my ($operand,$stemmed_operand,$index) = @_;
+ # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
+ # pretty well but will work much better when we have an actual query parser
+ my ($operand,$stemmed_operand,$index) = @_;
my $stemming = C4::Context->preference("QueryStemming") || 0;
my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
- my $weighted_query .= " (rk=("; # Specifies that we're applying rank
- # keyword has different weight properties
- if ( ( $index =~ /kw/ ) || ( !$index ) ) {
- # a simple way to find out if this query uses an index
- if ( $operand =~ /(\=|\:)/ ) {
- $weighted_query .= " $operand";
- }
- else {
- $weighted_query .=" Title-cover,ext,r1=\"$operand\""; # title cover as exact
- $weighted_query .=" or ti,ext,r2=\"$operand\""; # exact title elsewhere
- $weighted_query .= " or ti,phr,r3=\"$operand\""; # index as phrase
- #$weighted_query .= " or any,ext,r4=$operand"; # index as exact
- #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # all the words in the query (wordlist)
- $weighted_query .= " or wrd,fuzzy,r8=\"$operand\"" if $fuzzy_enabled; # add fuzzy
- $weighted_query .= " or wrd,right-Truncation,r9=\"$stemmed_operand\"" if ($stemming and $stemmed_operand); # add stemming
- # embedded sorting: 0 a-z; 1 z-a
- #$weighted_query .= ") or (sort1,aut=1";
- }
-
- }
- #TODO: build better cases based on specific search indexes
- #elsif ( $index =~ /au/ ) {
- # $weighted_query .=" $index,ext,r1=$operand"; # index label as exact
- # #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
- # $weighted_query .=" or $index,phr,r3=$operand"; # index as phrase
- # $weighted_query .= " or $index,rt,wrd,r3=$operand";
- #}
- #elsif ( $index =~ /ti/ ) {
- # $weighted_query .=" Title-cover,ext,r1=$operand"; # index label as exact
- # $weighted_query .= " or Title-series,ext,r2=$operand";
- # #$weighted_query .= " or ti,ext,r2=$operand";
- # #$weighted_query .= " or ti,phr,r3=$operand";
- # #$weighted_query .= " or ti,wrd,r3=$operand";
- # $weighted_query .=" or (title-sort-az=0 or Title-cover,startswithnt,st-word,r3=$operand #)";
- # $weighted_query .=" or (title-sort-az=0 or Title-cover,phr,r6=$operand)";
- #$weighted_query .= " or Title-cover,wrd,r5=$operand";
- #$weighted_query .= " or ti,ext,r6=$operand";
- #$weighted_query .= " or ti,startswith,phr,r7=$operand";
- #$weighted_query .= " or ti,phr,r8=$operand";
- #$weighted_query .= " or ti,wrd,r9=$operand";
- #$weighted_query .= " or ti,ext,r2=$operand"; # index as exact
- #$weighted_query .= " or ti,phr,r3=$operand"; # index as phrase
- #$weighted_query .= " or any,ext,r4=$operand"; # index as exact
- #$weighted_query .= " or kw,wrd,r5=$operand"; # index as exact
- #}
- else {
- $weighted_query .=" $index,ext,r1=$operand"; # index label as exact
- #$weighted_query .= " or $index,ext,r2=$operand"; # index as exact
- $weighted_query .=" or $index,phr,r3=$operand"; # index as phrase
- $weighted_query .= " or $index,rt,wrd,r3=$operand";
- $weighted_query .=" or $index,wrd,r5=$operand"; # index as word right-truncated
- $weighted_query .= " or $index,wrd,fuzzy,r8=$operand" if $fuzzy_enabled;
- }
- $weighted_query .= "))"; # close rank specification
- return $weighted_query;
+ my $weighted_query .= "(rk=("; # Specifies that we're applying rank
+
+ # Keyword, or, no index specified
+ if ( ( $index eq 'kw' ) || ( !$index ) ) {
+ $weighted_query .= "Title-cover,ext,r1=\"$operand\""; # exact title-cover
+ $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
+ $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
+ #$weighted_query .= " or any,ext,r4=$operand"; # exact any
+ #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
+ $weighted_query .= " or wrd,fuzzy,r8=\"$operand\"" if $fuzzy_enabled; # add fuzzy, word list
+ $weighted_query .= " or wrd,right-Truncation,r9=\"$stemmed_operand\"" if ($stemming and $stemmed_operand); # add stemming, right truncation
+ # embedded sorting: 0 a-z; 1 z-a
+ # $weighted_query .= ") or (sort1,aut=1";
+ }
+ # if the index already has more than one qualifier, just wrap the operand
+ # in quotes and pass it back
+ elsif ($index =~ ',') {
+ $weighted_query .=" $index=\"$operand\"";
+ }
+ #TODO: build better cases based on specific search indexes
+ else {
+ $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
+ #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
+ $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
+ $weighted_query .= " or $index,rt,wrd,r3=\"$operand\""; # word list index
+ }
+ $weighted_query .= "))"; # close rank specification
+ return $weighted_query;
}
# build the query itself
my @limits = @$limits if $limits;
my @sort_by = @$sort_by if $sort_by;
- my $stemming = C4::Context->preference("QueryStemming") || 0;
- my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
- my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
+ my $stemming = C4::Context->preference("QueryStemming") || 0;
+ my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
+ my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
+ my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
+ my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
+
+ my $query = $operands[0];
+ my $simple_query = $operands[0];
+ my $query_cgi;
+ my $query_desc;
+ my $query_type;
+
+ my $limit;
+ my $limit_cgi;
+ my $limit_desc;
- my $human_search_desc; # a human-readable query
- my $machine_search_desc; #a machine-readable query
- #warn "OPERATORS: >@operators< INDEXES: >@indexes< OPERANDS: >@operands< LIMITS: >@limits< SORTS: >@sort_by<";
- my $query = $operands[0];
-# STEP I: determine if this is a form-based / simple query or if it's complex (if complex,
-# we can't handle field weighting, stemming until a formal query parser is written
+ my $stopwords_removed;
-# check if this is a known query language query, if it is, return immediately,
-# the user is responsible for constructing valid syntax:
+ # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
+ # DIAGNOSTIC ONLY!!
if ( $query =~ /^ccl=/ ) {
- return ( undef, $', $', $', 'ccl' );
+ return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
}
if ( $query =~ /^cql=/ ) {
- return ( undef, $', $', $', 'cql' );
+ return ( undef, $', $', $', $', '', '', '', '', 'cql' );
}
if ( $query =~ /^pqf=/ ) {
- return ( undef, $', $', $', 'pqf' );
+ return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
}
- if ( $query =~ /(\(|\))/ ) { # sorry, too complex, assume CCL
- return ( undef, $query, $query, $query, 'ccl' );
+
+ # pass nested queries directly
+ if ( $query =~ /(\(|\))/ ) {
+ return ( undef, $query, $simple_query, $query_cgi, $query, $limit, $limit_cgi, $limit_desc, $stopwords_removed, 'ccl' );
}
# form-based queries are limited to non-nested at a specific depth, so we can easily
else {
$query = ""; # clear it out so we can populate properly with field-weighted stemmed query
my $previous_operand; # a flag used to keep track if there was a previous query
- # if there was, we can apply the current operator
- # for every operand
+ # if there was, we can apply the current operator
+ # for every operand
for ( my $i = 0 ; $i <= @operands ; $i++ ) {
- # COMBINE OPERANDS, INDEXES AND OPERATORS
- if ( $operands[$i] ) {
- my $operand = $operands[$i];
- my $index = $indexes[$i];
- # if there's no index, don't use one, it will throw a CCL error
- my $index_plus; $index_plus = "$index:" if $index;
- my $index_plus_comma; $index_plus_comma="$index," if $index;
-
- # Remove Stopwords
- $operand = _remove_stopwords($operand,$index);
-
- # Handle Truncation
- my ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr);
- ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr) = _add_truncation($operand,$index);
- warn "TRUNCATION: NON:@$nontruncated RIGHT:@$righttruncated LEFT:@$lefttruncated RIGHTLEFT:@$rightlefttruncated REGEX:@$regexpr";
+ # COMBINE OPERANDS, INDEXES AND OPERATORS
+ if ( $operands[$i] ) {
+
+ # a flag to determine whether or not to add the index to the query
+ my $indexes_set;
+ # if the user is sophisticated enough to specify an index, turn off some defaults
+ if ($operands[$i] =~ /(:|=)/) {
+ $weight_fields = 0;
+ $stemming = 0;
+ $remove_stopwords = 0;
+ }
+ my $operand = $operands[$i];
+ my $index = $indexes[$i];
+
+ # some helpful index modifs
+ my $index_plus = "$index:" if $index;
+ my $index_plus_comma="$index," if $index;
+
+ # Remove Stopwords
+ if ($remove_stopwords) {
+ ($operand, $stopwords_removed) = _remove_stopwords($operand,$index);
+ warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
+ warn "REMOVED STOPWORDS: @$stopwords_removed" if ($stopwords_removed && $DEBUG);
+ }
+
+ # Detect Truncation
+ my ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr);
+ my $truncated_operand;
+ ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr) = _detect_truncation($operand,$index);
+ warn "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<" if $DEBUG;
+
+ # Apply Truncation
+ if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){
+ # don't field weight or add the index to the query, we do it here
+ $indexes_set = 1;
+ undef $weight_fields;
+ my $previous_truncation_operand;
+ if (scalar(@$nontruncated)>0) {
+ $truncated_operand.= "$index_plus @$nontruncated ";
+ $previous_truncation_operand = 1;
+ }
+ if (scalar(@$righttruncated)>0){
+ $truncated_operand .= "and " if $previous_truncation_operand;
+ $truncated_operand .= "$index_plus_comma"."rtrn:@$righttruncated ";
+ $previous_truncation_operand = 1;
+ }
+ if (scalar(@$lefttruncated)>0){
+ $truncated_operand .= "and " if $previous_truncation_operand;
+ $truncated_operand .= "$index_plus_comma"."ltrn:@$lefttruncated ";
+ $previous_truncation_operand = 1;
+ }
+ if (scalar(@$rightlefttruncated)>0){
+ $truncated_operand .= "and " if $previous_truncation_operand;
+ $truncated_operand .= "$index_plus_comma"."rltrn:@$rightlefttruncated ";
+ $previous_truncation_operand = 1;
+ }
+ }
+ $operand = $truncated_operand if $truncated_operand;
+ warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
- # Handle Stemming
- my $stemmed_operand;
- $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
+ # Handle Stemming
+ my $stemmed_operand;
+ $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
+ warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
- # Handle Field Weighting
- my $weighted_operand;
+ # Handle Field Weighting
+ my $weighted_operand;
$weighted_operand = _build_weighted_query($operand,$stemmed_operand,$index) if $weight_fields;
-
- # proves we're operating in multi-leaf mode
- # $weighted_operand = "$weighted_operand and $weighted_operand";
- $operand = $weighted_operand if $weight_fields;
+ warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
+ $operand = $weighted_operand if $weight_fields;
+ $indexes_set = 1 if $weight_fields;
# If there's a previous operand, we need to add an operator
if ($previous_operand) {
- if ( $operators[ $i - 1 ] ) {
- $human_search_desc .=" $operators[$i-1] $index_plus $operands[$i]";
- $query .= " $operators[$i-1] $index_plus $operand";
+
+ # user-specified operator
+ if ( $operators[$i-1] ) {
+ $query .= " $operators[$i-1] ";
+ $query .= " $index_plus " unless $indexes_set;
+ $query .= " $operand";
+ $query_cgi .="&op=$operators[$i-1]";
+ $query_cgi .="&idx=$index" if $index;
+ $query_cgi .="&q=$operands[$i]" if $operands[$i];
+ $query_desc .=" $operators[$i-1] $index_plus $operands[$i]";
}
+
# the default operator is and
else {
- $query .= " and $index_plus $operand";
- $human_search_desc .= " and $index_plus $operands[$i]";
+ $query .= " and ";
+ $query .= "$index_plus " unless $indexes_set;
+ $query .= "$operand";
+ $query_cgi .="&op=and&idx=$index" if $index;
+ $query_cgi .="&q=$operands[$i]" if $operands[$i];
+ $query_desc .= " and $index_plus $operands[$i]";
}
}
- # There's no previous operand - FIXME: completely ignoring our $query, no field weighting, no stemming
- # FIXME: also, doesn't preserve original order
+
+ # there isn't a pervious operand, don't need an operator
else {
- # if there are terms to fit with truncation
- if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){
- # add the non-truncated ones first
- $query.= "$index_plus @$nontruncated " if (scalar(@$nontruncated)>0);
- if (scalar(@$righttruncated)>0){
- $query .= "and $index_plus_comma"."rtrn:@$righttruncated ";
- }
- if (scalar(@$lefttruncated)>0){
- $query .= "and $index_plus_comma"."ltrn:@$lefttruncated ";
- }
- if (scalar(@$rightlefttruncated)>0){
- $query .= "and $index_plus_comma"."rltrn:@$rightlefttruncated ";
- }
- $query=~s/^and//; # FIXME: this is cheating :-)
- $human_search_desc .= $query;
- } else {
- $query .= " $index_plus $operand";
- $human_search_desc .= " $index_plus $operands[$i]";
- }
+ # field-weighted queries already have indexes set
+ $query .=" $index_plus " unless $indexes_set;
+ $query .= $operand;
+ $query_desc .= " $index_plus $operands[$i]";
+ $query_cgi.="&idx=$index" if $index;
+ $query_cgi.="&q=$operands[$i]" if $operands[$i];
+
$previous_operand = 1;
}
} #/if $operands
} # /for
}
+ warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
# add limits
- my $limit_query;
- my $limit_search_desc;
- foreach my $limit (@limits) {
-
- # FIXME: not quite right yet ... will work on this soon -- JF
- my $type = $1 if $limit =~ m/([^:]+):([^:]*)/;
- if ( $limit =~ /available/ ) {
- $limit_query .= " (($query and datedue=0000-00-00) or ($query and datedue=0000-00-00 not lost=1) or ($query and datedue=0000-00-00 not lost=2))";
- #$limit_search_desc.=" and available";
- }
- elsif ( ($limit_query) && ( index( $limit_query, $type, 0 ) > 0 ) ) {
- if ( $limit_query !~ /\(/ ) {
- $limit_query =
- substr( $limit_query, 0, index( $limit_query, $type, 0 ) )
- . "("
- . substr( $limit_query, index( $limit_query, $type, 0 ) )
- . " or $limit )"
- if $limit;
- $limit_search_desc =
- substr( $limit_search_desc, 0,
- index( $limit_search_desc, $type, 0 ) )
- . "("
- . substr( $limit_search_desc,
- index( $limit_search_desc, $type, 0 ) )
- . " or $limit )"
- if $limit;
- }
- else {
- chop $limit_query;
- chop $limit_search_desc;
- $limit_query .= " or $limit )" if $limit;
- $limit_search_desc .= " or $limit )" if $limit;
- }
- }
- elsif ( ($limit_query) && ( $limit =~ /mc/ ) ) {
- $limit_query .= " or $limit" if $limit;
- $limit_search_desc .= " or $limit" if $limit;
+ my $group_OR_limits;
+ foreach my $this_limit (@limits) {
+ if ( $this_limit =~ /available/ ) {
+ # FIXME: switch to zebra search for null values
+ $limit .= " (($query and datedue=0000-00-00) or ($query and datedue=0000-00-00 not lost=1) or ($query and datedue=0000-00-00 not lost=2))";
+ $limit_cgi .= "&limit=available";
+ $limit_desc .="";
}
- # these are treated as AND
- elsif ($limit_query) {
- if ($limit =~ /branch/){
- $limit_query .= " ) and ( $limit" if $limit;
- $limit_search_desc .= " ) and ( $limit" if $limit;
- }else{
- $limit_query .= " or $limit" if $limit;
- $limit_search_desc .= " or $limit" if $limit;
- }
+ # these are treated as OR
+ elsif ( $this_limit =~ /mc/ ) {
+ $group_OR_limits .= " or " if $group_OR_limits;
+ $limit_desc .=" or " if $group_OR_limits;
+ $group_OR_limits .= "$this_limit";
+ $limit_cgi .="&limit=$this_limit";
+ $limit_desc .= "$this_limit";
}
- # otherwise, there is nothing but the limit
- else {
- $limit_query .= "$limit" if $limit;
- $limit_search_desc .= "$limit" if $limit;
- }
+ # regular old limits
+ else {
+ $limit .= " and " if $limit || $query;
+ $limit .= "$this_limit";
+ $limit_cgi .="&limit=$this_limit";
+ $limit_desc .=" and $this_limit";
+ }
}
+ if ($group_OR_limits) {
+ $limit.=" and " if ($query || $limit );
+ $limit.="($group_OR_limits)";
+ }
+ # normalize the strings
+ for ($query, $query_desc, $limit, $limit_desc) {
+ $_ =~ s/ / /g; # remove extra spaces
+ $_ =~ s/^ //g; # remove any beginning spaces
+ $_ =~ s/ $//g; # remove any ending spaces
+ $_ =~ s/:/=/g; # causes probs for server
+ $_ =~ s/==/=/g; # remove double == from query
- # if there's also a query, we need to AND the limits to it
- if ( ($limit_query) && ($query) ) {
- $limit_query = " and (" . $limit_query . ")";
- $limit_search_desc = " and ($limit_search_desc)" if $limit_search_desc;
+ }
+
+ $query_cgi =~ s/^&//;
- }
- #warn "LIMIT: $limit_query";
- $query .= $limit_query;
- $human_search_desc .= $limit_search_desc;
-
- # now normalize the strings
- $query =~ s/ / /g; # remove extra spaces
- $query =~ s/^ //g; # remove any beginning spaces
- $query =~ s/:/=/g; # causes probs for server
- $query =~ s/==/=/g; # remove double == from query
-
- my $federated_query = $human_search_desc;
- $federated_query =~ s/ / /g;
- $federated_query =~ s/^ //g;
- $federated_query =~ s/:/=/g;
- my $federated_query_opensearch = $federated_query;
-
-# my $federated_query_RPN = new ZOOM::Query::CCL2RPN( $query , C4::Context->ZConn('biblioserver'));
-
- $human_search_desc =~ s/ / /g;
- $human_search_desc =~ s/^ //g;
- my $koha_query = $query;
-
- #warn "QUERY:".$koha_query;
- #warn "SEARCHDESC:".$human_search_desc;
- #warn "FEDERATED QUERY:".$federated_query;
- return ( undef, $human_search_desc, $koha_query, $federated_query );
+ # append the limit to the query
+ $query .= " ".$limit;
+
+ warn "QUERY:".$query if $DEBUG;
+ warn "QUERY CGI:".$query_cgi if $DEBUG;
+ warn "QUERY DESC:".$query_desc if $DEBUG;
+ warn "LIMIT:".$limit if $DEBUG;
+ warn "LIMIT CGI:".$limit_cgi if $DEBUG;
+ warn "LIMIT DESC:".$limit_desc if $DEBUG;
+
+ return ( undef, $query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type );
}
# IMO this subroutine is pretty messy still -- it's responsible for
my $old_term = $term;
if ( length($term) > 3 ) {
$term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\])//g;
- $term =~ s/\\//g;
- $term =~ s/\*//g;
+ $term =~ s/\\//g;
+ $term =~ s/\*//g;
#FIXME: is there a better way to do this?
- $oldbiblio->{'title'} =~ s/$term/<span class=term>$&<\/span>/gi;
+ $oldbiblio->{'title'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
$oldbiblio->{'subtitle'} =~
- s/$term/<span class=term>$&<\/span>/gi;
-
- $oldbiblio->{'author'} =~ s/$term/<span class=term>$&<\/span>/gi;
- $oldbiblio->{'publishercode'} =~ s/$term/<span class=term>$&<\/span>/gi;
- $oldbiblio->{'place'} =~ s/$term/<span class=term>$&<\/span>/gi;
- $oldbiblio->{'pages'} =~ s/$term/<span class=term>$&<\/span>/gi;
- $oldbiblio->{'notes'} =~ s/$term/<span class=term>$&<\/span>/gi;
- $oldbiblio->{'size'} =~ s/$term/<span class=term>$&<\/span>/gi;
+ s/$term/<span class=\"term\">$&<\/span>/gi;
+
+ $oldbiblio->{'author'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
+ $oldbiblio->{'publishercode'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
+ $oldbiblio->{'place'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
+ $oldbiblio->{'pages'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
+ $oldbiblio->{'notes'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
+ $oldbiblio->{'size'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
}
}
NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
=cut
-
sub NZgetRecords {
- my (
- $koha_query, $federated_query, $sort_by_ref,
- $servers_ref, $results_per_page, $offset,
- $expanded_facet, $branches, $query_type,
- $scan
- ) = @_;
- my $result = NZanalyse($koha_query);
+ my ($query,$simple_query,$sort_by_ref,$servers_ref,$results_per_page,$offset,$expanded_facet,$branches,$query_type,$scan) = @_;
+ my $result = NZanalyse($query);
return (undef,NZorder($result,@$sort_by_ref[0],$results_per_page,$offset),undef);
}
=head2 NZanalyse
NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
- the list is builded from inverted index in nozebra SQL table
+ the list is built from an inverted index in the nozebra SQL table
note that title is here only for convenience : the sorting will be very fast when requested on title
if the sorting is requested on something else, we will have to reread all results, and that may be longer.
sub NZanalyse {
my ($string,$server) = @_;
# $server contains biblioserver or authorities, depending on what we search on.
- warn "querying : $string on $server";
+ #warn "querying : $string on $server";
$server='biblioserver' unless $server;
+
# if we have a ", replace the content to discard temporarily any and/or/not inside
my $commacontent;
if ($string =~/"/) {
$string =~ s/"(.*?)"/__X__/;
$commacontent = $1;
-# print "commacontent : $commacontent\n";
+ warn "commacontent : $commacontent" if $DEBUG;
}
# split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
# then, call again NZanalyse with $left and $right
$string =~ /(.*)( and | or | not | AND | OR | NOT )(.*)/;
my $left = $1;
my $right = $3;
- my $operand = lc($2);
+ my $operand = lc($2); # FIXME: and/or/not are operators, not operands
# it's not a leaf, we have a and/or/not
if ($operand) {
# reintroduce comma content if needed
$right =~ s/__X__/"$commacontent"/ if $commacontent;
$left =~ s/__X__/"$commacontent"/ if $commacontent;
-# warn "node : $left / $operand / $right\n";
+ warn "node : $left / $operand / $right\n" if $DEBUG;
my $leftresult = NZanalyse($left,$server);
my $rightresult = NZanalyse($right,$server);
# OK, we have the results for right and left part of the query
# it's a leaf, do the real SQL query and return the result
} else {
$string =~ s/__X__/"$commacontent"/ if $commacontent;
- $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\// /g;
-# warn "leaf : $string\n";
+ $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
+ warn "leaf : $string\n" if $DEBUG;
# parse the string in in operator/operand/value again
- $string =~ /(.*)(=|>|>=|<|<=)(.*)/;
+ $string =~ /(.*)(>=|<=)(.*)/;
my $left = $1;
my $operator = $2;
my $right = $3;
+ unless ($operator) {
+ $string =~ /(.*)(>|<|=)(.*)/;
+ $left = $1;
+ $operator = $2;
+ $right = $3;
+ }
my $results;
# automatic replace for short operators
$left='title' if $left =~ '^ti';
#do a specific search
my $dbh = C4::Context->dbh;
$operator='LIKE' if $operator eq '=' and $right=~ /%/;
- my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND indexname=? AND value $operator ?");
- # warn "$left / $operator / $right\n";
+ my $sth = $dbh->prepare("SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?");
+ warn "$left / $operator / $right\n";
# split each word, query the DB and build the biblionumbers result
foreach (split / /,$right) {
- my $biblionumbers;
+ my ($biblionumbers,$value);
next unless $_;
-# warn "EXECUTE : $server, $left, $_";
- $sth->execute($server, $left, $_);
- while (my $line = $sth->fetchrow) {
- $biblionumbers .= $line;
-# warn "result : $line";
+ warn "EXECUTE : $server, $left, $_";
+ $sth->execute($server, $left, $_) or warn "execute failed: $!";
+ while (my ($line,$value) = $sth->fetchrow) {
+ # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
+ # otherwise, fill the result
+ $biblionumbers .= $line unless ($right =~ /\d/ && $value =~ /\D/);
+ warn "result : $value ". ($right =~ /\d/) . "==".(!$value =~ /\d/) ;#= $line";
}
# do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
if ($results) {
my $cleaned = $entry;
$cleaned =~ s/-\d*$//;
# if the entry already in the hash, take it & increase weight
-# warn "===== $cleaned =====";
+ warn "===== $cleaned =====" if $DEBUG;
if ($results =~ "$cleaned") {
$temp .= "$entry;$entry;";
-# warn "INCLUDING $entry";
+ warn "INCLUDING $entry" if $DEBUG;
}
}
$results = $temp;
# split each word, query the DB and build the biblionumbers result
foreach (split / /,$string) {
next if C4::Context->stopwords->{uc($_)}; # skip if stopword
- #warn "search on all indexes on $_";
+ warn "search on all indexes on $_" if $DEBUG;
my $biblionumbers;
next unless $_;
$sth->execute($server, $_);
}
# do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
if ($results) {
-# warn "RES for $_ = $biblionumbers";
+ warn "RES for $_ = $biblionumbers" if $DEBUG;
my @leftresult = split /;/, $biblionumbers;
my $temp;
foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight
my $cleaned = $entry;
$cleaned =~ s/-\d*$//;
# if the entry already in the hash, take it & increase weight
-# warn "===== $cleaned =====";
+ warn "===== $cleaned =====" if $DEBUG;
if ($results =~ "$cleaned") {
$temp .= "$entry;$entry;";
-# warn "INCLUDING $entry";
+ warn "INCLUDING $entry" if $DEBUG;
}
}
$results = $temp;
} else {
-# warn "NEW RES for $_ = $biblionumbers";
+ warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
$results = $biblionumbers;
}
}
}
-# warn "return : $results for LEAF : $string";
+ warn "return : $results for LEAF : $string" if $DEBUG;
return $results;
}
}
#
# order by POPULARITY
#
- if ($ordering =~ /1=9523/) {
+ if ($ordering =~ /popularity/) {
my %result;
my %popularity;
# popularity is not in MARC record, it's builded from a specific query
# sort the hash and return the same structure as GetRecords (Zebra querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=9523 >i') { # sort popularity DESC
+ if ($ordering eq 'popularity_dsc') { # sort popularity DESC
foreach my $key (sort {$b cmp $a} (keys %popularity)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$popularity{$key}}->as_usmarc();
}
#
# ORDER BY author
#
- } elsif ($ordering eq '1=1003 <i'){
+ } elsif ($ordering =~/author/){
my %result;
foreach (split /;/,$biblionumbers) {
my ($biblionumber,$title) = split /,/,$_;
# sort the hash and return the same structure as GetRecords (Zebra querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=1003 <i') { # sort by author desc
- foreach my $key (sort (keys %result)) {
+ if ($ordering eq 'author_za') { # sort by author desc
+ foreach my $key (sort { $b cmp $a } (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
}
} else { # sort by author ASC
- foreach my $key (sort { $a cmp $b } (keys %result)) {
+ foreach my $key (sort (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
}
}
#
# ORDER BY callnumber
#
- } elsif ($ordering eq '1=20 <i'){
+ } elsif ($ordering =~/callnumber/){
my %result;
foreach (split /;/,$biblionumbers) {
my ($biblionumber,$title) = split /,/,$_;
# sort the hash and return the same structure as GetRecords (Zebra querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=1003 <i') { # sort by title desc
- foreach my $key (sort (keys %result)) {
+ if ($ordering eq 'call_number_dsc') { # sort by title desc
+ foreach my $key (sort { $b cmp $a } (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
}
} else { # sort by title ASC
$result_hash->{'hits'} = $numbers;
$finalresult->{'biblioserver'} = $result_hash;
return $finalresult;
- } elsif ($ordering =~ /1=31/){ #pub year
+ } elsif ($ordering =~ /pubdate/){ #pub year
my %result;
foreach (split /;/,$biblionumbers) {
my ($biblionumber,$title) = split /,/,$_;
my $record=GetMarcBiblio($biblionumber);
- my ($publicationyear_tag,$publicationyear_subfield)=GetMarcFromKohaField($dbh,'biblioitems.publicationyear');
+ my ($publicationyear_tag,$publicationyear_subfield)=GetMarcFromKohaField('biblioitems.publicationyear','');
my $publicationyear=$record->subfield($publicationyear_tag,$publicationyear_subfield);
# hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
# and we don't want to get only 1 result for each of them !!!
# sort the hash and return the same structure as GetRecords (Zebra querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=31 <i') { # sort by pubyear desc
- foreach my $key (sort (keys %result)) {
+ if ($ordering eq 'pubdate_dsc') { # sort by pubyear desc
+ foreach my $key (sort { $b cmp $a } (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
}
} else { # sort by pub year ASC
- foreach my $key (sort { $b cmp $a } (keys %result)) {
+ foreach my $key (sort (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
}
}
#
# ORDER BY title
#
- } elsif ($ordering =~ /1=4/) {
+ } elsif ($ordering =~ /title/) {
# the title is in the biblionumbers string, so we just need to build a hash, sort it and return
my %result;
foreach (split /;/,$biblionumbers) {
# sort the hash and return the same structure as GetRecords (Zebra querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=4 <i') { # sort by title desc
+ if ($ordering eq 'title_az') { # sort by title desc
foreach my $key (sort (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key};
}
# for the requested page, replace biblionumber by the complete record
# speed improvement : avoid reading too much things
for (my $counter=$offset;$counter<=$offset+$results_per_page;$counter++) {
- $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio($result_hash->{'RECORDS'}[$counter])->as_usmarc;
+ $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio($result_hash->{'RECORDS'}[$counter])->as_usmarc if $result_hash->{'RECORDS'}[$counter];
}
my $finalresult=();
$result_hash->{'hits'} = $numbers;