# set the version for version checking
$VERSION = 3.00;
+$DEBUG=1;
=head1 NAME
$result->{title} =~ s /\"//g;
$result->{title} =~ s /\(//g;
$result->{title} =~ s /\)//g;
- # remove valid operators
- $result->{title} =~ s/(and|or|not)//g;
+ # remove valid operators
+ $result->{title} =~ s/(and|or|not)//g;
$query = "ti,ext=$result->{title}";
- $query .= " and mt=$result->{itemtype}" if ($result->{itemtype});
+ $query .= " and itemtype=$result->{itemtype}" if ($result->{itemtype});
if ($result->{author}){
$result->{author} =~ s /\\//g;
$result->{author} =~ s /\"//g;
$result->{author} =~ s /\(//g;
$result->{author} =~ s /\)//g;
- # remove valid operators
- $result->{author} =~ s/(and|or|not)//g;
+ # remove valid operators
+ $result->{author} =~ s/(and|or|not)//g;
$query .= " and au,ext=$result->{author}";
}
}
# performs the search
sub getRecords {
my (
- $koha_query, $federated_query, $sort_by_ref,
+ $koha_query, $simple_query, $sort_by_ref,
$servers_ref, $results_per_page, $offset,
$expanded_facet, $branches, $query_type,
$scan
$query_to_use = $koha_query;
}
else {
- $query_to_use = $federated_query;
+ $query_to_use = $simple_query;
}
+ #$query_to_use = $simple_query if $scan;
+ #warn $simple_query if ($scan && $DEBUG);
# check if we've got a query_type defined
eval {
if ($query_type)
}
else {
if ($scan) {
-
- # warn "preparing to scan";
+ # warn "preparing to scan:$query_to_use";
$results[$i] =
$zconns[$i]->scan(
new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
);
}
else {
-
# warn "LAST : $query_to_use";
$results[$i] =
$zconns[$i]->search(
my $sort_by;
foreach my $sort (@sort_by) {
if ($sort eq "author_az") {
- $sort_by.="1=1003 <i ";
+ $sort_by.="1=1003 <i ";
}
elsif ($sort eq "author_za") {
- $sort_by.="1=1003 >i ";
+ $sort_by.="1=1003 >i ";
+ }
+ elsif ($sort eq "popularity_asc") {
+ $sort_by.="1=9003 <i ";
+ }
+ elsif ($sort eq "popularity_dsc") {
+ $sort_by.="1=9003 >i ";
+ }
+ elsif ($sort eq "call_number_asc") {
+ $sort_by.="1=20 <i ";
+ }
+ elsif ($sort eq "call_number_dsc") {
+ $sort_by.="1=20 >i ";
+ }
+ elsif ($sort eq "pubdate_asc") {
+ $sort_by.="1=31 <i ";
+ }
+ elsif ($sort eq "pubdate_dsc") {
+ $sort_by.="1=31 >i ";
+ }
+ elsif ($sort eq "acqdate_asc") {
+ $sort_by.="1=32 <i ";
+ }
+ elsif ($sort eq "acqdate_dsc") {
+ $sort_by.="1=32 >i ";
+ }
+ elsif ($sort eq "title_az") {
+ $sort_by.="1=4 <i ";
+ }
+ elsif ($sort eq "title_za") {
+ $sort_by.="1=4 >i ";
+ }
+ }
+ if ($sort_by) {
+ if ( $results[$i]->sort( "yaz", $sort_by ) < 0) {
+ warn "WARNING sort $sort_by failed";
}
-
- #$sort_by .= $sort . " "; # used to be $sort,
}
- warn "SORTING: $sort_by";
- $results[$i]->sort( "yaz", $sort_by ) if $sort_by;
}
while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
my $ev = $zconns[ $i - 1 ]->last_event();
## This is just an index scan
if ($scan) {
my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
-
# here we create a minimal MARC record and hand it off to the
# template just like a normal result ... perhaps not ideal, but
# it works for now
$tmprecord->encoding('UTF-8');
my $tmptitle;
- # srote the minimal record in author/title (depending on MARC flavour)
+ # srote the minimal record in author/title (depending on MARC flavour)
if ( C4::Context->preference("marcflavour") eq
"UNIMARC" )
{
return ( undef, $results_hashref, \@facets_loop );
}
+# STOPWORDS
+sub _remove_stopwords {
+ my ($operand,$index) = @_;
+ my @stopwords_removed;
+ # phrase and exact-qualified indexes shouldn't have stopwords removed
+ if ($index!~m/phr|ext/){
+ # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
+ # we use IsAlpha unicode definition, to deal correctly with diacritics.
+ # otherwise, a French word like "leçon" woudl be split into "le" "çon", le
+ # is an empty word, we'd get "çon" and wouldn't find anything...
+ foreach (keys %{C4::Context->stopwords}) {
+ next if ($_ =~/(and|or|not)/); # don't remove operators
+ if ($operand =~ /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/) {
+ $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
+ $operand=~ s/^$_\P{IsAlpha}/ /gi;
+ $operand=~ s/\P{IsAlpha}$_$/ /gi;
+ push @stopwords_removed, $_;
+ }
+ }
+ }
+ return ($operand, \@stopwords_removed);
+}
+
+# TRUNCATION
+sub _detect_truncation {
+ my ($operand,$index) = @_;
+ my (@nontruncated,@righttruncated,@lefttruncated,@rightlefttruncated,@regexpr);
+ $operand =~s/^ //g;
+ my @wordlist= split (/\s/,$operand);
+ foreach my $word (@wordlist){
+ if ($word=~s/^\*([^\*]+)\*$/$1/){
+ push @rightlefttruncated,$word;
+ }
+ elsif($word=~s/^\*([^\*]+)$/$1/){
+ push @lefttruncated,$word;
+ }
+ elsif ($word=~s/^([^\*]+)\*$/$1/){
+ push @righttruncated,$word;
+ }
+ elsif (index($word,"*")<0){
+ push @nontruncated,$word;
+ }
+ else {
+ push @regexpr,$word;
+ }
+ }
+ return (\@nontruncated,\@righttruncated,\@lefttruncated,\@rightlefttruncated,\@regexpr);
+}
+
+sub _build_stemmed_operand {
+ my ($operand) = @_;
+ my $stemmed_operand;
+ # FIXME: the locale should be set based on the user's language and/or search choice
+ my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
+ # FIXME: these should be stored in the db so the librarian can modify the behavior
+ $stemmer->add_exceptions(
+ {
+ 'and' => 'and',
+ 'or' => 'or',
+ 'not' => 'not',
+ }
+
+ );
+ my @words = split( / /, $operand );
+ my $stems = $stemmer->stem(@words);
+ for my $stem (@$stems) {
+ $stemmed_operand .= "$stem";
+ $stemmed_operand .= "?" unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
+ $stemmed_operand .= " ";
+ }
+ #warn "STEMMED OPERAND: $stemmed_operand";
+ return $stemmed_operand;
+}
+
+sub _build_weighted_query {
+ # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
+ # pretty well but will work much better when we have an actual query parser
+ my ($operand,$stemmed_operand,$index) = @_;
+ my $stemming = C4::Context->preference("QueryStemming") || 0;
+ my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
+ my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
+
+ my $weighted_query .= "(rk=("; # Specifies that we're applying rank
+
+ # Keyword, or, no index specified
+ if ( ( $index eq 'kw' ) || ( !$index ) ) {
+ $weighted_query .= "Title-cover,ext,r1=\"$operand\""; # exact title-cover
+ $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
+ $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
+ #$weighted_query .= " or any,ext,r4=$operand"; # exact any
+ #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
+ $weighted_query .= " or wrd,fuzzy,r8=\"$operand\"" if $fuzzy_enabled; # add fuzzy, word list
+ $weighted_query .= " or wrd,right-Truncation,r9=\"$stemmed_operand\"" if ($stemming and $stemmed_operand); # add stemming, right truncation
+ # embedded sorting: 0 a-z; 1 z-a
+ # $weighted_query .= ") or (sort1,aut=1";
+ }
+ elsif ( $index eq 'bc' ) {
+ $weighted_query .= "bc=\"$operand\"";
+ }
+ # if the index already has more than one qualifier, just wrap the operand
+ # in quotes and pass it back
+ elsif ($index =~ ',') {
+ $weighted_query .=" $index=\"$operand\"";
+ }
+ #TODO: build better cases based on specific search indexes
+ else {
+ $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
+ #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
+ $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
+ $weighted_query .= " or $index,rt,wrd,r3=\"$operand\""; # word list index
+ }
+ $weighted_query .= "))"; # close rank specification
+ return $weighted_query;
+}
+
# build the query itself
sub buildQuery {
- my ( $query, $operators, $operands, $indexes, $limits, $sort_by ) = @_;
+ my ( $operators, $operands, $indexes, $limits, $sort_by, $scan) = @_;
my @operators = @$operators if $operators;
my @indexes = @$indexes if $indexes;
my @limits = @$limits if $limits;
my @sort_by = @$sort_by if $sort_by;
- my $human_search_desc; # a human-readable query
- my $machine_search_desc; #a machine-readable query
+ my $stemming = C4::Context->preference("QueryStemming") || 0;
+ my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
+ my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
+ my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
+ my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
+
+ my $query = $operands[0];
+ my $simple_query = $operands[0];
+ my $query_cgi;
+ my $query_desc;
+ my $query_type;
+
+ my $limit;
+ my $limit_cgi;
+ my $limit_desc;
-# STEP I: determine if this is a form-based / simple query or if it's complex (if complex,
-# we can't handle field weighting, stemming until a formal query parser is written
-# I'll work on this soon -- JF
-#if (!$query) { # form-based
-# check if this is a known query language query, if it is, return immediately:
+ my $stopwords_removed;
+
+ # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
+ # DIAGNOSTIC ONLY!!
if ( $query =~ /^ccl=/ ) {
- return ( undef, $', $', $', 'ccl' );
+ return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
}
if ( $query =~ /^cql=/ ) {
- return ( undef, $', $', $', 'cql' );
+ return ( undef, $', $', $', $', '', '', '', '', 'cql' );
}
if ( $query =~ /^pqf=/ ) {
- return ( undef, $', $', $', 'pqf' );
+ return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
}
- if ( $query =~ /(\(|\))/ ) { # sorry, too complex
- return ( undef, $query, $query, $query, 'ccl' );
+
+ # pass nested queries directly
+ if ( $query =~ /(\(|\))/ ) {
+ return ( undef, $query, $simple_query, $query_cgi, $query, $limit, $limit_cgi, $limit_desc, $stopwords_removed, 'ccl' );
}
-# form-based queries are limited to non-nested a specific depth, so we can easily
+# form-based queries are limited to non-nested at a specific depth, so we can easily
# modify the incoming query operands and indexes to do stemming and field weighting
# Once we do so, we'll end up with a value in $query, just like if we had an
# incoming $query from the user
else {
- $query = ""
- ; # clear it out so we can populate properly with field-weighted stemmed query
- my $previous_operand
- ; # a flag used to keep track if there was a previous query
- # if there was, we can apply the current operator
+ $query = ""; # clear it out so we can populate properly with field-weighted stemmed query
+ my $previous_operand; # a flag used to keep track if there was a previous query
+ # if there was, we can apply the current operator
+ # for every operand
for ( my $i = 0 ; $i <= @operands ; $i++ ) {
- my $operand = $operands[$i];
- # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
- # we use IsAlpha unicode definition, to deal correctly with diacritics.
- # otherwise, a french word like "leçon" is splitted in "le" "çon", le is an empty word, we get "çon"
- # and don't find anything...
- my $stemmed_operand;
- my $stemming = C4::Context->preference("QueryStemming") || 0;
- my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
- my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
-
- # We Have to do this more carefully.
- #Since Phrase Search Is Phrase search.
- #phrase "Physics In Collision" will not be found if we do it like that.
- my $index = $indexes[$i];
- my (@nontruncated,@righttruncated,@lefttruncated,@rightlefttruncated,@regexpr);
-
- # if the operator contains more than one qualifier, but not phrase
- if (index($index,"phr")<0 && index($index,",")>0){
- #operand may be a wordlist deleting stopwords
- foreach (keys %{C4::Context->stopwords}) {
- $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /i;
- $operand=~ s/^$_\P{IsAlpha}/ /i;
- $operand=~ s/\P{IsAlpha}$_$/ /i;
- }
- #now coping with words
- my @wordlist= split (/\s/,$operand);
- foreach my $word (@wordlist){
- if (index($word,"*")==0 && index($word,"*",1)==length($word)-2){
- $word=~s/\*//;
- push @rightlefttruncated,$word;
- } elsif(index($word,"*")==0 && index($word,"*",1)<0){
- $word=~s/\*//;
- push @lefttruncated,$word;
- } elsif (index($word,"*")==length($word)-1){
- $word=~s/\*//;
- push @righttruncated,$word;
- } elsif (index($word,"*")<0){
- push @nontruncated,$word;
- } else {
- push @regexpr,$word;
- }
- }
- }
-
- if ( $operands[$i] ) {
- $operand =~ s/^(and |or |not )//i;
-
-# STEMMING FIXME: need to refine the field weighting so stemmed operands don't disrupt the query ranking
- if ($stemming) {
- # FIXME: the locale should be set based on the user's language and/or search choice
- my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
- # FIXME: these should be stored in the db so the librarian can modify the behavior
- $stemmer->add_exceptions(
- {
- 'and' => 'and',
- 'or' => 'or',
- 'not' => 'not',
- }
- );
-
- my @words = split( / /, $operands[$i] );
- my $stems = $stemmer->stem(@words);
- foreach my $stem (@$stems) {
- $stemmed_operand .= "$stem";
- $stemmed_operand .= "?"
- unless ( $stem =~ /(and$|or$|not$)/ )
- || ( length($stem) < 3 );
- $stemmed_operand .= " ";
- $stemmed_operand =~ s/(and|or|not)//g;
- #warn "STEM: $stemmed_operand";
- }
- #$operand = $stemmed_operand;
- }
+ # COMBINE OPERANDS, INDEXES AND OPERATORS
+ if ( $operands[$i] ) {
-# FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
-# pretty well but will work much better when we have an actual query parser
- my $weighted_query;
- if ($weight_fields) {
- $weighted_query .=
- " rk=("; # Specifies that we're applying rank
- # keyword has different weight properties
- if ( ( $index =~ /kw/ ) || ( !$index ) )
- { # FIXME: do I need to add right-truncation in the case of stemming?
- # a simple way to find out if this query uses an index
- if ( $operand =~ /(\=|\:)/ ) {
- $weighted_query .= " $operand";
- }
- else {
- $weighted_query .=" Title-cover,ext,r1=\"$operand\""; # title cover as exact
- $weighted_query .=" or ti,ext,r2=\"$operand\""; # exact title elsewhere
- #$weighted_query .= " or ti,phr,r3=$operand"; # index as phrase
- #$weighted_query .= " or any,ext,r4=$operand"; # index as exact
- $weighted_query .=" or kw,wrdl,r5=\"$operand\""; # all the words in the query (wordlist)
- $weighted_query .= " or wrd,fuzzy,r9=$operand" if $fuzzy_enabled; # add fuzzy
- $weighted_query .= " or wrd,right-Truncation=$stemmed_operand" if $stemming; # add stemming
- # embedded sorting: 0 a-z; 1 z-a
- #$weighted_query .= ") or (sort1,aut=1";
- }
+ # a flag to determine whether or not to add the index to the query
+ my $indexes_set;
+ # if the user is sophisticated enough to specify an index, turn off some defaults
+ if ($operands[$i] =~ /(:|=)/ || $scan) {
+ $weight_fields = 0;
+ $stemming = 0;
+ $remove_stopwords = 0;
+ }
+ my $operand = $operands[$i];
+ my $index = $indexes[$i];
+
+ # some helpful index modifs
+ my $index_plus = "$index:" if $index;
+ my $index_plus_comma="$index," if $index;
+
+ # Remove Stopwords
+ if ($remove_stopwords) {
+ ($operand, $stopwords_removed) = _remove_stopwords($operand,$index);
+ warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
+ warn "REMOVED STOPWORDS: @$stopwords_removed" if ($stopwords_removed && $DEBUG);
+ }
+
+ # Detect Truncation
+ my ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr);
+ my $truncated_operand;
+ ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr) = _detect_truncation($operand,$index);
+ warn "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<" if $DEBUG;
+
+ # Apply Truncation
+ if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){
+ # don't field weight or add the index to the query, we do it here
+ $indexes_set = 1;
+ undef $weight_fields;
+ my $previous_truncation_operand;
+ if (scalar(@$nontruncated)>0) {
+ $truncated_operand.= "$index_plus @$nontruncated ";
+ $previous_truncation_operand = 1;
}
- elsif ( $index =~ /au/ ) {
- $weighted_query .=
- " $index,ext,r1=$operand"; # index label as exact
- #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
- $weighted_query .=
- " or $index,phr,r3=$operand"; # index as phrase
- $weighted_query .= " or $index,rt,wrd,r3=$operand";
+ if (scalar(@$righttruncated)>0){
+ $truncated_operand .= "and " if $previous_truncation_operand;
+ $truncated_operand .= "$index_plus_comma"."rtrn:@$righttruncated ";
+ $previous_truncation_operand = 1;
}
- elsif ( $index =~ /ti/ ) {
- $weighted_query .=
- " Title-cover,ext,r1=$operand"; # index label as exact
- $weighted_query .= " or Title-series,ext,r2=$operand";
-
- #$weighted_query .= " or ti,ext,r2=$operand";
- #$weighted_query .= " or ti,phr,r3=$operand";
- #$weighted_query .= " or ti,wrd,r3=$operand";
- $weighted_query .=" or (title-sort-az=0 or Title-cover,startswithnt,st-word,r3=$operand #)";
- $weighted_query .=" or (title-sort-az=0 or Title-cover,phr,r6=$operand)";
-
- #$weighted_query .= " or Title-cover,wrd,r5=$operand";
- #$weighted_query .= " or ti,ext,r6=$operand";
- #$weighted_query .= " or ti,startswith,phr,r7=$operand";
- #$weighted_query .= " or ti,phr,r8=$operand";
- #$weighted_query .= " or ti,wrd,r9=$operand";
-
- #$weighted_query .= " or ti,ext,r2=$operand"; # index as exact
- #$weighted_query .= " or ti,phr,r3=$operand"; # index as phrase
- #$weighted_query .= " or any,ext,r4=$operand"; # index as exact
- #$weighted_query .= " or kw,wrd,r5=$operand"; # index as exact
+ if (scalar(@$lefttruncated)>0){
+ $truncated_operand .= "and " if $previous_truncation_operand;
+ $truncated_operand .= "$index_plus_comma"."ltrn:@$lefttruncated ";
+ $previous_truncation_operand = 1;
}
- else {
- $weighted_query .=
- " $index,ext,r1=$operand"; # index label as exact
- #$weighted_query .= " or $index,ext,r2=$operand"; # index as exact
- $weighted_query .=
- " or $index,phr,r3=$operand"; # index as phrase
- $weighted_query .= " or $index,rt,wrd,r3=$operand";
- $weighted_query .=
- " or $index,wrd,r5=$operand"
- ; # index as word right-truncated
- $weighted_query .= " or $index,wrd,fuzzy,r8=$operand";
+ if (scalar(@$rightlefttruncated)>0){
+ $truncated_operand .= "and " if $previous_truncation_operand;
+ $truncated_operand .= "$index_plus_comma"."rltrn:@$rightlefttruncated ";
+ $previous_truncation_operand = 1;
}
- $weighted_query .= ")"; # close rank specification
- $operand = $weighted_query;
}
-
- # only add an operator if there is a previous operand
+ $operand = $truncated_operand if $truncated_operand;
+ warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
+
+ # Handle Stemming
+ my $stemmed_operand;
+ $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
+ warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
+
+ # Handle Field Weighting
+ my $weighted_operand;
+ $weighted_operand = _build_weighted_query($operand,$stemmed_operand,$index) if $weight_fields;
+ warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
+ $operand = $weighted_operand if $weight_fields;
+ $indexes_set = 1 if $weight_fields;
+
+ # If there's a previous operand, we need to add an operator
if ($previous_operand) {
- if ( $operators[ $i - 1 ] ) {
- $query .= " $operators[$i-1] $index: $operand";
- if ( !$index ) {
- $human_search_desc .=
- " $operators[$i-1] $operands[$i]";
- }
- else {
- $human_search_desc .=
- " $operators[$i-1] $index: $operands[$i]";
- }
+
+ # user-specified operator
+ if ( $operators[$i-1] ) {
+ $query .= " $operators[$i-1] ";
+ $query .= " $index_plus " unless $indexes_set;
+ $query .= " $operand";
+ $query_cgi .="&op=$operators[$i-1]";
+ $query_cgi .="&idx=$index" if $index;
+ $query_cgi .="&q=$operands[$i]" if $operands[$i];
+ $query_desc .=" $operators[$i-1] $index_plus $operands[$i]";
}
# the default operator is and
else {
- $query .= " and $index: $operand";
- $human_search_desc .= " and $index: $operands[$i]";
+ $query .= " and ";
+ $query .= "$index_plus " unless $indexes_set;
+ $query .= "$operand";
+ $query_cgi .="&op=and&idx=$index" if $index;
+ $query_cgi .="&q=$operands[$i]" if $operands[$i];
+ $query_desc .= " and $index_plus $operands[$i]";
}
}
- else {
- if ( !$index ) {
- $query .= " $operand";
- $human_search_desc .= " $operands[$i]";
- }
- else {
- if (scalar(@righttruncated)+scalar(@lefttruncated)+scalar(@rightlefttruncated)>0){
- $query.= "$index: @nontruncated " if (scalar(@nontruncated)>0);
- if (scalar(@righttruncated)>0){
- $query .= "and $index,rtrn:@righttruncated ";
- }
- if (scalar(@lefttruncated)>0){
- $query .= "and $index,ltrn:@lefttruncated ";
- }
- if (scalar(@rightlefttruncated)>0){
- $query .= "and $index,rltrn:@rightlefttruncated ";
- }
- $query=~s/^and//;
- $human_search_desc .= $query;
- } else {
- $query .= " $index: $operand";
- $human_search_desc .= " $index: $operands[$i]";
- }
- }
+
+ # there isn't a pervious operand, don't need an operator
+ else {
+ # field-weighted queries already have indexes set
+ $query .=" $index_plus " unless $indexes_set;
+ $query .= $operand;
+ $query_desc .= " $index_plus $operands[$i]";
+ $query_cgi.="&idx=$index" if $index;
+ $query_cgi.="&q=$operands[$i]" if $operands[$i];
+
$previous_operand = 1;
}
} #/if $operands
} # /for
}
+ warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
# add limits
- my $limit_query;
- my $limit_search_desc;
- foreach my $limit (@limits) {
-
- # FIXME: not quite right yet ... will work on this soon -- JF
- my $type = $1 if $limit =~ m/([^:]+):([^:]*)/;
- if ( $limit =~ /available/ ) {
- $limit_query .= " (($query and datedue=0000-00-00) or ($query and datedue=0000-00-00 not lost=1) or ($query and datedue=0000-00-00 not lost=2))";
- #$limit_search_desc.=" and available";
- }
- elsif ( ($limit_query) && ( index( $limit_query, $type, 0 ) > 0 ) ) {
- if ( $limit_query !~ /\(/ ) {
- $limit_query =
- substr( $limit_query, 0, index( $limit_query, $type, 0 ) )
- . "("
- . substr( $limit_query, index( $limit_query, $type, 0 ) )
- . " or $limit )"
- if $limit;
- $limit_search_desc =
- substr( $limit_search_desc, 0,
- index( $limit_search_desc, $type, 0 ) )
- . "("
- . substr( $limit_search_desc,
- index( $limit_search_desc, $type, 0 ) )
- . " or $limit )"
- if $limit;
- }
- else {
- chop $limit_query;
- chop $limit_search_desc;
- $limit_query .= " or $limit )" if $limit;
- $limit_search_desc .= " or $limit )" if $limit;
- }
- }
- elsif ( ($limit_query) && ( $limit =~ /mc/ ) ) {
- $limit_query .= " or $limit" if $limit;
- $limit_search_desc .= " or $limit" if $limit;
+ my $group_OR_limits;
+ my $availability_limit;
+ foreach my $this_limit (@limits) {
+ if ( $this_limit =~ /available/ ) {
+ # available is defined as (items.notloan is NULL) and (items.itemlost > 0 or NULL) (last clause handles NULL values for lost in zebra)
+ $availability_limit .="( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and ((lost,st-numeric gt 0) or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
+ $limit_cgi .= "&limit=available";
+ $limit_desc .="";
}
- # these are treated as AND
- elsif ($limit_query) {
- if ($limit =~ /branch/){
- $limit_query .= " ) and ( $limit" if $limit;
- $limit_search_desc .= " ) and ( $limit" if $limit;
- }else{
- $limit_query .= " or $limit" if $limit;
- $limit_search_desc .= " or $limit" if $limit;
- }
+ # these are treated as OR
+ elsif ( $this_limit =~ /mc/ ) {
+ $group_OR_limits .= " or " if $group_OR_limits;
+ $limit_desc .=" or " if $group_OR_limits;
+ $group_OR_limits .= "$this_limit";
+ $limit_cgi .="&limit=$this_limit";
+ $limit_desc .= "$this_limit";
}
- # otherwise, there is nothing but the limit
- else {
- $limit_query .= "$limit" if $limit;
- $limit_search_desc .= "$limit" if $limit;
- }
- }
-
- # if there's also a query, we need to AND the limits to it
- if ( ($limit_query) && ($query) ) {
- $limit_query = " and (" . $limit_query . ")";
- $limit_search_desc = " and ($limit_search_desc)" if $limit_search_desc;
-
+ # regular old limits
+ else {
+ $limit .= " and " if $limit || $query;
+ $limit .= "$this_limit";
+ $limit_cgi .="&limit=$this_limit";
+ $limit_desc .=" and $this_limit";
+ }
}
- warn "LIMIT: $limit_query";
- $query .= $limit_query;
- $human_search_desc .= $limit_search_desc;
-
- # now normalize the strings
- $query =~ s/ / /g; # remove extra spaces
- $query =~ s/^ //g; # remove any beginning spaces
- $query =~ s/:/=/g; # causes probs for server
- $query =~ s/==/=/g; # remove double == from query
-
- my $federated_query = $human_search_desc;
- $federated_query =~ s/ / /g;
- $federated_query =~ s/^ //g;
- $federated_query =~ s/:/=/g;
- my $federated_query_opensearch = $federated_query;
-
-# my $federated_query_RPN = new ZOOM::Query::CCL2RPN( $query , C4::Context->ZConn('biblioserver'));
-
- $human_search_desc =~ s/ / /g;
- $human_search_desc =~ s/^ //g;
- my $koha_query = $query;
-
- #warn "QUERY:".$koha_query;
- #warn "SEARCHDESC:".$human_search_desc;
- #warn "FEDERATED QUERY:".$federated_query;
- return ( undef, $human_search_desc, $koha_query, $federated_query );
+ if ($group_OR_limits) {
+ $limit.=" and " if ($query || $limit );
+ $limit.="($group_OR_limits)";
+ }
+ if ($availability_limit) {
+ $limit.=" not " if ($query || $limit );
+ $limit.="$availability_limit";
+ }
+ # normalize the strings
+ $query =~ s/:/=/g;
+ $limit =~ s/:/=/g;
+ for ($query, $query_desc, $limit, $limit_desc) {
+ $_ =~ s/ / /g; # remove extra spaces
+ $_ =~ s/^ //g; # remove any beginning spaces
+ $_ =~ s/ $//g; # remove any ending spaces
+ $_ =~ s/==/=/g; # remove double == from query
+
+ }
+ $query_cgi =~ s/^&//;
+
+ # append the limit to the query
+ $query .= " ".$limit;
+
+ warn "QUERY:".$query if $DEBUG;
+ warn "QUERY CGI:".$query_cgi if $DEBUG;
+ warn "QUERY DESC:".$query_desc if $DEBUG;
+ warn "LIMIT:".$limit if $DEBUG;
+ warn "LIMIT CGI:".$limit_cgi if $DEBUG;
+ warn "LIMIT DESC:".$limit_desc if $DEBUG;
+
+ return ( undef, $query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type );
}
# IMO this subroutine is pretty messy still -- it's responsible for
my $marcrecord;
$marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
+ $oldbiblio->{result_number} = $i+1;
# add image url if there is one
if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
$oldbiblio->{imageurl} =
$summary =~ s/\n/<br>/g;
$oldbiblio->{summary} = $summary;
}
- # add spans to search term in results
+ # add spans to search term in results for search term highlighting
foreach my $term ( keys %$span_terms_hashref ) {
-
- #warn "term: $term";
my $old_term = $term;
if ( length($term) > 3 ) {
$term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\])//g;
- $term =~ s/\\//g;
+ $term =~ s/\\//g;
+ $term =~ s/\*//g;
#FIXME: is there a better way to do this?
- $oldbiblio->{'title'} =~ s/$term/<span class=term>$&<\/span>/gi;
+ $oldbiblio->{'title'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
$oldbiblio->{'subtitle'} =~
- s/$term/<span class=term>$&<\/span>/gi;
-
- $oldbiblio->{'author'} =~ s/$term/<span class=term>$&<\/span>/gi;
- $oldbiblio->{'publishercode'} =~ s/$term/<span class=term>$&<\/span>/gi;
- $oldbiblio->{'place'} =~ s/$term/<span class=term>$&<\/span>/gi;
- $oldbiblio->{'pages'} =~ s/$term/<span class=term>$&<\/span>/gi;
- $oldbiblio->{'notes'} =~ s/$term/<span class=term>$&<\/span>/gi;
- $oldbiblio->{'size'} =~ s/$term/<span class=term>$&<\/span>/gi;
+ s/$term/<span class=\"term\">$&<\/span>/gi;
+
+ $oldbiblio->{'author'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
+ $oldbiblio->{'publishercode'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
+ $oldbiblio->{'place'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
+ $oldbiblio->{'pages'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
+ $oldbiblio->{'notes'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
+ $oldbiblio->{'size'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
}
}
# last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
$norequests = 1 if $itemtypes{$oldbiblio->{itemtype}}->{notforloan};
-
+ my $itemscount;
for my $key ( sort keys %$items ) {
+ $itemscount++;
my $this_item = {
branchname => $branches{$items->{$key}->{branchcode}},
branchcode => $items->{$key}->{branchcode},
wthdrawn => $items->{$key}->{wthdrawn},
lost => $items->{$key}->{itemlost},
};
- push @items_loop, $this_item;
+ # only show the number specified by the user
+ my $maxitems = (C4::Context->preference('maxItemsinSearchResults')) ? C4::Context->preference('maxItemsinSearchResults')- 1 : 1;
+ push @items_loop, $this_item unless $itemscount > $maxitems;;
}
$oldbiblio->{norequests} = $norequests;
$oldbiblio->{items_count} = $items_count;
NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
=cut
-
sub NZgetRecords {
- my (
- $koha_query, $federated_query, $sort_by_ref,
- $servers_ref, $results_per_page, $offset,
- $expanded_facet, $branches, $query_type,
- $scan
- ) = @_;
- my $result = NZanalyse($koha_query);
+ my ($query,$simple_query,$sort_by_ref,$servers_ref,$results_per_page,$offset,$expanded_facet,$branches,$query_type,$scan) = @_;
+ my $result = NZanalyse($query);
return (undef,NZorder($result,@$sort_by_ref[0],$results_per_page,$offset),undef);
}
=head2 NZanalyse
NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
- the list is builded from inverted index in nozebra SQL table
+ the list is built from an inverted index in the nozebra SQL table
note that title is here only for convenience : the sorting will be very fast when requested on title
if the sorting is requested on something else, we will have to reread all results, and that may be longer.
# $server contains biblioserver or authorities, depending on what we search on.
#warn "querying : $string on $server";
$server='biblioserver' unless $server;
+
# if we have a ", replace the content to discard temporarily any and/or/not inside
my $commacontent;
if ($string =~/"/) {
$string =~ s/"(.*?)"/__X__/;
$commacontent = $1;
-# print "commacontent : $commacontent\n";
+ warn "commacontent : $commacontent" if $DEBUG;
}
# split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
# then, call again NZanalyse with $left and $right
$string =~ /(.*)( and | or | not | AND | OR | NOT )(.*)/;
my $left = $1;
my $right = $3;
- my $operand = lc($2);
+ my $operand = lc($2); # FIXME: and/or/not are operators, not operands
# it's not a leaf, we have a and/or/not
if ($operand) {
# reintroduce comma content if needed
$right =~ s/__X__/"$commacontent"/ if $commacontent;
$left =~ s/__X__/"$commacontent"/ if $commacontent;
-# warn "node : $left / $operand / $right\n";
+ warn "node : $left / $operand / $right\n" if $DEBUG;
my $leftresult = NZanalyse($left,$server);
my $rightresult = NZanalyse($right,$server);
# OK, we have the results for right and left part of the query
# it's a leaf, do the real SQL query and return the result
} else {
$string =~ s/__X__/"$commacontent"/ if $commacontent;
- $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\// /g;
-# warn "leaf : $string\n";
+ $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
+ warn "leaf : $string\n" if $DEBUG;
# parse the string in in operator/operand/value again
- $string =~ /(.*)(=|>|>=|<|<=)(.*)/;
+ $string =~ /(.*)(>=|<=)(.*)/;
my $left = $1;
my $operator = $2;
my $right = $3;
+ unless ($operator) {
+ $string =~ /(.*)(>|<|=)(.*)/;
+ $left = $1;
+ $operator = $2;
+ $right = $3;
+ }
my $results;
# automatic replace for short operators
- $left='title' if $left eq 'ti';
- $left='author' if $left eq 'au';
- $left='publisher' if $left eq 'pb';
- $left='subject' if $left eq 'su';
- $left='koha-Auth-Number' if $left eq 'an';
- $left='keyword' if $left eq 'kw';
+ $left='title' if $left =~ '^ti';
+ $left='author' if $left =~ '^au';
+ $left='publisher' if $left =~ '^pb';
+ $left='subject' if $left =~ '^su';
+ $left='koha-Auth-Number' if $left =~ '^an';
+ $left='keyword' if $left =~ '^kw';
if ($operator) {
#do a specific search
my $dbh = C4::Context->dbh;
$operator='LIKE' if $operator eq '=' and $right=~ /%/;
- my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND indexname=? AND value $operator ?");
- # warn "$left / $operator / $right\n";
+ my $sth = $dbh->prepare("SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?");
+ warn "$left / $operator / $right\n";
# split each word, query the DB and build the biblionumbers result
foreach (split / /,$right) {
- my $biblionumbers;
+ my ($biblionumbers,$value);
next unless $_;
-# warn "EXECUTE : $server, $left, $_";
- $sth->execute($server, $left, $_);
- while (my $line = $sth->fetchrow) {
- $biblionumbers .= $line;
-# warn "result : $line";
+ warn "EXECUTE : $server, $left, $_";
+ $sth->execute($server, $left, $_) or warn "execute failed: $!";
+ while (my ($line,$value) = $sth->fetchrow) {
+ # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
+ # otherwise, fill the result
+ $biblionumbers .= $line unless ($right =~ /\d/ && $value =~ /\D/);
+ warn "result : $value ". ($right =~ /\d/) . "==".(!$value =~ /\d/) ;#= $line";
}
# do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
if ($results) {
my @leftresult = split /;/, $biblionumbers;
my $temp;
- foreach (@leftresult) {
- if ($results =~ "$_;") {
- $temp .= "$_;$_;";
+ foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight
+ # remove weight at the end
+ my $cleaned = $entry;
+ $cleaned =~ s/-\d*$//;
+ # if the entry already in the hash, take it & increase weight
+ warn "===== $cleaned =====" if $DEBUG;
+ if ($results =~ "$cleaned") {
+ $temp .= "$entry;$entry;";
+ warn "INCLUDING $entry" if $DEBUG;
}
}
$results = $temp;
# split each word, query the DB and build the biblionumbers result
foreach (split / /,$string) {
next if C4::Context->stopwords->{uc($_)}; # skip if stopword
- #warn "search on all indexes on $_";
+ warn "search on all indexes on $_" if $DEBUG;
my $biblionumbers;
next unless $_;
$sth->execute($server, $_);
}
# do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
if ($results) {
+ warn "RES for $_ = $biblionumbers" if $DEBUG;
my @leftresult = split /;/, $biblionumbers;
my $temp;
- foreach (@leftresult) {
- if ($results =~ "$_;") {
- $temp .= "$_;$_;";
+ foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight
+ # remove weight at the end
+ my $cleaned = $entry;
+ $cleaned =~ s/-\d*$//;
+ # if the entry already in the hash, take it & increase weight
+ warn "===== $cleaned =====" if $DEBUG;
+ if ($results =~ "$cleaned") {
+ $temp .= "$entry;$entry;";
+ warn "INCLUDING $entry" if $DEBUG;
}
}
$results = $temp;
} else {
+ warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
$results = $biblionumbers;
}
}
}
-# warn "return : $results for LEAF : $string";
+ warn "return : $results for LEAF : $string" if $DEBUG;
return $results;
}
}
#
# order by POPULARITY
#
- if ($ordering =~ /1=9523/) {
+ if ($ordering =~ /popularity/) {
my %result;
my %popularity;
# popularity is not in MARC record, it's builded from a specific query
# sort the hash and return the same structure as GetRecords (Zebra querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=9523 >i') { # sort popularity DESC
+ if ($ordering eq 'popularity_dsc') { # sort popularity DESC
foreach my $key (sort {$b cmp $a} (keys %popularity)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$popularity{$key}}->as_usmarc();
}
#
# ORDER BY author
#
- } elsif ($ordering eq '1=1003 <i'){
+ } elsif ($ordering =~/author/){
my %result;
foreach (split /;/,$biblionumbers) {
my ($biblionumber,$title) = split /,/,$_;
# sort the hash and return the same structure as GetRecords (Zebra querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=1003 <i') { # sort by author desc
- foreach my $key (sort (keys %result)) {
+ if ($ordering eq 'author_za') { # sort by author desc
+ foreach my $key (sort { $b cmp $a } (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
}
} else { # sort by author ASC
- foreach my $key (sort { $a cmp $b } (keys %result)) {
+ foreach my $key (sort (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
}
}
#
# ORDER BY callnumber
#
- } elsif ($ordering eq '1=20 <i'){
+ } elsif ($ordering =~/callnumber/){
my %result;
foreach (split /;/,$biblionumbers) {
my ($biblionumber,$title) = split /,/,$_;
# sort the hash and return the same structure as GetRecords (Zebra querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=1003 <i') { # sort by title desc
- foreach my $key (sort (keys %result)) {
+ if ($ordering eq 'call_number_dsc') { # sort by title desc
+ foreach my $key (sort { $b cmp $a } (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
}
} else { # sort by title ASC
$result_hash->{'hits'} = $numbers;
$finalresult->{'biblioserver'} = $result_hash;
return $finalresult;
- } elsif ($ordering =~ /1=31/){ #pub year
+ } elsif ($ordering =~ /pubdate/){ #pub year
my %result;
foreach (split /;/,$biblionumbers) {
my ($biblionumber,$title) = split /,/,$_;
my $record=GetMarcBiblio($biblionumber);
- my ($publicationyear_tag,$publicationyear_subfield)=GetMarcFromKohaField($dbh,'biblioitems.publicationyear');
+ my ($publicationyear_tag,$publicationyear_subfield)=GetMarcFromKohaField('biblioitems.publicationyear','');
my $publicationyear=$record->subfield($publicationyear_tag,$publicationyear_subfield);
# hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
# and we don't want to get only 1 result for each of them !!!
# sort the hash and return the same structure as GetRecords (Zebra querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=31 <i') { # sort by pubyear desc
- foreach my $key (sort (keys %result)) {
+ if ($ordering eq 'pubdate_dsc') { # sort by pubyear desc
+ foreach my $key (sort { $b cmp $a } (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
}
} else { # sort by pub year ASC
- foreach my $key (sort { $b cmp $a } (keys %result)) {
+ foreach my $key (sort (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
}
}
#
# ORDER BY title
#
- } elsif ($ordering =~ /1=4/) {
+ } elsif ($ordering =~ /title/) {
# the title is in the biblionumbers string, so we just need to build a hash, sort it and return
my %result;
foreach (split /;/,$biblionumbers) {
# sort the hash and return the same structure as GetRecords (Zebra querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=4 <i') { # sort by title desc
+ if ($ordering eq 'title_az') { # sort by title desc
foreach my $key (sort (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key};
}
# for the requested page, replace biblionumber by the complete record
# speed improvement : avoid reading too much things
for (my $counter=$offset;$counter<=$offset+$results_per_page;$counter++) {
- $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio($result_hash->{'RECORDS'}[$counter])->as_usmarc;
+ $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio($result_hash->{'RECORDS'}[$counter])->as_usmarc if $result_hash->{'RECORDS'}[$counter];
}
my $finalresult=();
$result_hash->{'hits'} = $numbers;