3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
19 # use warnings; # FIXME
22 use C4::Biblio; # GetMarcFromKohaField
23 use C4::Koha; # getFacets
25 use C4::Search::PazPar2;
27 use C4::Dates qw(format_date);
31 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
33 # set the version for version checking
36 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
41 C4::Search - Functions for searching the Koha catalog.
45 See opac/opac-search.pl or catalogue/search.pl for example of usage
49 This module provides searching functions for Koha's bibliographic databases
65 # make all your functions, whether exported or not;
69 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
71 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
77 my $dbh = C4::Context->dbh;
78 my $result = TransformMarcToKoha( $dbh, $record, '' );
83 my ( $biblionumber, $title );
85 # search duplicate on ISBN, easy and fast..
87 if ( $result->{isbn} ) {
88 $result->{isbn} =~ s/\(.*$//;
89 $result->{isbn} =~ s/\s+$//;
90 $query = "isbn=$result->{isbn}";
93 $result->{title} =~ s /\\//g;
94 $result->{title} =~ s /\"//g;
95 $result->{title} =~ s /\(//g;
96 $result->{title} =~ s /\)//g;
98 # FIXME: instead of removing operators, could just do
99 # quotes around the value
100 $result->{title} =~ s/(and|or|not)//g;
101 $query = "ti,ext=$result->{title}";
102 $query .= " and itemtype=$result->{itemtype}"
103 if ( $result->{itemtype} );
104 if ( $result->{author} ) {
105 $result->{author} =~ s /\\//g;
106 $result->{author} =~ s /\"//g;
107 $result->{author} =~ s /\(//g;
108 $result->{author} =~ s /\)//g;
110 # remove valid operators
111 $result->{author} =~ s/(and|or|not)//g;
112 $query .= " and au,ext=$result->{author}";
116 # FIXME: add error handling
117 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
119 foreach my $possible_duplicate_record (@$searchresults) {
121 MARC::Record->new_from_usmarc($possible_duplicate_record);
122 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
124 # FIXME :: why 2 $biblionumber ?
126 push @results, $result->{'biblionumber'};
127 push @results, $result->{'title'};
135 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
137 This function provides a simple search API on the bibliographic catalog
143 * $query can be a simple keyword or a complete CCL query
144 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
145 * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
146 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
151 * $error is a empty unless an error is detected
152 * \@results is an array of records.
153 * $total_hits is the number of hits that would have been returned with no limit
155 =item C<usage in the script:>
159 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
161 if (defined $error) {
162 $template->param(query_error => $error);
163 warn "error: ".$error;
164 output_html_with_http_headers $input, $cookie, $template->output;
168 my $hits = scalar @$marcresults;
171 for my $i (0..$hits) {
173 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
174 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
176 #build the hash for the template.
177 $resultsloop{highlight} = ($i % 2)?(1):(0);
178 $resultsloop{title} = $biblio->{'title'};
179 $resultsloop{subtitle} = $biblio->{'subtitle'};
180 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
181 $resultsloop{author} = $biblio->{'author'};
182 $resultsloop{publishercode} = $biblio->{'publishercode'};
183 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
185 push @results, \%resultsloop;
188 $template->param(result=>\@results);
193 my ( $query, $offset, $max_results, $servers ) = @_;
195 if ( C4::Context->preference('NoZebra') ) {
196 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
199 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
200 return ( undef, $search_result, scalar($result->{hits}) );
203 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
204 my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
210 return ( "No query entered", undef, undef ) unless $query;
212 # Initialize & Search Zebra
213 for ( my $i = 0 ; $i < @servers ; $i++ ) {
215 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
216 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
217 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
221 $zconns[$i]->errmsg() . " ("
222 . $zconns[$i]->errcode() . ") "
223 . $zconns[$i]->addinfo() . " "
224 . $zconns[$i]->diagset();
226 return ( $error, undef, undef ) if $zconns[$i]->errcode();
230 # caught a ZOOM::Exception
234 . $@->addinfo() . " "
237 return ( $error, undef, undef );
240 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
241 my $event = $zconns[ $i - 1 ]->last_event();
242 if ( $event == ZOOM::Event::ZEND ) {
244 my $first_record = defined( $offset ) ? $offset+1 : 1;
245 my $hits = $tmpresults[ $i - 1 ]->size();
246 $total_hits += $hits;
247 my $last_record = $hits;
248 if ( defined $max_results && $offset + $max_results < $hits ) {
249 $last_record = $offset + $max_results;
252 for my $j ( $first_record..$last_record ) {
253 my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
254 push @results, $record;
259 foreach my $result (@tmpresults) {
262 foreach my $zoom_query (@zoom_queries) {
263 $zoom_query->destroy();
266 return ( undef, \@results, $total_hits );
272 ( undef, $results_hashref, \@facets_loop ) = getRecords (
274 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
275 $results_per_page, $offset, $expanded_facet, $branches,
279 The all singing, all dancing, multi-server, asynchronous, scanning,
280 searching, record nabbing, facet-building
282 See verbse embedded documentation.
288 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
289 $results_per_page, $offset, $expanded_facet, $branches,
293 my @servers = @$servers_ref;
294 my @sort_by = @$sort_by_ref;
296 # Initialize variables for the ZOOM connection and results object
300 my $results_hashref = ();
302 # Initialize variables for the faceted results objects
303 my $facets_counter = ();
304 my $facets_info = ();
305 my $facets = getFacets();
307 my @facets_loop; # stores the ref to array of hashes for template facets loop
309 ### LOOP THROUGH THE SERVERS
310 for ( my $i = 0 ; $i < @servers ; $i++ ) {
311 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
313 # perform the search, create the results objects
314 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
315 my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
317 #$query_to_use = $simple_query if $scan;
318 warn $simple_query if ( $scan and $DEBUG );
320 # Check if we've got a query_type defined, if so, use it
324 if ( $query_type =~ /^ccl/ ) {
326 s/\:/\=/g; # change : to = last minute (FIXME)
329 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
332 elsif ( $query_type =~ /^cql/ ) {
335 new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
337 elsif ( $query_type =~ /^pqf/ ) {
340 new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
347 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
353 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
359 warn "WARNING: query problem with $query_to_use " . $@;
362 # Concatenate the sort_by limits and pass them to the results object
363 # Note: sort will override rank
365 foreach my $sort (@sort_by) {
366 if ( $sort eq "author_az" ) {
367 $sort_by .= "1=1003 <i ";
369 elsif ( $sort eq "author_za" ) {
370 $sort_by .= "1=1003 >i ";
372 elsif ( $sort eq "popularity_asc" ) {
373 $sort_by .= "1=9003 <i ";
375 elsif ( $sort eq "popularity_dsc" ) {
376 $sort_by .= "1=9003 >i ";
378 elsif ( $sort eq "call_number_asc" ) {
379 $sort_by .= "1=20 <i ";
381 elsif ( $sort eq "call_number_dsc" ) {
382 $sort_by .= "1=20 >i ";
384 elsif ( $sort eq "pubdate_asc" ) {
385 $sort_by .= "1=31 <i ";
387 elsif ( $sort eq "pubdate_dsc" ) {
388 $sort_by .= "1=31 >i ";
390 elsif ( $sort eq "acqdate_asc" ) {
391 $sort_by .= "1=32 <i ";
393 elsif ( $sort eq "acqdate_dsc" ) {
394 $sort_by .= "1=32 >i ";
396 elsif ( $sort eq "title_az" ) {
397 $sort_by .= "1=4 <i ";
399 elsif ( $sort eq "title_za" ) {
400 $sort_by .= "1=4 >i ";
403 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
407 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
408 warn "WARNING sort $sort_by failed";
411 } # finished looping through servers
413 # The big moment: asynchronously retrieve results from all servers
414 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
415 my $ev = $zconns[ $i - 1 ]->last_event();
416 if ( $ev == ZOOM::Event::ZEND ) {
417 next unless $results[ $i - 1 ];
418 my $size = $results[ $i - 1 ]->size();
422 # loop through the results
423 $results_hash->{'hits'} = $size;
425 if ( $offset + $results_per_page <= $size ) {
426 $times = $offset + $results_per_page;
431 for ( my $j = $offset ; $j < $times ; $j++ ) {
436 ## Check if it's an index scan
438 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
440 # here we create a minimal MARC record and hand it off to the
441 # template just like a normal result ... perhaps not ideal, but
443 my $tmprecord = MARC::Record->new();
444 $tmprecord->encoding('UTF-8');
448 # the minimal record in author/title (depending on MARC flavour)
449 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
450 $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
451 $tmprecord->append_fields($tmptitle);
453 $tmptitle = MARC::Field->new('245',' ',' ', a => $term,);
454 $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
455 $tmprecord->append_fields($tmptitle);
456 $tmprecord->append_fields($tmpauthor);
458 $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
463 $record = $results[ $i - 1 ]->record($j)->raw();
465 # warn "RECORD $j:".$record;
466 $results_hash->{'RECORDS'}[$j] = $record;
468 # Fill the facets while we're looping, but only for the biblioserver
469 $facet_record = MARC::Record->new_from_usmarc($record)
470 if $servers[ $i - 1 ] =~ /biblioserver/;
472 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
474 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
475 ($facets->[$k]) or next;
476 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
477 for my $field (@fields) {
478 my @subfields = $field->subfields();
479 for my $subfield (@subfields) {
480 my ( $code, $data ) = @$subfield;
481 ($code eq $facets->[$k]->{'subfield'}) or next;
482 $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
485 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
486 $facets->[$k]->{'label_value'};
487 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
488 $facets->[$k]->{'expanded'};
493 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
496 # warn "connection ", $i-1, ": $size hits";
497 # warn $results[$i-1]->record(0)->render() if $size > 0;
500 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
502 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
503 keys %$facets_counter )
506 my $number_of_facets;
507 my @this_facets_array;
510 $facets_counter->{$link_value}
511 ->{$b} <=> $facets_counter->{$link_value}->{$a}
512 } keys %{ $facets_counter->{$link_value} }
516 if ( ( $number_of_facets < 6 )
517 || ( $expanded_facet eq $link_value )
518 || ( $facets_info->{$link_value}->{'expanded'} ) )
521 # Sanitize the link value ), ( will cause errors with CCL,
522 my $facet_link_value = $one_facet;
523 $facet_link_value =~ s/(\(|\))/ /g;
525 # fix the length that will display in the label,
526 my $facet_label_value = $one_facet;
528 substr( $one_facet, 0, 20 ) . "..."
529 unless length($facet_label_value) <= 20;
531 # if it's a branch, label by the name, not the code,
532 if ( $link_value =~ /branch/ ) {
534 $branches->{$one_facet}->{'branchname'};
537 # but we're down with the whole label being in the link's title.
538 my $facet_title_value = $one_facet;
540 push @this_facets_array,
544 $facets_counter->{$link_value}
546 facet_label_value => $facet_label_value,
547 facet_title_value => $facet_title_value,
548 facet_link_value => $facet_link_value,
549 type_link_value => $link_value,
555 # handle expanded option
556 unless ( $facets_info->{$link_value}->{'expanded'} ) {
558 if ( ( $number_of_facets > 6 )
559 && ( $expanded_facet ne $link_value ) );
564 type_link_value => $link_value,
565 type_id => $link_value . "_id",
566 "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
567 facets => \@this_facets_array,
568 expandable => $expandable,
569 expand => $link_value,
571 ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
576 return ( undef, $results_hashref, \@facets_loop );
581 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
582 $results_per_page, $offset, $expanded_facet, $branches,
586 my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
588 $paz->search($simple_query);
589 sleep 1; # FIXME: WHY?
592 my $results_hashref = {};
593 my $stats = XMLin($paz->stat);
594 my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
596 # for a grouped search result, the number of hits
597 # is the number of groups returned; 'bib_hits' will have
598 # the total number of bibs.
599 $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
600 $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
602 HIT: foreach my $hit (@{ $results->{'hit'} }) {
603 my $recid = $hit->{recid}->[0];
605 my $work_title = $hit->{'md-work-title'}->[0];
607 if (exists $hit->{'md-work-author'}) {
608 $work_author = $hit->{'md-work-author'}->[0];
610 my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
612 my $result_group = {};
613 $result_group->{'group_label'} = $group_label;
614 $result_group->{'group_merge_key'} = $recid;
617 if (exists $hit->{count}) {
618 $count = $hit->{count}->[0];
620 $result_group->{'group_count'} = $count;
622 for (my $i = 0; $i < $count; $i++) {
623 # FIXME -- may need to worry about diacritics here
624 my $rec = $paz->record($recid, $i);
625 push @{ $result_group->{'RECORDS'} }, $rec;
628 push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
631 # pass through facets
632 my $termlist_xml = $paz->termlist('author,subject');
633 my $terms = XMLin($termlist_xml, forcearray => 1);
634 my @facets_loop = ();
635 #die Dumper($results);
636 # foreach my $list (sort keys %{ $terms->{'list'} }) {
638 # foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
640 # facet_label_value => $facet->{'name'}->[0],
643 # push @facets_loop, ( {
644 # type_label => $list,
645 # facets => \@facets,
649 return ( undef, $results_hashref, \@facets_loop );
653 sub _remove_stopwords {
654 my ( $operand, $index ) = @_;
655 my @stopwords_removed;
657 # phrase and exact-qualified indexes shouldn't have stopwords removed
658 if ( $index !~ m/phr|ext/ ) {
660 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
661 # we use IsAlpha unicode definition, to deal correctly with diacritics.
662 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
663 # is a stopword, we'd get "çon" and wouldn't find anything...
664 foreach ( keys %{ C4::Context->stopwords } ) {
665 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
667 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
669 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
670 $operand =~ s/^$_\P{IsAlpha}/ /gi;
671 $operand =~ s/\P{IsAlpha}$_$/ /gi;
672 $operand =~ s/$1//gi;
673 push @stopwords_removed, $_;
677 return ( $operand, \@stopwords_removed );
681 sub _detect_truncation {
682 my ( $operand, $index ) = @_;
683 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
686 my @wordlist = split( /\s/, $operand );
687 foreach my $word (@wordlist) {
688 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
689 push @rightlefttruncated, $word;
691 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
692 push @lefttruncated, $word;
694 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
695 push @righttruncated, $word;
697 elsif ( index( $word, "*" ) < 0 ) {
698 push @nontruncated, $word;
701 push @regexpr, $word;
705 \@nontruncated, \@righttruncated, \@lefttruncated,
706 \@rightlefttruncated, \@regexpr
711 sub _build_stemmed_operand {
715 # If operand contains a digit, it is almost certainly an identifier, and should
716 # not be stemmed. This is particularly relevant for ISBNs and ISSNs, which
717 # can contain the letter "X" - for example, _build_stemmend_operand would reduce
718 # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
719 # results (e.g., "23 x 29 cm." from the 300$c). Bug 2098.
720 return $operand if $operand =~ /\d/;
722 # FIXME: the locale should be set based on the user's language and/or search choice
723 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
725 # FIXME: these should be stored in the db so the librarian can modify the behavior
726 $stemmer->add_exceptions(
733 my @words = split( / /, $operand );
734 my $stems = $stemmer->stem(@words);
735 for my $stem (@$stems) {
736 $stemmed_operand .= "$stem";
737 $stemmed_operand .= "?"
738 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
739 $stemmed_operand .= " ";
741 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
742 return $stemmed_operand;
746 sub _build_weighted_query {
748 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
749 # pretty well but could work much better if we had a smarter query parser
750 my ( $operand, $stemmed_operand, $index ) = @_;
751 my $stemming = C4::Context->preference("QueryStemming") || 0;
752 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
753 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
755 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
757 # Keyword, or, no index specified
758 if ( ( $index eq 'kw' ) || ( !$index ) ) {
760 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
761 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
762 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
763 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
764 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
765 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
766 if $fuzzy_enabled; # add fuzzy, word list
767 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
768 if ( $stemming and $stemmed_operand )
769 ; # add stemming, right truncation
770 $weighted_query .= " or wrdl,r9=\"$operand\"";
772 # embedded sorting: 0 a-z; 1 z-a
773 # $weighted_query .= ") or (sort1,aut=1";
776 # Barcode searches should skip this process
777 elsif ( $index eq 'bc' ) {
778 $weighted_query .= "bc=\"$operand\"";
781 # Authority-number searches should skip this process
782 elsif ( $index eq 'an' ) {
783 $weighted_query .= "an=\"$operand\"";
786 # If the index already has more than one qualifier, wrap the operand
787 # in quotes and pass it back (assumption is that the user knows what they
788 # are doing and won't appreciate us mucking up their query
789 elsif ( $index =~ ',' ) {
790 $weighted_query .= " $index=\"$operand\"";
793 #TODO: build better cases based on specific search indexes
795 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
796 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
797 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
799 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
802 $weighted_query .= "))"; # close rank specification
803 return $weighted_query;
809 $simple_query, $query_cgi,
811 $limit_cgi, $limit_desc,
812 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
814 Build queries and limits in CCL, CGI, Human,
815 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
817 See verbose embedded documentation.
823 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
825 warn "---------\nEnter buildQuery\n---------" if $DEBUG;
828 my @operators = @$operators if $operators;
829 my @indexes = @$indexes if $indexes;
830 my @operands = @$operands if $operands;
831 my @limits = @$limits if $limits;
832 my @sort_by = @$sort_by if $sort_by;
834 my $stemming = C4::Context->preference("QueryStemming") || 0;
835 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
836 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
837 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
838 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
840 # no stemming/weight/fuzzy in NoZebra
841 if ( C4::Context->preference("NoZebra") ) {
847 my $query = $operands[0];
848 my $simple_query = $operands[0];
850 # initialize the variables we're passing back
859 my $stopwords_removed; # flag to determine if stopwords have been removed
861 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
863 if ( $query =~ /^ccl=/ ) {
864 return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
866 if ( $query =~ /^cql=/ ) {
867 return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
869 if ( $query =~ /^pqf=/ ) {
870 return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
873 # pass nested queries directly
874 # FIXME: need better handling of some of these variables in this case
875 if ( $query =~ /(\(|\))/ ) {
877 undef, $query, $simple_query, $query_cgi,
878 $query, $limit, $limit_cgi, $limit_desc,
879 $stopwords_removed, 'ccl'
883 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
884 # query operands and indexes and add stemming, truncation, field weighting, etc.
885 # Once we do so, we'll end up with a value in $query, just like if we had an
886 # incoming $query from the user
889 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
891 ; # a flag used to keep track if there was a previous query
892 # if there was, we can apply the current operator
894 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
896 # COMBINE OPERANDS, INDEXES AND OPERATORS
897 if ( $operands[$i] ) {
899 # A flag to determine whether or not to add the index to the query
902 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
903 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
906 $remove_stopwords = 0;
908 my $operand = $operands[$i];
909 my $index = $indexes[$i];
911 # Add index-specific attributes
912 # Date of Publication
913 if ( $index eq 'yr' ) {
914 $index .= ",st-numeric";
916 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
919 # Date of Acquisition
920 elsif ( $index eq 'acqdate' ) {
921 $index .= ",st-date-normalized";
923 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
925 # ISBN,ISSN,Standard Number, don't need special treatment
926 elsif ( $index eq 'nb' || $index eq 'ns' ) {
929 $stemming, $auto_truncation,
930 $weight_fields, $fuzzy_enabled,
932 ) = ( 0, 0, 0, 0, 0 );
935 # Set default structure attribute (word list)
937 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
938 $struct_attr = ",wrdl";
941 # Some helpful index variants
942 my $index_plus = $index . $struct_attr . ":" if $index;
943 my $index_plus_comma = $index . $struct_attr . "," if $index;
944 if ($auto_truncation){
945 # FIXME Auto Truncation is only valid for LTR languages
947 # use C4::Languages qw(regex_lang_subtags get_bidi);
948 # $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
949 # my $current_lang = regex_lang_subtags($lang);
951 # $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
952 $index_plus_comma .= "rtrn:";
956 if ($remove_stopwords) {
957 ( $operand, $stopwords_removed ) =
958 _remove_stopwords( $operand, $index );
959 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
960 warn "REMOVED STOPWORDS: @$stopwords_removed"
961 if ( $stopwords_removed && $DEBUG );
965 my $truncated_operand;
966 my( $nontruncated, $righttruncated, $lefttruncated,
967 $rightlefttruncated, $regexpr
968 ) = _detect_truncation( $operand, $index );
970 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
975 scalar(@$righttruncated) + scalar(@$lefttruncated) +
976 scalar(@$rightlefttruncated) > 0 )
979 # Don't field weight or add the index to the query, we do it here
981 undef $weight_fields;
982 my $previous_truncation_operand;
983 if ( scalar(@$nontruncated) > 0 ) {
984 $truncated_operand .= "$index_plus @$nontruncated ";
985 $previous_truncation_operand = 1;
987 if ( scalar(@$righttruncated) > 0 ) {
988 $truncated_operand .= "and "
989 if $previous_truncation_operand;
990 $truncated_operand .=
991 "$index_plus_comma" . "rtrn:@$righttruncated ";
992 $previous_truncation_operand = 1;
994 if ( scalar(@$lefttruncated) > 0 ) {
995 $truncated_operand .= "and "
996 if $previous_truncation_operand;
997 $truncated_operand .=
998 "$index_plus_comma" . "ltrn:@$lefttruncated ";
999 $previous_truncation_operand = 1;
1001 if ( scalar(@$rightlefttruncated) > 0 ) {
1002 $truncated_operand .= "and "
1003 if $previous_truncation_operand;
1004 $truncated_operand .=
1005 "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1006 $previous_truncation_operand = 1;
1009 $operand = $truncated_operand if $truncated_operand;
1010 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1013 my $stemmed_operand;
1014 $stemmed_operand = _build_stemmed_operand($operand)
1016 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1018 # Handle Field Weighting
1019 my $weighted_operand;
1021 _build_weighted_query( $operand, $stemmed_operand, $index )
1023 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1024 $operand = $weighted_operand if $weight_fields;
1025 $indexes_set = 1 if $weight_fields;
1027 # If there's a previous operand, we need to add an operator
1028 if ($previous_operand) {
1030 # User-specified operator
1031 if ( $operators[ $i - 1 ] ) {
1032 $query .= " $operators[$i-1] ";
1033 $query .= " $index_plus " unless $indexes_set;
1034 $query .= " $operand";
1035 $query_cgi .= "&op=$operators[$i-1]";
1036 $query_cgi .= "&idx=$index" if $index;
1037 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1039 " $operators[$i-1] $index_plus $operands[$i]";
1042 # Default operator is and
1045 $query .= "$index_plus " unless $indexes_set;
1046 $query .= "$operand";
1047 $query_cgi .= "&op=and&idx=$index" if $index;
1048 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1049 $query_desc .= " and $index_plus $operands[$i]";
1053 # There isn't a pervious operand, don't need an operator
1056 # Field-weighted queries already have indexes set
1057 $query .= " $index_plus " unless $indexes_set;
1059 $query_desc .= " $index_plus $operands[$i]";
1060 $query_cgi .= "&idx=$index" if $index;
1061 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1062 $previous_operand = 1;
1067 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1070 my $group_OR_limits;
1071 my $availability_limit;
1072 foreach my $this_limit (@limits) {
1073 if ( $this_limit =~ /available/ ) {
1075 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1077 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1078 $availability_limit .=
1079 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1080 $limit_cgi .= "&limit=available";
1084 # group_OR_limits, prefixed by mc-
1085 # OR every member of the group
1086 elsif ( $this_limit =~ /mc/ ) {
1087 $group_OR_limits .= " or " if $group_OR_limits;
1088 $limit_desc .= " or " if $group_OR_limits;
1089 $group_OR_limits .= "$this_limit";
1090 $limit_cgi .= "&limit=$this_limit";
1091 $limit_desc .= " $this_limit";
1094 # Regular old limits
1096 $limit .= " and " if $limit || $query;
1097 $limit .= "$this_limit";
1098 $limit_cgi .= "&limit=$this_limit";
1099 if ($this_limit =~ /^branch:(.+)/) {
1100 my $branchcode = $1;
1101 my $branchname = GetBranchName($branchcode);
1102 if (defined $branchname) {
1103 $limit_desc .= " branch:$branchname";
1105 $limit_desc .= " $this_limit";
1108 $limit_desc .= " $this_limit";
1112 if ($group_OR_limits) {
1113 $limit .= " and " if ( $query || $limit );
1114 $limit .= "($group_OR_limits)";
1116 if ($availability_limit) {
1117 $limit .= " and " if ( $query || $limit );
1118 $limit .= "($availability_limit)";
1121 # Normalize the query and limit strings
1124 for ( $query, $query_desc, $limit, $limit_desc ) {
1125 $_ =~ s/ / /g; # remove extra spaces
1126 $_ =~ s/^ //g; # remove any beginning spaces
1127 $_ =~ s/ $//g; # remove any ending spaces
1128 $_ =~ s/==/=/g; # remove double == from query
1130 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1132 for ($query_cgi,$simple_query) {
1135 # append the limit to the query
1136 $query .= " " . $limit;
1140 warn "QUERY:" . $query;
1141 warn "QUERY CGI:" . $query_cgi;
1142 warn "QUERY DESC:" . $query_desc;
1143 warn "LIMIT:" . $limit;
1144 warn "LIMIT CGI:" . $limit_cgi;
1145 warn "LIMIT DESC:" . $limit_desc;
1146 warn "---------\nLeave buildQuery\n---------";
1149 undef, $query, $simple_query, $query_cgi,
1150 $query_desc, $limit, $limit_cgi, $limit_desc,
1151 $stopwords_removed, $query_type
1155 =head2 searchResults
1157 Format results in a form suitable for passing to the template
1161 # IMO this subroutine is pretty messy still -- it's responsible for
1162 # building the HTML output for the template
1164 my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1165 my $dbh = C4::Context->dbh;
1169 # add search-term highlighting via <span>s on the search terms
1170 my $span_terms_hashref;
1171 for my $span_term ( split( / /, $searchdesc ) ) {
1172 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1173 $span_terms_hashref->{$span_term}++;
1176 #Build branchnames hash
1178 #get branch information.....
1181 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1182 ; # FIXME : use C4::Koha::GetBranches
1184 while ( my $bdata = $bsth->fetchrow_hashref ) {
1185 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1187 # FIXME - We build an authorised values hash here, using the default framework
1188 # though it is possible to have different authvals for different fws.
1190 my $shelflocations =GetKohaAuthorisedValues('items.location','');
1192 # get notforloan authorised value list (see $shelflocations FIXME)
1193 my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1195 #Build itemtype hash
1196 #find itemtype & itemtype image
1200 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1203 while ( my $bdata = $bsth->fetchrow_hashref ) {
1204 foreach (qw(description imageurl summary notforloan)) {
1205 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1209 #search item field code
1212 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1215 my ($itemtag) = $sth->fetchrow;
1217 ## find column names of items related to MARC
1218 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1220 my %subfieldstosearch;
1221 while ( ( my $column ) = $sth2->fetchrow ) {
1222 my ( $tagfield, $tagsubfield ) =
1223 &GetMarcFromKohaField( "items." . $column, "" );
1224 $subfieldstosearch{$column} = $tagsubfield;
1227 # handle which records to actually retrieve
1229 if ( $hits && $offset + $results_per_page <= $hits ) {
1230 $times = $offset + $results_per_page;
1233 $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it?
1236 my $marcflavour = C4::Context->preference("marcflavour");
1237 # loop through all of the records we've retrieved
1238 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1239 my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1240 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1241 $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1242 $oldbiblio->{result_number} = $i + 1;
1244 # add imageurl to itemtype if there is one
1245 $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1247 $oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1248 $oldbiblio->{normalized_upc} = GetNormalizedUPC($marcrecord,$marcflavour);
1249 $oldbiblio->{normalized_ean} = GetNormalizedEAN($marcrecord,$marcflavour);
1250 $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1251 $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1252 $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1253 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1254 # Build summary if there is one (the summary is defined in the itemtypes table)
1255 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1256 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1257 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1258 my @fields = $marcrecord->fields();
1259 foreach my $field (@fields) {
1260 my $tag = $field->tag();
1261 my $tagvalue = $field->as_string();
1263 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1264 unless ( $tag < 10 ) {
1265 my @subf = $field->subfields;
1266 for my $i ( 0 .. $#subf ) {
1267 my $subfieldcode = $subf[$i][0];
1268 my $subfieldvalue = $subf[$i][1];
1269 my $tagsubf = $tag . $subfieldcode;
1271 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1276 $summary =~ s/\[(.*?)]//g;
1277 $summary =~ s/\n/<br\/>/g;
1278 $oldbiblio->{summary} = $summary;
1281 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1282 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1283 $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1284 $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1285 # Add search-term highlighting to the whole record where they match using <span>s
1286 if (C4::Context->preference("OpacHighlightedWords")){
1287 my $searchhighlightblob;
1288 for my $highlight_field ( $marcrecord->fields ) {
1290 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1291 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1292 for my $subfield ($highlight_field->subfields()) {
1294 next if $subfield->[0] eq '9';
1295 my $field = $subfield->[1];
1296 for my $term ( keys %$span_terms_hashref ) {
1297 if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1298 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1302 $searchhighlightblob .= $field . " ... " if $match;
1306 $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1307 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1310 # Add search-term highlighting to the title, subtitle, etc. fields
1311 for my $term ( keys %$span_terms_hashref ) {
1312 my $old_term = $term;
1313 if ( length($term) > 3 ) {
1314 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1315 foreach(qw(title subtitle author publishercode place pages notes size)) {
1316 $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1321 ($i % 2) and $oldbiblio->{'toggle'} = 1;
1323 # Pull out the items fields
1324 my @fields = $marcrecord->field($itemtag);
1326 # Setting item statuses for display
1327 my @available_items_loop;
1328 my @onloan_items_loop;
1329 my @other_items_loop;
1331 my $available_items;
1335 my $ordered_count = 0;
1336 my $available_count = 0;
1337 my $onloan_count = 0;
1338 my $longoverdue_count = 0;
1339 my $other_count = 0;
1340 my $wthdrawn_count = 0;
1341 my $itemlost_count = 0;
1342 my $itembinding_count = 0;
1343 my $itemdamaged_count = 0;
1344 my $item_in_transit_count = 0;
1345 my $can_place_holds = 0;
1346 my $items_count = scalar(@fields);
1348 ( C4::Context->preference('maxItemsinSearchResults') )
1349 ? C4::Context->preference('maxItemsinSearchResults') - 1
1352 # loop through every item
1353 foreach my $field (@fields) {
1356 # populate the items hash
1357 foreach my $code ( keys %subfieldstosearch ) {
1358 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1360 my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch';
1361 my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1362 # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1363 if ($item->{$hbranch}) {
1364 $item->{'branchname'} = $branches{$item->{$hbranch}};
1366 elsif ($item->{$otherbranch}) { # Last resort
1367 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1370 my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1371 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1372 if ( $item->{onloan} ) {
1374 my $key = $prefix . $item->{due_date};
1375 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1376 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1377 $onloan_items->{$key}->{branchname} = $item->{branchname};
1378 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1379 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1380 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1381 # if something's checked out and lost, mark it as 'long overdue'
1382 if ( $item->{itemlost} ) {
1383 $onloan_items->{$prefix}->{longoverdue}++;
1384 $longoverdue_count++;
1385 } else { # can place holds as long as item isn't lost
1386 $can_place_holds = 1;
1390 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1394 if ( $item->{notforloan} == -1 ) {
1398 # is item in transit?
1399 my $transfertwhen = '';
1400 my ($transfertfrom, $transfertto);
1402 unless ($item->{wthdrawn}
1403 || $item->{itemlost}
1405 || $item->{notforloan}
1406 || $items_count > 20) {
1408 # A couple heuristics to limit how many times
1409 # we query the database for item transfer information, sacrificing
1410 # accuracy in some cases for speed;
1412 # 1. don't query if item has one of the other statuses
1413 # 2. don't check transit status if the bib has
1414 # more than 20 items
1416 # FIXME: to avoid having the query the database like this, and to make
1417 # the in transit status count as unavailable for search limiting,
1418 # should map transit status to record indexed in Zebra.
1420 ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1423 # item is withdrawn, lost or damaged
1424 if ( $item->{wthdrawn}
1425 || $item->{itemlost}
1427 || $item->{notforloan}
1428 || ($transfertwhen ne ''))
1430 $wthdrawn_count++ if $item->{wthdrawn};
1431 $itemlost_count++ if $item->{itemlost};
1432 $itemdamaged_count++ if $item->{damaged};
1433 $item_in_transit_count++ if $transfertwhen ne '';
1434 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1437 my $key = $prefix . $item->{status};
1438 foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1439 $other_items->{$key}->{$_} = $item->{$_};
1441 $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1442 $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1443 $other_items->{$key}->{count}++ if $item->{$hbranch};
1444 $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1445 $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1449 $can_place_holds = 1;
1451 $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1452 foreach (qw(branchname itemcallnumber)) {
1453 $available_items->{$prefix}->{$_} = $item->{$_};
1455 $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1456 $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1459 } # notforloan, item level and biblioitem level
1460 my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1462 ( C4::Context->preference('maxItemsinSearchResults') )
1463 ? C4::Context->preference('maxItemsinSearchResults') - 1
1465 for my $key ( sort keys %$onloan_items ) {
1466 (++$onloanitemscount > $maxitems) and last;
1467 push @onloan_items_loop, $onloan_items->{$key};
1469 for my $key ( sort keys %$other_items ) {
1470 (++$otheritemscount > $maxitems) and last;
1471 push @other_items_loop, $other_items->{$key};
1473 for my $key ( sort keys %$available_items ) {
1474 (++$availableitemscount > $maxitems) and last;
1475 push @available_items_loop, $available_items->{$key}
1478 # XSLT processing of some stuff
1479 if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1480 $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1481 $oldbiblio->{biblionumber}, $marcrecord, 'Results' );
1484 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1485 $can_place_holds = 0
1486 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1487 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1488 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1489 $oldbiblio->{items_count} = $items_count;
1490 $oldbiblio->{available_items_loop} = \@available_items_loop;
1491 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1492 $oldbiblio->{other_items_loop} = \@other_items_loop;
1493 $oldbiblio->{availablecount} = $available_count;
1494 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1495 $oldbiblio->{onloancount} = $onloan_count;
1496 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1497 $oldbiblio->{othercount} = $other_count;
1498 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1499 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1500 $oldbiblio->{itemlostcount} = $itemlost_count;
1501 $oldbiblio->{damagedcount} = $itemdamaged_count;
1502 $oldbiblio->{intransitcount} = $item_in_transit_count;
1503 $oldbiblio->{orderedcount} = $ordered_count;
1504 push( @newresults, $oldbiblio );
1509 #----------------------------------------------------------------------
1511 # Non-Zebra GetRecords#
1512 #----------------------------------------------------------------------
1516 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1522 $query, $simple_query, $sort_by_ref, $servers_ref,
1523 $results_per_page, $offset, $expanded_facet, $branches,
1526 warn "query =$query" if $DEBUG;
1527 my $result = NZanalyse($query);
1528 warn "results =$result" if $DEBUG;
1530 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1536 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1537 the list is built from an inverted index in the nozebra SQL table
1538 note that title is here only for convenience : the sorting will be very fast when requested on title
1539 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1544 my ( $string, $server ) = @_;
1545 # warn "---------" if $DEBUG;
1546 warn " NZanalyse" if $DEBUG;
1547 # warn "---------" if $DEBUG;
1549 # $server contains biblioserver or authorities, depending on what we search on.
1550 #warn "querying : $string on $server";
1551 $server = 'biblioserver' unless $server;
1553 # if we have a ", replace the content to discard temporarily any and/or/not inside
1555 if ( $string =~ /"/ ) {
1556 $string =~ s/"(.*?)"/__X__/;
1558 warn "commacontent : $commacontent" if $DEBUG;
1561 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1562 # then, call again NZanalyse with $left and $right
1563 # (recursive until we find a leaf (=> something without and/or/not)
1564 # delete repeated operator... Would then go in infinite loop
1565 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1568 #process parenthesis before.
1569 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1572 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1574 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1576 my $leftresult = NZanalyse( $left, $server );
1578 my $rightresult = NZanalyse( $right, $server );
1580 # OK, we have the results for right and left part of the query
1581 # depending of operand, intersect, union or exclude both lists
1582 # to get a result list
1583 if ( $operator eq ' and ' ) {
1584 return NZoperatorAND($leftresult,$rightresult);
1586 elsif ( $operator eq ' or ' ) {
1588 # just merge the 2 strings
1589 return $leftresult . $rightresult;
1591 elsif ( $operator eq ' not ' ) {
1592 return NZoperatorNOT($leftresult,$rightresult);
1596 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1600 warn "string :" . $string if $DEBUG;
1604 if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1607 $operator = lc($2); # FIXME: and/or/not are operators, not operands
1609 warn "no parenthesis. left : $left operator: $operator right: $right"
1612 # it's not a leaf, we have a and/or/not
1615 # reintroduce comma content if needed
1616 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1617 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1618 warn "node : $left / $operator / $right\n" if $DEBUG;
1619 my $leftresult = NZanalyse( $left, $server );
1620 my $rightresult = NZanalyse( $right, $server );
1621 warn " leftresult : $leftresult" if $DEBUG;
1622 warn " rightresult : $rightresult" if $DEBUG;
1623 # OK, we have the results for right and left part of the query
1624 # depending of operand, intersect, union or exclude both lists
1625 # to get a result list
1626 if ( $operator eq ' and ' ) {
1628 return NZoperatorAND($leftresult,$rightresult);
1630 elsif ( $operator eq ' or ' ) {
1632 # just merge the 2 strings
1633 return $leftresult . $rightresult;
1635 elsif ( $operator eq ' not ' ) {
1636 return NZoperatorNOT($leftresult,$rightresult);
1640 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1641 die "error : operand unknown : $operator for $string";
1644 # it's a leaf, do the real SQL query and return the result
1647 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1648 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1649 #remove trailing blank at the beginning
1651 warn "leaf:$string" if $DEBUG;
1653 # parse the string in in operator/operand/value again
1657 if ($string =~ /(.*)(>=|<=)(.*)/) {
1664 # warn "handling leaf... left:$left operator:$operator right:$right"
1666 unless ($operator) {
1667 if ($string =~ /(.*)(>|<|=)(.*)/) {
1672 "handling unless (operator)... left:$left operator:$operator right:$right"
1680 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1683 # automatic replace for short operators
1684 $left = 'title' if $left =~ '^ti$';
1685 $left = 'author' if $left =~ '^au$';
1686 $left = 'publisher' if $left =~ '^pb$';
1687 $left = 'subject' if $left =~ '^su$';
1688 $left = 'koha-Auth-Number' if $left =~ '^an$';
1689 $left = 'keyword' if $left =~ '^kw$';
1690 warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1691 if ( $operator && $left ne 'keyword' ) {
1693 #do a specific search
1694 my $dbh = C4::Context->dbh;
1695 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1698 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1700 warn "$left / $operator / $right\n" if $DEBUG;
1702 # split each word, query the DB and build the biblionumbers result
1703 #sanitizing leftpart
1704 $left =~ s/^\s+|\s+$//;
1705 foreach ( split / /, $right ) {
1707 $_ =~ s/^\s+|\s+$//;
1709 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1710 $sth->execute( $server, $left, $_ )
1711 or warn "execute failed: $!";
1712 while ( my ( $line, $value ) = $sth->fetchrow ) {
1714 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1715 # otherwise, fill the result
1716 $biblionumbers .= $line
1717 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1718 warn "result : $value "
1719 . ( $right =~ /\d/ ) . "=="
1720 . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line";
1723 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1725 warn "NZAND" if $DEBUG;
1726 $results = NZoperatorAND($biblionumbers,$results);
1729 $results = $biblionumbers;
1735 #do a complete search (all indexes), if index='kw' do complete search too.
1736 my $dbh = C4::Context->dbh;
1739 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1742 # split each word, query the DB and build the biblionumbers result
1743 foreach ( split / /, $string ) {
1744 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1745 warn "search on all indexes on $_" if $DEBUG;
1748 $sth->execute( $server, $_ );
1749 while ( my $line = $sth->fetchrow ) {
1750 $biblionumbers .= $line;
1753 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1755 $results = NZoperatorAND($biblionumbers,$results);
1758 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1759 $results = $biblionumbers;
1763 warn "return : $results for LEAF : $string" if $DEBUG;
1766 warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1770 my ($rightresult, $leftresult)=@_;
1772 my @leftresult = split /;/, $leftresult;
1773 warn " @leftresult / $rightresult \n" if $DEBUG;
1775 # my @rightresult = split /;/,$leftresult;
1778 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1779 # the result is stored twice, to have the same weight for AND than OR.
1780 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1781 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1782 foreach (@leftresult) {
1785 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1786 if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1787 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1789 "$value-$countvalue;$value-$countvalue;";
1792 warn "NZAND DONE : $finalresult \n" if $DEBUG;
1793 return $finalresult;
1797 my ($rightresult, $leftresult)=@_;
1798 return $rightresult.$leftresult;
1802 my ($leftresult, $rightresult)=@_;
1804 my @leftresult = split /;/, $leftresult;
1806 # my @rightresult = split /;/,$leftresult;
1808 foreach (@leftresult) {
1810 $value=$1 if $value=~m/(.*)-\d+$/;
1811 unless ($rightresult =~ "$value-") {
1812 $finalresult .= "$_;";
1815 return $finalresult;
1820 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1827 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1828 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1830 # order title asc by default
1831 # $ordering = '1=36 <i' unless $ordering;
1832 $results_per_page = 20 unless $results_per_page;
1833 $offset = 0 unless $offset;
1834 my $dbh = C4::Context->dbh;
1837 # order by POPULARITY
1839 if ( $ordering =~ /popularity/ ) {
1843 # popularity is not in MARC record, it's builded from a specific query
1845 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1846 foreach ( split /;/, $biblionumbers ) {
1847 my ( $biblionumber, $title ) = split /,/, $_;
1848 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1849 $sth->execute($biblionumber);
1850 my $popularity = $sth->fetchrow || 0;
1852 # hint : the key is popularity.title because we can have
1853 # many results with the same popularity. In this cas, sub-ordering is done by title
1854 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1855 # (un-frequent, I agree, but we won't forget anything that way ;-)
1856 $popularity{ sprintf( "%10d", $popularity ) . $title
1857 . $biblionumber } = $biblionumber;
1860 # sort the hash and return the same structure as GetRecords (Zebra querying)
1863 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1864 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1865 $result_hash->{'RECORDS'}[ $numbers++ ] =
1866 $result{ $popularity{$key} }->as_usmarc();
1869 else { # sort popularity ASC
1870 foreach my $key ( sort ( keys %popularity ) ) {
1871 $result_hash->{'RECORDS'}[ $numbers++ ] =
1872 $result{ $popularity{$key} }->as_usmarc();
1875 my $finalresult = ();
1876 $result_hash->{'hits'} = $numbers;
1877 $finalresult->{'biblioserver'} = $result_hash;
1878 return $finalresult;
1884 elsif ( $ordering =~ /author/ ) {
1886 foreach ( split /;/, $biblionumbers ) {
1887 my ( $biblionumber, $title ) = split /,/, $_;
1888 my $record = GetMarcBiblio($biblionumber);
1890 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1891 $author = $record->subfield( '200', 'f' );
1892 $author = $record->subfield( '700', 'a' ) unless $author;
1895 $author = $record->subfield( '100', 'a' );
1898 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1899 # and we don't want to get only 1 result for each of them !!!
1900 $result{ $author . $biblionumber } = $record;
1903 # sort the hash and return the same structure as GetRecords (Zebra querying)
1906 if ( $ordering eq 'author_za' ) { # sort by author desc
1907 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1908 $result_hash->{'RECORDS'}[ $numbers++ ] =
1909 $result{$key}->as_usmarc();
1912 else { # sort by author ASC
1913 foreach my $key ( sort ( keys %result ) ) {
1914 $result_hash->{'RECORDS'}[ $numbers++ ] =
1915 $result{$key}->as_usmarc();
1918 my $finalresult = ();
1919 $result_hash->{'hits'} = $numbers;
1920 $finalresult->{'biblioserver'} = $result_hash;
1921 return $finalresult;
1924 # ORDER BY callnumber
1927 elsif ( $ordering =~ /callnumber/ ) {
1929 foreach ( split /;/, $biblionumbers ) {
1930 my ( $biblionumber, $title ) = split /,/, $_;
1931 my $record = GetMarcBiblio($biblionumber);
1933 my $frameworkcode = GetFrameworkCode($biblionumber);
1934 my ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField( 'items.itemcallnumber', $frameworkcode);
1935 ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField('biblioitems.callnumber', $frameworkcode)
1936 unless $callnumber_tag;
1937 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1938 $callnumber = $record->subfield( '200', 'f' );
1940 $callnumber = $record->subfield( '100', 'a' );
1943 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1944 # and we don't want to get only 1 result for each of them !!!
1945 $result{ $callnumber . $biblionumber } = $record;
1948 # sort the hash and return the same structure as GetRecords (Zebra querying)
1951 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1952 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1953 $result_hash->{'RECORDS'}[ $numbers++ ] =
1954 $result{$key}->as_usmarc();
1957 else { # sort by title ASC
1958 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1959 $result_hash->{'RECORDS'}[ $numbers++ ] =
1960 $result{$key}->as_usmarc();
1963 my $finalresult = ();
1964 $result_hash->{'hits'} = $numbers;
1965 $finalresult->{'biblioserver'} = $result_hash;
1966 return $finalresult;
1968 elsif ( $ordering =~ /pubdate/ ) { #pub year
1970 foreach ( split /;/, $biblionumbers ) {
1971 my ( $biblionumber, $title ) = split /,/, $_;
1972 my $record = GetMarcBiblio($biblionumber);
1973 my ( $publicationyear_tag, $publicationyear_subfield ) =
1974 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1975 my $publicationyear =
1976 $record->subfield( $publicationyear_tag,
1977 $publicationyear_subfield );
1979 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1980 # and we don't want to get only 1 result for each of them !!!
1981 $result{ $publicationyear . $biblionumber } = $record;
1984 # sort the hash and return the same structure as GetRecords (Zebra querying)
1987 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
1988 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1989 $result_hash->{'RECORDS'}[ $numbers++ ] =
1990 $result{$key}->as_usmarc();
1993 else { # sort by pub year ASC
1994 foreach my $key ( sort ( keys %result ) ) {
1995 $result_hash->{'RECORDS'}[ $numbers++ ] =
1996 $result{$key}->as_usmarc();
1999 my $finalresult = ();
2000 $result_hash->{'hits'} = $numbers;
2001 $finalresult->{'biblioserver'} = $result_hash;
2002 return $finalresult;
2008 elsif ( $ordering =~ /title/ ) {
2010 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2012 foreach ( split /;/, $biblionumbers ) {
2013 my ( $biblionumber, $title ) = split /,/, $_;
2015 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2016 # and we don't want to get only 1 result for each of them !!!
2017 # hint & speed improvement : we can order without reading the record
2018 # so order, and read records only for the requested page !
2019 $result{ $title . $biblionumber } = $biblionumber;
2022 # sort the hash and return the same structure as GetRecords (Zebra querying)
2025 if ( $ordering eq 'title_az' ) { # sort by title desc
2026 foreach my $key ( sort ( keys %result ) ) {
2027 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2030 else { # sort by title ASC
2031 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2032 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2036 # limit the $results_per_page to result size if it's more
2037 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2039 # for the requested page, replace biblionumber by the complete record
2040 # speed improvement : avoid reading too much things
2042 my $counter = $offset ;
2043 $counter <= $offset + $results_per_page ;
2047 $result_hash->{'RECORDS'}[$counter] =
2048 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2050 my $finalresult = ();
2051 $result_hash->{'hits'} = $numbers;
2052 $finalresult->{'biblioserver'} = $result_hash;
2053 return $finalresult;
2060 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2063 foreach ( split /;/, $biblionumbers ) {
2064 my ( $biblionumber, $title ) = split /,/, $_;
2065 $title =~ /(.*)-(\d)/;
2070 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2071 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2072 # biblio N has ranking = 6
2073 $count_ranking{$biblionumber} += $ranking;
2076 # build the result by "inverting" the count_ranking hash
2077 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2079 foreach ( keys %count_ranking ) {
2080 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2083 # sort the hash and return the same structure as GetRecords (Zebra querying)
2086 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2087 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2090 # limit the $results_per_page to result size if it's more
2091 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2093 # for the requested page, replace biblionumber by the complete record
2094 # speed improvement : avoid reading too much things
2096 my $counter = $offset ;
2097 $counter <= $offset + $results_per_page ;
2101 $result_hash->{'RECORDS'}[$counter] =
2102 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2103 if $result_hash->{'RECORDS'}[$counter];
2105 my $finalresult = ();
2106 $result_hash->{'hits'} = $numbers;
2107 $finalresult->{'biblioserver'} = $result_hash;
2108 return $finalresult;
2112 END { } # module clean-up code here (global destructor)
2119 Koha Developement team <info@koha.org>