3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
21 use C4::Biblio; # GetMarcFromKohaField
22 use C4::Koha; # getFacets
24 use C4::Dates qw(format_date);
26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
28 # set the version for version checking
31 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
36 C4::Search - Functions for searching the Koha catalog.
40 see opac/opac-search.pl or catalogue/search.pl for example of usage
44 This module provides searching functions for Koha's bibliographic databases
62 # make all your functions, whether exported or not;
64 =head2 findseealso($dbh,$fields);
66 C<$dbh> is a link to the DB handler.
69 my $dbh =C4::Context->dbh;
71 C<$fields> is a reference to the fields array
73 This function modify the @$fields array and add related fields to search on.
78 my ( $dbh, $fields ) = @_;
79 my $tagslib = GetMarcStructure( 1 );
80 for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
81 my ($tag) = substr( @$fields[$i], 1, 3 );
82 my ($subfield) = substr( @$fields[$i], 4, 1 );
83 @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
84 if ( $tagslib->{$tag}->{$subfield}->{seealso} );
90 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
96 my $dbh = C4::Context->dbh;
97 my $result = TransformMarcToKoha( $dbh, $record, '' );
102 my ( $biblionumber, $title );
104 # search duplicate on ISBN, easy and fast..
105 # ... normalize first
106 if ( $result->{isbn} ) {
107 $result->{isbn} =~ s/\(.*$//;
108 $result->{isbn} =~ s/\s+$//;
110 #$search->{'avoidquerylog'}=1;
111 if ( $result->{isbn} ) {
112 $query = "isbn=$result->{isbn}";
115 $result->{title} =~ s /\\//g;
116 $result->{title} =~ s /\"//g;
117 $result->{title} =~ s /\(//g;
118 $result->{title} =~ s /\)//g;
119 # remove valid operators
120 $result->{title} =~ s/(and|or|not)//g;
121 $query = "ti,ext=$result->{title}";
122 $query .= " and itemtype=$result->{itemtype}" if ($result->{itemtype});
123 if ($result->{author}){
124 $result->{author} =~ s /\\//g;
125 $result->{author} =~ s /\"//g;
126 $result->{author} =~ s /\(//g;
127 $result->{author} =~ s /\)//g;
128 # remove valid operators
129 $result->{author} =~ s/(and|or|not)//g;
130 $query .= " and au,ext=$result->{author}";
133 my ($error,$searchresults) =
134 SimpleSearch($query); # FIXME :: hardcoded !
136 foreach my $possible_duplicate_record (@$searchresults) {
138 MARC::Record->new_from_usmarc($possible_duplicate_record);
139 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
141 # FIXME :: why 2 $biblionumber ?
143 push @results, $result->{'biblionumber'};
144 push @results, $result->{'title'};
152 ($error,$results) = SimpleSearch($query,@servers);
154 This function provides a simple search API on the bibliographic catalog
160 * $query can be a simple keyword or a complete CCL query configured with your ccl.properties
161 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
164 * $error is a empty unless an error is detected
165 * \@results is an array of records.
167 =item C<usage in the script:>
171 my ($error, $marcresults) = SimpleSearch($query);
173 if (defined $error) {
174 $template->param(query_error => $error);
175 warn "error: ".$error;
176 output_html_with_http_headers $input, $cookie, $template->output;
180 my $hits = scalar @$marcresults;
183 for(my $i=0;$i<$hits;$i++) {
185 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
186 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
188 #build the hash for the template.
189 $resultsloop{highlight} = ($i % 2)?(1):(0);
190 $resultsloop{title} = $biblio->{'title'};
191 $resultsloop{subtitle} = $biblio->{'subtitle'};
192 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
193 $resultsloop{author} = $biblio->{'author'};
194 $resultsloop{publishercode} = $biblio->{'publishercode'};
195 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
197 push @results, \%resultsloop;
199 $template->param(result=>\@results);
205 if (C4::Context->preference('NoZebra')) {
206 my $result = NZorder(NZanalyse($query))->{'biblioserver'};
207 my $search_result = ( $result->{hits} && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
208 return (undef,$search_result);
214 return ( "No query entered", undef ) unless $query;
216 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
217 @servers =("biblioserver") unless @servers;
219 # Initialize & Search Zebra
220 for ( my $i = 0 ; $i < @servers ; $i++ ) {
222 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
223 $tmpresults[$i] = $zconns[$i]->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) );
227 $zconns[$i]->errmsg() . " ("
228 . $zconns[$i]->errcode() . ") "
229 . $zconns[$i]->addinfo() . " "
230 . $zconns[$i]->diagset();
232 return ( $error, undef ) if $zconns[$i]->errcode();
235 # caught a ZOOM::Exception
239 . $@->addinfo() . " "
242 return ( $error, undef );
247 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
248 $ev = $zconns[ $i - 1 ]->last_event();
249 if ( $ev == ZOOM::Event::ZEND ) {
250 $hits = $tmpresults[ $i - 1 ]->size();
253 for ( my $j = 0 ; $j < $hits ; $j++ ) {
254 my $record = $tmpresults[ $i - 1 ]->record($j)->raw();
255 push @results, $record;
259 return ( undef, \@results );
265 ($error,$results) = getRecords($query,@servers);
267 The all singing, all dancing, multi-server, asynchronous, scanning,
268 searching, record nabbing, facet-building function
270 See verbse embedded documentation.
277 $koha_query, $simple_query, $sort_by_ref,
278 $servers_ref, $results_per_page, $offset,
279 $expanded_facet, $branches, $query_type,
283 my @servers = @$servers_ref;
284 my @sort_by = @$sort_by_ref;
286 # Create the zoom connection and query object
290 my $results_hashref = ();
293 my $facets_counter = ();
294 my $facets_info = ();
295 my $facets = getFacets();
297 #### INITIALIZE SOME VARS USED FOR FACETED RESULTS
298 my @facets_loop; # stores the ref to array of hashes for template facets loop
299 for ( my $i = 0 ; $i < @servers ; $i++ ) {
300 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
302 # perform the search, create the results objects
303 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
305 if ( $servers[$i] =~ /biblioserver/ ) {
306 $query_to_use = $koha_query;
309 $query_to_use = $simple_query;
312 #$query_to_use = $simple_query if $scan;
313 warn $simple_query if ($scan and $DEBUG);
315 # Check if we've got a query_type defined, if so, use it
319 if ( $query_type =~ /^ccl/ ) {
320 $query_to_use =~ s/\:/\=/g; # change : to = last minute (FIXME)
321 $results[$i] = $zconns[$i]->search( new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] ) );
323 elsif ( $query_type =~ /^cql/ ) {
324 $results[$i] = $zconns[$i]->search( new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
326 elsif ( $query_type =~ /^pqf/ ) {
327 $results[$i] = $zconns[$i]->search( new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
332 $results[$i] = $zconns[$i]->scan( new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] ) );
335 $results[$i] = $zconns[$i]->search( new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] ));
340 warn "WARNING: query problem with $query_to_use " . $@;
343 # Concatenate the sort_by limits and pass them to the results object
344 # Note: sort will override rank
346 foreach my $sort (@sort_by) {
347 if ($sort eq "author_az") {
348 $sort_by.="1=1003 <i ";
350 elsif ($sort eq "author_za") {
351 $sort_by.="1=1003 >i ";
353 elsif ($sort eq "popularity_asc") {
354 $sort_by.="1=9003 <i ";
356 elsif ($sort eq "popularity_dsc") {
357 $sort_by.="1=9003 >i ";
359 elsif ($sort eq "call_number_asc") {
360 $sort_by.="1=20 <i ";
362 elsif ($sort eq "call_number_dsc") {
363 $sort_by.="1=20 >i ";
365 elsif ($sort eq "pubdate_asc") {
366 $sort_by.="1=31 <i ";
368 elsif ($sort eq "pubdate_dsc") {
369 $sort_by.="1=31 >i ";
371 elsif ($sort eq "acqdate_asc") {
372 $sort_by.="1=32 <i ";
374 elsif ($sort eq "acqdate_dsc") {
375 $sort_by.="1=32 >i ";
377 elsif ($sort eq "title_az") {
380 elsif ($sort eq "title_za") {
385 if ( $results[$i]->sort( "yaz", $sort_by ) < 0) {
386 warn "WARNING sort $sort_by failed";
391 # The big moment: asynchronously retrieve results from all servers
392 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
393 my $ev = $zconns[ $i - 1 ]->last_event();
394 if ( $ev == ZOOM::Event::ZEND ) {
395 next unless $results[ $i - 1 ];
396 my $size = $results[ $i - 1 ]->size();
399 #$results_hash->{'server'} = $servers[$i-1];
401 # loop through the results
402 $results_hash->{'hits'} = $size;
404 if ( $offset + $results_per_page <= $size ) {
405 $times = $offset + $results_per_page;
410 for ( my $j = $offset ; $j < $times ; $j++ )
411 { #(($offset+$count<=$size) ? ($offset+$count):$size) ; $j++){
416 ## This is just an index scan
418 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
419 # here we create a minimal MARC record and hand it off to the
420 # template just like a normal result ... perhaps not ideal, but
422 my $tmprecord = MARC::Record->new();
423 $tmprecord->encoding('UTF-8');
426 # the minimal record in author/title (depending on MARC flavour)
427 if ( C4::Context->preference("marcflavour") eq
430 $tmptitle = MARC::Field->new(
437 $tmptitle = MARC::Field->new('245', ' ', ' ',a => $term,);
438 $tmpauthor = MARC::Field->new('100', ' ', ' ',a => $occ,);
440 $tmprecord->append_fields($tmptitle);
441 $tmprecord->append_fields($tmpauthor);
442 $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
447 $record = $results[ $i - 1 ]->record($j)->raw();
448 # warn "RECORD $j:".$record;
449 $results_hash->{'RECORDS'}[$j] =
450 $record; # making a reference to a hash
451 # Fill the facets while we're looping
452 $facet_record = MARC::Record->new_from_usmarc($record);
454 # warn $servers[$i-1].$facet_record->title();
455 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
456 if ( $facets->[$k] ) {
458 for my $tag ( @{ $facets->[$k]->{'tags'} } ) {
459 push @fields, $facet_record->field($tag);
461 for my $field (@fields) {
462 my @subfields = $field->subfields();
463 for my $subfield (@subfields) {
464 my ( $code, $data ) = @$subfield;
466 $facets->[$k]->{'subfield'} )
468 $facets_counter->{ $facets->[$k]
469 ->{'link_value'} }->{$data}++;
473 $facets_info->{ $facets->[$k]->{'link_value'} }
475 $facets->[$k]->{'label_value'};
476 $facets_info->{ $facets->[$k]->{'link_value'} }
477 ->{'expanded'} = $facets->[$k]->{'expanded'};
482 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
485 # warn "connection ", $i-1, ": $size hits";
486 # warn $results[$i-1]->record(0)->render() if $size > 0;
490 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
491 keys %$facets_counter
495 my $number_of_facets;
496 my @this_facets_array;
499 $facets_counter->{$link_value}
500 ->{$b} <=> $facets_counter->{$link_value}->{$a}
501 } keys %{ $facets_counter->{$link_value} }
505 if ( ( $number_of_facets < 6 )
506 || ( $expanded_facet eq $link_value )
507 || ( $facets_info->{$link_value}->{'expanded'} ) )
510 # Sanitize the link value ), ( will cause errors with CCL,
511 my $facet_link_value = $one_facet;
512 $facet_link_value =~ s/(\(|\))/ /g;
514 # fix the length that will display in the label,
515 my $facet_label_value = $one_facet;
516 $facet_label_value = substr( $one_facet, 0, 20 ) . "..."
517 unless length($facet_label_value) <= 20;
519 # if it's a branch, label by the name, not the code,
520 if ( $link_value =~ /branch/ ) {
522 $branches->{$one_facet}->{'branchname'};
525 # but we're down with the whole label being in the link's title.
526 my $facet_title_value = $one_facet;
528 push @this_facets_array,
532 $facets_counter->{$link_value}->{$one_facet},
533 facet_label_value => $facet_label_value,
534 facet_title_value => $facet_title_value,
535 facet_link_value => $facet_link_value,
536 type_link_value => $link_value,
542 # handle expanded option
543 unless ( $facets_info->{$link_value}->{'expanded'} ) {
545 if ( ( $number_of_facets > 6 )
546 && ( $expanded_facet ne $link_value ) );
551 type_link_value => $link_value,
552 type_id => $link_value . "_id",
554 $facets_info->{$link_value}->{'label_value'},
555 facets => \@this_facets_array,
556 expandable => $expandable,
557 expand => $link_value,
563 return ( undef, $results_hashref, \@facets_loop );
567 sub _remove_stopwords {
568 my ($operand,$index) = @_;
569 my @stopwords_removed;
571 # phrase and exact-qualified indexes shouldn't have stopwords removed
572 if ($index!~m/phr|ext/){
574 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
575 # we use IsAlpha unicode definition, to deal correctly with diacritics.
576 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
577 # is a stopword, we'd get "çon" and wouldn't find anything...
578 foreach (keys %{C4::Context->stopwords}) {
579 next if ($_ =~/(and|or|not)/); # don't remove operators
580 if ($operand =~ /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/) {
581 $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
582 $operand=~ s/^$_\P{IsAlpha}/ /gi;
583 $operand=~ s/\P{IsAlpha}$_$/ /gi;
584 push @stopwords_removed, $_;
588 return ($operand, \@stopwords_removed);
592 sub _detect_truncation {
593 my ($operand,$index) = @_;
594 my (@nontruncated,@righttruncated,@lefttruncated,@rightlefttruncated,@regexpr);
596 my @wordlist= split (/\s/,$operand);
597 foreach my $word (@wordlist){
598 if ($word=~s/^\*([^\*]+)\*$/$1/){
599 push @rightlefttruncated,$word;
601 elsif($word=~s/^\*([^\*]+)$/$1/){
602 push @lefttruncated,$word;
604 elsif ($word=~s/^([^\*]+)\*$/$1/){
605 push @righttruncated,$word;
607 elsif (index($word,"*")<0){
608 push @nontruncated,$word;
614 return (\@nontruncated,\@righttruncated,\@lefttruncated,\@rightlefttruncated,\@regexpr);
618 sub _build_stemmed_operand {
621 # FIXME: the locale should be set based on the user's language and/or search choice
622 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
623 # FIXME: these should be stored in the db so the librarian can modify the behavior
624 $stemmer->add_exceptions(
631 my @words = split( / /, $operand );
632 my $stems = $stemmer->stem(@words);
633 for my $stem (@$stems) {
634 $stemmed_operand .= "$stem";
635 $stemmed_operand .= "?" unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
636 $stemmed_operand .= " ";
638 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
639 return $stemmed_operand;
643 sub _build_weighted_query {
644 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
645 # pretty well but could work much better if we had a smarter query parser
646 my ($operand,$stemmed_operand,$index) = @_;
647 my $stemming = C4::Context->preference("QueryStemming") || 0;
648 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
649 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
651 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
653 # Keyword, or, no index specified
654 if ( ( $index eq 'kw' ) || ( !$index ) ) {
655 $weighted_query .= "Title-cover,ext,r1=\"$operand\""; # exact title-cover
656 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
657 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
658 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
659 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
660 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\"" if $fuzzy_enabled; # add fuzzy, word list
661 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\"" if ($stemming and $stemmed_operand); # add stemming, right truncation
662 $weighted_query .= " or wrdl,r9=\"$operand\"";
663 # embedded sorting: 0 a-z; 1 z-a
664 # $weighted_query .= ") or (sort1,aut=1";
667 # Barcode searches should skip this process
668 elsif ( $index eq 'bc' ) {
669 $weighted_query .= "bc=\"$operand\"";
672 # if the index already has more than one qualifier, wrap the operand
673 # in quotes and pass it back (assumption is that the user knows what they
674 # are doing and won't appreciate us mucking up their query
675 elsif ($index =~ ',') {
676 $weighted_query .=" $index=\"$operand\"";
679 #TODO: build better cases based on specific search indexes
681 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
682 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
683 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
684 $weighted_query .= " or $index,rt,wrdl,r3=\"$operand\""; # word list index
687 $weighted_query .= "))"; # close rank specification
688 return $weighted_query;
694 $simple_query, $query_cgi,
696 $limit_cgi, $limit_desc,
697 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
699 Build queries and limits in CCL, CGI, Human,
700 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
702 See verbose embedded documentation.
708 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan) = @_;
710 warn "---------" if $DEBUG;
711 warn "Enter buildQuery" if $DEBUG;
712 warn "---------" if $DEBUG;
715 my @operators = @$operators if $operators;
716 my @indexes = @$indexes if $indexes;
717 my @operands = @$operands if $operands;
718 my @limits = @$limits if $limits;
719 my @sort_by = @$sort_by if $sort_by;
721 my $stemming = C4::Context->preference("QueryStemming") || 0;
722 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
723 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
724 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
725 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
727 # no stemming/weight/fuzzy in NoZebra
728 if (C4::Context->preference("NoZebra")) {
734 my $query = $operands[0];
735 my $simple_query = $operands[0];
737 # initialize the variables we're passing back
746 my $stopwords_removed; # flag to determine if stopwords have been removed
748 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
750 if ( $query =~ /^ccl=/ ) {
751 return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
753 if ( $query =~ /^cql=/ ) {
754 return ( undef, $', $', $', $', '', '', '', '', 'cql' );
756 if ( $query =~ /^pqf=/ ) {
757 return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
760 # pass nested queries directly
761 # FIXME: need better handling of some of these variables in this case
762 if ( $query =~ /(\(|\))/ ) {
763 return ( undef, $query, $simple_query, $query_cgi, $query, $limit, $limit_cgi, $limit_desc, $stopwords_removed, 'ccl' );
766 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
767 # query operands and indexes and add stemming, truncation, field weighting, etc.
768 # Once we do so, we'll end up with a value in $query, just like if we had an
769 # incoming $query from the user
771 $query = ""; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
772 my $previous_operand; # a flag used to keep track if there was a previous query
773 # if there was, we can apply the current operator
775 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
777 # COMBINE OPERANDS, INDEXES AND OPERATORS
778 if ( $operands[$i] ) {
780 # A flag to determine whether or not to add the index to the query
783 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
784 if ($operands[$i] =~ /(:|=)/ || $scan) {
787 $remove_stopwords = 0;
789 my $operand = $operands[$i];
790 my $index = $indexes[$i];
792 # Add index-specific attributes
793 # Date of Publication
794 if ($index eq 'yr') {
795 $index .=",st-numeric";
797 ($stemming,$auto_truncation,$weight_fields, $fuzzy_enabled, $remove_stopwords) = (0,0,0,0,0);
799 # Date of Acquisition
800 elsif ($index eq 'acqdate') {
801 $index.=",st-date-normalized";
803 ($stemming,$auto_truncation,$weight_fields, $fuzzy_enabled, $remove_stopwords) = (0,0,0,0,0);
806 # Set default structure attribute (word list)
808 unless (!$index || $index =~ /(st-|phr|ext|wrdl)/) {
809 $struct_attr = ",wrdl";
812 # Some helpful index variants
813 my $index_plus = $index.$struct_attr.":" if $index;
814 my $index_plus_comma=$index.$struct_attr."," if $index;
817 if ($remove_stopwords) {
818 ($operand, $stopwords_removed) = _remove_stopwords($operand,$index);
819 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
820 warn "REMOVED STOPWORDS: @$stopwords_removed" if ($stopwords_removed && $DEBUG);
824 my ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr);
825 my $truncated_operand;
826 ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr) = _detect_truncation($operand,$index);
827 warn "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<" if $DEBUG;
830 if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){
831 # Don't field weight or add the index to the query, we do it here
833 undef $weight_fields;
834 my $previous_truncation_operand;
835 if (scalar(@$nontruncated)>0) {
836 $truncated_operand.= "$index_plus @$nontruncated ";
837 $previous_truncation_operand = 1;
839 if (scalar(@$righttruncated)>0){
840 $truncated_operand .= "and " if $previous_truncation_operand;
841 $truncated_operand .= "$index_plus_comma"."rtrn:@$righttruncated ";
842 $previous_truncation_operand = 1;
844 if (scalar(@$lefttruncated)>0){
845 $truncated_operand .= "and " if $previous_truncation_operand;
846 $truncated_operand .= "$index_plus_comma"."ltrn:@$lefttruncated ";
847 $previous_truncation_operand = 1;
849 if (scalar(@$rightlefttruncated)>0){
850 $truncated_operand .= "and " if $previous_truncation_operand;
851 $truncated_operand .= "$index_plus_comma"."rltrn:@$rightlefttruncated ";
852 $previous_truncation_operand = 1;
855 $operand = $truncated_operand if $truncated_operand;
856 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
860 $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
861 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
863 # Handle Field Weighting
864 my $weighted_operand;
865 $weighted_operand = _build_weighted_query($operand,$stemmed_operand,$index) if $weight_fields;
866 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
867 $operand = $weighted_operand if $weight_fields;
868 $indexes_set = 1 if $weight_fields;
870 # If there's a previous operand, we need to add an operator
871 if ($previous_operand) {
873 # User-specified operator
874 if ( $operators[$i-1] ) {
875 $query .= " $operators[$i-1] ";
876 $query .= " $index_plus " unless $indexes_set;
877 $query .= " $operand";
878 $query_cgi .="&op=$operators[$i-1]";
879 $query_cgi .="&idx=$index" if $index;
880 $query_cgi .="&q=$operands[$i]" if $operands[$i];
881 $query_desc .=" $operators[$i-1] $index_plus $operands[$i]";
884 # Default operator is and
887 $query .= "$index_plus " unless $indexes_set;
888 $query .= "$operand";
889 $query_cgi .="&op=and&idx=$index" if $index;
890 $query_cgi .="&q=$operands[$i]" if $operands[$i];
891 $query_desc .= " and $index_plus $operands[$i]";
895 # There isn't a pervious operand, don't need an operator
897 # Field-weighted queries already have indexes set
898 $query .=" $index_plus " unless $indexes_set;
900 $query_desc .= " $index_plus $operands[$i]";
901 $query_cgi.="&idx=$index" if $index;
902 $query_cgi.="&q=$operands[$i]" if $operands[$i];
903 $previous_operand = 1;
908 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
912 my $availability_limit;
913 foreach my $this_limit (@limits) {
914 if ( $this_limit =~ /available/ ) {
915 # available is defined as (items.notloan is NULL) and (items.itemlost > 0 or NULL) (last clause handles NULL values for lost in zebra)
916 # all records not indexed in the onloan register and allrecords not indexed in the lost register, or where the value of lost is equal to or less than 0
917 $availability_limit .="( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and ((lost,st-numeric <= 0) or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
918 $limit_cgi .= "&limit=available";
922 # group_OR_limits, prefixed by mc-
923 # OR every member of the group
924 elsif ( $this_limit =~ /mc/ ) {
925 $group_OR_limits .= " or " if $group_OR_limits;
926 $limit_desc .=" or " if $group_OR_limits;
927 $group_OR_limits .= "$this_limit";
928 $limit_cgi .="&limit=$this_limit";
929 $limit_desc .= " $this_limit";
934 $limit .= " and " if $limit || $query;
935 $limit .= "$this_limit";
936 $limit_cgi .="&limit=$this_limit";
937 $limit_desc .=" $this_limit";
940 if ($group_OR_limits) {
941 $limit.=" and " if ($query || $limit );
942 $limit.="($group_OR_limits)";
944 if ($availability_limit) {
945 $limit.=" and " if ($query || $limit );
946 $limit.="($availability_limit)";
949 # Normalize the query and limit strings
952 for ($query, $query_desc, $limit, $limit_desc) {
953 $_ =~ s/ / /g; # remove extra spaces
954 $_ =~ s/^ //g; # remove any beginning spaces
955 $_ =~ s/ $//g; # remove any ending spaces
956 $_ =~ s/==/=/g; # remove double == from query
959 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
961 # append the limit to the query
966 warn "QUERY:".$query;
967 warn "QUERY CGI:".$query_cgi;
968 warn "QUERY DESC:".$query_desc;
969 warn "LIMIT:".$limit;
970 warn "LIMIT CGI:".$limit_cgi;
971 warn "LIMIT DESC:".$limit_desc;
973 warn "Leave buildQuery";
976 return ( undef, $query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type );
981 Format results in a form suitable for passing to the template
985 # IMO this subroutine is pretty messy still -- it's responsible for
986 # building the HTML output for the template
988 my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_;
989 my $dbh = C4::Context->dbh;
994 # add search-term highlighting via <span>s on the search terms
995 my $span_terms_hashref;
996 for my $span_term ( split( / /, $searchdesc ) ) {
997 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
998 $span_terms_hashref->{$span_term}++;
1001 #Build branchnames hash
1003 #get branch information.....
1006 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1007 ; # FIXME : use C4::Koha::GetBranches
1009 while ( my $bdata = $bsth->fetchrow_hashref ) {
1010 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1013 my $lsch = $dbh->prepare("SELECT authorised_value,lib FROM authorised_values WHERE category = 'LOC'");
1015 while (my $ldata = $lsch->fetchrow_hashref ) {
1016 $locations{ $ldata->{'authorised_value'} } = $ldata->{'lib'};
1019 #Build itemtype hash
1020 #find itemtype & itemtype image
1023 $dbh->prepare("SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes");
1025 while ( my $bdata = $bsth->fetchrow_hashref ) {
1026 $itemtypes{ $bdata->{'itemtype'} }->{description} =
1027 $bdata->{'description'};
1028 $itemtypes{ $bdata->{'itemtype'} }->{imageurl} = $bdata->{'imageurl'};
1029 $itemtypes{ $bdata->{'itemtype'} }->{summary} = $bdata->{'summary'};
1030 $itemtypes{ $bdata->{'itemtype'} }->{notforloan} = $bdata->{'notforloan'};
1033 #search item field code
1034 my $sth = $dbh->prepare("SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'");
1036 my ($itemtag) = $sth->fetchrow;
1038 # get notforloan authorised value list
1039 $sth = $dbh->prepare("SELECT authorised_value FROM `marc_subfield_structure` WHERE kohafield = 'items.notforloan' AND frameworkcode=''");
1041 my ($notforloan_authorised_value) = $sth->fetchrow;
1043 ## find column names of items related to MARC
1044 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1046 my %subfieldstosearch;
1047 while ( ( my $column ) = $sth2->fetchrow ) {
1048 my ( $tagfield, $tagsubfield ) =
1049 &GetMarcFromKohaField( "items." . $column, "" );
1050 $subfieldstosearch{$column} = $tagsubfield;
1053 # handle which records to actually retrieve
1055 if ( $hits && $offset + $results_per_page <= $hits ) {
1056 $times = $offset + $results_per_page;
1062 # loop through all of the records we've retrieved
1063 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1065 $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1066 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1067 $oldbiblio->{result_number} = $i+1;
1069 # add imageurl to itemtype if there is one
1070 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
1071 $oldbiblio->{imageurl} =
1072 $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
1073 $oldbiblio->{description} =
1074 $itemtypes{ $oldbiblio->{itemtype} }->{description};
1077 $oldbiblio->{imageurl} =
1078 getitemtypeimagesrc() . "/"
1079 . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1080 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1081 $oldbiblio->{description} =
1082 $itemtypes{ $oldbiblio->{itemtype} }->{description};
1085 # Build summary if there is one (the summary is defined in the itemtypes table)
1086 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1087 if ($itemtypes{ $oldbiblio->{itemtype} }->{summary}) {
1088 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1089 my @fields = $marcrecord->fields();
1090 foreach my $field (@fields) {
1091 my $tag = $field->tag();
1092 my $tagvalue = $field->as_string();
1093 $summary =~ s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1095 my @subf = $field->subfields;
1096 for my $i (0..$#subf) {
1097 my $subfieldcode = $subf[$i][0];
1098 my $subfieldvalue = $subf[$i][1];
1099 my $tagsubf = $tag.$subfieldcode;
1100 $summary =~ s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1104 $summary =~ s/\[(.*?)]//g;
1105 $summary =~ s/\n/<br>/g;
1106 $oldbiblio->{summary} = $summary;
1109 # Add search-term highlighting to the whole record where they match using <span>s
1110 my $searchhighlightblob;
1111 for my $highlight_field ($marcrecord->fields) {
1112 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1113 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1115 my $field = $highlight_field->as_string();
1116 for my $term ( keys %$span_terms_hashref ) {
1117 if (($field =~ /$term/i) && (length($term) > 3)) {
1118 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1122 # FIXME: we might want to limit the size of these fields if we
1124 $searchhighlightblob .= $field." ... " if $match;
1126 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1128 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1129 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1131 # Add search-term highlighting to the title, subtitle, etc. fields
1132 for my $term ( keys %$span_terms_hashref ) {
1133 my $old_term = $term;
1134 if ( length($term) > 3 ) {
1135 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1136 $oldbiblio->{'title'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1137 $oldbiblio->{'subtitle'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1138 $oldbiblio->{'author'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1139 $oldbiblio->{'publishercode'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1140 $oldbiblio->{'place'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1141 $oldbiblio->{'pages'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1142 $oldbiblio->{'notes'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1143 $oldbiblio->{'size'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1148 # surely there's a better way to handle this
1150 $toggle = "#ffffcc";
1155 $oldbiblio->{'toggle'} = $toggle;
1157 # Pull out the items fields
1158 my @fields = $marcrecord->field($itemtag);
1160 # Setting item statuses for display
1161 my @available_items_loop;
1162 my @onloan_items_loop;
1163 my @other_items_loop;
1165 my $available_items;
1169 my $ordered_count = 0;
1170 my $available_count = 0;
1171 my $onloan_count = 0;
1172 my $longoverdue_count = 0;
1173 my $other_count = 0;
1174 my $wthdrawn_count = 0;
1175 my $itemlost_count = 0;
1176 my $itembinding_count = 0;
1177 my $itemdamaged_count = 0;
1178 my $can_place_holds = 0;
1179 my $items_count=scalar(@fields);
1181 my $maxitems = (C4::Context->preference('maxItemsinSearchResults')) ? C4::Context->preference('maxItemsinSearchResults')- 1 : 1;
1183 # loop through every item
1184 foreach my $field (@fields) {
1188 # populate the items hash
1189 foreach my $code ( keys %subfieldstosearch ) {
1190 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1192 # set item's branch name, use homebranch first, fall back to holdingbranch
1193 if ($item->{'homebranch'}) {
1194 $item->{'branchname'} = $branches{$item->{homebranch}};
1197 elsif ($item->{'holdingbranch'}) {
1198 $item->{'branchname'} = $branches{$item->{holdingbranch}};
1201 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1202 if ($item->{onloan}) {
1204 $onloan_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{due_date} }->{due_date} = format_date($item->{onloan});
1205 $onloan_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{due_date} }->{count}++ if $item->{'homebranch'};
1206 $onloan_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{due_date} }->{branchname} = $item->{'branchname'};
1207 $onloan_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{due_date} }->{location} = $locations{$item->{location}};
1208 $onloan_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{due_date} }->{itemcallnumber} = $item->{itemcallnumber};
1210 # if something's checked out and lost, mark it as 'long overdue'
1211 if ( $item->{itemlost} ) {
1212 $onloan_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{due_date} }->{longoverdue}++;
1213 $longoverdue_count++;
1215 # can place holds as long as this item isn't lost
1217 $can_place_holds = 1;
1221 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1224 if ( $item->{notforloan} == -1) {
1228 # item is withdrawn, lost or damaged
1229 if ( $item->{wthdrawn} || $item->{itemlost} || $item->{damaged} || $item->{notforloan} ) {
1230 $wthdrawn_count++ if $item->{wthdrawn};
1231 $itemlost_count++ if $item->{itemlost};
1232 $itemdamaged_count++ if $item->{damaged};
1233 $item->{status} = $item->{wthdrawn}."-".$item->{itemlost}."-".$item->{damaged}."-".$item->{notforloan};
1236 $other_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{status} }->{wthdrawn} = $item->{wthdrawn};
1237 $other_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{status} }->{itemlost} = $item->{itemlost};
1238 $other_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{status} }->{damaged} = $item->{damaged};
1239 $other_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{status} }->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1241 $other_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{status} }->{count}++ if $item->{'homebranch'};
1242 $other_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{status} }->{branchname} = $item->{'branchname'};
1243 $other_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{status} }->{location} = $locations{$item->{location}};
1244 $other_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'}.$item->{status} }->{itemcallnumber} = $item->{itemcallnumber};
1249 $can_place_holds = 1;
1251 $available_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'} }->{count}++ if $item->{'homebranch'};
1252 $available_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'} }->{branchname} = $item->{'branchname'};
1253 $available_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'} }->{location} = $locations{$item->{location}};
1254 $available_items->{ $item->{'homebranch'}.'--'.$item->{location}.$item->{'itemcallnumber'} }->{itemcallnumber} = $item->{itemcallnumber};
1257 } # notforloan, item level and biblioitem level
1258 my ($availableitemscount, $onloanitemscount, $otheritemscount);
1259 my $maxitems = (C4::Context->preference('maxItemsinSearchResults')) ? C4::Context->preference('maxItemsinSearchResults')- 1 : 1;
1260 for my $key ( sort keys %$onloan_items ) {
1261 $onloanitemscount++;
1262 push @onloan_items_loop, $onloan_items->{$key} unless $onloanitemscount > $maxitems;
1264 for my $key ( sort keys %$other_items ) {
1266 push @other_items_loop, $other_items->{$key} unless $otheritemscount > $maxitems;
1268 for my $key ( sort keys %$available_items ) {
1269 $availableitemscount++;
1270 push @available_items_loop, $available_items->{$key} unless $availableitemscount > $maxitems;
1273 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1274 $can_place_holds = 0 if $itemtypes{$oldbiblio->{itemtype}}->{notforloan};
1275 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1276 $oldbiblio->{itemsplural} = 1 if $items_count>1;
1277 $oldbiblio->{items_count} = $items_count;
1278 $oldbiblio->{available_items_loop} = \@available_items_loop;
1279 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1280 $oldbiblio->{other_items_loop} = \@other_items_loop;
1281 $oldbiblio->{availablecount} = $available_count;
1282 $oldbiblio->{availableplural} = 1 if $available_count>1;
1283 $oldbiblio->{onloancount} = $onloan_count;
1284 $oldbiblio->{onloanplural} = 1 if $onloan_count>1;
1285 $oldbiblio->{othercount} = $other_count;
1286 $oldbiblio->{otherplural} = 1 if $other_count>1;
1287 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1288 $oldbiblio->{itemlostcount} = $itemlost_count;
1289 $oldbiblio->{damagedcount} = $itemdamaged_count;
1290 $oldbiblio->{orderedcount} = $ordered_count;
1291 $oldbiblio->{isbn} =~ s/-//g; # deleting - in isbn to enable amazon content
1292 push( @newresults, $oldbiblio );
1299 #----------------------------------------------------------------------
1301 # Non-Zebra GetRecords#
1302 #----------------------------------------------------------------------
1306 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1310 my ($query,$simple_query,$sort_by_ref,$servers_ref,$results_per_page,$offset,$expanded_facet,$branches,$query_type,$scan) = @_;
1311 warn "query =$query" if $DEBUG;
1312 my $result = NZanalyse($query);
1313 warn "results =$result" if $DEBUG;
1314 return (undef,NZorder($result,@$sort_by_ref[0],$results_per_page,$offset),undef);
1319 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1320 the list is built from an inverted index in the nozebra SQL table
1321 note that title is here only for convenience : the sorting will be very fast when requested on title
1322 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1327 my ($string,$server) = @_;
1328 warn "---------" if $DEBUG;
1329 warn "Enter NZanalyse" if $DEBUG;
1330 warn "---------" if $DEBUG;
1332 # $server contains biblioserver or authorities, depending on what we search on.
1333 #warn "querying : $string on $server";
1334 $server='biblioserver' unless $server;
1336 # if we have a ", replace the content to discard temporarily any and/or/not inside
1338 if ($string =~/"/) {
1339 $string =~ s/"(.*?)"/__X__/;
1341 warn "commacontent : $commacontent" if $DEBUG;
1343 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1344 # then, call again NZanalyse with $left and $right
1345 # (recursive until we find a leaf (=> something without and/or/not)
1346 # delete repeated operator... Would then go in infinite loop
1347 while ($string =~s/( and| or| not| AND| OR| NOT)\1/$1/g){
1349 #process parenthesis before.
1350 if ($string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/){
1353 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1354 warn "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right" if $DEBUG;
1355 my $leftresult = NZanalyse($left,$server);
1357 my $rightresult = NZanalyse($right,$server);
1358 # OK, we have the results for right and left part of the query
1359 # depending of operand, intersect, union or exclude both lists
1360 # to get a result list
1361 if ($operator eq ' and ') {
1362 my @leftresult = split /;/, $leftresult;
1363 warn " @leftresult / $rightresult \n" if $DEBUG;
1364 # my @rightresult = split /;/,$leftresult;
1366 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1367 # the result is stored twice, to have the same weight for AND than OR.
1368 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1369 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1370 foreach (@leftresult) {
1373 ($value,$countvalue)=($1,$2) if $value=~m/(.*)-(\d+)$/;
1374 if ($rightresult =~ /$value-(\d+);/) {
1375 $countvalue=($1>$countvalue?$countvalue:$1);
1376 $finalresult .= "$value-$countvalue;$value-$countvalue;";
1379 warn " $finalresult \n" if $DEBUG;
1380 return $finalresult;
1381 } elsif ($operator eq ' or ') {
1382 # just merge the 2 strings
1383 return $leftresult.$rightresult;
1384 } elsif ($operator eq ' not ') {
1385 my @leftresult = split /;/, $leftresult;
1386 # my @rightresult = split /;/,$leftresult;
1388 foreach (@leftresult) {
1390 $value=$1 if $value=~m/(.*)-\d+$/;
1391 unless ($rightresult =~ "$value-") {
1394 return $finalresult;
1396 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1402 warn "string :".$string if $DEBUG;
1403 $string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/;
1406 my $operator = lc($2); # FIXME: and/or/not are operators, not operands
1407 warn "dealing w/parenthesis. left :$left operator:$operator right:$right" if $DEBUG;
1408 # it's not a leaf, we have a and/or/not
1410 # reintroduce comma content if needed
1411 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1412 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1413 warn "node : $left / $operator / $right\n" if $DEBUG;
1414 my $leftresult = NZanalyse($left,$server);
1415 my $rightresult = NZanalyse($right,$server);
1416 # OK, we have the results for right and left part of the query
1417 # depending of operand, intersect, union or exclude both lists
1418 # to get a result list
1419 if ($operator eq ' and ') {
1420 my @leftresult = split /;/, $leftresult;
1421 # my @rightresult = split /;/,$leftresult;
1423 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1424 # the result is stored twice, to have the same weight for AND than OR.
1425 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1426 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1427 foreach (@leftresult) {
1428 if ($rightresult =~ "$_;") {
1429 $finalresult .= "$_;$_;";
1432 return $finalresult;
1433 } elsif ($operator eq ' or ') {
1434 # just merge the 2 strings
1435 return $leftresult.$rightresult;
1436 } elsif ($operator eq ' not ') {
1437 my @leftresult = split /;/, $leftresult;
1438 # my @rightresult = split /;/,$leftresult;
1440 foreach (@leftresult) {
1441 unless ($rightresult =~ "$_;") {
1442 $finalresult .= "$_;";
1445 return $finalresult;
1447 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1448 die "error : operand unknown : $operator for $string";
1450 # it's a leaf, do the real SQL query and return the result
1452 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1453 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1454 warn "leaf:$string" if $DEBUG;
1455 # parse the string in in operator/operand/value again
1456 $string =~ /(.*)(>=|<=)(.*)/;
1460 warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1461 unless ($operator) {
1462 $string =~ /(.*)(>|<|=)(.*)/;
1466 warn "handling unless (operator)... left:$left operator:$operator right:$right" if $DEBUG;
1469 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1470 $left =~ s/[ ,].*$//;
1471 # automatic replace for short operators
1472 $left='title' if $left =~ '^ti$';
1473 $left='author' if $left =~ '^au$';
1474 $left='publisher' if $left =~ '^pb$';
1475 $left='subject' if $left =~ '^su$';
1476 $left='koha-Auth-Number' if $left =~ '^an$';
1477 $left='keyword' if $left =~ '^kw$';
1478 if ($operator && $left ne 'keyword' ) {
1479 #do a specific search
1480 my $dbh = C4::Context->dbh;
1481 $operator='LIKE' if $operator eq '=' and $right=~ /%/;
1482 my $sth = $dbh->prepare("SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?");
1483 warn "$left / $operator / $right\n";
1484 # split each word, query the DB and build the biblionumbers result
1485 #sanitizing leftpart
1486 $left=~s/^\s+|\s+$//;
1487 foreach (split / /,$right) {
1491 warn "EXECUTE : $server, $left, $_";
1492 $sth->execute($server, $left, $_) or warn "execute failed: $!";
1493 while (my ($line,$value) = $sth->fetchrow) {
1494 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1495 # otherwise, fill the result
1496 $biblionumbers .= $line unless ($right =~ /^\d+$/ && $value =~ /\D/);
1497 warn "result : $value ". ($right =~ /\d/) . "==".(!$value =~ /\d/) ;#= $line";
1499 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1501 my @leftresult = split /;/, $biblionumbers;
1503 foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight
1504 # remove weight at the end
1505 my $cleaned = $entry;
1506 $cleaned =~ s/-\d*$//;
1507 # if the entry already in the hash, take it & increase weight
1508 warn "===== $cleaned =====" if $DEBUG;
1509 if ($results =~ "$cleaned") {
1510 $temp .= "$entry;$entry;";
1511 warn "INCLUDING $entry" if $DEBUG;
1516 $results = $biblionumbers;
1520 #do a complete search (all indexes), if index='kw' do complete search too.
1521 my $dbh = C4::Context->dbh;
1522 my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?");
1523 # split each word, query the DB and build the biblionumbers result
1524 foreach (split / /,$string) {
1525 next if C4::Context->stopwords->{uc($_)}; # skip if stopword
1526 warn "search on all indexes on $_" if $DEBUG;
1529 $sth->execute($server, $_);
1530 while (my $line = $sth->fetchrow) {
1531 $biblionumbers .= $line;
1533 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1535 warn "RES for $_ = $biblionumbers" if $DEBUG;
1536 my @leftresult = split /;/, $biblionumbers;
1538 foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight
1539 # remove weight at the end
1540 my $cleaned = $entry;
1541 $cleaned =~ s/-\d*$//;
1542 # if the entry already in the hash, take it & increase weight
1543 # warn "===== $cleaned =====" if $DEBUG;
1544 if ($results =~ "$cleaned") {
1545 $temp .= "$entry;$entry;";
1546 # warn "INCLUDING $entry" if $DEBUG;
1551 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1552 $results = $biblionumbers;
1556 warn "return : $results for LEAF : $string" if $DEBUG;
1559 warn "---------" if $DEBUG;
1560 warn "Leave NZanalyse" if $DEBUG;
1561 warn "---------" if $DEBUG;
1566 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1574 my ($biblionumbers, $ordering,$results_per_page,$offset) = @_;
1575 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1576 # order title asc by default
1577 # $ordering = '1=36 <i' unless $ordering;
1578 $results_per_page=20 unless $results_per_page;
1579 $offset = 0 unless $offset;
1580 my $dbh = C4::Context->dbh;
1582 # order by POPULARITY
1584 if ($ordering =~ /popularity/) {
1587 # popularity is not in MARC record, it's builded from a specific query
1588 my $sth = $dbh->prepare("select sum(issues) from items where biblionumber=?");
1589 foreach (split /;/,$biblionumbers) {
1590 my ($biblionumber,$title) = split /,/,$_;
1591 $result{$biblionumber}=GetMarcBiblio($biblionumber);
1592 $sth->execute($biblionumber);
1593 my $popularity= $sth->fetchrow ||0;
1594 # hint : the key is popularity.title because we can have
1595 # many results with the same popularity. In this cas, sub-ordering is done by title
1596 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1597 # (un-frequent, I agree, but we won't forget anything that way ;-)
1598 $popularity{sprintf("%10d",$popularity).$title.$biblionumber} = $biblionumber;
1600 # sort the hash and return the same structure as GetRecords (Zebra querying)
1603 if ($ordering eq 'popularity_dsc') { # sort popularity DESC
1604 foreach my $key (sort {$b cmp $a} (keys %popularity)) {
1605 $result_hash->{'RECORDS'}[$numbers++] = $result{$popularity{$key}}->as_usmarc();
1607 } else { # sort popularity ASC
1608 foreach my $key (sort (keys %popularity)) {
1609 $result_hash->{'RECORDS'}[$numbers++] = $result{$popularity{$key}}->as_usmarc();
1613 $result_hash->{'hits'} = $numbers;
1614 $finalresult->{'biblioserver'} = $result_hash;
1615 return $finalresult;
1619 } elsif ($ordering =~/author/){
1621 foreach (split /;/,$biblionumbers) {
1622 my ($biblionumber,$title) = split /,/,$_;
1623 my $record=GetMarcBiblio($biblionumber);
1625 if (C4::Context->preference('marcflavour') eq 'UNIMARC') {
1626 $author=$record->subfield('200','f');
1627 $author=$record->subfield('700','a') unless $author;
1629 $author=$record->subfield('100','a');
1631 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1632 # and we don't want to get only 1 result for each of them !!!
1633 $result{$author.$biblionumber}=$record;
1635 # sort the hash and return the same structure as GetRecords (Zebra querying)
1638 if ($ordering eq 'author_za') { # sort by author desc
1639 foreach my $key (sort { $b cmp $a } (keys %result)) {
1640 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1642 } else { # sort by author ASC
1643 foreach my $key (sort (keys %result)) {
1644 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1648 $result_hash->{'hits'} = $numbers;
1649 $finalresult->{'biblioserver'} = $result_hash;
1650 return $finalresult;
1652 # ORDER BY callnumber
1654 } elsif ($ordering =~/callnumber/){
1656 foreach (split /;/,$biblionumbers) {
1657 my ($biblionumber,$title) = split /,/,$_;
1658 my $record=GetMarcBiblio($biblionumber);
1660 my ($callnumber_tag,$callnumber_subfield)=GetMarcFromKohaField($dbh,'items.itemcallnumber');
1661 ($callnumber_tag,$callnumber_subfield)= GetMarcFromKohaField('biblioitems.callnumber') unless $callnumber_tag;
1662 if (C4::Context->preference('marcflavour') eq 'UNIMARC') {
1663 $callnumber=$record->subfield('200','f');
1665 $callnumber=$record->subfield('100','a');
1667 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1668 # and we don't want to get only 1 result for each of them !!!
1669 $result{$callnumber.$biblionumber}=$record;
1671 # sort the hash and return the same structure as GetRecords (Zebra querying)
1674 if ($ordering eq 'call_number_dsc') { # sort by title desc
1675 foreach my $key (sort { $b cmp $a } (keys %result)) {
1676 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1678 } else { # sort by title ASC
1679 foreach my $key (sort { $a cmp $b } (keys %result)) {
1680 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1684 $result_hash->{'hits'} = $numbers;
1685 $finalresult->{'biblioserver'} = $result_hash;
1686 return $finalresult;
1687 } elsif ($ordering =~ /pubdate/){ #pub year
1689 foreach (split /;/,$biblionumbers) {
1690 my ($biblionumber,$title) = split /,/,$_;
1691 my $record=GetMarcBiblio($biblionumber);
1692 my ($publicationyear_tag,$publicationyear_subfield)=GetMarcFromKohaField('biblioitems.publicationyear','');
1693 my $publicationyear=$record->subfield($publicationyear_tag,$publicationyear_subfield);
1694 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1695 # and we don't want to get only 1 result for each of them !!!
1696 $result{$publicationyear.$biblionumber}=$record;
1698 # sort the hash and return the same structure as GetRecords (Zebra querying)
1701 if ($ordering eq 'pubdate_dsc') { # sort by pubyear desc
1702 foreach my $key (sort { $b cmp $a } (keys %result)) {
1703 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1705 } else { # sort by pub year ASC
1706 foreach my $key (sort (keys %result)) {
1707 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1711 $result_hash->{'hits'} = $numbers;
1712 $finalresult->{'biblioserver'} = $result_hash;
1713 return $finalresult;
1717 } elsif ($ordering =~ /title/) {
1718 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
1720 foreach (split /;/,$biblionumbers) {
1721 my ($biblionumber,$title) = split /,/,$_;
1722 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1723 # and we don't want to get only 1 result for each of them !!!
1724 # hint & speed improvement : we can order without reading the record
1725 # so order, and read records only for the requested page !
1726 $result{$title.$biblionumber}=$biblionumber;
1728 # sort the hash and return the same structure as GetRecords (Zebra querying)
1731 if ($ordering eq 'title_az') { # sort by title desc
1732 foreach my $key (sort (keys %result)) {
1733 $result_hash->{'RECORDS'}[$numbers++] = $result{$key};
1735 } else { # sort by title ASC
1736 foreach my $key (sort { $b cmp $a } (keys %result)) {
1737 $result_hash->{'RECORDS'}[$numbers++] = $result{$key};
1740 # limit the $results_per_page to result size if it's more
1741 $results_per_page = $numbers-1 if $numbers < $results_per_page;
1742 # for the requested page, replace biblionumber by the complete record
1743 # speed improvement : avoid reading too much things
1744 for (my $counter=$offset;$counter<=$offset+$results_per_page;$counter++) {
1745 $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio($result_hash->{'RECORDS'}[$counter])->as_usmarc;
1748 $result_hash->{'hits'} = $numbers;
1749 $finalresult->{'biblioserver'} = $result_hash;
1750 return $finalresult;
1755 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
1758 foreach (split /;/,$biblionumbers) {
1759 my ($biblionumber,$title) = split /,/,$_;
1760 $title =~ /(.*)-(\d)/;
1763 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
1764 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
1765 # biblio N has ranking = 6
1766 $count_ranking{$biblionumber} += $ranking;
1768 # build the result by "inverting" the count_ranking hash
1769 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
1771 foreach (keys %count_ranking) {
1772 $result{sprintf("%10d",$count_ranking{$_}).'-'.$_} = $_;
1774 # sort the hash and return the same structure as GetRecords (Zebra querying)
1777 foreach my $key (sort {$b cmp $a} (keys %result)) {
1778 $result_hash->{'RECORDS'}[$numbers++] = $result{$key};
1780 # limit the $results_per_page to result size if it's more
1781 $results_per_page = $numbers-1 if $numbers < $results_per_page;
1782 # for the requested page, replace biblionumber by the complete record
1783 # speed improvement : avoid reading too much things
1784 for (my $counter=$offset;$counter<=$offset+$results_per_page;$counter++) {
1785 $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio($result_hash->{'RECORDS'}[$counter])->as_usmarc if $result_hash->{'RECORDS'}[$counter];
1788 $result_hash->{'hits'} = $numbers;
1789 $finalresult->{'biblioserver'} = $result_hash;
1790 return $finalresult;
1795 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
1797 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
1798 test parameter if set donot perform change to records in database.
1804 * $listbiblios is an array ref to marcrecords to be changed
1805 * $tagsubfield is the reference of the subfield to change.
1806 * $initvalue is the value to search the record for
1807 * $targetvalue is the value to set the subfield to
1808 * $test is to be set only not to perform changes in database.
1810 =item C<Output arg:>
1811 * $countchanged counts all the changes performed.
1812 * $listunchanged contains the list of all the biblionumbers of records unchanged.
1814 =item C<usage in the script:>
1818 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
1819 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
1820 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
1825 my ($listbiblios,$tagsubfield,$initvalue,$targetvalue,$test)=@_;
1828 my ($tag,$subfield)=($1,$2) if ($tagsubfield=~/^(\d{1,3})([a-z0-9A-Z@])?$/);
1829 if ((length($tag)<3)&& $subfield=~/0-9/){
1830 $tag=$tag.$subfield;
1833 my ($bntag,$bnsubf) = GetMarcFromKohaField('biblio.biblionumber');
1834 my ($itemtag,$itemsubf) = GetMarcFromKohaField('items.itemnumber');
1835 foreach my $usmarc (@$listbiblios){
1837 $record=eval{MARC::Record->new_from_usmarc($usmarc)};
1840 # usmarc is not a valid usmarc May be a biblionumber
1841 if ($tag eq $itemtag){
1842 my $bib=GetBiblioFromItemNumber($usmarc);
1843 $record=GetMarcItem($bib->{'biblionumber'},$usmarc) ;
1844 $biblionumber=$bib->{'biblionumber'};
1846 $record=GetMarcBiblio($usmarc);
1847 $biblionumber=$usmarc;
1851 $biblionumber = $record->subfield($bntag,$bnsubf);
1853 $biblionumber=$record->field($bntag)->data;
1856 #GetBiblionumber is to be written.
1857 #Could be replaced by TransformMarcToKoha (But Would be longer)
1858 if ($record->field($tag)){
1860 foreach my $field ($record->field($tag)){
1862 if ($field->delete_subfield('code' =>$subfield,'match'=>qr($initvalue))){
1865 $field->update($subfield,$targetvalue) if ($targetvalue);
1869 if ($field->delete_field($field)){
1874 $field->data=$targetvalue if ($field->data=~qr($initvalue));
1878 # warn $record->as_formatted;
1880 ModBiblio($record,$biblionumber,GetFrameworkCode($biblionumber)) unless ($test);
1882 push @unmatched, $biblionumber;
1885 push @unmatched, $biblionumber;
1888 return ($countmatched,\@unmatched);
1891 END { } # module clean-up code here (global destructor)
1898 Koha Developement team <info@koha.org>