3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
21 use C4::Biblio; # GetMarcFromKohaField
22 use C4::Koha; # getFacets
24 use C4::Search::PazPar2;
26 use C4::Dates qw(format_date);
29 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
31 # set the version for version checking
34 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
39 C4::Search - Functions for searching the Koha catalog.
43 See opac/opac-search.pl or catalogue/search.pl for example of usage
47 This module provides searching functions for Koha's bibliographic databases
65 # make all your functions, whether exported or not;
67 =head2 findseealso($dbh,$fields);
69 C<$dbh> is a link to the DB handler.
72 my $dbh =C4::Context->dbh;
74 C<$fields> is a reference to the fields array
76 This function modifies the @$fields array and adds related fields to search on.
78 FIXME: this function is probably deprecated in Koha 3
83 my ( $dbh, $fields ) = @_;
84 my $tagslib = GetMarcStructure(1);
85 for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
86 my ($tag) = substr( @$fields[$i], 1, 3 );
87 my ($subfield) = substr( @$fields[$i], 4, 1 );
88 @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
89 if ( $tagslib->{$tag}->{$subfield}->{seealso} );
95 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
97 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
103 my $dbh = C4::Context->dbh;
104 my $result = TransformMarcToKoha( $dbh, $record, '' );
109 my ( $biblionumber, $title );
111 # search duplicate on ISBN, easy and fast..
112 # ... normalize first
113 if ( $result->{isbn} ) {
114 $result->{isbn} =~ s/\(.*$//;
115 $result->{isbn} =~ s/\s+$//;
116 $query = "isbn=$result->{isbn}";
119 $result->{title} =~ s /\\//g;
120 $result->{title} =~ s /\"//g;
121 $result->{title} =~ s /\(//g;
122 $result->{title} =~ s /\)//g;
124 # FIXME: instead of removing operators, could just do
125 # quotes around the value
126 $result->{title} =~ s/(and|or|not)//g;
127 $query = "ti,ext=$result->{title}";
128 $query .= " and itemtype=$result->{itemtype}"
129 if ( $result->{itemtype} );
130 if ( $result->{author} ) {
131 $result->{author} =~ s /\\//g;
132 $result->{author} =~ s /\"//g;
133 $result->{author} =~ s /\(//g;
134 $result->{author} =~ s /\)//g;
136 # remove valid operators
137 $result->{author} =~ s/(and|or|not)//g;
138 $query .= " and au,ext=$result->{author}";
142 # FIXME: add error handling
143 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
145 foreach my $possible_duplicate_record (@$searchresults) {
147 MARC::Record->new_from_usmarc($possible_duplicate_record);
148 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
150 # FIXME :: why 2 $biblionumber ?
152 push @results, $result->{'biblionumber'};
153 push @results, $result->{'title'};
161 ($error,$results) = SimpleSearch( $query, $offset, $max_results, [ @servers ] );
163 This function provides a simple search API on the bibliographic catalog
169 * $query can be a simple keyword or a complete CCL query
170 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
171 * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
172 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
176 * $error is a empty unless an error is detected
177 * \@results is an array of records.
179 =item C<usage in the script:>
183 my ($error, $marcresults) = SimpleSearch($query);
185 if (defined $error) {
186 $template->param(query_error => $error);
187 warn "error: ".$error;
188 output_html_with_http_headers $input, $cookie, $template->output;
192 my $hits = scalar @$marcresults;
195 for(my $i=0;$i<$hits;$i++) {
197 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
198 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
200 #build the hash for the template.
201 $resultsloop{highlight} = ($i % 2)?(1):(0);
202 $resultsloop{title} = $biblio->{'title'};
203 $resultsloop{subtitle} = $biblio->{'subtitle'};
204 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
205 $resultsloop{author} = $biblio->{'author'};
206 $resultsloop{publishercode} = $biblio->{'publishercode'};
207 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
209 push @results, \%resultsloop;
212 $template->param(result=>\@results);
217 my ( $query, $offset, $max_results, $servers ) = @_;
219 if ( C4::Context->preference('NoZebra') ) {
220 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
223 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
224 return ( undef, $search_result, scalar($search_result) );
227 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
228 my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
234 return ( "No query entered", undef, undef ) unless $query;
236 # Initialize & Search Zebra
237 for ( my $i = 0 ; $i < @servers ; $i++ ) {
239 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
240 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
241 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
245 $zconns[$i]->errmsg() . " ("
246 . $zconns[$i]->errcode() . ") "
247 . $zconns[$i]->addinfo() . " "
248 . $zconns[$i]->diagset();
250 return ( $error, undef, undef ) if $zconns[$i]->errcode();
254 # caught a ZOOM::Exception
258 . $@->addinfo() . " "
261 return ( $error, undef, undef );
264 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
265 my $event = $zconns[ $i - 1 ]->last_event();
266 if ( $event == ZOOM::Event::ZEND ) {
268 my $first_record = defined( $offset ) ? $offset+1 : 1;
269 my $hits = $tmpresults[ $i - 1 ]->size();
270 $total_hits += $hits;
271 my $last_record = $hits;
272 if ( defined $max_results && $offset + $max_results < $hits ) {
273 $last_record = $offset + $max_results;
276 for my $j ( $first_record..$last_record ) {
277 my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
278 push @results, $record;
283 foreach my $result (@tmpresults) {
286 foreach my $zoom_query (@zoom_queries) {
287 $zoom_query->destroy();
290 return ( undef, \@results, $total_hits );
296 ( undef, $results_hashref, \@facets_loop ) = getRecords (
298 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
299 $results_per_page, $offset, $expanded_facet, $branches,
303 The all singing, all dancing, multi-server, asynchronous, scanning,
304 searching, record nabbing, facet-building
306 See verbse embedded documentation.
312 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
313 $results_per_page, $offset, $expanded_facet, $branches,
317 my @servers = @$servers_ref;
318 my @sort_by = @$sort_by_ref;
320 # Initialize variables for the ZOOM connection and results object
324 my $results_hashref = ();
326 # Initialize variables for the faceted results objects
327 my $facets_counter = ();
328 my $facets_info = ();
329 my $facets = getFacets();
332 ; # stores the ref to array of hashes for template facets loop
334 ### LOOP THROUGH THE SERVERS
335 for ( my $i = 0 ; $i < @servers ; $i++ ) {
336 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
338 # perform the search, create the results objects
339 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
340 my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
342 #$query_to_use = $simple_query if $scan;
343 warn $simple_query if ( $scan and $DEBUG );
345 # Check if we've got a query_type defined, if so, use it
349 if ( $query_type =~ /^ccl/ ) {
351 s/\:/\=/g; # change : to = last minute (FIXME)
354 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
357 elsif ( $query_type =~ /^cql/ ) {
360 new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
362 elsif ( $query_type =~ /^pqf/ ) {
365 new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
372 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
378 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
384 warn "WARNING: query problem with $query_to_use " . $@;
387 # Concatenate the sort_by limits and pass them to the results object
388 # Note: sort will override rank
390 foreach my $sort (@sort_by) {
391 if ( $sort eq "author_az" ) {
392 $sort_by .= "1=1003 <i ";
394 elsif ( $sort eq "author_za" ) {
395 $sort_by .= "1=1003 >i ";
397 elsif ( $sort eq "popularity_asc" ) {
398 $sort_by .= "1=9003 <i ";
400 elsif ( $sort eq "popularity_dsc" ) {
401 $sort_by .= "1=9003 >i ";
403 elsif ( $sort eq "call_number_asc" ) {
404 $sort_by .= "1=20 <i ";
406 elsif ( $sort eq "call_number_dsc" ) {
407 $sort_by .= "1=20 >i ";
409 elsif ( $sort eq "pubdate_asc" ) {
410 $sort_by .= "1=31 <i ";
412 elsif ( $sort eq "pubdate_dsc" ) {
413 $sort_by .= "1=31 >i ";
415 elsif ( $sort eq "acqdate_asc" ) {
416 $sort_by .= "1=32 <i ";
418 elsif ( $sort eq "acqdate_dsc" ) {
419 $sort_by .= "1=32 >i ";
421 elsif ( $sort eq "title_az" ) {
422 $sort_by .= "1=4 <i ";
424 elsif ( $sort eq "title_za" ) {
425 $sort_by .= "1=4 >i ";
429 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
430 warn "WARNING sort $sort_by failed";
433 } # finished looping through servers
435 # The big moment: asynchronously retrieve results from all servers
436 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
437 my $ev = $zconns[ $i - 1 ]->last_event();
438 if ( $ev == ZOOM::Event::ZEND ) {
439 next unless $results[ $i - 1 ];
440 my $size = $results[ $i - 1 ]->size();
444 # loop through the results
445 $results_hash->{'hits'} = $size;
447 if ( $offset + $results_per_page <= $size ) {
448 $times = $offset + $results_per_page;
453 for ( my $j = $offset ; $j < $times ; $j++ ) {
458 ## Check if it's an index scan
460 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
462 # here we create a minimal MARC record and hand it off to the
463 # template just like a normal result ... perhaps not ideal, but
465 my $tmprecord = MARC::Record->new();
466 $tmprecord->encoding('UTF-8');
470 # the minimal record in author/title (depending on MARC flavour)
471 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
472 $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
473 $tmprecord->append_fields($tmptitle);
475 $tmptitle = MARC::Field->new('245',' ',' ', a => $term,);
476 $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
477 $tmprecord->append_fields($tmptitle);
478 $tmprecord->append_fields($tmpauthor);
480 $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
485 $record = $results[ $i - 1 ]->record($j)->raw();
487 # warn "RECORD $j:".$record;
488 $results_hash->{'RECORDS'}[$j] = $record;
490 # Fill the facets while we're looping, but only for the biblioserver
491 $facet_record = MARC::Record->new_from_usmarc($record)
492 if $servers[ $i - 1 ] =~ /biblioserver/;
494 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
496 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
498 if ( $facets->[$k] ) {
500 for my $tag ( @{ $facets->[$k]->{'tags'} } )
503 $facet_record->field($tag);
505 for my $field (@fields) {
506 my @subfields = $field->subfields();
507 for my $subfield (@subfields) {
508 my ( $code, $data ) = @$subfield;
510 $facets->[$k]->{'subfield'} )
512 $facets_counter->{ $facets->[$k]
518 $facets_info->{ $facets->[$k]
519 ->{'link_value'} }->{'label_value'} =
520 $facets->[$k]->{'label_value'};
521 $facets_info->{ $facets->[$k]
522 ->{'link_value'} }->{'expanded'} =
523 $facets->[$k]->{'expanded'};
529 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
532 # warn "connection ", $i-1, ": $size hits";
533 # warn $results[$i-1]->record(0)->render() if $size > 0;
536 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
538 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
539 keys %$facets_counter )
542 my $number_of_facets;
543 my @this_facets_array;
546 $facets_counter->{$link_value}
547 ->{$b} <=> $facets_counter->{$link_value}->{$a}
548 } keys %{ $facets_counter->{$link_value} }
552 if ( ( $number_of_facets < 6 )
553 || ( $expanded_facet eq $link_value )
554 || ( $facets_info->{$link_value}->{'expanded'} ) )
557 # Sanitize the link value ), ( will cause errors with CCL,
558 my $facet_link_value = $one_facet;
559 $facet_link_value =~ s/(\(|\))/ /g;
561 # fix the length that will display in the label,
562 my $facet_label_value = $one_facet;
564 substr( $one_facet, 0, 20 ) . "..."
565 unless length($facet_label_value) <= 20;
567 # if it's a branch, label by the name, not the code,
568 if ( $link_value =~ /branch/ ) {
570 $branches->{$one_facet}->{'branchname'};
573 # but we're down with the whole label being in the link's title.
574 my $facet_title_value = $one_facet;
576 push @this_facets_array,
580 $facets_counter->{$link_value}
582 facet_label_value => $facet_label_value,
583 facet_title_value => $facet_title_value,
584 facet_link_value => $facet_link_value,
585 type_link_value => $link_value,
591 # handle expanded option
592 unless ( $facets_info->{$link_value}->{'expanded'} ) {
594 if ( ( $number_of_facets > 6 )
595 && ( $expanded_facet ne $link_value ) );
600 type_link_value => $link_value,
601 type_id => $link_value . "_id",
602 "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
603 facets => \@this_facets_array,
604 expandable => $expandable,
605 expand => $link_value,
612 return ( undef, $results_hashref, \@facets_loop );
617 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
618 $results_per_page, $offset, $expanded_facet, $branches,
622 my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
624 $paz->search($simple_query);
628 my $results_hashref = {};
629 my $stats = XMLin($paz->stat);
630 my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
632 # for a grouped search result, the number of hits
633 # is the number of groups returned; 'bib_hits' will have
634 # the total number of bibs.
635 $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
636 $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
638 HIT: foreach my $hit (@{ $results->{'hit'} }) {
639 my $recid = $hit->{recid}->[0];
641 my $work_title = $hit->{'md-work-title'}->[0];
643 if (exists $hit->{'md-work-author'}) {
644 $work_author = $hit->{'md-work-author'}->[0];
646 my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
648 my $result_group = {};
649 $result_group->{'group_label'} = $group_label;
650 $result_group->{'group_merge_key'} = $recid;
653 if (exists $hit->{count}) {
654 $count = $hit->{count}->[0];
656 $result_group->{'group_count'} = $count;
658 for (my $i = 0; $i < $count; $i++) {
659 # FIXME -- may need to worry about diacritics here
660 my $rec = $paz->record($recid, $i);
661 push @{ $result_group->{'RECORDS'} }, $rec;
664 push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
667 # pass through facets
668 my $termlist_xml = $paz->termlist('author,subject');
669 my $terms = XMLin($termlist_xml, forcearray => 1);
670 my @facets_loop = ();
671 #die Dumper($results);
672 # foreach my $list (sort keys %{ $terms->{'list'} }) {
674 # foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
676 # facet_label_value => $facet->{'name'}->[0],
679 # push @facets_loop, ( {
680 # type_label => $list,
681 # facets => \@facets,
685 return ( undef, $results_hashref, \@facets_loop );
689 sub _remove_stopwords {
690 my ( $operand, $index ) = @_;
691 my @stopwords_removed;
693 # phrase and exact-qualified indexes shouldn't have stopwords removed
694 if ( $index !~ m/phr|ext/ ) {
696 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
697 # we use IsAlpha unicode definition, to deal correctly with diacritics.
698 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
699 # is a stopword, we'd get "çon" and wouldn't find anything...
700 foreach ( keys %{ C4::Context->stopwords } ) {
701 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
703 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/ )
705 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
706 $operand =~ s/^$_\P{IsAlpha}/ /gi;
707 $operand =~ s/\P{IsAlpha}$_$/ /gi;
708 push @stopwords_removed, $_;
712 return ( $operand, \@stopwords_removed );
716 sub _detect_truncation {
717 my ( $operand, $index ) = @_;
718 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
721 my @wordlist = split( /\s/, $operand );
722 foreach my $word (@wordlist) {
723 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
724 push @rightlefttruncated, $word;
726 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
727 push @lefttruncated, $word;
729 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
730 push @righttruncated, $word;
732 elsif ( index( $word, "*" ) < 0 ) {
733 push @nontruncated, $word;
736 push @regexpr, $word;
740 \@nontruncated, \@righttruncated, \@lefttruncated,
741 \@rightlefttruncated, \@regexpr
746 sub _build_stemmed_operand {
750 # If operand contains a digit, it is almost certainly an identifier, and should
751 # not be stemmed. This is particularly relevant for ISBNs and ISSNs, which
752 # can contain the letter "X" - for example, _build_stemmend_operand would reduce
753 # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
754 # results (e.g., "23 x 29 cm." from the 300$c). Bug 2098.
755 return $operand if $operand =~ /\d/;
757 # FIXME: the locale should be set based on the user's language and/or search choice
758 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
760 # FIXME: these should be stored in the db so the librarian can modify the behavior
761 $stemmer->add_exceptions(
768 my @words = split( / /, $operand );
769 my $stems = $stemmer->stem(@words);
770 for my $stem (@$stems) {
771 $stemmed_operand .= "$stem";
772 $stemmed_operand .= "?"
773 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
774 $stemmed_operand .= " ";
776 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
777 return $stemmed_operand;
781 sub _build_weighted_query {
783 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
784 # pretty well but could work much better if we had a smarter query parser
785 my ( $operand, $stemmed_operand, $index ) = @_;
786 my $stemming = C4::Context->preference("QueryStemming") || 0;
787 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
788 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
790 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
792 # Keyword, or, no index specified
793 if ( ( $index eq 'kw' ) || ( !$index ) ) {
795 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
796 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
797 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
798 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
799 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
800 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
801 if $fuzzy_enabled; # add fuzzy, word list
802 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
803 if ( $stemming and $stemmed_operand )
804 ; # add stemming, right truncation
805 $weighted_query .= " or wrdl,r9=\"$operand\"";
807 # embedded sorting: 0 a-z; 1 z-a
808 # $weighted_query .= ") or (sort1,aut=1";
811 # Barcode searches should skip this process
812 elsif ( $index eq 'bc' ) {
813 $weighted_query .= "bc=\"$operand\"";
816 # Authority-number searches should skip this process
817 elsif ( $index eq 'an' ) {
818 $weighted_query .= "an=\"$operand\"";
821 # If the index already has more than one qualifier, wrap the operand
822 # in quotes and pass it back (assumption is that the user knows what they
823 # are doing and won't appreciate us mucking up their query
824 elsif ( $index =~ ',' ) {
825 $weighted_query .= " $index=\"$operand\"";
828 #TODO: build better cases based on specific search indexes
830 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
831 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
832 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
834 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
837 $weighted_query .= "))"; # close rank specification
838 return $weighted_query;
844 $simple_query, $query_cgi,
846 $limit_cgi, $limit_desc,
847 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
849 Build queries and limits in CCL, CGI, Human,
850 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
852 See verbose embedded documentation.
858 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
860 warn "---------\nEnter buildQuery\n---------" if $DEBUG;
863 my @operators = @$operators if $operators;
864 my @indexes = @$indexes if $indexes;
865 my @operands = @$operands if $operands;
866 my @limits = @$limits if $limits;
867 my @sort_by = @$sort_by if $sort_by;
869 my $stemming = C4::Context->preference("QueryStemming") || 0;
870 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
871 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
872 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
873 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
875 # no stemming/weight/fuzzy in NoZebra
876 if ( C4::Context->preference("NoZebra") ) {
882 my $query = $operands[0];
883 my $simple_query = $operands[0];
885 # initialize the variables we're passing back
894 my $stopwords_removed; # flag to determine if stopwords have been removed
896 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
898 if ( $query =~ /^ccl=/ ) {
899 return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
901 if ( $query =~ /^cql=/ ) {
902 return ( undef, $', $', $', $', '', '', '', '', 'cql' );
904 if ( $query =~ /^pqf=/ ) {
905 return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
908 # pass nested queries directly
909 # FIXME: need better handling of some of these variables in this case
910 if ( $query =~ /(\(|\))/ ) {
912 undef, $query, $simple_query, $query_cgi,
913 $query, $limit, $limit_cgi, $limit_desc,
914 $stopwords_removed, 'ccl'
918 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
919 # query operands and indexes and add stemming, truncation, field weighting, etc.
920 # Once we do so, we'll end up with a value in $query, just like if we had an
921 # incoming $query from the user
924 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
926 ; # a flag used to keep track if there was a previous query
927 # if there was, we can apply the current operator
929 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
931 # COMBINE OPERANDS, INDEXES AND OPERATORS
932 if ( $operands[$i] ) {
934 # A flag to determine whether or not to add the index to the query
937 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
938 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
941 $remove_stopwords = 0;
943 my $operand = $operands[$i];
944 my $index = $indexes[$i];
946 # Add index-specific attributes
947 # Date of Publication
948 if ( $index eq 'yr' ) {
949 $index .= ",st-numeric";
951 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
954 # Date of Acquisition
955 elsif ( $index eq 'acqdate' ) {
956 $index .= ",st-date-normalized";
958 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
960 # ISBN,ISSN,Standard Number, don't need special treatment
961 elsif ( $index eq 'nb' || $index eq 'ns' ) {
964 $stemming, $auto_truncation,
965 $weight_fields, $fuzzy_enabled,
967 ) = ( 0, 0, 0, 0, 0 );
970 # Set default structure attribute (word list)
972 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
973 $struct_attr = ",wrdl";
976 # Some helpful index variants
977 my $index_plus = $index . $struct_attr . ":" if $index;
978 my $index_plus_comma = $index . $struct_attr . "," if $index;
981 if ($remove_stopwords) {
982 ( $operand, $stopwords_removed ) =
983 _remove_stopwords( $operand, $index );
984 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
985 warn "REMOVED STOPWORDS: @$stopwords_removed"
986 if ( $stopwords_removed && $DEBUG );
990 my ( $nontruncated, $righttruncated, $lefttruncated,
991 $rightlefttruncated, $regexpr );
992 my $truncated_operand;
994 $nontruncated, $righttruncated, $lefttruncated,
995 $rightlefttruncated, $regexpr
996 ) = _detect_truncation( $operand, $index );
998 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1003 scalar(@$righttruncated) + scalar(@$lefttruncated) +
1004 scalar(@$rightlefttruncated) > 0 )
1007 # Don't field weight or add the index to the query, we do it here
1009 undef $weight_fields;
1010 my $previous_truncation_operand;
1011 if ( scalar(@$nontruncated) > 0 ) {
1012 $truncated_operand .= "$index_plus @$nontruncated ";
1013 $previous_truncation_operand = 1;
1015 if ( scalar(@$righttruncated) > 0 ) {
1016 $truncated_operand .= "and "
1017 if $previous_truncation_operand;
1018 $truncated_operand .=
1019 "$index_plus_comma" . "rtrn:@$righttruncated ";
1020 $previous_truncation_operand = 1;
1022 if ( scalar(@$lefttruncated) > 0 ) {
1023 $truncated_operand .= "and "
1024 if $previous_truncation_operand;
1025 $truncated_operand .=
1026 "$index_plus_comma" . "ltrn:@$lefttruncated ";
1027 $previous_truncation_operand = 1;
1029 if ( scalar(@$rightlefttruncated) > 0 ) {
1030 $truncated_operand .= "and "
1031 if $previous_truncation_operand;
1032 $truncated_operand .=
1033 "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1034 $previous_truncation_operand = 1;
1037 $operand = $truncated_operand if $truncated_operand;
1038 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1041 my $stemmed_operand;
1042 $stemmed_operand = _build_stemmed_operand($operand)
1044 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1046 # Handle Field Weighting
1047 my $weighted_operand;
1049 _build_weighted_query( $operand, $stemmed_operand, $index )
1051 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1052 $operand = $weighted_operand if $weight_fields;
1053 $indexes_set = 1 if $weight_fields;
1055 # If there's a previous operand, we need to add an operator
1056 if ($previous_operand) {
1058 # User-specified operator
1059 if ( $operators[ $i - 1 ] ) {
1060 $query .= " $operators[$i-1] ";
1061 $query .= " $index_plus " unless $indexes_set;
1062 $query .= " $operand";
1063 $query_cgi .= "&op=$operators[$i-1]";
1064 $query_cgi .= "&idx=$index" if $index;
1065 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1067 " $operators[$i-1] $index_plus $operands[$i]";
1070 # Default operator is and
1073 $query .= "$index_plus " unless $indexes_set;
1074 $query .= "$operand";
1075 $query_cgi .= "&op=and&idx=$index" if $index;
1076 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1077 $query_desc .= " and $index_plus $operands[$i]";
1081 # There isn't a pervious operand, don't need an operator
1084 # Field-weighted queries already have indexes set
1085 $query .= " $index_plus " unless $indexes_set;
1087 $query_desc .= " $index_plus $operands[$i]";
1088 $query_cgi .= "&idx=$index" if $index;
1089 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1090 $previous_operand = 1;
1095 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1098 my $group_OR_limits;
1099 my $availability_limit;
1100 foreach my $this_limit (@limits) {
1101 if ( $this_limit =~ /available/ ) {
1103 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1105 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1106 $availability_limit .=
1107 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1108 $limit_cgi .= "&limit=available";
1112 # group_OR_limits, prefixed by mc-
1113 # OR every member of the group
1114 elsif ( $this_limit =~ /mc/ ) {
1115 $group_OR_limits .= " or " if $group_OR_limits;
1116 $limit_desc .= " or " if $group_OR_limits;
1117 $group_OR_limits .= "$this_limit";
1118 $limit_cgi .= "&limit=$this_limit";
1119 $limit_desc .= " $this_limit";
1122 # Regular old limits
1124 $limit .= " and " if $limit || $query;
1125 $limit .= "$this_limit";
1126 $limit_cgi .= "&limit=$this_limit";
1127 $limit_desc .= " $this_limit";
1130 if ($group_OR_limits) {
1131 $limit .= " and " if ( $query || $limit );
1132 $limit .= "($group_OR_limits)";
1134 if ($availability_limit) {
1135 $limit .= " and " if ( $query || $limit );
1136 $limit .= "($availability_limit)";
1139 # Normalize the query and limit strings
1142 for ( $query, $query_desc, $limit, $limit_desc ) {
1143 $_ =~ s/ / /g; # remove extra spaces
1144 $_ =~ s/^ //g; # remove any beginning spaces
1145 $_ =~ s/ $//g; # remove any ending spaces
1146 $_ =~ s/==/=/g; # remove double == from query
1148 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1150 for ($query_cgi,$simple_query) {
1153 # append the limit to the query
1154 $query .= " " . $limit;
1158 warn "QUERY:" . $query;
1159 warn "QUERY CGI:" . $query_cgi;
1160 warn "QUERY DESC:" . $query_desc;
1161 warn "LIMIT:" . $limit;
1162 warn "LIMIT CGI:" . $limit_cgi;
1163 warn "LIMIT DESC:" . $limit_desc;
1164 warn "---------\nLeave buildQuery\n---------";
1167 undef, $query, $simple_query, $query_cgi,
1168 $query_desc, $limit, $limit_cgi, $limit_desc,
1169 $stopwords_removed, $query_type
1173 =head2 searchResults
1175 Format results in a form suitable for passing to the template
1179 # IMO this subroutine is pretty messy still -- it's responsible for
1180 # building the HTML output for the template
1182 my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_;
1183 my $dbh = C4::Context->dbh;
1187 # add search-term highlighting via <span>s on the search terms
1188 my $span_terms_hashref;
1189 for my $span_term ( split( / /, $searchdesc ) ) {
1190 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1191 $span_terms_hashref->{$span_term}++;
1194 #Build branchnames hash
1196 #get branch information.....
1199 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1200 ; # FIXME : use C4::Koha::GetBranches
1202 while ( my $bdata = $bsth->fetchrow_hashref ) {
1203 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1205 # FIXME - We build an authorised values hash here, using the default framework
1206 # though it is possible to have different authvals for different fws.
1208 my $shelflocations =GetKohaAuthorisedValues('items.location','');
1210 # get notforloan authorised value list (see $shelflocations FIXME)
1211 my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1213 #Build itemtype hash
1214 #find itemtype & itemtype image
1218 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1221 while ( my $bdata = $bsth->fetchrow_hashref ) {
1222 foreach (qw(description imageurl summary notforloan)) {
1223 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1227 #search item field code
1230 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1233 my ($itemtag) = $sth->fetchrow;
1235 ## find column names of items related to MARC
1236 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1238 my %subfieldstosearch;
1239 while ( ( my $column ) = $sth2->fetchrow ) {
1240 my ( $tagfield, $tagsubfield ) =
1241 &GetMarcFromKohaField( "items." . $column, "" );
1242 $subfieldstosearch{$column} = $tagsubfield;
1245 # handle which records to actually retrieve
1247 if ( $hits && $offset + $results_per_page <= $hits ) {
1248 $times = $offset + $results_per_page;
1251 $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it?
1254 # loop through all of the records we've retrieved
1255 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1256 my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1257 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1258 $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1259 $oldbiblio->{result_number} = $i + 1;
1261 # add imageurl to itemtype if there is one
1262 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
1263 $oldbiblio->{imageurl} =
1264 $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
1266 $oldbiblio->{imageurl} =
1267 getitemtypeimagesrc() . "/"
1268 . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1269 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1271 my $biblio_authorised_value_images = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{biblionumber} ) );
1272 $oldbiblio->{authorised_value_images} = $biblio_authorised_value_images;
1273 my $aisbn = $oldbiblio->{'isbn'};
1274 $aisbn =~ /(\d*[X]*)/;
1275 $oldbiblio->{amazonisbn} = $1;
1276 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1277 # Build summary if there is one (the summary is defined in the itemtypes table)
1278 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1279 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1280 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1281 my @fields = $marcrecord->fields();
1282 foreach my $field (@fields) {
1283 my $tag = $field->tag();
1284 my $tagvalue = $field->as_string();
1286 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1287 unless ( $tag < 10 ) {
1288 my @subf = $field->subfields;
1289 for my $i ( 0 .. $#subf ) {
1290 my $subfieldcode = $subf[$i][0];
1291 my $subfieldvalue = $subf[$i][1];
1292 my $tagsubf = $tag . $subfieldcode;
1294 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1299 $summary =~ s/\[(.*?)]//g;
1300 $summary =~ s/\n/<br\/>/g;
1301 $oldbiblio->{summary} = $summary;
1304 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1305 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1306 $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1307 # Add search-term highlighting to the whole record where they match using <span>s
1308 if (C4::Context->preference("OpacHighlightedWords")){
1309 my $searchhighlightblob;
1310 for my $highlight_field ( $marcrecord->fields ) {
1312 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1313 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1314 for my $subfield ($highlight_field->subfields()) {
1316 next if $subfield->[0] eq '9';
1317 my $field = $subfield->[1];
1318 for my $term ( keys %$span_terms_hashref ) {
1319 if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1320 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1324 $searchhighlightblob .= $field . " ... " if $match;
1328 $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1329 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1332 # Add search-term highlighting to the title, subtitle, etc. fields
1333 for my $term ( keys %$span_terms_hashref ) {
1334 my $old_term = $term;
1335 if ( length($term) > 3 ) {
1336 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1337 foreach(qw(title subtitle author publishercode place pages notes size)) {
1338 $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1343 ($i % 2) and $oldbiblio->{'toggle'} = 1;
1345 # Pull out the items fields
1346 my @fields = $marcrecord->field($itemtag);
1348 # Setting item statuses for display
1349 my @available_items_loop;
1350 my @onloan_items_loop;
1351 my @other_items_loop;
1353 my $available_items;
1357 my $ordered_count = 0;
1358 my $available_count = 0;
1359 my $onloan_count = 0;
1360 my $longoverdue_count = 0;
1361 my $other_count = 0;
1362 my $wthdrawn_count = 0;
1363 my $itemlost_count = 0;
1364 my $itembinding_count = 0;
1365 my $itemdamaged_count = 0;
1366 my $can_place_holds = 0;
1367 my $items_count = scalar(@fields);
1370 ( C4::Context->preference('maxItemsinSearchResults') )
1371 ? C4::Context->preference('maxItemsinSearchResults') - 1
1374 # loop through every item
1375 foreach my $field (@fields) {
1379 # populate the items hash
1380 foreach my $code ( keys %subfieldstosearch ) {
1381 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1383 my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch';
1384 my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1385 # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1386 if ($item->{$hbranch}) {
1387 $item->{'branchname'} = $branches{$item->{$hbranch}};
1389 elsif ($item->{$otherbranch}) { # Last resort
1390 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1393 my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1394 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1395 if ( $item->{onloan} ) {
1397 my $key = $prefix . $item->{due_date};
1398 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1399 $onloan_items->{$key}->{count}++ if $item->{homebranch};
1400 $onloan_items->{$key}->{branchname} = $item->{branchname};
1401 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1402 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1403 $onloan_items->{$key}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1404 # if something's checked out and lost, mark it as 'long overdue'
1405 if ( $item->{itemlost} ) {
1406 $onloan_items->{$prefix}->{longoverdue}++;
1407 $longoverdue_count++;
1408 } else { # can place holds as long as item isn't lost
1409 $can_place_holds = 1;
1413 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1417 if ( $item->{notforloan} == -1 ) {
1421 # item is withdrawn, lost or damaged
1422 if ( $item->{wthdrawn}
1423 || $item->{itemlost}
1425 || $item->{notforloan} )
1427 $wthdrawn_count++ if $item->{wthdrawn};
1428 $itemlost_count++ if $item->{itemlost};
1429 $itemdamaged_count++ if $item->{damaged};
1430 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1433 my $key = $prefix . $item->{status};
1434 foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1435 $other_items->{$key}->{$_} = $item->{$_};
1437 $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1438 $other_items->{$key}->{count}++ if $item->{homebranch};
1439 $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1440 $other_items->{$key}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1444 $can_place_holds = 1;
1446 $available_items->{$prefix}->{count}++ if $item->{homebranch};
1447 foreach (qw(branchname itemcallnumber)) {
1448 $available_items->{$prefix}->{$_} = $item->{$_};
1450 $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1451 $available_items->{$prefix}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1454 } # notforloan, item level and biblioitem level
1455 my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1457 ( C4::Context->preference('maxItemsinSearchResults') )
1458 ? C4::Context->preference('maxItemsinSearchResults') - 1
1460 for my $key ( sort keys %$onloan_items ) {
1461 (++$onloanitemscount > $maxitems) and last;
1462 push @onloan_items_loop, $onloan_items->{$key};
1464 for my $key ( sort keys %$other_items ) {
1465 (++$otheritemscount > $maxitems) and last;
1466 push @other_items_loop, $other_items->{$key};
1468 for my $key ( sort keys %$available_items ) {
1469 (++$availableitemscount > $maxitems) and last;
1470 push @available_items_loop, $available_items->{$key}
1473 # XSLT processing of some stuff
1474 if (C4::Context->preference("XSLTResultsDisplay") ) {
1475 my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber},C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1476 $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1479 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1480 $can_place_holds = 0
1481 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1482 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1483 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1484 $oldbiblio->{items_count} = $items_count;
1485 $oldbiblio->{available_items_loop} = \@available_items_loop;
1486 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1487 $oldbiblio->{other_items_loop} = \@other_items_loop;
1488 $oldbiblio->{availablecount} = $available_count;
1489 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1490 $oldbiblio->{onloancount} = $onloan_count;
1491 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1492 $oldbiblio->{othercount} = $other_count;
1493 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1494 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1495 $oldbiblio->{itemlostcount} = $itemlost_count;
1496 $oldbiblio->{damagedcount} = $itemdamaged_count;
1497 $oldbiblio->{orderedcount} = $ordered_count;
1498 $oldbiblio->{isbn} =~
1499 s/-//g; # deleting - in isbn to enable amazon content
1500 $oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'} ) );
1501 push( @newresults, $oldbiblio );
1506 #----------------------------------------------------------------------
1508 # Non-Zebra GetRecords#
1509 #----------------------------------------------------------------------
1513 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1519 $query, $simple_query, $sort_by_ref, $servers_ref,
1520 $results_per_page, $offset, $expanded_facet, $branches,
1523 warn "query =$query" if $DEBUG;
1524 my $result = NZanalyse($query);
1525 warn "results =$result" if $DEBUG;
1527 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1533 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1534 the list is built from an inverted index in the nozebra SQL table
1535 note that title is here only for convenience : the sorting will be very fast when requested on title
1536 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1541 my ( $string, $server ) = @_;
1542 # warn "---------" if $DEBUG;
1543 warn " NZanalyse" if $DEBUG;
1544 # warn "---------" if $DEBUG;
1546 # $server contains biblioserver or authorities, depending on what we search on.
1547 #warn "querying : $string on $server";
1548 $server = 'biblioserver' unless $server;
1550 # if we have a ", replace the content to discard temporarily any and/or/not inside
1552 if ( $string =~ /"/ ) {
1553 $string =~ s/"(.*?)"/__X__/;
1555 warn "commacontent : $commacontent" if $DEBUG;
1558 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1559 # then, call again NZanalyse with $left and $right
1560 # (recursive until we find a leaf (=> something without and/or/not)
1561 # delete repeated operator... Would then go in infinite loop
1562 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1565 #process parenthesis before.
1566 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1569 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1571 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1573 my $leftresult = NZanalyse( $left, $server );
1575 my $rightresult = NZanalyse( $right, $server );
1577 # OK, we have the results for right and left part of the query
1578 # depending of operand, intersect, union or exclude both lists
1579 # to get a result list
1580 if ( $operator eq ' and ' ) {
1581 return NZoperatorAND($leftresult,$rightresult);
1583 elsif ( $operator eq ' or ' ) {
1585 # just merge the 2 strings
1586 return $leftresult . $rightresult;
1588 elsif ( $operator eq ' not ' ) {
1589 return NZoperatorNOT($leftresult,$rightresult);
1593 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1597 warn "string :" . $string if $DEBUG;
1601 if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1604 $operator = lc($2); # FIXME: and/or/not are operators, not operands
1606 warn "no parenthesis. left : $left operator: $operator right: $right"
1609 # it's not a leaf, we have a and/or/not
1612 # reintroduce comma content if needed
1613 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1614 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1615 warn "node : $left / $operator / $right\n" if $DEBUG;
1616 my $leftresult = NZanalyse( $left, $server );
1617 my $rightresult = NZanalyse( $right, $server );
1618 warn " leftresult : $leftresult" if $DEBUG;
1619 warn " rightresult : $rightresult" if $DEBUG;
1620 # OK, we have the results for right and left part of the query
1621 # depending of operand, intersect, union or exclude both lists
1622 # to get a result list
1623 if ( $operator eq ' and ' ) {
1625 return NZoperatorAND($leftresult,$rightresult);
1627 elsif ( $operator eq ' or ' ) {
1629 # just merge the 2 strings
1630 return $leftresult . $rightresult;
1632 elsif ( $operator eq ' not ' ) {
1633 return NZoperatorNOT($leftresult,$rightresult);
1637 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1638 die "error : operand unknown : $operator for $string";
1641 # it's a leaf, do the real SQL query and return the result
1644 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1645 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1646 #remove trailing blank at the beginning
1648 warn "leaf:$string" if $DEBUG;
1650 # parse the string in in operator/operand/value again
1654 if ($string =~ /(.*)(>=|<=)(.*)/) {
1661 # warn "handling leaf... left:$left operator:$operator right:$right"
1663 unless ($operator) {
1664 if ($string =~ /(.*)(>|<|=)(.*)/) {
1669 "handling unless (operator)... left:$left operator:$operator right:$right"
1677 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1680 # automatic replace for short operators
1681 $left = 'title' if $left =~ '^ti$';
1682 $left = 'author' if $left =~ '^au$';
1683 $left = 'publisher' if $left =~ '^pb$';
1684 $left = 'subject' if $left =~ '^su$';
1685 $left = 'koha-Auth-Number' if $left =~ '^an$';
1686 $left = 'keyword' if $left =~ '^kw$';
1687 warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1688 if ( $operator && $left ne 'keyword' ) {
1690 #do a specific search
1691 my $dbh = C4::Context->dbh;
1692 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1695 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1697 warn "$left / $operator / $right\n" if $DEBUG;
1699 # split each word, query the DB and build the biblionumbers result
1700 #sanitizing leftpart
1701 $left =~ s/^\s+|\s+$//;
1702 foreach ( split / /, $right ) {
1704 $_ =~ s/^\s+|\s+$//;
1706 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1707 $sth->execute( $server, $left, $_ )
1708 or warn "execute failed: $!";
1709 while ( my ( $line, $value ) = $sth->fetchrow ) {
1711 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1712 # otherwise, fill the result
1713 $biblionumbers .= $line
1714 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1715 warn "result : $value "
1716 . ( $right =~ /\d/ ) . "=="
1717 . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line";
1720 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1722 warn "NZAND" if $DEBUG;
1723 $results = NZoperatorAND($biblionumbers,$results);
1726 $results = $biblionumbers;
1732 #do a complete search (all indexes), if index='kw' do complete search too.
1733 my $dbh = C4::Context->dbh;
1736 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1739 # split each word, query the DB and build the biblionumbers result
1740 foreach ( split / /, $string ) {
1741 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1742 warn "search on all indexes on $_" if $DEBUG;
1745 $sth->execute( $server, $_ );
1746 while ( my $line = $sth->fetchrow ) {
1747 $biblionumbers .= $line;
1750 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1752 $results = NZoperatorAND($biblionumbers,$results);
1755 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1756 $results = $biblionumbers;
1760 warn "return : $results for LEAF : $string" if $DEBUG;
1763 warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1767 my ($rightresult, $leftresult)=@_;
1769 my @leftresult = split /;/, $leftresult;
1770 warn " @leftresult / $rightresult \n" if $DEBUG;
1772 # my @rightresult = split /;/,$leftresult;
1775 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1776 # the result is stored twice, to have the same weight for AND than OR.
1777 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1778 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1779 foreach (@leftresult) {
1782 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1783 if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1784 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1786 "$value-$countvalue;$value-$countvalue;";
1789 warn "NZAND DONE : $finalresult \n" if $DEBUG;
1790 return $finalresult;
1794 my ($rightresult, $leftresult)=@_;
1795 return $rightresult.$leftresult;
1799 my ($leftresult, $rightresult)=@_;
1801 my @leftresult = split /;/, $leftresult;
1803 # my @rightresult = split /;/,$leftresult;
1805 foreach (@leftresult) {
1807 $value=$1 if $value=~m/(.*)-\d+$/;
1808 unless ($rightresult =~ "$value-") {
1809 $finalresult .= "$_;";
1812 return $finalresult;
1817 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1824 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1825 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1827 # order title asc by default
1828 # $ordering = '1=36 <i' unless $ordering;
1829 $results_per_page = 20 unless $results_per_page;
1830 $offset = 0 unless $offset;
1831 my $dbh = C4::Context->dbh;
1834 # order by POPULARITY
1836 if ( $ordering =~ /popularity/ ) {
1840 # popularity is not in MARC record, it's builded from a specific query
1842 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1843 foreach ( split /;/, $biblionumbers ) {
1844 my ( $biblionumber, $title ) = split /,/, $_;
1845 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1846 $sth->execute($biblionumber);
1847 my $popularity = $sth->fetchrow || 0;
1849 # hint : the key is popularity.title because we can have
1850 # many results with the same popularity. In this cas, sub-ordering is done by title
1851 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1852 # (un-frequent, I agree, but we won't forget anything that way ;-)
1853 $popularity{ sprintf( "%10d", $popularity ) . $title
1854 . $biblionumber } = $biblionumber;
1857 # sort the hash and return the same structure as GetRecords (Zebra querying)
1860 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1861 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1862 $result_hash->{'RECORDS'}[ $numbers++ ] =
1863 $result{ $popularity{$key} }->as_usmarc();
1866 else { # sort popularity ASC
1867 foreach my $key ( sort ( keys %popularity ) ) {
1868 $result_hash->{'RECORDS'}[ $numbers++ ] =
1869 $result{ $popularity{$key} }->as_usmarc();
1872 my $finalresult = ();
1873 $result_hash->{'hits'} = $numbers;
1874 $finalresult->{'biblioserver'} = $result_hash;
1875 return $finalresult;
1881 elsif ( $ordering =~ /author/ ) {
1883 foreach ( split /;/, $biblionumbers ) {
1884 my ( $biblionumber, $title ) = split /,/, $_;
1885 my $record = GetMarcBiblio($biblionumber);
1887 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1888 $author = $record->subfield( '200', 'f' );
1889 $author = $record->subfield( '700', 'a' ) unless $author;
1892 $author = $record->subfield( '100', 'a' );
1895 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1896 # and we don't want to get only 1 result for each of them !!!
1897 $result{ $author . $biblionumber } = $record;
1900 # sort the hash and return the same structure as GetRecords (Zebra querying)
1903 if ( $ordering eq 'author_za' ) { # sort by author desc
1904 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1905 $result_hash->{'RECORDS'}[ $numbers++ ] =
1906 $result{$key}->as_usmarc();
1909 else { # sort by author ASC
1910 foreach my $key ( sort ( keys %result ) ) {
1911 $result_hash->{'RECORDS'}[ $numbers++ ] =
1912 $result{$key}->as_usmarc();
1915 my $finalresult = ();
1916 $result_hash->{'hits'} = $numbers;
1917 $finalresult->{'biblioserver'} = $result_hash;
1918 return $finalresult;
1921 # ORDER BY callnumber
1924 elsif ( $ordering =~ /callnumber/ ) {
1926 foreach ( split /;/, $biblionumbers ) {
1927 my ( $biblionumber, $title ) = split /,/, $_;
1928 my $record = GetMarcBiblio($biblionumber);
1930 my ( $callnumber_tag, $callnumber_subfield ) =
1931 GetMarcFromKohaField( $dbh, 'items.itemcallnumber' );
1932 ( $callnumber_tag, $callnumber_subfield ) =
1933 GetMarcFromKohaField('biblioitems.callnumber')
1934 unless $callnumber_tag;
1935 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1936 $callnumber = $record->subfield( '200', 'f' );
1939 $callnumber = $record->subfield( '100', 'a' );
1942 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1943 # and we don't want to get only 1 result for each of them !!!
1944 $result{ $callnumber . $biblionumber } = $record;
1947 # sort the hash and return the same structure as GetRecords (Zebra querying)
1950 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1951 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1952 $result_hash->{'RECORDS'}[ $numbers++ ] =
1953 $result{$key}->as_usmarc();
1956 else { # sort by title ASC
1957 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1958 $result_hash->{'RECORDS'}[ $numbers++ ] =
1959 $result{$key}->as_usmarc();
1962 my $finalresult = ();
1963 $result_hash->{'hits'} = $numbers;
1964 $finalresult->{'biblioserver'} = $result_hash;
1965 return $finalresult;
1967 elsif ( $ordering =~ /pubdate/ ) { #pub year
1969 foreach ( split /;/, $biblionumbers ) {
1970 my ( $biblionumber, $title ) = split /,/, $_;
1971 my $record = GetMarcBiblio($biblionumber);
1972 my ( $publicationyear_tag, $publicationyear_subfield ) =
1973 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1974 my $publicationyear =
1975 $record->subfield( $publicationyear_tag,
1976 $publicationyear_subfield );
1978 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1979 # and we don't want to get only 1 result for each of them !!!
1980 $result{ $publicationyear . $biblionumber } = $record;
1983 # sort the hash and return the same structure as GetRecords (Zebra querying)
1986 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
1987 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1988 $result_hash->{'RECORDS'}[ $numbers++ ] =
1989 $result{$key}->as_usmarc();
1992 else { # sort by pub year ASC
1993 foreach my $key ( sort ( keys %result ) ) {
1994 $result_hash->{'RECORDS'}[ $numbers++ ] =
1995 $result{$key}->as_usmarc();
1998 my $finalresult = ();
1999 $result_hash->{'hits'} = $numbers;
2000 $finalresult->{'biblioserver'} = $result_hash;
2001 return $finalresult;
2007 elsif ( $ordering =~ /title/ ) {
2009 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2011 foreach ( split /;/, $biblionumbers ) {
2012 my ( $biblionumber, $title ) = split /,/, $_;
2014 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2015 # and we don't want to get only 1 result for each of them !!!
2016 # hint & speed improvement : we can order without reading the record
2017 # so order, and read records only for the requested page !
2018 $result{ $title . $biblionumber } = $biblionumber;
2021 # sort the hash and return the same structure as GetRecords (Zebra querying)
2024 if ( $ordering eq 'title_az' ) { # sort by title desc
2025 foreach my $key ( sort ( keys %result ) ) {
2026 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2029 else { # sort by title ASC
2030 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2031 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2035 # limit the $results_per_page to result size if it's more
2036 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2038 # for the requested page, replace biblionumber by the complete record
2039 # speed improvement : avoid reading too much things
2041 my $counter = $offset ;
2042 $counter <= $offset + $results_per_page ;
2046 $result_hash->{'RECORDS'}[$counter] =
2047 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2049 my $finalresult = ();
2050 $result_hash->{'hits'} = $numbers;
2051 $finalresult->{'biblioserver'} = $result_hash;
2052 return $finalresult;
2059 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2062 foreach ( split /;/, $biblionumbers ) {
2063 my ( $biblionumber, $title ) = split /,/, $_;
2064 $title =~ /(.*)-(\d)/;
2069 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2070 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2071 # biblio N has ranking = 6
2072 $count_ranking{$biblionumber} += $ranking;
2075 # build the result by "inverting" the count_ranking hash
2076 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2078 foreach ( keys %count_ranking ) {
2079 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2082 # sort the hash and return the same structure as GetRecords (Zebra querying)
2085 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2086 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2089 # limit the $results_per_page to result size if it's more
2090 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2092 # for the requested page, replace biblionumber by the complete record
2093 # speed improvement : avoid reading too much things
2095 my $counter = $offset ;
2096 $counter <= $offset + $results_per_page ;
2100 $result_hash->{'RECORDS'}[$counter] =
2101 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2102 if $result_hash->{'RECORDS'}[$counter];
2104 my $finalresult = ();
2105 $result_hash->{'hits'} = $numbers;
2106 $finalresult->{'biblioserver'} = $result_hash;
2107 return $finalresult;
2113 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2115 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2116 test parameter if set donot perform change to records in database.
2122 * $listbiblios is an array ref to marcrecords to be changed
2123 * $tagsubfield is the reference of the subfield to change.
2124 * $initvalue is the value to search the record for
2125 * $targetvalue is the value to set the subfield to
2126 * $test is to be set only not to perform changes in database.
2128 =item C<Output arg:>
2129 * $countchanged counts all the changes performed.
2130 * $listunchanged contains the list of all the biblionumbers of records unchanged.
2132 =item C<usage in the script:>
2136 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2137 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2138 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2143 my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2146 my ( $tag, $subfield ) = ( $1, $2 )
2147 if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2148 if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2149 $tag = $tag . $subfield;
2152 my ( $bntag, $bnsubf ) = GetMarcFromKohaField('biblio.biblionumber');
2153 my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber');
2154 if ($tag eq $itemtag) {
2155 # do not allow the embedded item tag to be
2157 warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2160 foreach my $usmarc (@$listbiblios) {
2162 $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2166 # usmarc is not a valid usmarc May be a biblionumber
2167 # FIXME - sorry, please let's figure out whether
2168 # this function is to be passed a list of
2169 # record numbers or a list of MARC::Record
2170 # objects. The former is probably better
2171 # because the MARC records supplied by Zebra
2172 # may be not current.
2173 $record = GetMarcBiblio($usmarc);
2174 $biblionumber = $usmarc;
2177 if ( $bntag >= 010 ) {
2178 $biblionumber = $record->subfield( $bntag, $bnsubf );
2181 $biblionumber = $record->field($bntag)->data;
2185 #GetBiblionumber is to be written.
2186 #Could be replaced by TransformMarcToKoha (But Would be longer)
2187 if ( $record->field($tag) ) {
2189 foreach my $field ( $record->field($tag) ) {
2192 $field->delete_subfield(
2193 'code' => $subfield,
2194 'match' => qr($initvalue)
2200 $field->update( $subfield, $targetvalue )
2205 if ( $tag >= 010 ) {
2206 if ( $field->delete_field($field) ) {
2212 $field->data = $targetvalue
2213 if ( $field->data =~ qr($initvalue) );
2218 # warn $record->as_formatted;
2220 ModBiblio( $record, $biblionumber,
2221 GetFrameworkCode($biblionumber) )
2225 push @unmatched, $biblionumber;
2229 push @unmatched, $biblionumber;
2232 return ( $countmatched, \@unmatched );
2235 END { } # module clean-up code here (global destructor)
2242 Koha Developement team <info@koha.org>