3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
21 use C4::Biblio; # GetMarcFromKohaField
22 use C4::Koha; # getFacets
24 use C4::Search::PazPar2;
26 use C4::Dates qw(format_date);
28 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
30 # set the version for version checking
33 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
38 C4::Search - Functions for searching the Koha catalog.
42 See opac/opac-search.pl or catalogue/search.pl for example of usage
46 This module provides searching functions for Koha's bibliographic databases
64 # make all your functions, whether exported or not;
66 =head2 findseealso($dbh,$fields);
68 C<$dbh> is a link to the DB handler.
71 my $dbh =C4::Context->dbh;
73 C<$fields> is a reference to the fields array
75 This function modifies the @$fields array and adds related fields to search on.
77 FIXME: this function is probably deprecated in Koha 3
82 my ( $dbh, $fields ) = @_;
83 my $tagslib = GetMarcStructure(1);
84 for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
85 my ($tag) = substr( @$fields[$i], 1, 3 );
86 my ($subfield) = substr( @$fields[$i], 4, 1 );
87 @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
88 if ( $tagslib->{$tag}->{$subfield}->{seealso} );
94 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
96 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
102 my $dbh = C4::Context->dbh;
103 my $result = TransformMarcToKoha( $dbh, $record, '' );
108 my ( $biblionumber, $title );
110 # search duplicate on ISBN, easy and fast..
111 # ... normalize first
112 if ( $result->{isbn} ) {
113 $result->{isbn} =~ s/\(.*$//;
114 $result->{isbn} =~ s/\s+$//;
115 $query = "isbn=$result->{isbn}";
118 $result->{title} =~ s /\\//g;
119 $result->{title} =~ s /\"//g;
120 $result->{title} =~ s /\(//g;
121 $result->{title} =~ s /\)//g;
123 # FIXME: instead of removing operators, could just do
124 # quotes around the value
125 $result->{title} =~ s/(and|or|not)//g;
126 $query = "ti,ext=$result->{title}";
127 $query .= " and itemtype=$result->{itemtype}"
128 if ( $result->{itemtype} );
129 if ( $result->{author} ) {
130 $result->{author} =~ s /\\//g;
131 $result->{author} =~ s /\"//g;
132 $result->{author} =~ s /\(//g;
133 $result->{author} =~ s /\)//g;
135 # remove valid operators
136 $result->{author} =~ s/(and|or|not)//g;
137 $query .= " and au,ext=$result->{author}";
141 # FIXME: add error handling
142 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
144 foreach my $possible_duplicate_record (@$searchresults) {
146 MARC::Record->new_from_usmarc($possible_duplicate_record);
147 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
149 # FIXME :: why 2 $biblionumber ?
151 push @results, $result->{'biblionumber'};
152 push @results, $result->{'title'};
160 ($error,$results) = SimpleSearch($query,@servers);
162 This function provides a simple search API on the bibliographic catalog
168 * $query can be a simple keyword or a complete CCL query
169 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
172 * $error is a empty unless an error is detected
173 * \@results is an array of records.
175 =item C<usage in the script:>
179 my ($error, $marcresults) = SimpleSearch($query);
181 if (defined $error) {
182 $template->param(query_error => $error);
183 warn "error: ".$error;
184 output_html_with_http_headers $input, $cookie, $template->output;
188 my $hits = scalar @$marcresults;
191 for(my $i=0;$i<$hits;$i++) {
193 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
194 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
196 #build the hash for the template.
197 $resultsloop{highlight} = ($i % 2)?(1):(0);
198 $resultsloop{title} = $biblio->{'title'};
199 $resultsloop{subtitle} = $biblio->{'subtitle'};
200 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
201 $resultsloop{author} = $biblio->{'author'};
202 $resultsloop{publishercode} = $biblio->{'publishercode'};
203 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
205 push @results, \%resultsloop;
208 $template->param(result=>\@results);
214 if ( C4::Context->preference('NoZebra') ) {
215 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
218 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
219 return ( undef, $search_result );
226 return ( "No query entered", undef ) unless $query;
228 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
229 @servers = ("biblioserver") unless @servers;
231 # Initialize & Search Zebra
232 for ( my $i = 0 ; $i < @servers ; $i++ ) {
234 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
237 ->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) );
241 $zconns[$i]->errmsg() . " ("
242 . $zconns[$i]->errcode() . ") "
243 . $zconns[$i]->addinfo() . " "
244 . $zconns[$i]->diagset();
246 return ( $error, undef ) if $zconns[$i]->errcode();
250 # caught a ZOOM::Exception
254 . $@->addinfo() . " "
257 return ( $error, undef );
262 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
263 $ev = $zconns[ $i - 1 ]->last_event();
264 if ( $ev == ZOOM::Event::ZEND ) {
265 $hits = $tmpresults[ $i - 1 ]->size();
268 for ( my $j = 0 ; $j < $hits ; $j++ ) {
269 my $record = $tmpresults[ $i - 1 ]->record($j)->raw();
270 push @results, $record;
275 return ( undef, \@results );
281 ( undef, $results_hashref, \@facets_loop ) = getRecords (
283 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
284 $results_per_page, $offset, $expanded_facet, $branches,
288 The all singing, all dancing, multi-server, asynchronous, scanning,
289 searching, record nabbing, facet-building
291 See verbse embedded documentation.
297 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
298 $results_per_page, $offset, $expanded_facet, $branches,
302 my @servers = @$servers_ref;
303 my @sort_by = @$sort_by_ref;
305 # Initialize variables for the ZOOM connection and results object
309 my $results_hashref = ();
311 # Initialize variables for the faceted results objects
312 my $facets_counter = ();
313 my $facets_info = ();
314 my $facets = getFacets();
317 ; # stores the ref to array of hashes for template facets loop
319 ### LOOP THROUGH THE SERVERS
320 for ( my $i = 0 ; $i < @servers ; $i++ ) {
321 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
323 # perform the search, create the results objects
324 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
326 if ( $servers[$i] =~ /biblioserver/ ) {
327 $query_to_use = $koha_query;
330 $query_to_use = $simple_query;
333 #$query_to_use = $simple_query if $scan;
334 warn $simple_query if ( $scan and $DEBUG );
336 # Check if we've got a query_type defined, if so, use it
340 if ( $query_type =~ /^ccl/ ) {
342 s/\:/\=/g; # change : to = last minute (FIXME)
345 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
348 elsif ( $query_type =~ /^cql/ ) {
351 new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
353 elsif ( $query_type =~ /^pqf/ ) {
356 new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
363 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
369 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
375 warn "WARNING: query problem with $query_to_use " . $@;
378 # Concatenate the sort_by limits and pass them to the results object
379 # Note: sort will override rank
381 foreach my $sort (@sort_by) {
382 if ( $sort eq "author_az" ) {
383 $sort_by .= "1=1003 <i ";
385 elsif ( $sort eq "author_za" ) {
386 $sort_by .= "1=1003 >i ";
388 elsif ( $sort eq "popularity_asc" ) {
389 $sort_by .= "1=9003 <i ";
391 elsif ( $sort eq "popularity_dsc" ) {
392 $sort_by .= "1=9003 >i ";
394 elsif ( $sort eq "call_number_asc" ) {
395 $sort_by .= "1=20 <i ";
397 elsif ( $sort eq "call_number_dsc" ) {
398 $sort_by .= "1=20 >i ";
400 elsif ( $sort eq "pubdate_asc" ) {
401 $sort_by .= "1=31 <i ";
403 elsif ( $sort eq "pubdate_dsc" ) {
404 $sort_by .= "1=31 >i ";
406 elsif ( $sort eq "acqdate_asc" ) {
407 $sort_by .= "1=32 <i ";
409 elsif ( $sort eq "acqdate_dsc" ) {
410 $sort_by .= "1=32 >i ";
412 elsif ( $sort eq "title_az" ) {
413 $sort_by .= "1=4 <i ";
415 elsif ( $sort eq "title_za" ) {
416 $sort_by .= "1=4 >i ";
420 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
421 warn "WARNING sort $sort_by failed";
424 } # finished looping through servers
426 # The big moment: asynchronously retrieve results from all servers
427 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
428 my $ev = $zconns[ $i - 1 ]->last_event();
429 if ( $ev == ZOOM::Event::ZEND ) {
430 next unless $results[ $i - 1 ];
431 my $size = $results[ $i - 1 ]->size();
435 # loop through the results
436 $results_hash->{'hits'} = $size;
438 if ( $offset + $results_per_page <= $size ) {
439 $times = $offset + $results_per_page;
444 for ( my $j = $offset ; $j < $times ; $j++ ) {
449 ## Check if it's an index scan
451 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
453 # here we create a minimal MARC record and hand it off to the
454 # template just like a normal result ... perhaps not ideal, but
456 my $tmprecord = MARC::Record->new();
457 $tmprecord->encoding('UTF-8');
461 # the minimal record in author/title (depending on MARC flavour)
462 if ( C4::Context->preference("marcflavour") eq
465 $tmptitle = MARC::Field->new(
470 $tmprecord->append_fields($tmptitle);
474 MARC::Field->new( '245', ' ', ' ', a => $term, );
476 MARC::Field->new( '100', ' ', ' ', a => $occ, );
477 $tmprecord->append_fields($tmptitle);
478 $tmprecord->append_fields($tmpauthor);
480 $results_hash->{'RECORDS'}[$j] =
481 $tmprecord->as_usmarc();
486 $record = $results[ $i - 1 ]->record($j)->raw();
488 # warn "RECORD $j:".$record;
489 $results_hash->{'RECORDS'}[$j] = $record;
491 # Fill the facets while we're looping, but only for the biblioserver
492 $facet_record = MARC::Record->new_from_usmarc($record)
493 if $servers[ $i - 1 ] =~ /biblioserver/;
495 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
497 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
499 if ( $facets->[$k] ) {
501 for my $tag ( @{ $facets->[$k]->{'tags'} } )
504 $facet_record->field($tag);
506 for my $field (@fields) {
507 my @subfields = $field->subfields();
508 for my $subfield (@subfields) {
509 my ( $code, $data ) = @$subfield;
511 $facets->[$k]->{'subfield'} )
513 $facets_counter->{ $facets->[$k]
519 $facets_info->{ $facets->[$k]
520 ->{'link_value'} }->{'label_value'} =
521 $facets->[$k]->{'label_value'};
522 $facets_info->{ $facets->[$k]
523 ->{'link_value'} }->{'expanded'} =
524 $facets->[$k]->{'expanded'};
530 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
533 # warn "connection ", $i-1, ": $size hits";
534 # warn $results[$i-1]->record(0)->render() if $size > 0;
537 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
539 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
540 keys %$facets_counter )
543 my $number_of_facets;
544 my @this_facets_array;
547 $facets_counter->{$link_value}
548 ->{$b} <=> $facets_counter->{$link_value}->{$a}
549 } keys %{ $facets_counter->{$link_value} }
553 if ( ( $number_of_facets < 6 )
554 || ( $expanded_facet eq $link_value )
555 || ( $facets_info->{$link_value}->{'expanded'} ) )
558 # Sanitize the link value ), ( will cause errors with CCL,
559 my $facet_link_value = $one_facet;
560 $facet_link_value =~ s/(\(|\))/ /g;
562 # fix the length that will display in the label,
563 my $facet_label_value = $one_facet;
565 substr( $one_facet, 0, 20 ) . "..."
566 unless length($facet_label_value) <= 20;
568 # if it's a branch, label by the name, not the code,
569 if ( $link_value =~ /branch/ ) {
571 $branches->{$one_facet}->{'branchname'};
574 # but we're down with the whole label being in the link's title.
575 my $facet_title_value = $one_facet;
577 push @this_facets_array,
581 $facets_counter->{$link_value}
583 facet_label_value => $facet_label_value,
584 facet_title_value => $facet_title_value,
585 facet_link_value => $facet_link_value,
586 type_link_value => $link_value,
592 # handle expanded option
593 unless ( $facets_info->{$link_value}->{'expanded'} ) {
595 if ( ( $number_of_facets > 6 )
596 && ( $expanded_facet ne $link_value ) );
601 type_link_value => $link_value,
602 type_id => $link_value . "_id",
604 $facets_info->{$link_value}->{'label_value'},
605 facets => \@this_facets_array,
606 expandable => $expandable,
607 expand => $link_value,
614 return ( undef, $results_hashref, \@facets_loop );
619 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
620 $results_per_page, $offset, $expanded_facet, $branches,
624 my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
626 $paz->search($simple_query);
630 my $results_hashref = {};
631 my $stats = XMLin($paz->stat);
632 my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
634 # for a grouped search result, the number of hits
635 # is the number of groups returned; 'bib_hits' will have
636 # the total number of bibs.
637 $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
638 $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
640 HIT: foreach my $hit (@{ $results->{'hit'} }) {
641 my $recid = $hit->{recid}->[0];
643 my $work_title = $hit->{'md-work-title'}->[0];
645 if (exists $hit->{'md-work-author'}) {
646 $work_author = $hit->{'md-work-author'}->[0];
648 my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
650 my $result_group = {};
651 $result_group->{'group_label'} = $group_label;
652 $result_group->{'group_merge_key'} = $recid;
655 if (exists $hit->{count}) {
656 $count = $hit->{count}->[0];
658 $result_group->{'group_count'} = $count;
660 for (my $i = 0; $i < $count; $i++) {
661 # FIXME -- may need to worry about diacritics here
662 my $rec = $paz->record($recid, $i);
663 push @{ $result_group->{'RECORDS'} }, $rec;
666 push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
669 # pass through facets
670 my $termlist_xml = $paz->termlist('author,subject');
671 my $terms = XMLin($termlist_xml, forcearray => 1);
672 my @facets_loop = ();
673 #die Dumper($results);
674 # foreach my $list (sort keys %{ $terms->{'list'} }) {
676 # foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
678 # facet_label_value => $facet->{'name'}->[0],
681 # push @facets_loop, ( {
682 # type_label => $list,
683 # facets => \@facets,
687 return ( undef, $results_hashref, \@facets_loop );
691 sub _remove_stopwords {
692 my ( $operand, $index ) = @_;
693 my @stopwords_removed;
695 # phrase and exact-qualified indexes shouldn't have stopwords removed
696 if ( $index !~ m/phr|ext/ ) {
698 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
699 # we use IsAlpha unicode definition, to deal correctly with diacritics.
700 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
701 # is a stopword, we'd get "çon" and wouldn't find anything...
702 foreach ( keys %{ C4::Context->stopwords } ) {
703 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
705 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/ )
707 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
708 $operand =~ s/^$_\P{IsAlpha}/ /gi;
709 $operand =~ s/\P{IsAlpha}$_$/ /gi;
710 push @stopwords_removed, $_;
714 return ( $operand, \@stopwords_removed );
718 sub _detect_truncation {
719 my ( $operand, $index ) = @_;
720 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
723 my @wordlist = split( /\s/, $operand );
724 foreach my $word (@wordlist) {
725 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
726 push @rightlefttruncated, $word;
728 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
729 push @lefttruncated, $word;
731 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
732 push @righttruncated, $word;
734 elsif ( index( $word, "*" ) < 0 ) {
735 push @nontruncated, $word;
738 push @regexpr, $word;
742 \@nontruncated, \@righttruncated, \@lefttruncated,
743 \@rightlefttruncated, \@regexpr
748 sub _build_stemmed_operand {
752 # FIXME: the locale should be set based on the user's language and/or search choice
753 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
755 # FIXME: these should be stored in the db so the librarian can modify the behavior
756 $stemmer->add_exceptions(
763 my @words = split( / /, $operand );
764 my $stems = $stemmer->stem(@words);
765 for my $stem (@$stems) {
766 $stemmed_operand .= "$stem";
767 $stemmed_operand .= "?"
768 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
769 $stemmed_operand .= " ";
771 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
772 return $stemmed_operand;
776 sub _build_weighted_query {
778 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
779 # pretty well but could work much better if we had a smarter query parser
780 my ( $operand, $stemmed_operand, $index ) = @_;
781 my $stemming = C4::Context->preference("QueryStemming") || 0;
782 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
783 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
785 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
787 # Keyword, or, no index specified
788 if ( ( $index eq 'kw' ) || ( !$index ) ) {
790 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
791 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
792 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
793 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
794 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
795 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
796 if $fuzzy_enabled; # add fuzzy, word list
797 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
798 if ( $stemming and $stemmed_operand )
799 ; # add stemming, right truncation
800 $weighted_query .= " or wrdl,r9=\"$operand\"";
802 # embedded sorting: 0 a-z; 1 z-a
803 # $weighted_query .= ") or (sort1,aut=1";
806 # Barcode searches should skip this process
807 elsif ( $index eq 'bc' ) {
808 $weighted_query .= "bc=\"$operand\"";
811 # Authority-number searches should skip this process
812 elsif ( $index eq 'an' ) {
813 $weighted_query .= "an=\"$operand\"";
816 # If the index already has more than one qualifier, wrap the operand
817 # in quotes and pass it back (assumption is that the user knows what they
818 # are doing and won't appreciate us mucking up their query
819 elsif ( $index =~ ',' ) {
820 $weighted_query .= " $index=\"$operand\"";
823 #TODO: build better cases based on specific search indexes
825 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
826 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
827 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
829 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
832 $weighted_query .= "))"; # close rank specification
833 return $weighted_query;
839 $simple_query, $query_cgi,
841 $limit_cgi, $limit_desc,
842 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
844 Build queries and limits in CCL, CGI, Human,
845 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
847 See verbose embedded documentation.
853 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
855 warn "---------\nEnter buildQuery\n---------" if $DEBUG;
858 my @operators = @$operators if $operators;
859 my @indexes = @$indexes if $indexes;
860 my @operands = @$operands if $operands;
861 my @limits = @$limits if $limits;
862 my @sort_by = @$sort_by if $sort_by;
864 my $stemming = C4::Context->preference("QueryStemming") || 0;
865 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
866 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
867 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
868 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
870 # no stemming/weight/fuzzy in NoZebra
871 if ( C4::Context->preference("NoZebra") ) {
877 my $query = $operands[0];
878 my $simple_query = $operands[0];
880 # initialize the variables we're passing back
889 my $stopwords_removed; # flag to determine if stopwords have been removed
891 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
893 if ( $query =~ /^ccl=/ ) {
894 return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
896 if ( $query =~ /^cql=/ ) {
897 return ( undef, $', $', $', $', '', '', '', '', 'cql' );
899 if ( $query =~ /^pqf=/ ) {
900 return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
903 # pass nested queries directly
904 # FIXME: need better handling of some of these variables in this case
905 if ( $query =~ /(\(|\))/ ) {
907 undef, $query, $simple_query, $query_cgi,
908 $query, $limit, $limit_cgi, $limit_desc,
909 $stopwords_removed, 'ccl'
913 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
914 # query operands and indexes and add stemming, truncation, field weighting, etc.
915 # Once we do so, we'll end up with a value in $query, just like if we had an
916 # incoming $query from the user
919 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
921 ; # a flag used to keep track if there was a previous query
922 # if there was, we can apply the current operator
924 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
926 # COMBINE OPERANDS, INDEXES AND OPERATORS
927 if ( $operands[$i] ) {
929 # A flag to determine whether or not to add the index to the query
932 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
933 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
936 $remove_stopwords = 0;
938 my $operand = $operands[$i];
939 my $index = $indexes[$i];
941 # Add index-specific attributes
942 # Date of Publication
943 if ( $index eq 'yr' ) {
944 $index .= ",st-numeric";
947 $stemming, $auto_truncation,
948 $weight_fields, $fuzzy_enabled,
950 ) = ( 0, 0, 0, 0, 0 );
953 # Date of Acquisition
954 elsif ( $index eq 'acqdate' ) {
955 $index .= ",st-date-normalized";
958 $stemming, $auto_truncation,
959 $weight_fields, $fuzzy_enabled,
961 ) = ( 0, 0, 0, 0, 0 );
964 # Set default structure attribute (word list)
966 unless ( !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
967 $struct_attr = ",wrdl";
970 # Some helpful index variants
971 my $index_plus = $index . $struct_attr . ":" if $index;
972 my $index_plus_comma = $index . $struct_attr . "," if $index;
975 if ($remove_stopwords) {
976 ( $operand, $stopwords_removed ) =
977 _remove_stopwords( $operand, $index );
978 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
979 warn "REMOVED STOPWORDS: @$stopwords_removed"
980 if ( $stopwords_removed && $DEBUG );
984 my ( $nontruncated, $righttruncated, $lefttruncated,
985 $rightlefttruncated, $regexpr );
986 my $truncated_operand;
988 $nontruncated, $righttruncated, $lefttruncated,
989 $rightlefttruncated, $regexpr
990 ) = _detect_truncation( $operand, $index );
992 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
997 scalar(@$righttruncated) + scalar(@$lefttruncated) +
998 scalar(@$rightlefttruncated) > 0 )
1001 # Don't field weight or add the index to the query, we do it here
1003 undef $weight_fields;
1004 my $previous_truncation_operand;
1005 if ( scalar(@$nontruncated) > 0 ) {
1006 $truncated_operand .= "$index_plus @$nontruncated ";
1007 $previous_truncation_operand = 1;
1009 if ( scalar(@$righttruncated) > 0 ) {
1010 $truncated_operand .= "and "
1011 if $previous_truncation_operand;
1012 $truncated_operand .=
1013 "$index_plus_comma" . "rtrn:@$righttruncated ";
1014 $previous_truncation_operand = 1;
1016 if ( scalar(@$lefttruncated) > 0 ) {
1017 $truncated_operand .= "and "
1018 if $previous_truncation_operand;
1019 $truncated_operand .=
1020 "$index_plus_comma" . "ltrn:@$lefttruncated ";
1021 $previous_truncation_operand = 1;
1023 if ( scalar(@$rightlefttruncated) > 0 ) {
1024 $truncated_operand .= "and "
1025 if $previous_truncation_operand;
1026 $truncated_operand .=
1027 "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1028 $previous_truncation_operand = 1;
1031 $operand = $truncated_operand if $truncated_operand;
1032 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1035 my $stemmed_operand;
1036 $stemmed_operand = _build_stemmed_operand($operand)
1038 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1040 # Handle Field Weighting
1041 my $weighted_operand;
1043 _build_weighted_query( $operand, $stemmed_operand, $index )
1045 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1046 $operand = $weighted_operand if $weight_fields;
1047 $indexes_set = 1 if $weight_fields;
1049 # If there's a previous operand, we need to add an operator
1050 if ($previous_operand) {
1052 # User-specified operator
1053 if ( $operators[ $i - 1 ] ) {
1054 $query .= " $operators[$i-1] ";
1055 $query .= " $index_plus " unless $indexes_set;
1056 $query .= " $operand";
1057 $query_cgi .= "&op=$operators[$i-1]";
1058 $query_cgi .= "&idx=$index" if $index;
1059 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1061 " $operators[$i-1] $index_plus $operands[$i]";
1064 # Default operator is and
1067 $query .= "$index_plus " unless $indexes_set;
1068 $query .= "$operand";
1069 $query_cgi .= "&op=and&idx=$index" if $index;
1070 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1071 $query_desc .= " and $index_plus $operands[$i]";
1075 # There isn't a pervious operand, don't need an operator
1078 # Field-weighted queries already have indexes set
1079 $query .= " $index_plus " unless $indexes_set;
1081 $query_desc .= " $index_plus $operands[$i]";
1082 $query_cgi .= "&idx=$index" if $index;
1083 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1084 $previous_operand = 1;
1089 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1092 my $group_OR_limits;
1093 my $availability_limit;
1094 foreach my $this_limit (@limits) {
1095 if ( $this_limit =~ /available/ ) {
1097 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1099 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1100 $availability_limit .=
1101 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1102 $limit_cgi .= "&limit=available";
1106 # group_OR_limits, prefixed by mc-
1107 # OR every member of the group
1108 elsif ( $this_limit =~ /mc/ ) {
1109 $group_OR_limits .= " or " if $group_OR_limits;
1110 $limit_desc .= " or " if $group_OR_limits;
1111 $group_OR_limits .= "$this_limit";
1112 $limit_cgi .= "&limit=$this_limit";
1113 $limit_desc .= " $this_limit";
1116 # Regular old limits
1118 $limit .= " and " if $limit || $query;
1119 $limit .= "$this_limit";
1120 $limit_cgi .= "&limit=$this_limit";
1121 $limit_desc .= " $this_limit";
1124 if ($group_OR_limits) {
1125 $limit .= " and " if ( $query || $limit );
1126 $limit .= "($group_OR_limits)";
1128 if ($availability_limit) {
1129 $limit .= " and " if ( $query || $limit );
1130 $limit .= "($availability_limit)";
1133 # Normalize the query and limit strings
1136 for ( $query, $query_desc, $limit, $limit_desc ) {
1137 $_ =~ s/ / /g; # remove extra spaces
1138 $_ =~ s/^ //g; # remove any beginning spaces
1139 $_ =~ s/ $//g; # remove any ending spaces
1140 $_ =~ s/==/=/g; # remove double == from query
1142 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1144 for ($query_cgi,$simple_query) {
1147 # append the limit to the query
1148 $query .= " " . $limit;
1152 warn "QUERY:" . $query;
1153 warn "QUERY CGI:" . $query_cgi;
1154 warn "QUERY DESC:" . $query_desc;
1155 warn "LIMIT:" . $limit;
1156 warn "LIMIT CGI:" . $limit_cgi;
1157 warn "LIMIT DESC:" . $limit_desc;
1158 warn "---------\nLeave buildQuery\n---------";
1161 undef, $query, $simple_query, $query_cgi,
1162 $query_desc, $limit, $limit_cgi, $limit_desc,
1163 $stopwords_removed, $query_type
1167 =head2 searchResults
1169 Format results in a form suitable for passing to the template
1173 # IMO this subroutine is pretty messy still -- it's responsible for
1174 # building the HTML output for the template
1176 my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_;
1177 my $dbh = C4::Context->dbh;
1181 # add search-term highlighting via <span>s on the search terms
1182 my $span_terms_hashref;
1183 for my $span_term ( split( / /, $searchdesc ) ) {
1184 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1185 $span_terms_hashref->{$span_term}++;
1188 #Build branchnames hash
1190 #get branch information.....
1193 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1194 ; # FIXME : use C4::Koha::GetBranches
1196 while ( my $bdata = $bsth->fetchrow_hashref ) {
1197 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1202 "SELECT authorised_value,lib FROM authorised_values WHERE category = 'LOC'"
1205 while ( my $ldata = $lsch->fetchrow_hashref ) {
1206 $locations{ $ldata->{'authorised_value'} } = $ldata->{'lib'};
1209 #Build itemtype hash
1210 #find itemtype & itemtype image
1214 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1217 while ( my $bdata = $bsth->fetchrow_hashref ) {
1218 $itemtypes{ $bdata->{'itemtype'} }->{description} =
1219 $bdata->{'description'};
1220 $itemtypes{ $bdata->{'itemtype'} }->{imageurl} = $bdata->{'imageurl'};
1221 $itemtypes{ $bdata->{'itemtype'} }->{summary} = $bdata->{'summary'};
1222 $itemtypes{ $bdata->{'itemtype'} }->{notforloan} =
1223 $bdata->{'notforloan'};
1226 #search item field code
1229 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1232 my ($itemtag) = $sth->fetchrow;
1234 # get notforloan authorised value list
1237 "SELECT authorised_value FROM `marc_subfield_structure` WHERE kohafield = 'items.notforloan' AND frameworkcode=''"
1240 my ($notforloan_authorised_value) = $sth->fetchrow;
1242 ## find column names of items related to MARC
1243 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1245 my %subfieldstosearch;
1246 while ( ( my $column ) = $sth2->fetchrow ) {
1247 my ( $tagfield, $tagsubfield ) =
1248 &GetMarcFromKohaField( "items." . $column, "" );
1249 $subfieldstosearch{$column} = $tagsubfield;
1252 # handle which records to actually retrieve
1254 if ( $hits && $offset + $results_per_page <= $hits ) {
1255 $times = $offset + $results_per_page;
1261 # loop through all of the records we've retrieved
1262 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1264 $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1265 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1266 $oldbiblio->{result_number} = $i + 1;
1268 # add imageurl to itemtype if there is one
1269 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
1270 $oldbiblio->{imageurl} =
1271 $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
1272 $oldbiblio->{description} =
1273 $itemtypes{ $oldbiblio->{itemtype} }->{description};
1276 $oldbiblio->{imageurl} =
1277 getitemtypeimagesrc() . "/"
1278 . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1279 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1280 $oldbiblio->{description} =
1281 $itemtypes{ $oldbiblio->{itemtype} }->{description};
1284 # Build summary if there is one (the summary is defined in the itemtypes table)
1285 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1286 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1287 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1288 my @fields = $marcrecord->fields();
1289 foreach my $field (@fields) {
1290 my $tag = $field->tag();
1291 my $tagvalue = $field->as_string();
1293 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1294 unless ( $tag < 10 ) {
1295 my @subf = $field->subfields;
1296 for my $i ( 0 .. $#subf ) {
1297 my $subfieldcode = $subf[$i][0];
1298 my $subfieldvalue = $subf[$i][1];
1299 my $tagsubf = $tag . $subfieldcode;
1301 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1306 $summary =~ s/\[(.*?)]//g;
1307 $summary =~ s/\n/<br\/>/g;
1308 $oldbiblio->{summary} = $summary;
1311 # Add search-term highlighting to the whole record where they match using <span>s
1312 if (C4::Context->preference("OpacHighlightedWords")){
1313 my $searchhighlightblob;
1314 for my $highlight_field ( $marcrecord->fields ) {
1316 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1317 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1318 for my $subfield ($highlight_field->subfields()) {
1320 next if $subfield->[0] eq '9';
1321 my $field = $subfield->[1];
1322 for my $term ( keys %$span_terms_hashref ) {
1323 if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1324 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1328 $searchhighlightblob .= $field . " ... " if $match;
1332 $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1333 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1335 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1336 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1338 # Add search-term highlighting to the title, subtitle, etc. fields
1339 for my $term ( keys %$span_terms_hashref ) {
1340 my $old_term = $term;
1341 if ( length($term) > 3 ) {
1342 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1343 $oldbiblio->{'title'} =~
1344 s/$term/<span class=\"term\">$&<\/span>/gi;
1345 $oldbiblio->{'subtitle'} =~
1346 s/$term/<span class=\"term\">$&<\/span>/gi;
1347 $oldbiblio->{'author'} =~
1348 s/$term/<span class=\"term\">$&<\/span>/gi;
1349 $oldbiblio->{'publishercode'} =~
1350 s/$term/<span class=\"term\">$&<\/span>/gi;
1351 $oldbiblio->{'place'} =~
1352 s/$term/<span class=\"term\">$&<\/span>/gi;
1353 $oldbiblio->{'pages'} =~
1354 s/$term/<span class=\"term\">$&<\/span>/gi;
1355 $oldbiblio->{'notes'} =~
1356 s/$term/<span class=\"term\">$&<\/span>/gi;
1357 $oldbiblio->{'size'} =~
1358 s/$term/<span class=\"term\">$&<\/span>/gi;
1362 ($i % 2) and $oldbiblio->{'toggle'} = 1;
1364 # Pull out the items fields
1365 my @fields = $marcrecord->field($itemtag);
1367 # Setting item statuses for display
1368 my @available_items_loop;
1369 my @onloan_items_loop;
1370 my @other_items_loop;
1372 my $available_items;
1376 my $ordered_count = 0;
1377 my $available_count = 0;
1378 my $onloan_count = 0;
1379 my $longoverdue_count = 0;
1380 my $other_count = 0;
1381 my $wthdrawn_count = 0;
1382 my $itemlost_count = 0;
1383 my $itembinding_count = 0;
1384 my $itemdamaged_count = 0;
1385 my $can_place_holds = 0;
1386 my $items_count = scalar(@fields);
1389 ( C4::Context->preference('maxItemsinSearchResults') )
1390 ? C4::Context->preference('maxItemsinSearchResults') - 1
1393 # loop through every item
1394 foreach my $field (@fields) {
1398 # populate the items hash
1399 foreach my $code ( keys %subfieldstosearch ) {
1400 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1402 my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch';
1403 my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1404 # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1405 if ($item->{$hbranch}) {
1406 $item->{'branchname'} = $branches{$item->{$hbranch}};
1408 elsif ($item->{$otherbranch}) { # Last resort
1409 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1412 my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1413 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1414 if ( $item->{onloan} ) {
1416 my $key = $prefix . $item->{due_date};
1417 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1418 $onloan_items->{$key}->{count}++ if $item->{homebranch};
1419 $onloan_items->{$key}->{branchname} = $item->{branchname};
1420 $onloan_items->{$key}->{location} = $locations{ $item->{location} };
1421 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1422 $onloan_items->{$key}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1423 # if something's checked out and lost, mark it as 'long overdue'
1424 if ( $item->{itemlost} ) {
1425 $onloan_items->{$prefix}->{longoverdue}++;
1426 $longoverdue_count++;
1427 } else { # can place holds as long as item isn't lost
1428 $can_place_holds = 1;
1432 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1436 if ( $item->{notforloan} == -1 ) {
1440 # item is withdrawn, lost or damaged
1441 if ( $item->{wthdrawn}
1442 || $item->{itemlost}
1444 || $item->{notforloan} )
1446 $wthdrawn_count++ if $item->{wthdrawn};
1447 $itemlost_count++ if $item->{itemlost};
1448 $itemdamaged_count++ if $item->{damaged};
1449 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1452 my $key = $prefix . $item->{status};
1453 foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1454 $other_items->{$key}->{$_} = $item->{$_};
1456 $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1457 $other_items->{$key}->{count}++ if $item->{homebranch};
1458 $other_items->{$key}->{location} = $locations{ $item->{location} };
1459 $other_items->{$key}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1463 $can_place_holds = 1;
1465 $available_items->{$prefix}->{count}++ if $item->{homebranch};
1466 foreach (qw(branchname itemcallnumber)) {
1467 $available_items->{$prefix}->{$_} = $item->{$_};
1469 $available_items->{$prefix}->{location} = $locations{ $item->{location} };
1470 $available_items->{$prefix}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1473 } # notforloan, item level and biblioitem level
1474 my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1476 ( C4::Context->preference('maxItemsinSearchResults') )
1477 ? C4::Context->preference('maxItemsinSearchResults') - 1
1479 for my $key ( sort keys %$onloan_items ) {
1480 (++$onloanitemscount > $maxitems) and last;
1481 push @onloan_items_loop, $onloan_items->{$key};
1483 for my $key ( sort keys %$other_items ) {
1484 (++$otheritemscount > $maxitems) and last;
1485 push @other_items_loop, $other_items->{$key};
1487 for my $key ( sort keys %$available_items ) {
1488 (++$availableitemscount > $maxitems) and last;
1489 push @available_items_loop, $available_items->{$key}
1492 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1493 $can_place_holds = 0
1494 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1495 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1496 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1497 $oldbiblio->{items_count} = $items_count;
1498 $oldbiblio->{available_items_loop} = \@available_items_loop;
1499 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1500 $oldbiblio->{other_items_loop} = \@other_items_loop;
1501 $oldbiblio->{availablecount} = $available_count;
1502 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1503 $oldbiblio->{onloancount} = $onloan_count;
1504 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1505 $oldbiblio->{othercount} = $other_count;
1506 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1507 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1508 $oldbiblio->{itemlostcount} = $itemlost_count;
1509 $oldbiblio->{damagedcount} = $itemdamaged_count;
1510 $oldbiblio->{orderedcount} = $ordered_count;
1511 $oldbiblio->{isbn} =~
1512 s/-//g; # deleting - in isbn to enable amazon content
1513 push( @newresults, $oldbiblio );
1518 #----------------------------------------------------------------------
1520 # Non-Zebra GetRecords#
1521 #----------------------------------------------------------------------
1525 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1531 $query, $simple_query, $sort_by_ref, $servers_ref,
1532 $results_per_page, $offset, $expanded_facet, $branches,
1535 warn "query =$query" if $DEBUG;
1536 my $result = NZanalyse($query);
1537 warn "results =$result" if $DEBUG;
1539 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1545 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1546 the list is built from an inverted index in the nozebra SQL table
1547 note that title is here only for convenience : the sorting will be very fast when requested on title
1548 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1553 my ( $string, $server ) = @_;
1554 # warn "---------" if $DEBUG;
1555 warn " NZanalyse" if $DEBUG;
1556 # warn "---------" if $DEBUG;
1558 # $server contains biblioserver or authorities, depending on what we search on.
1559 #warn "querying : $string on $server";
1560 $server = 'biblioserver' unless $server;
1562 # if we have a ", replace the content to discard temporarily any and/or/not inside
1564 if ( $string =~ /"/ ) {
1565 $string =~ s/"(.*?)"/__X__/;
1567 warn "commacontent : $commacontent" if $DEBUG;
1570 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1571 # then, call again NZanalyse with $left and $right
1572 # (recursive until we find a leaf (=> something without and/or/not)
1573 # delete repeated operator... Would then go in infinite loop
1574 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1577 #process parenthesis before.
1578 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1581 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1583 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1585 my $leftresult = NZanalyse( $left, $server );
1587 my $rightresult = NZanalyse( $right, $server );
1589 # OK, we have the results for right and left part of the query
1590 # depending of operand, intersect, union or exclude both lists
1591 # to get a result list
1592 if ( $operator eq ' and ' ) {
1593 return NZoperatorAND($leftresult,$rightresult);
1595 elsif ( $operator eq ' or ' ) {
1597 # just merge the 2 strings
1598 return $leftresult . $rightresult;
1600 elsif ( $operator eq ' not ' ) {
1601 return NZoperatorNOT($leftresult,$rightresult);
1605 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1609 warn "string :" . $string if $DEBUG;
1610 $string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/;
1613 my $operator = lc($2); # FIXME: and/or/not are operators, not operands
1614 warn "no parenthesis. left : $left operator: $operator right: $right"
1617 # it's not a leaf, we have a and/or/not
1620 # reintroduce comma content if needed
1621 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1622 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1623 warn "node : $left / $operator / $right\n" if $DEBUG;
1624 my $leftresult = NZanalyse( $left, $server );
1625 my $rightresult = NZanalyse( $right, $server );
1626 warn " leftresult : $leftresult" if $DEBUG;
1627 warn " rightresult : $rightresult" if $DEBUG;
1628 # OK, we have the results for right and left part of the query
1629 # depending of operand, intersect, union or exclude both lists
1630 # to get a result list
1631 if ( $operator eq ' and ' ) {
1633 return NZoperatorAND($leftresult,$rightresult);
1635 elsif ( $operator eq ' or ' ) {
1637 # just merge the 2 strings
1638 return $leftresult . $rightresult;
1640 elsif ( $operator eq ' not ' ) {
1641 return NZoperatorNOT($leftresult,$rightresult);
1645 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1646 die "error : operand unknown : $operator for $string";
1649 # it's a leaf, do the real SQL query and return the result
1652 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1653 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1654 #remove trailing blank at the beginning
1656 warn "leaf:$string" if $DEBUG;
1658 # parse the string in in operator/operand/value again
1659 $string =~ /(.*)(>=|<=)(.*)/;
1663 # warn "handling leaf... left:$left operator:$operator right:$right"
1665 unless ($operator) {
1666 $string =~ /(.*)(>|<|=)(.*)/;
1671 "handling unless (operator)... left:$left operator:$operator right:$right"
1676 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1679 # automatic replace for short operators
1680 $left = 'title' if $left =~ '^ti$';
1681 $left = 'author' if $left =~ '^au$';
1682 $left = 'publisher' if $left =~ '^pb$';
1683 $left = 'subject' if $left =~ '^su$';
1684 $left = 'koha-Auth-Number' if $left =~ '^an$';
1685 $left = 'keyword' if $left =~ '^kw$';
1686 warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1687 if ( $operator && $left ne 'keyword' ) {
1689 #do a specific search
1690 my $dbh = C4::Context->dbh;
1691 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1694 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1696 warn "$left / $operator / $right\n" if $DEBUG;
1698 # split each word, query the DB and build the biblionumbers result
1699 #sanitizing leftpart
1700 $left =~ s/^\s+|\s+$//;
1701 foreach ( split / /, $right ) {
1703 $_ =~ s/^\s+|\s+$//;
1705 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1706 $sth->execute( $server, $left, $_ )
1707 or warn "execute failed: $!";
1708 while ( my ( $line, $value ) = $sth->fetchrow ) {
1710 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1711 # otherwise, fill the result
1712 $biblionumbers .= $line
1713 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1714 warn "result : $value "
1715 . ( $right =~ /\d/ ) . "=="
1716 . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line";
1719 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1721 warn "NZAND" if $DEBUG;
1722 $results = NZoperatorAND($biblionumbers,$results);
1725 $results = $biblionumbers;
1731 #do a complete search (all indexes), if index='kw' do complete search too.
1732 my $dbh = C4::Context->dbh;
1735 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1738 # split each word, query the DB and build the biblionumbers result
1739 foreach ( split / /, $string ) {
1740 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1741 warn "search on all indexes on $_" if $DEBUG;
1744 $sth->execute( $server, $_ );
1745 while ( my $line = $sth->fetchrow ) {
1746 $biblionumbers .= $line;
1749 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1751 $results = NZoperatorAND($biblionumbers,$results);
1754 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1755 $results = $biblionumbers;
1759 warn "return : $results for LEAF : $string" if $DEBUG;
1762 warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1766 my ($rightresult, $leftresult)=@_;
1768 my @leftresult = split /;/, $leftresult;
1769 warn " @leftresult / $rightresult \n" if $DEBUG;
1771 # my @rightresult = split /;/,$leftresult;
1774 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1775 # the result is stored twice, to have the same weight for AND than OR.
1776 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1777 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1778 foreach (@leftresult) {
1781 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1782 if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1783 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1785 "$value-$countvalue;$value-$countvalue;";
1788 warn "NZAND DONE : $finalresult \n" if $DEBUG;
1789 return $finalresult;
1793 my ($rightresult, $leftresult)=@_;
1794 return $rightresult.$leftresult;
1798 my ($rightresult, $leftresult)=@_;
1800 my @leftresult = split /;/, $leftresult;
1802 # my @rightresult = split /;/,$leftresult;
1804 foreach (@leftresult) {
1806 $value=$1 if $value=~m/(.*)-\d+$/;
1807 unless ($rightresult =~ "$value-") {
1808 $finalresult .= "$_;";
1811 return $finalresult;
1816 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1823 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1824 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1826 # order title asc by default
1827 # $ordering = '1=36 <i' unless $ordering;
1828 $results_per_page = 20 unless $results_per_page;
1829 $offset = 0 unless $offset;
1830 my $dbh = C4::Context->dbh;
1833 # order by POPULARITY
1835 if ( $ordering =~ /popularity/ ) {
1839 # popularity is not in MARC record, it's builded from a specific query
1841 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1842 foreach ( split /;/, $biblionumbers ) {
1843 my ( $biblionumber, $title ) = split /,/, $_;
1844 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1845 $sth->execute($biblionumber);
1846 my $popularity = $sth->fetchrow || 0;
1848 # hint : the key is popularity.title because we can have
1849 # many results with the same popularity. In this cas, sub-ordering is done by title
1850 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1851 # (un-frequent, I agree, but we won't forget anything that way ;-)
1852 $popularity{ sprintf( "%10d", $popularity ) . $title
1853 . $biblionumber } = $biblionumber;
1856 # sort the hash and return the same structure as GetRecords (Zebra querying)
1859 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1860 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1861 $result_hash->{'RECORDS'}[ $numbers++ ] =
1862 $result{ $popularity{$key} }->as_usmarc();
1865 else { # sort popularity ASC
1866 foreach my $key ( sort ( keys %popularity ) ) {
1867 $result_hash->{'RECORDS'}[ $numbers++ ] =
1868 $result{ $popularity{$key} }->as_usmarc();
1871 my $finalresult = ();
1872 $result_hash->{'hits'} = $numbers;
1873 $finalresult->{'biblioserver'} = $result_hash;
1874 return $finalresult;
1880 elsif ( $ordering =~ /author/ ) {
1882 foreach ( split /;/, $biblionumbers ) {
1883 my ( $biblionumber, $title ) = split /,/, $_;
1884 my $record = GetMarcBiblio($biblionumber);
1886 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1887 $author = $record->subfield( '200', 'f' );
1888 $author = $record->subfield( '700', 'a' ) unless $author;
1891 $author = $record->subfield( '100', 'a' );
1894 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1895 # and we don't want to get only 1 result for each of them !!!
1896 $result{ $author . $biblionumber } = $record;
1899 # sort the hash and return the same structure as GetRecords (Zebra querying)
1902 if ( $ordering eq 'author_za' ) { # sort by author desc
1903 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1904 $result_hash->{'RECORDS'}[ $numbers++ ] =
1905 $result{$key}->as_usmarc();
1908 else { # sort by author ASC
1909 foreach my $key ( sort ( keys %result ) ) {
1910 $result_hash->{'RECORDS'}[ $numbers++ ] =
1911 $result{$key}->as_usmarc();
1914 my $finalresult = ();
1915 $result_hash->{'hits'} = $numbers;
1916 $finalresult->{'biblioserver'} = $result_hash;
1917 return $finalresult;
1920 # ORDER BY callnumber
1923 elsif ( $ordering =~ /callnumber/ ) {
1925 foreach ( split /;/, $biblionumbers ) {
1926 my ( $biblionumber, $title ) = split /,/, $_;
1927 my $record = GetMarcBiblio($biblionumber);
1929 my ( $callnumber_tag, $callnumber_subfield ) =
1930 GetMarcFromKohaField( $dbh, 'items.itemcallnumber' );
1931 ( $callnumber_tag, $callnumber_subfield ) =
1932 GetMarcFromKohaField('biblioitems.callnumber')
1933 unless $callnumber_tag;
1934 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1935 $callnumber = $record->subfield( '200', 'f' );
1938 $callnumber = $record->subfield( '100', 'a' );
1941 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1942 # and we don't want to get only 1 result for each of them !!!
1943 $result{ $callnumber . $biblionumber } = $record;
1946 # sort the hash and return the same structure as GetRecords (Zebra querying)
1949 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1950 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1951 $result_hash->{'RECORDS'}[ $numbers++ ] =
1952 $result{$key}->as_usmarc();
1955 else { # sort by title ASC
1956 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1957 $result_hash->{'RECORDS'}[ $numbers++ ] =
1958 $result{$key}->as_usmarc();
1961 my $finalresult = ();
1962 $result_hash->{'hits'} = $numbers;
1963 $finalresult->{'biblioserver'} = $result_hash;
1964 return $finalresult;
1966 elsif ( $ordering =~ /pubdate/ ) { #pub year
1968 foreach ( split /;/, $biblionumbers ) {
1969 my ( $biblionumber, $title ) = split /,/, $_;
1970 my $record = GetMarcBiblio($biblionumber);
1971 my ( $publicationyear_tag, $publicationyear_subfield ) =
1972 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1973 my $publicationyear =
1974 $record->subfield( $publicationyear_tag,
1975 $publicationyear_subfield );
1977 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1978 # and we don't want to get only 1 result for each of them !!!
1979 $result{ $publicationyear . $biblionumber } = $record;
1982 # sort the hash and return the same structure as GetRecords (Zebra querying)
1985 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
1986 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1987 $result_hash->{'RECORDS'}[ $numbers++ ] =
1988 $result{$key}->as_usmarc();
1991 else { # sort by pub year ASC
1992 foreach my $key ( sort ( keys %result ) ) {
1993 $result_hash->{'RECORDS'}[ $numbers++ ] =
1994 $result{$key}->as_usmarc();
1997 my $finalresult = ();
1998 $result_hash->{'hits'} = $numbers;
1999 $finalresult->{'biblioserver'} = $result_hash;
2000 return $finalresult;
2006 elsif ( $ordering =~ /title/ ) {
2008 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2010 foreach ( split /;/, $biblionumbers ) {
2011 my ( $biblionumber, $title ) = split /,/, $_;
2013 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2014 # and we don't want to get only 1 result for each of them !!!
2015 # hint & speed improvement : we can order without reading the record
2016 # so order, and read records only for the requested page !
2017 $result{ $title . $biblionumber } = $biblionumber;
2020 # sort the hash and return the same structure as GetRecords (Zebra querying)
2023 if ( $ordering eq 'title_az' ) { # sort by title desc
2024 foreach my $key ( sort ( keys %result ) ) {
2025 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2028 else { # sort by title ASC
2029 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2030 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2034 # limit the $results_per_page to result size if it's more
2035 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2037 # for the requested page, replace biblionumber by the complete record
2038 # speed improvement : avoid reading too much things
2040 my $counter = $offset ;
2041 $counter <= $offset + $results_per_page ;
2045 $result_hash->{'RECORDS'}[$counter] =
2046 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2048 my $finalresult = ();
2049 $result_hash->{'hits'} = $numbers;
2050 $finalresult->{'biblioserver'} = $result_hash;
2051 return $finalresult;
2058 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2061 foreach ( split /;/, $biblionumbers ) {
2062 my ( $biblionumber, $title ) = split /,/, $_;
2063 $title =~ /(.*)-(\d)/;
2068 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2069 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2070 # biblio N has ranking = 6
2071 $count_ranking{$biblionumber} += $ranking;
2074 # build the result by "inverting" the count_ranking hash
2075 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2077 foreach ( keys %count_ranking ) {
2078 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2081 # sort the hash and return the same structure as GetRecords (Zebra querying)
2084 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2085 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2088 # limit the $results_per_page to result size if it's more
2089 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2091 # for the requested page, replace biblionumber by the complete record
2092 # speed improvement : avoid reading too much things
2094 my $counter = $offset ;
2095 $counter <= $offset + $results_per_page ;
2099 $result_hash->{'RECORDS'}[$counter] =
2100 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2101 if $result_hash->{'RECORDS'}[$counter];
2103 my $finalresult = ();
2104 $result_hash->{'hits'} = $numbers;
2105 $finalresult->{'biblioserver'} = $result_hash;
2106 return $finalresult;
2112 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2114 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2115 test parameter if set donot perform change to records in database.
2121 * $listbiblios is an array ref to marcrecords to be changed
2122 * $tagsubfield is the reference of the subfield to change.
2123 * $initvalue is the value to search the record for
2124 * $targetvalue is the value to set the subfield to
2125 * $test is to be set only not to perform changes in database.
2127 =item C<Output arg:>
2128 * $countchanged counts all the changes performed.
2129 * $listunchanged contains the list of all the biblionumbers of records unchanged.
2131 =item C<usage in the script:>
2135 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2136 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2137 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2142 my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2145 my ( $tag, $subfield ) = ( $1, $2 )
2146 if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2147 if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2148 $tag = $tag . $subfield;
2151 my ( $bntag, $bnsubf ) = GetMarcFromKohaField('biblio.biblionumber');
2152 my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber');
2153 if ($tag eq $itemtag) {
2154 # do not allow the embedded item tag to be
2156 warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2159 foreach my $usmarc (@$listbiblios) {
2161 $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2165 # usmarc is not a valid usmarc May be a biblionumber
2166 # FIXME - sorry, please let's figure out whether
2167 # this function is to be passed a list of
2168 # record numbers or a list of MARC::Record
2169 # objects. The former is probably better
2170 # because the MARC records supplied by Zebra
2171 # may be not current.
2172 $record = GetMarcBiblio($usmarc);
2173 $biblionumber = $usmarc;
2176 if ( $bntag >= 010 ) {
2177 $biblionumber = $record->subfield( $bntag, $bnsubf );
2180 $biblionumber = $record->field($bntag)->data;
2184 #GetBiblionumber is to be written.
2185 #Could be replaced by TransformMarcToKoha (But Would be longer)
2186 if ( $record->field($tag) ) {
2188 foreach my $field ( $record->field($tag) ) {
2191 $field->delete_subfield(
2192 'code' => $subfield,
2193 'match' => qr($initvalue)
2199 $field->update( $subfield, $targetvalue )
2204 if ( $tag >= 010 ) {
2205 if ( $field->delete_field($field) ) {
2211 $field->data = $targetvalue
2212 if ( $field->data =~ qr($initvalue) );
2217 # warn $record->as_formatted;
2219 ModBiblio( $record, $biblionumber,
2220 GetFrameworkCode($biblionumber) )
2224 push @unmatched, $biblionumber;
2228 push @unmatched, $biblionumber;
2231 return ( $countmatched, \@unmatched );
2234 END { } # module clean-up code here (global destructor)
2241 Koha Developement team <info@koha.org>