# This file is part of Koha.
#
-# Koha is free software; you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any later
-# version.
+# Koha is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
#
-# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
-# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+# Koha is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
#
-# You should have received a copy of the GNU General Public License along with
-# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
-# Suite 330, Boston, MA 02111-1307 USA
+# You should have received a copy of the GNU General Public License
+# along with Koha; if not, see <http://www.gnu.org/licenses>.
use strict;
#use warnings; FIXME - Bug 2505
use Business::ISBN;
use MARC::Record;
use MARC::Field;
-use utf8;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
# set the version for version checking
my $facets_counter = {};
my $facets_info = {};
my $facets = getFacets();
- my $facets_maxrecs = C4::Context->preference('maxRecordsForFacets')||20;
my @facets_loop; # stores the ref to array of hashes for template facets loop
# Fill the facets while we're looping, but only for the
# biblioserver and not for a scan
if ( !$scan && $servers[ $i - 1 ] =~ /biblioserver/ ) {
-
- my $jmax = $size > $facets_maxrecs
- ? $facets_maxrecs
- : $size;
-
- for ( my $j = 0 ; $j < $jmax ; $j++ ) {
-
- my $marc_record = new_record_from_zebra (
- 'biblioserver',
- $results[ $i - 1 ]->record($j)->raw()
- );
-
- if ( ! defined $marc_record ) {
- warn "ERROR DECODING RECORD - $@: " .
- $results[ $i - 1 ]->record($j)->raw();
- next;
- }
-
- _get_facets_data_from_record( $marc_record, $facets, $facets_counter );
- $facets_info = _get_facets_info( $facets );
- }
+ $facets_counter = GetFacets( $results[ $i - 1 ] );
+ $facets_info = _get_facets_info( $facets );
}
- # warn "connection ", $i-1, ": $size hits";
- # warn $results[$i-1]->record(0)->render() if $size > 0;
-
# BUILD FACETS
if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
for my $link_value (
return ( undef, $results_hashref, \@facets_loop );
}
+sub GetFacets {
+
+ my $rs = shift;
+ my $facets;
+
+ my $indexing_mode = C4::Context->config('zebra_bib_index_mode') // 'dom';
+ my $use_zebra_facets = C4::Context->config('use_zebra_facets') // 0;
+
+ if ( $indexing_mode eq 'dom' &&
+ $use_zebra_facets ) {
+ $facets = _get_facets_from_zebra( $rs );
+ } else {
+ $facets = _get_facets_from_records( $rs );
+ }
+
+ return $facets;
+}
+
+sub _get_facets_from_records {
+
+ my $rs = shift;
+
+ my $facets_maxrecs = C4::Context->preference('maxRecordsForFacets') // 20;
+ my $facets_config = getFacets();
+ my $facets = {};
+ my $size = $rs->size();
+ my $jmax = $size > $facets_maxrecs
+ ? $facets_maxrecs
+ : $size;
+
+ for ( my $j = 0 ; $j < $jmax ; $j++ ) {
+
+ my $marc_record = new_record_from_zebra (
+ 'biblioserver',
+ $rs->record( $j )->raw()
+ );
+
+ if ( ! defined $marc_record ) {
+ warn "ERROR DECODING RECORD - $@: " .
+ $rs->record( $j )->raw();
+ next;
+ }
+
+ _get_facets_data_from_record( $marc_record, $facets_config, $facets );
+ }
+
+ return $facets;
+}
+
=head2 _get_facets_data_from_record
C4::Search::_get_facets_data_from_record( $marc_record, $facets, $facets_counter );
}
}
+=head2 _get_facets_from_zebra
+
+ my $facets = _get_facets_from_zebra( $result_set )
+
+Retrieves facets for a specified result set. It loops through the facets defined
+in C4::Koha::getFacets and returns a hash with the following structure:
+
+ { facet_idx => {
+ facet_value => count
+ },
+ ...
+ }
+
+=cut
+
+sub _get_facets_from_zebra {
+
+ my $rs = shift;
+
+ # save current elementSetName
+ my $elementSetName = $rs->option( 'elementSetName' );
+
+ my $facets_loop = getFacets();
+ my $facets_data = {};
+ # loop through defined facets and fill the facets hashref
+ foreach my $facet ( @$facets_loop ) {
+
+ my $idx = $facet->{ idx };
+ my $sep = $facet->{ sep };
+ my $facet_values = _get_facet_from_result_set( $idx, $rs, $sep );
+ if ( $facet_values ) {
+ # we've actually got a result
+ $facets_data->{ $idx } = $facet_values;
+ }
+ }
+ # set elementSetName to its previous value to avoid side effects
+ $rs->option( elementSetName => $elementSetName );
+
+ return $facets_data;
+}
+
+=head2 _get_facet_from_result_set
+
+ my $facet_values =
+ C4::Search::_get_facet_from_result_set( $facet_idx, $result_set, $sep )
+
+Internal function that extracts facet information for a specific index ($facet_idx) and
+returns a hash containing facet values and count:
+
+ {
+ $facet_value => $count ,
+ ...
+ }
+
+Warning: this function has the side effect of changing the elementSetName for the result
+set. It is a helper function for the main loop, which takes care of backing it up for
+restoring.
+
+=cut
+
+sub _get_facet_from_result_set {
+
+ my $facet_idx = shift;
+ my $rs = shift;
+ my $sep = shift;
+
+ my $internal_sep = '<*>';
+ my $facetMaxCount = C4::Context->preference('FacetMaxCount') // 20;
+
+ return if ( ! defined $facet_idx || ! defined $rs );
+ # zebra's facet element, untokenized index
+ my $facet_element = 'zebra::facet::' . $facet_idx . ':0:' . $facetMaxCount;
+ # configure zebra results for retrieving the desired facet
+ $rs->option( elementSetName => $facet_element );
+ # get the facet record from result set
+ my $facet = $rs->record( 0 )->raw;
+ # if the facet has no restuls...
+ return if !defined $facet;
+ # TODO: benchmark DOM vs. SAX performance
+ my $facet_dom = XML::LibXML->load_xml(
+ string => ($facet)
+ );
+ my @terms = $facet_dom->getElementsByTagName('term');
+ return if ! @terms;
+
+ my $facets = {};
+ foreach my $term ( @terms ) {
+ my $facet_value = $term->textContent;
+ $facet_value =~ s/\Q$internal_sep\E/$sep/ if defined $sep;
+ $facets->{ $facet_value } = $term->getAttribute( 'occur' );
+ }
+
+ return $facets;
+}
+
=head2 _get_facets_info
my $facets_info = C4::Search::_get_facets_info( $facets )
if ( @limits ) {
$q .= ' and '.join(' and ', @limits);
}
- return ( undef, $q, $q, "q=ccl=".uri_escape($q), $q, '', '', '', '', 'ccl' );
+ return ( undef, $q, $q, "q=ccl=".uri_escape_utf8($q), $q, '', '', '', '', 'ccl' );
}
if ( $query =~ /^cql=/ ) {
- return ( undef, $', $', "q=cql=".uri_escape($'), $', '', '', '', '', 'cql' );
+ return ( undef, $', $', "q=cql=".uri_escape_utf8($'), $', '', '', '', '', 'cql' );
}
if ( $query =~ /^pqf=/ ) {
if ($query_desc) {
- $query_cgi = "q=".uri_escape($query_desc);
+ $query_cgi = "q=".uri_escape_utf8($query_desc);
} else {
$query_desc = $';
- $query_cgi = "q=pqf=".uri_escape($');
+ $query_cgi = "q=pqf=".uri_escape_utf8($');
}
return ( undef, $', $', $query_cgi, $query_desc, '', '', '', '', 'pqf' );
}
my $index = $indexes[$i];
# Add index-specific attributes
+
+ #Afaik, this 'yr' condition will only ever be met in the staff client advanced search
+ #for "Publication date", since typing 'yr:YYYY' into the search box produces a CCL query,
+ #which is processed higher up in this sub. Other than that, year searches are typically
+ #handled as limits which are not processed her either.
+
# Date of Publication
- if ( $index eq 'yr' ) {
- $index .= ",st-numeric";
- $indexes_set++;
+ if ( $index =~ /yr/ ) {
+ #weight_fields/relevance search causes errors with date ranges
+ #In the case of YYYY-, it will only return records with a 'yr' of YYYY (not the range)
+ #In the case of YYYY-YYYY, it will return no results
$stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
}
# Date of Acquisition
- elsif ( $index eq 'acqdate' ) {
- $index .= ",st-date-normalized";
- $indexes_set++;
+ elsif ( $index =~ /acqdate/ ) {
+ #stemming and auto_truncation would have zero impact since it already is YYYY-MM-DD format
+ #Weight_fields probably SHOULD be turned OFF, otherwise you'll get records floating to the
+ #top of the results just because they have lots of item records matching that date.
+ #Fuzzy actually only applies during _build_weighted_query, and is reset there anyway, so
+ #irrelevant here
+ #remove_stopwords doesn't function anymore so is irrelevant
$stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
}
# ISBN,ISSN,Standard Number, don't need special treatment
$remove_stopwords
) = ( 0, 0, 0, 0, 0 );
+ if ( $index eq 'nb' ) {
+ if ( C4::Context->preference("SearchWithISBNVariations") ) {
+ my @isbns = C4::Koha::GetVariationsOfISBN( $operand );
+ $operands[$i] = $operand = '(nb=' . join(' OR nb=', @isbns) . ')';
+ $indexes[$i] = $index = '';
+ }
+ }
}
if(not $index){
if ( $k !~ /mc-i(tem)?type/ ) {
# in case the mc-ccode value has complicating chars like ()'s inside it we wrap in quotes
$this_limit =~ tr/"//d;
- $this_limit = $k.":\"".$v."\"";
+ $this_limit = $k.":'".$v."'";
}
$group_OR_limits{$k} .= " or " if $group_OR_limits{$k};
$limit_desc .= " or " if $group_OR_limits{$k};
$group_OR_limits{$k} .= "$this_limit";
- $limit_cgi .= "&limit=" . uri_escape($this_limit);
+ $limit_cgi .= "&limit=" . uri_escape_utf8($this_limit);
$limit_desc .= " $this_limit";
}
else {
$limit .= " and " if $limit || $query;
$limit .= "$this_limit";
- $limit_cgi .= "&limit=" . uri_escape($this_limit);
+ $limit_cgi .= "&limit=" . uri_escape_utf8($this_limit);
if ($this_limit =~ /^branch:(.+)/) {
my $branchcode = $1;
my $branchname = GetBranchName($branchcode);
# This is flawed , means we can't search anything with : in it
# if user wants to do ccl or cql, start the query with that
# $query =~ s/:/=/g;
+ #NOTE: We use several several different regexps here as you can't have variable length lookback assertions
$query =~ s/(?<=(ti|au|pb|su|an|kw|mc|nb|ns)):/=/g;
$query =~ s/(?<=(wrdl)):/=/g;
$query =~ s/(?<=(trn|phr)):/=/g;
+ $query =~ s/(?<=(st-numeric)):/=/g;
+ $query =~ s/(?<=(st-year)):/=/g;
+ $query =~ s/(?<=(st-date-normalized)):/=/g;
$limit =~ s/:/=/g;
for ( $query, $query_desc, $limit, $limit_desc ) {
s/ +/ /g; # remove extra spaces
#e.g. " and kw,wrdl:test"
$params->{query} .= $operator . $operand;
- $params->{query_cgi} .= "&op=".uri_escape($operator) if $operator;
- $params->{query_cgi} .= "&idx=".uri_escape($params->{index}) if $params->{index};
- $params->{query_cgi} .= "&q=".uri_escape($params->{original_operand}) if $params->{original_operand};
+ $params->{query_cgi} .= "&op=".uri_escape_utf8($operator) if $operator;
+ $params->{query_cgi} .= "&idx=".uri_escape_utf8($params->{index}) if $params->{index};
+ $params->{query_cgi} .= "&q=".uri_escape_utf8($params->{original_operand}) if $params->{original_operand};
#e.g. " and kw,wrdl: test"
$params->{query_desc} .= $operator . $params->{index_plus} . " " . $params->{original_operand};
: $bibliotag < 10
? GetFrameworkCode($marcrecord->field($bibliotag)->data)
: GetFrameworkCode($marcrecord->subfield($bibliotag,$bibliosubf));
+
+ SetUTF8Flag($marcrecord);
my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, $fw );
$oldbiblio->{subtitle} = GetRecordValue('subtitle', $marcrecord, $fw);
$oldbiblio->{result_number} = $i + 1;
if($marcrecord->field($1)){
my @repl = $marcrecord->field($1)->subfield($2);
my $subfieldvalue = $repl[$i];
-
- if (! utf8::is_utf8($subfieldvalue)) {
- utf8::decode($subfieldvalue);
- }
-
- $newline =~ s/\[$tag\]/$subfieldvalue/g;
+ $newline =~ s/\[$tag\]/$subfieldvalue/g;
}
}
$newsummary .= "$newline\n";
# should map transit status to record indexed in Zebra.
#
($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
- $reservestatus = C4::Reserves::GetReserveStatus( $item->{itemnumber}, $oldbiblio->{biblionumber} );
+ $reservestatus = C4::Reserves::GetReserveStatus( $item->{itemnumber} );
}
# item is withdrawn, lost, damaged, not for loan, reserved or in transit
}
# XSLT processing of some stuff
- SetUTF8Flag($marcrecord);
- warn $marcrecord->as_formatted if $DEBUG;
my $interface = $search_context eq 'opac' ? 'OPAC' : '';
if (!$scan && C4::Context->preference($interface . "XSLTResultsDisplay")) {
$oldbiblio->{XSLTResultsRecord} = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, $interface."XSLTResultsDisplay", 1, \@hiddenitems);
my $raw_data = shift;
# Set the default indexing modes
my $index_mode = ( $server eq 'biblioserver' )
- ? C4::Context->config('zebra_bib_index_mode') // 'grs1'
+ ? C4::Context->config('zebra_bib_index_mode') // 'dom'
: C4::Context->config('zebra_auth_index_mode') // 'dom';
my $marc_record = eval {