use Koha::Database;
use Koha::Exceptions::Config;
use Koha::Exceptions::Elasticsearch;
+use Koha::Filter::MARC::EmbedSeeFromHeadings;
use Koha::SearchFields;
use Koha::SearchMarcMaps;
use Koha::Caches;
use C4::Heading;
+use C4::AuthoritiesMarc qw( GuessAuthTypeCode );
+use C4::Biblio;
-use Carp;
-use Clone qw(clone);
-use JSON;
+use Carp qw( carp croak );
+use Clone qw( clone );
use Modern::Perl;
-use Readonly;
+use Readonly qw( Readonly );
use Search::Elasticsearch;
-use Try::Tiny;
-use YAML::Syck;
+use Try::Tiny qw( catch try );
+use YAML::XS;
-use List::Util qw( sum0 reduce );
+use List::Util qw( sum0 );
use MARC::File::XML;
-use MIME::Base64;
-use Encode qw(encode);
+use MIME::Base64 qw( encode_base64 );
+use Encode qw( encode );
use Business::ISBN;
-use Scalar::Util qw(looks_like_number);
+use Scalar::Util qw( looks_like_number );
-__PACKAGE__->mk_ro_accessors(qw( index ));
+__PACKAGE__->mk_ro_accessors(qw( index index_name ));
__PACKAGE__->mk_accessors(qw( sort_fields ));
# Constants to refer to the standard index names
The name of the index to use, generally 'biblios' or 'authorities'.
+=item index_name
+
+The Elasticsearch index name with Koha instance prefix.
+
=back
+
=head1 FUNCTIONS
=cut
sub new {
my $class = shift @_;
- my $self = $class->SUPER::new(@_);
+ my ($params) = @_;
+
# Check for a valid index
- Koha::Exceptions::MissingParameter->throw('No index name provided') unless $self->index;
+ Koha::Exceptions::MissingParameter->throw('No index name provided') unless $params->{index};
+ my $config = _read_configuration();
+ $params->{index_name} = $config->{index_name} . '_' . $params->{index};
+
+ my $self = $class->SUPER::new(@_);
return $self;
}
sub get_elasticsearch {
my $self = shift @_;
unless (defined $self->{elasticsearch}) {
- my $conf = $self->get_elasticsearch_params();
- $self->{elasticsearch} = Search::Elasticsearch->new($conf);
+ $self->{elasticsearch} = Search::Elasticsearch->new(
+ $self->get_elasticsearch_params()
+ );
}
return $self->{elasticsearch};
}
sub get_elasticsearch_params {
my ($self) = @_;
- # Copy the hash so that we're not modifying the original
- my $conf = C4::Context->config('elasticsearch');
- die "No 'elasticsearch' block is defined in koha-conf.xml.\n" if ( !$conf );
- my $es = { %{ $conf } };
-
- # Helpfully, the multiple server lines end up in an array for us anyway
- # if there are multiple ones, but not if there's only one.
- my $server = $es->{server};
- delete $es->{server};
- if ( ref($server) eq 'ARRAY' ) {
-
- # store it called 'nodes' (which is used by newer Search::Elasticsearch)
- $es->{nodes} = $server;
- }
- elsif ($server) {
- $es->{nodes} = [$server];
- }
- else {
- die "No elasticsearch servers were specified in koha-conf.xml.\n";
- }
- die "No elasticsearch index_name was specified in koha-conf.xml.\n"
- if ( !$es->{index_name} );
- # Append the name of this particular index to our namespace
- $es->{index_name} .= '_' . $self->index;
-
- $es->{key_prefix} = 'es_';
- $es->{cxn_pool} //= 'Static';
- $es->{request_timeout} //= 60;
+ my $conf;
+ try {
+ $conf = _read_configuration();
+ } catch {
+ if ( ref($_) eq 'Koha::Exceptions::Config::MissingEntry' ) {
+ croak($_->message);
+ }
+ };
- return $es;
+ return $conf
}
=head2 get_elasticsearch_settings
if (!defined $settings) {
my $config_file = C4::Context->config('elasticsearch_index_config');
$config_file ||= C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/index_config.yaml';
- $settings = LoadFile( $config_file );
+ $settings = YAML::XS::LoadFile( $config_file );
}
return $settings;
if (!defined $all_mappings{$self->index}) {
$sort_fields{$self->index} = {};
# Clone the general mapping to break ties with the original hash
- my $mappings = {
- data => clone(_get_elasticsearch_field_config('general', ''))
- };
+ my $mappings = clone(_get_elasticsearch_field_config('general', ''));
my $marcflavour = lc C4::Context->preference('marcflavour');
$self->_foreach_mapping(
sub {
$es_type = 'integer';
} elsif ($type eq 'isbn' || $type eq 'stdno') {
$es_type = 'stdno';
+ } elsif ($type eq 'year') {
+ $es_type = 'year';
+ } elsif ($type eq 'callnumber') {
+ $es_type = 'cn_sort';
}
if ($search) {
- $mappings->{data}{properties}{$name} = _get_elasticsearch_field_config('search', $es_type);
+ $mappings->{properties}{$name} = _get_elasticsearch_field_config('search', $es_type);
}
if ($facet) {
- $mappings->{data}{properties}{ $name . '__facet' } = _get_elasticsearch_field_config('facet', $es_type);
+ $mappings->{properties}{ $name . '__facet' } = _get_elasticsearch_field_config('facet', $es_type);
}
if ($suggestible) {
- $mappings->{data}{properties}{ $name . '__suggestion' } = _get_elasticsearch_field_config('suggestible', $es_type);
+ $mappings->{properties}{ $name . '__suggestion' } = _get_elasticsearch_field_config('suggestible', $es_type);
}
# Sort is a bit special as it can be true, false, undef.
# We care about "true" or "undef",
# "undef" means to do the default thing, which is make it sortable.
if (!defined $sort || $sort) {
- $mappings->{data}{properties}{ $name . '__sort' } = _get_elasticsearch_field_config('sort', $es_type);
+ $mappings->{properties}{ $name . '__sort' } = _get_elasticsearch_field_config('sort', $es_type);
$sort_fields{$self->index}{$name} = 1;
}
}
);
+ if( $self->index eq 'authorities' ){
+ $mappings->{properties}{ 'match-heading' } = _get_elasticsearch_field_config('search', 'text');
+ $mappings->{properties}{ 'subject-heading-thesaurus' } = _get_elasticsearch_field_config('search', 'text');
+ }
$all_mappings{$self->index} = $mappings;
}
$self->sort_fields(\%{$sort_fields{$self->index}});
-
return $all_mappings{$self->index};
}
=head2 raw_elasticsearch_mappings
Return elasticsearch mapping as it is in database.
-marc_type: marc21|unimarc|normarc
+marc_type: marc21|unimarc
$raw_mappings = raw_elasticsearch_mappings( $marc_type )
$mappings->{ $marc_map->index_name }{ $search_field->name }{label} = $search_field->label;
$mappings->{ $marc_map->index_name }{ $search_field->name }{type} = $search_field->type;
+ $mappings->{ $marc_map->index_name }{ $search_field->name }{mandatory} = $search_field->mandatory;
$mappings->{ $marc_map->index_name }{ $search_field->name }{facet_order} = $search_field->facet_order if defined $search_field->facet_order;
$mappings->{ $marc_map->index_name }{ $search_field->name }{weight} = $search_field->weight if defined $search_field->weight;
+ $mappings->{ $marc_map->index_name }{ $search_field->name }{opac} = $search_field->opac if defined $search_field->opac;
+ $mappings->{ $marc_map->index_name }{ $search_field->name }{staff_client} = $search_field->staff_client if defined $search_field->staff_client;
push (@{ $mappings->{ $marc_map->index_name }{ $search_field->name }{mappings} },
{
if (!defined $settings) {
my $config_file = C4::Context->config('elasticsearch_field_config');
$config_file ||= C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/field_config.yaml';
- $settings = LoadFile( $config_file );
+ local $YAML::XS::Boolean = 'JSON::PP';
+ $settings = YAML::XS::LoadFile( $config_file );
}
if (!defined $settings->{$purpose}) {
sub _load_elasticsearch_mappings {
my $mappings_yaml = C4::Context->config('elasticsearch_index_mappings');
$mappings_yaml ||= C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/mappings.yaml';
- return LoadFile( $mappings_yaml );
+ return YAML::XS::LoadFile( $mappings_yaml );
}
sub reset_elasticsearch_mappings {
while ( my ( $index_name, $fields ) = each %$indexes ) {
while ( my ( $field_name, $data ) = each %$fields ) {
- my %sf_params = map { $_ => $data->{$_} } grep { exists $data->{$_} } qw/ type label weight staff_client opac facet_order /;
+ my %sf_params = map { $_ => $data->{$_} } grep { exists $data->{$_} } qw/ type label weight staff_client opac facet_order mandatory/;
# Set default values
$sf_params{staff_client} //= 1;
$search_field->add_to_search_marc_maps($marc_field, {
facet => $mapping->{facet} || 0,
suggestible => $mapping->{suggestible} || 0,
- sort => $mapping->{sort},
+ sort => $mapping->{sort} // 1,
search => $mapping->{search} // 1
});
}
}
}
- my $cache = Koha::Caches->get_instance();
- $cache->clear_from_cache('elasticsearch_search_fields_staff_client');
- $cache->clear_from_cache('elasticsearch_search_fields_opac');
+ $self->clear_search_fields_cache();
# FIXME return the mappings?
}
# Copy (scalar) data since can have multiple targets
# with differing options for (possibly) mutating data
# so need a different copy for each
- my $_data = $data;
- $record_document->{$target} //= [];
+ my $data_copy = $data;
if (defined $options->{substr}) {
my ($start, $length) = @{$options->{substr}};
- $_data = length($data) > $start ? substr $data, $start, $length : '';
+ $data_copy = length($data) > $start ? substr $data_copy, $start, $length : '';
}
+
+ # Add data to values array for callbacks processing
+ my $values = [$data_copy];
+
+ # Value callbacks takes subfield data (or values from previous
+ # callbacks) as argument, and returns a possibly different list of values.
+ # Note that the returned list may also be empty.
if (defined $options->{value_callbacks}) {
- $_data = reduce { $b->($a) } ($_data, @{$options->{value_callbacks}});
+ foreach my $callback (@{$options->{value_callbacks}}) {
+ # Pass each value to current callback which returns a list
+ # (scalar is fine too) resulting either in a list or
+ # a list of lists that will be flattened by perl.
+ # The next callback will receive the possibly expanded list of values.
+ $values = [ map { $callback->($_) } @{$values} ];
+ }
}
+
+ # Skip mapping if all values has been removed
+ next unless @{$values};
+
if (defined $options->{property}) {
- $_data = {
- $options->{property} => $_data
- }
+ $values = [ map { { $options->{property} => $_ } if $_} @{$values} ];
}
if (defined $options->{nonfiling_characters_indicator}) {
my $nonfiling_chars = $meta->{field}->indicator($options->{nonfiling_characters_indicator});
$nonfiling_chars = looks_like_number($nonfiling_chars) ? int($nonfiling_chars) : 0;
- if ($nonfiling_chars) {
- $_data = substr $_data, $nonfiling_chars;
- }
+ # Nonfiling chars does not make sense for multiple values
+ # Only apply on first element
+ $values->[0] = substr $values->[0], $nonfiling_chars;
}
- push @{$record_document->{$target}}, $_data;
+
+ $values = [ grep(!/^$/, @{$values}) ];
+
+ $record_document->{$target} //= [];
+ push @{$record_document->{$target}}, @{$values};
}
}
my @record_documents;
+ my %auth_match_headings;
+ if( $self->index eq 'authorities' ){
+ my @auth_types = Koha::Authority::Types->search->as_list;
+ %auth_match_headings = map { $_->authtypecode => $_->auth_tag_to_report } @auth_types;
+ }
+
foreach my $record (@{$records}) {
my $record_document = {};
+
+ if ( $self->index eq 'authorities' ){
+ my $authtypecode = GuessAuthTypeCode( $record );
+ if( $authtypecode ){
+ if( $authtypecode !~ m/_SUBD/ ){ #Subdivision records will not be used for linking and so don't require match-heading to be built
+ my $field = $record->field( $auth_match_headings{ $authtypecode } );
+ my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading
+ push @{$record_document->{'match-heading'}}, $heading->search_form if $heading;
+ }
+ } else {
+ warn "Cannot determine authority type for record: " . $record->field('001')->as_string;
+ }
+ }
+
my $mappings = $rules->{leader};
if ($mappings) {
$self->_process_mappings($mappings, $record->leader(), $record_document, {
}
);
}
- if ( @{$mappings} && grep { $_->[0] eq 'match-heading'} @{$mappings} ){
- # Used by the authority linker the match-heading field requires a specific syntax
- # that is specified in C4/Heading
- my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading
- next unless $heading;
- push @{$record_document->{'match-heading'}}, $heading->search_form;
- }
}
my $subfields_join_mappings = $data_field_rules->{subfields_join};
if ($subfields_join_mappings) {
foreach my $subfields_group (keys %{$subfields_join_mappings}) {
- # Map each subfield to values, remove empty values, join with space
- my $data = join(
- ' ',
- grep(
- $_,
- map { join(' ', $field->subfield($_)) } split(//, $subfields_group)
- )
- );
+ my $data_field = $field->clone; #copy field to preserve for alt scripts
+ $data_field->delete_subfield(match => qr/^$/); #remove empty subfields, otherwise they are printed as a space
+ my $data = $data_field->as_string( $subfields_group ); #get values for subfields as a combined string, preserving record order
if ($data) {
$self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document, {
altscript => $altscript,
}
);
}
- if ( grep { $_->[0] eq 'match-heading' } @{$subfields_join_mappings->{$subfields_group}} ){
- # Used by the authority linker the match-heading field requires a specific syntax
- # that is specified in C4/Heading
- my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading
- next unless $heading;
- push @{$record_document->{'match-heading'}}, $heading->search_form;
+ }
+ }
+ }
+ }
+ }
+
+ if (C4::Context->preference('IncludeSeeFromInSearches') and $self->index eq 'biblios') {
+ foreach my $field (Koha::Filter::MARC::EmbedSeeFromHeadings->new->fields($record)) {
+ my $data_field_rules = $data_fields_rules->{$field->tag()};
+ if ($data_field_rules) {
+ my $subfields_mappings = $data_field_rules->{subfields};
+ my $wildcard_mappings = $subfields_mappings->{'*'};
+ foreach my $subfield ($field->subfields()) {
+ my ($code, $data) = @{$subfield};
+ my @mappings;
+ push @mappings, @{ $subfields_mappings->{$code} } if $subfields_mappings->{$code};
+ push @mappings, @$wildcard_mappings if $wildcard_mappings;
+ # Do not include "see from" into these kind of fields
+ @mappings = grep { $_->[0] !~ /__(sort|facet|suggestion)$/ } @mappings;
+ if (@mappings) {
+ $self->_process_mappings(\@mappings, $data, $record_document, {
+ data_source => 'subfield',
+ code => $code,
+ field => $field
+ }
+ );
+ }
+ }
+
+ my $subfields_join_mappings = $data_field_rules->{subfields_join};
+ if ($subfields_join_mappings) {
+ foreach my $subfields_group (keys %{$subfields_join_mappings}) {
+ my $data_field = $field->clone;
+ # remove empty subfields, otherwise they are printed as a space
+ $data_field->delete_subfield(match => qr/^$/);
+ my $data = $data_field->as_string( $subfields_group );
+ if ($data) {
+ my @mappings = @{ $subfields_join_mappings->{$subfields_group} };
+ # Do not include "see from" into these kind of fields
+ @mappings = grep { $_->[0] !~ /__(sort|facet|suggestion)$/ } @mappings;
+ $self->_process_mappings(\@mappings, $data, $record_document, {
+ data_source => 'subfields_group',
+ codes => $subfields_group,
+ field => $field
+ }
+ );
}
}
}
}
}
}
+
foreach my $field (keys %{$rules->{defaults}}) {
unless (defined $record_document->{$field}) {
$record_document->{$field} = $rules->{defaults}->{$field};
$record_document->{'marc_format'} = 'base64ISO2709';
}
}
+
+ # Check if there is at least one available item
+ if ($self->index eq $BIBLIOS_INDEX) {
+ my ($tag, $code) = C4::Biblio::GetMarcFromKohaField('biblio.biblionumber');
+ my $field = $record->field($tag);
+ if ($field) {
+ my $biblionumber = $field->is_control_field ? $field->data : $field->subfield($code);
+ my $avail_items = Koha::Items->search({
+ biblionumber => $biblionumber,
+ onloan => undef,
+ itemlost => 0,
+ })->count;
+
+ $record_document->{available} = $avail_items ? \1 : \0;
+ }
+ }
+
push @record_documents, $record_document;
}
return \@record_documents;
return $value ? 'true' : 'false';
};
}
+ elsif ($target_type eq 'year') {
+ $default_options->{value_callbacks} //= [];
+ # Only accept years containing digits and "u"
+ push @{$default_options->{value_callbacks}}, sub {
+ my ($value) = @_;
+ # Replace "u" with "0" for sorting
+ return map { s/[u\s]/0/gr } ( $value =~ /[0-9u\s]{4}/g );
+ };
+ }
if ($search) {
my $mapping = [$target_name, $default_options];
}
}
+ if( $self->index eq 'authorities' ){
+ push @{$rules->{control_fields}->{'008'}}, ['subject-heading-thesaurus', { 'substr' => [ 11, 1 ] } ];
+ push @{$rules->{data_fields}->{'040'}->{subfields}->{f}}, ['subject-heading-thesaurus', { } ];
+ }
+
return $rules;
}
=item C<$marc_type>
A string that indicates the MARC type that this mapping is for, e.g. 'marc21',
-'unimarc', 'normarc'.
+'unimarc'.
=item C<$marc_field>
A string that describes the MARC field that contains the data to extract.
-These are of a form suited to Catmandu's MARC fixers.
=back
warn $msg; # simple logging
# This is super-primitive
- return "Unable to understand your search query, please rephrase and try again.\n" if $msg =~ /ParseException/;
+ return "Unable to understand your search query, please rephrase and try again.\n" if $msg =~ /ParseException|parse_exception/;
return "Unable to perform your search. Please try again.\n";
}
my $configuration;
my $conf = C4::Context->config('elasticsearch');
- Koha::Exceptions::Config::MissingEntry->throw(
- "Missing 'elasticsearch' block in config file")
- unless defined $conf;
-
- if ( $conf && $conf->{server} ) {
- my $nodes = $conf->{server};
- if ( ref($nodes) eq 'ARRAY' ) {
- $configuration->{nodes} = $nodes;
- }
- else {
- $configuration->{nodes} = [$nodes];
- }
- }
- else {
+ unless ( defined $conf ) {
Koha::Exceptions::Config::MissingEntry->throw(
- "Missing 'server' entry in config file for elasticsearch");
+ "Missing <elasticsearch> entry in koha-conf.xml"
+ );
}
- if ( defined $conf->{index_name} ) {
- $configuration->{index_name} = $conf->{index_name};
+ unless ( exists $conf->{server} ) {
+ Koha::Exceptions::Config::MissingEntry->throw(
+ "Missing <elasticsearch>/<server> entry in koha-conf.xml"
+ );
}
- else {
+
+ unless ( exists $conf->{index_name} ) {
Koha::Exceptions::Config::MissingEntry->throw(
- "Missing 'index_name' entry in config file for elasticsearch");
+ "Missing <elasticsearch>/<index_name> entry in koha-conf.xml",
+ );
+ }
+
+ while ( my ( $var, $val ) = each %$conf ) {
+ if ( $var eq 'server' ) {
+ if ( ref($val) eq 'ARRAY' ) {
+ $configuration->{nodes} = $val;
+ }
+ else {
+ $configuration->{nodes} = [$val];
+ }
+ } else {
+ $configuration->{$var} = $val;
+ }
}
+ $configuration->{cxn_pool} //= 'Static';
+
return $configuration;
}
my @search_field_names = qw( author itype location su-geo title-series subject ccode holdingbranch homebranch ln );
my @faceted_fields = Koha::SearchFields->search(
{ name => { -in => \@search_field_names }, facet_order => { '!=' => undef } }, { order_by => ['facet_order'] }
- );
+ )->as_list;
my @not_faceted_fields = Koha::SearchFields->search(
{ name => { -in => \@search_field_names }, facet_order => undef }, { order_by => ['facet_order'] }
- );
+ )->as_list;
# This could certainly be improved
return ( @faceted_fields, @not_faceted_fields );
}
+=head2 clear_search_fields_cache
+
+Koha::SearchEngine::Elasticsearch->clear_search_fields_cache();
+
+Clear cached values for ES search fields
+
+=cut
+
+sub clear_search_fields_cache {
+
+ my $cache = Koha::Caches->get_instance();
+ $cache->clear_from_cache('elasticsearch_search_fields_staff_client_biblios');
+ $cache->clear_from_cache('elasticsearch_search_fields_opac_biblios');
+ $cache->clear_from_cache('elasticsearch_search_fields_staff_client_authorities');
+ $cache->clear_from_cache('elasticsearch_search_fields_opac_authorities');
+
+}
+
1;
__END__