#
# This file is part of Koha.
#
-# Koha is free software; you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the Free Software
-# Foundation; either version 3 of the License, or (at your option) any later
-# version.
+# Koha is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
#
-# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
-# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+# Koha is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
#
-# You should have received a copy of the GNU General Public License along
-# with Koha; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+# You should have received a copy of the GNU General Public License
+# along with Koha; if not, see <http://www.gnu.org/licenses>.
use base qw(Class::Accessor);
use Koha::Database;
use Koha::Exceptions::Config;
+use Koha::Exceptions::Elasticsearch;
use Koha::SearchFields;
use Koha::SearchMarcMaps;
+use C4::Heading;
use Carp;
+use Clone qw(clone);
use JSON;
use Modern::Perl;
use Readonly;
$es->{key_prefix} = 'es_';
$es->{client} //= '5_0::Direct';
- $es->{cxn_pool} //= 'Sniff';
+ $es->{cxn_pool} //= 'Static';
$es->{request_timeout} //= 60;
return $es;
if (!defined $all_mappings{$self->index}) {
$sort_fields{$self->index} = {};
+ # Clone the general mapping to break ties with the original hash
my $mappings = {
- data => scalar _get_elasticsearch_mapping('general', '')
+ data => clone(_get_elasticsearch_field_config('general', ''))
};
my $marcflavour = lc C4::Context->preference('marcflavour');
$self->_foreach_mapping(
sub {
- my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
+ my ( $name, $type, $facet, $suggestible, $sort, $search, $marc_type ) = @_;
return if $marc_type ne $marcflavour;
# TODO if this gets any sort of complexity to it, it should
# be broken out into its own function.
$es_type = 'stdno';
}
- $mappings->{data}{properties}{$name} = _get_elasticsearch_mapping('search', $es_type);
+ if ($search) {
+ $mappings->{data}{properties}{$name} = _get_elasticsearch_field_config('search', $es_type);
+ }
if ($facet) {
- $mappings->{data}{properties}{ $name . '__facet' } = _get_elasticsearch_mapping('facet', $es_type);
+ $mappings->{data}{properties}{ $name . '__facet' } = _get_elasticsearch_field_config('facet', $es_type);
}
if ($suggestible) {
- $mappings->{data}{properties}{ $name . '__suggestion' } = _get_elasticsearch_mapping('suggestible', $es_type);
+ $mappings->{data}{properties}{ $name . '__suggestion' } = _get_elasticsearch_field_config('suggestible', $es_type);
}
# Sort is a bit special as it can be true, false, undef.
# We care about "true" or "undef",
# "undef" means to do the default thing, which is make it sortable.
if (!defined $sort || $sort) {
- $mappings->{data}{properties}{ $name . '__sort' } = _get_elasticsearch_mapping('sort', $es_type);
+ $mappings->{data}{properties}{ $name . '__sort' } = _get_elasticsearch_field_config('sort', $es_type);
$sort_fields{$self->index}{$name} = 1;
}
}
return $all_mappings{$self->index};
}
-=head2 _get_elasticsearch_mapping
+=head2 _get_elasticsearch_field_config
-Get the Elasticsearch mappings for the given purpose and data type.
+Get the Elasticsearch field config for the given purpose and data type.
-$mapping = _get_elasticsearch_mapping('search', 'text');
+$mapping = _get_elasticsearch_field_config('search', 'text');
=cut
-sub _get_elasticsearch_mapping {
+sub _get_elasticsearch_field_config {
my ( $purpose, $type ) = @_;
return;
}
-sub reset_elasticsearch_mappings {
- my ( $reset_fields ) = @_;
+=head2 _load_elasticsearch_mappings
+
+Load Elasticsearch mappings in the format of mappings.yaml.
+
+$indexes = _load_elasticsearch_mappings();
+
+=cut
+
+sub _load_elasticsearch_mappings {
my $mappings_yaml = C4::Context->config('elasticsearch_index_mappings');
$mappings_yaml ||= C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/mappings.yaml';
- my $indexes = LoadFile( $mappings_yaml );
+ return LoadFile( $mappings_yaml );
+}
+
+sub reset_elasticsearch_mappings {
+ my ( $self ) = @_;
+ my $indexes = $self->_load_elasticsearch_mappings();
+
+ Koha::SearchMarcMaps->delete;
+ Koha::SearchFields->delete;
while ( my ( $index_name, $fields ) = each %$indexes ) {
while ( my ( $field_name, $data ) = each %$fields ) {
- my $field_type = $data->{type};
- my $field_label = $data->{label};
+
+ my %sf_params = map { $_ => $data->{$_} } grep { exists $data->{$_} } qw/ type label weight staff_client opac facet_order /;
+
+ # Set default values
+ $sf_params{staff_client} //= 1;
+ $sf_params{opac} //= 1;
+
+ $sf_params{name} = $field_name;
+
+ my $search_field = Koha::SearchFields->find_or_create( \%sf_params, { key => 'name' } );
+
my $mappings = $data->{mappings};
- my $facet_order = $data->{facet_order};
- my $search_field = Koha::SearchFields->find_or_create({
- name => $field_name,
- label => $field_label,
- type => $field_type,
- },
- {
- key => 'name'
- });
- $search_field->update(
- {
- facet_order => $facet_order
- }
- );
for my $mapping ( @$mappings ) {
- my $marc_field = Koha::SearchMarcMaps->find_or_create({ index_name => $index_name, marc_type => $mapping->{marc_type}, marc_field => $mapping->{marc_field} });
- $search_field->add_to_search_marc_maps($marc_field, { facet => $mapping->{facet} || 0, suggestible => $mapping->{suggestible} || 0, sort => $mapping->{sort} } );
+ my $marc_field = Koha::SearchMarcMaps->find_or_create({
+ index_name => $index_name,
+ marc_type => $mapping->{marc_type},
+ marc_field => $mapping->{marc_field}
+ });
+ $search_field->add_to_search_marc_maps($marc_field, {
+ facet => $mapping->{facet} || 0,
+ suggestible => $mapping->{suggestible} || 0,
+ sort => $mapping->{sort},
+ search => $mapping->{search} // 1
+ });
}
}
}
=head2 marc_records_to_documents($marc_records)
- my @record_documents = $self->marc_records_to_documents($marc_records);
+ my $record_documents = $self->marc_records_to_documents($marc_records);
Using mappings stored in database convert C<$marc_records> to Elasticsearch documents.
my $control_fields_rules = $rules->{control_fields};
my $data_fields_rules = $rules->{data_fields};
my $marcflavour = lc C4::Context->preference('marcflavour');
+ my $use_array = C4::Context->preference('ElasticsearchMARCFormat') eq 'ARRAY';
my @record_documents;
}
my $data_field_rules = $data_fields_rules->{$tag};
-
if ($data_field_rules) {
my $subfields_mappings = $data_field_rules->{subfields};
my $wildcard_mappings = $subfields_mappings->{'*'};
if (@{$mappings}) {
$self->_process_mappings($mappings, $data, $record_document, $altscript);
}
+ if ( defined @{$mappings}[0] && grep /match-heading/, @{@{$mappings}[0]} ){
+ # Used by the authority linker the match-heading field requires a specific syntax
+ # that is specified in C4/Heading
+ my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading
+ next unless $heading;
+ push @{$record_document->{'match-heading'}}, $heading->search_form;
+ }
}
my $subfields_join_mappings = $data_field_rules->{subfields_join};
if ($data) {
$self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document, $altscript);
}
+ if ( grep { $_->[0] eq 'match-heading' } @{$subfields_join_mappings->{$subfields_group}} ){
+ # Used by the authority linker the match-heading field requires a specific syntax
+ # that is specified in C4/Heading
+ my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading
+ next unless $heading;
+ push @{$record_document->{'match-heading'}}, $heading->search_form;
+ }
}
}
}
# TODO: Perhaps should check if $records_document non empty, but really should never be the case
$record->encoding('UTF-8');
- my @warnings;
- {
- # Temporarily intercept all warn signals (MARC::Record carps when record length > 99999)
- local $SIG{__WARN__} = sub {
- push @warnings, $_[0];
- };
- $record_document->{'marc_data'} = encode_base64(encode('UTF-8', $record->as_usmarc()));
- }
- if (@warnings) {
- # Suppress warnings if record length exceeded
- unless (substr($record->leader(), 0, 5) eq '99999') {
- foreach my $warning (@warnings) {
- carp $warning;
+ if ($use_array) {
+ $record_document->{'marc_data_array'} = $self->_marc_to_array($record);
+ $record_document->{'marc_format'} = 'ARRAY';
+ } else {
+ my @warnings;
+ {
+ # Temporarily intercept all warn signals (MARC::Record carps when record length > 99999)
+ local $SIG{__WARN__} = sub {
+ push @warnings, $_[0];
+ };
+ $record_document->{'marc_data'} = encode_base64(encode('UTF-8', $record->as_usmarc()));
+ }
+ if (@warnings) {
+ # Suppress warnings if record length exceeded
+ unless (substr($record->leader(), 0, 5) eq '99999') {
+ foreach my $warning (@warnings) {
+ carp $warning;
+ }
}
+ $record_document->{'marc_data'} = $record->as_xml_record($marcflavour);
+ $record_document->{'marc_format'} = 'MARCXML';
+ }
+ else {
+ $record_document->{'marc_format'} = 'base64ISO2709';
}
- $record_document->{'marc_data'} = $record->as_xml_record($marcflavour);
- $record_document->{'marc_format'} = 'MARCXML';
- }
- else {
- $record_document->{'marc_format'} = 'base64ISO2709';
}
- my $id = $record->subfield('999', 'c');
- push @record_documents, [$id, $record_document];
+ push @record_documents, $record_document;
}
return \@record_documents;
}
-=head2 _field_mappings($facet, $suggestible, $sort, $target_name, $target_type, $range)
+=head2 _marc_to_array($record)
+
+ my @fields = _marc_to_array($record)
+
+Convert a MARC::Record to an array modeled after MARC-in-JSON
+(see https://github.com/marc4j/marc4j/wiki/MARC-in-JSON-Description)
+
+=over 4
+
+=item C<$record>
+
+A MARC::Record object
+
+=back
+
+=cut
+
+sub _marc_to_array {
+ my ($self, $record) = @_;
+
+ my $data = {
+ leader => $record->leader(),
+ fields => []
+ };
+ for my $field ($record->fields()) {
+ my $tag = $field->tag();
+ if ($field->is_control_field()) {
+ push @{$data->{fields}}, {$tag => $field->data()};
+ } else {
+ my $subfields = ();
+ foreach my $subfield ($field->subfields()) {
+ my ($code, $contents) = @{$subfield};
+ push @{$subfields}, {$code => $contents};
+ }
+ push @{$data->{fields}}, {
+ $tag => {
+ ind1 => $field->indicator(1),
+ ind2 => $field->indicator(2),
+ subfields => $subfields
+ }
+ };
+ }
+ }
+ return $data;
+}
+
+=head2 _array_to_marc($data)
+
+ my $record = _array_to_marc($data)
+
+Convert an array modeled after MARC-in-JSON to a MARC::Record
+
+=over 4
+
+=item C<$data>
- my @mappings = _field_mappings($facet, $suggestible, $sort, $target_name, $target_type, $range)
+An array modeled after MARC-in-JSON
+(see https://github.com/marc4j/marc4j/wiki/MARC-in-JSON-Description)
+
+=back
+
+=cut
+
+sub _array_to_marc {
+ my ($self, $data) = @_;
+
+ my $record = MARC::Record->new();
+
+ $record->leader($data->{leader});
+ for my $field (@{$data->{fields}}) {
+ my $tag = (keys %{$field})[0];
+ $field = $field->{$tag};
+ my $marc_field;
+ if (ref($field) eq 'HASH') {
+ my @subfields;
+ foreach my $subfield (@{$field->{subfields}}) {
+ my $code = (keys %{$subfield})[0];
+ push @subfields, $code;
+ push @subfields, $subfield->{$code};
+ }
+ $marc_field = MARC::Field->new($tag, $field->{ind1}, $field->{ind2}, @subfields);
+ } else {
+ $marc_field = MARC::Field->new($tag, $field)
+ }
+ $record->append_fields($marc_field);
+ }
+;
+ return $record;
+}
+
+=head2 _field_mappings($facet, $suggestible, $sort, $search, $target_name, $target_type, $range)
+
+ my @mappings = _field_mappings($facet, $suggestible, $sort, $search, $target_name, $target_type, $range)
Get mappings, an internal data structure later used by
L<_process_mappings($mappings, $data, $record_document, $altscript)> to process MARC target
Boolean indicating whether to create a sort field for this mapping.
+=item C<$search>
+
+Boolean indicating whether to create a search field for this mapping.
+
=item C<$target_name>
Elasticsearch document target field name.
where "<START>" and "<END>" are integers specifying a range that will be used
for extracting a substring from MARC data as Elasticsearch field target value.
-The first character position is "1", and the range is inclusive,
-so "1-3" means the first three characters of MARC data.
+The first character position is "0", and the range is inclusive,
+so "0-2" means the first three characters of MARC data.
If only "<START>" is provided only one character at position "<START>" will
be extracted.
=cut
sub _field_mappings {
- my ($_self, $facet, $suggestible, $sort, $target_name, $target_type, $range) = @_;
+ my ($_self, $facet, $suggestible, $sort, $search, $target_name, $target_type, $range) = @_;
my %mapping_defaults = ();
my @mappings;
my $substr_args = undef;
- if ($range) {
+ if (defined $range) {
# TODO: use value_callback instead?
my ($start, $end) = map(int, split /-/, $range, 2);
$substr_args = [$start];
};
}
- my $mapping = [$target_name, $default_options];
- push @mappings, $mapping;
+ if ($search) {
+ my $mapping = [$target_name, $default_options];
+ push @mappings, $mapping;
+ }
my @suffixes = ();
push @suffixes, 'facet' if $facet;
sub _get_marc_mapping_rules {
my ($self) = @_;
my $marcflavour = lc C4::Context->preference('marcflavour');
- my $field_spec_regexp = qr/^([0-9]{3})([()0-9a-z]+)?(?:_\/(\d+(?:-\d+)?))?$/;
+ my $field_spec_regexp = qr/^([0-9]{3})([()0-9a-zA-Z]+)?(?:_\/(\d+(?:-\d+)?))?$/;
my $leader_regexp = qr/^leader(?:_\/(\d+(?:-\d+)?))?$/;
my $rules = {
'leader' => [],
};
$self->_foreach_mapping(sub {
- my ($name, $type, $facet, $suggestible, $sort, $marc_type, $marc_field) = @_;
+ my ($name, $type, $facet, $suggestible, $sort, $search, $marc_type, $marc_field) = @_;
return if $marc_type ne $marcflavour;
if ($type eq 'sum') {
push @{$rules->{sum}}, $name;
+ push @{$rules->{sum}}, $name."__sort" if $sort;
}
elsif ($type eq 'isbn') {
push @{$rules->{isbn}}, $name;
}
my $range = defined $3 ? $3 : undef;
- my @mappings = $self->_field_mappings($facet, $suggestible, $sort, $name, $type, $range);
-
+ my @mappings = $self->_field_mappings($facet, $suggestible, $sort, $search, $name, $type, $range);
if ($field_tag < 10) {
$rules->{control_fields}->{$field_tag} //= [];
push @{$rules->{control_fields}->{$field_tag}}, @mappings;
}
elsif ($marc_field =~ $leader_regexp) {
my $range = defined $1 ? $1 : undef;
- my @mappings = $self->_field_mappings($facet, $suggestible, $sort, $name, $type, $range);
+ my @mappings = $self->_field_mappings($facet, $suggestible, $sort, $search, $name, $type, $range);
push @{$rules->{leader}}, @mappings;
}
else {
'search_marc_to_fields.facet',
'search_marc_to_fields.suggestible',
'search_marc_to_fields.sort',
+ 'search_marc_to_fields.search',
'search_marc_map.marc_type',
'search_marc_map.marc_field',
],
'facet',
'suggestible',
'sort',
+ 'search',
'marc_type',
'marc_field',
],
$search_field->get_column('facet'),
$search_field->get_column('suggestible'),
$search_field->get_column('sort'),
+ $search_field->get_column('search'),
$search_field->get_column('marc_type'),
$search_field->get_column('marc_field'),
);
# These should correspond to the ES field names, as opposed to the CCL
# things that zebra uses.
- my @search_field_names = qw( author itype location su-geo title-series subject ccode holdingbranch homebranch );
+ my @search_field_names = qw( author itype location su-geo title-series subject ccode holdingbranch homebranch ln );
my @faceted_fields = Koha::SearchFields->search(
{ name => { -in => \@search_field_names }, facet_order => { '!=' => undef } }, { order_by => ['facet_order'] }
);