1 package Koha::SearchEngine::Elasticsearch::Indexer;
3 # Copyright 2013 Catalyst IT
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
20 use Carp qw( carp croak );
22 use Try::Tiny qw( catch try );
23 use List::Util qw( any );
24 use base qw(Koha::SearchEngine::Elasticsearch);
27 use Koha::Exceptions::Elasticsearch;
28 use Koha::SearchEngine::Zebra::Indexer;
29 use Koha::BackgroundJob::UpdateElasticIndex;
30 use C4::AuthoritiesMarc qw//;
36 Koha::SearchEngine::Elasticsearch::Indexer - handles adding new records to the index
40 my $indexer = Koha::SearchEngine::Elasticsearch::Indexer->new(
41 { index => Koha::SearchEngine::BIBLIOS_INDEX } );
42 $indexer->drop_index();
43 $indexer->update_index(\@biblionumbers, \@records);
50 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_OK>
52 Represents an index state where index is created and in a working state.
54 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_REINDEX_REQUIRED>
56 Not currently used, but could be useful later, for example if can detect when new field or mapping added.
58 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_RECREATE_REQUIRED>
60 Representings an index state where index needs to be recreated and is not in a working state.
68 INDEX_STATUS_REINDEX_REQUIRED => 1,
69 INDEX_STATUS_RECREATE_REQUIRED => 2,
74 =head2 update_index($biblionums, $records)
77 $self->update_index($biblionums, $records);
79 die("Something went wrong trying to update index:" . $_[0]);
82 Converts C<MARC::Records> C<$records> to Elasticsearch documents and performs
83 an update request for these records on the Elasticsearch index.
89 Arrayref of biblio numbers for the C<$records>, the order must be the same as
90 and match up with C<$records>.
94 Arrayref of C<MARC::Record>s.
101 my ($self, $record_ids, $records) = @_;
103 my $index_record_ids = [];
104 unless ( $records && @$records ) {
105 for my $record_id ( sort { $a <=> $b } @$record_ids ) {
107 next unless $record_id;
109 my $record = $self->_get_record( $record_id );
111 push @$records, $record;
112 push @$index_record_ids, $record_id;
116 $index_record_ids = $record_ids;
119 my $documents = $self->marc_records_to_documents($records);
121 for (my $i = 0; $i < scalar @$index_record_ids; $i++) {
122 my $id = $index_record_ids->[$i];
123 my $document = $documents->[$i];
129 push @body, $document;
134 my $elasticsearch = $self->get_elasticsearch();
135 $response = $elasticsearch->bulk(
136 index => $self->index_name,
137 type => 'data', # is just hard coded in Indexer.pm?
140 if ($response->{errors}) {
141 carp "One or more ElasticSearch errors occurred when indexing documents";
144 Koha::Exceptions::Elasticsearch::BadResponse->throw(
146 details => $_->{text},
153 =head2 set_index_status_ok
155 Convenience method for setting index status to C<INDEX_STATUS_OK>.
159 sub set_index_status_ok {
161 $self->index_status(INDEX_STATUS_OK);
164 =head2 is_index_status_ok
166 Convenience method for checking if index status is C<INDEX_STATUS_OK>.
170 sub is_index_status_ok {
172 return $self->index_status == INDEX_STATUS_OK;
175 =head2 set_index_status_reindex_required
177 Convenience method for setting index status to C<INDEX_REINDEX_REQUIRED>.
181 sub set_index_status_reindex_required {
183 $self->index_status(INDEX_STATUS_REINDEX_REQUIRED);
186 =head2 is_index_status_reindex_required
188 Convenience method for checking if index status is C<INDEX_STATUS_REINDEX_REQUIRED>.
192 sub is_index_status_reindex_required {
194 return $self->index_status == INDEX_STATUS_REINDEX_REQUIRED;
197 =head2 set_index_status_recreate_required
199 Convenience method for setting index status to C<INDEX_STATUS_RECREATE_REQUIRED>.
203 sub set_index_status_recreate_required {
205 $self->index_status(INDEX_STATUS_RECREATE_REQUIRED);
208 =head2 is_index_status_recreate_required
210 Convenience method for checking if index status is C<INDEX_STATUS_RECREATE_REQUIRED>.
214 sub is_index_status_recreate_required {
216 return $self->index_status == INDEX_STATUS_RECREATE_REQUIRED;
219 =head2 index_status($status)
221 Will either set the current index status to C<$status> and return C<$status>,
222 or return the current index status if called with no arguments.
228 Optional argument. If passed will set current index status to C<$status> if C<$status> is
229 a valid status. See L</CONSTANTS>.
236 my ($self, $status) = @_;
237 my $key = 'ElasticsearchIndexStatus_' . $self->index;
239 if (defined $status) {
240 unless (any { $status == $_ } (
242 INDEX_STATUS_REINDEX_REQUIRED,
243 INDEX_STATUS_RECREATE_REQUIRED,
246 Koha::Exception->throw("Invalid index status: $status");
248 C4::Context->set_preference($key, $status);
252 return C4::Context->preference($key);
256 =head2 update_mappings
258 Generate Elasticsearch mappings from mappings stored in database and
259 perform a request to update Elasticsearch index mappings. Will throw an
260 error and set index status to C<INDEX_STATUS_RECREATE_REQUIRED> if update
265 sub update_mappings {
267 my $elasticsearch = $self->get_elasticsearch();
268 my $mappings = $self->get_elasticsearch_mappings();
271 my $response = $elasticsearch->indices->put_mapping(
272 index => $self->index_name,
274 include_type_name => JSON::true(),
280 $self->set_index_status_recreate_required();
281 my $reason = $_[0]->{vars}->{body}->{error}->{reason};
282 my $index_name = $self->index_name;
283 Koha::Exception->throw(
284 error => "Unable to update mappings for index \"$index_name\". Reason was: \"$reason\". Index needs to be recreated and reindexed",
287 $self->set_index_status_ok();
290 =head2 update_index_background($record_numbers, $server)
292 This has exactly the same API as C<update_index> however it'll
293 return immediately. It'll start a background process that does the adding.
295 If it fails to add to Elasticsearch then it'll add to a queue that will cause
296 it to be updated by a regular index cron job in the future.
300 sub update_index_background {
301 my ( $self, $record_numbers, $server ) = @_;
303 Koha::BackgroundJob::UpdateElasticIndex->new->enqueue({ record_ids => $record_numbers, record_server => $server });
308 This function takes an array of record numbers and fetches the records to send to update_index
311 If $records parameter is provided the records will be used as-is, this is only utilized for authorities
314 The other variables are used for parity with Zebra indexing calls. Currently the calls are passed through
320 my ( $self, $record_numbers, $op, $server, $records ) = @_;
321 $record_numbers = [$record_numbers] if ref $record_numbers ne 'ARRAY' && defined $record_numbers;
322 $records = [$records] if ref $records ne 'ARRAY' && defined $records;
323 if ( $op eq 'specialUpdate' ) {
325 $self->update_index( $record_numbers, $records );
327 $self->update_index_background( $record_numbers, $server );
330 elsif ( $op eq 'recordDelete' ) {
331 $self->delete_index_background( $record_numbers );
333 #FIXME Current behaviour is to index Zebra when using ES, at some point we should stop
334 Koha::SearchEngine::Zebra::Indexer::index_records( $self, $record_numbers, $op, $server, undef );
338 my ( $self, $record_id ) = @_;
339 return $self->index eq $Koha::SearchEngine::BIBLIOS_INDEX
340 ? C4::Biblio::GetMarcBiblio({ biblionumber => $record_id, embed_items => 1 })
341 : C4::AuthoritiesMarc::GetAuthority($record_id);
344 =head2 delete_index($biblionums)
346 C<$biblionums> is an arrayref of biblionumbers to delete from the index.
351 my ($self, $biblionums) = @_;
353 my $elasticsearch = $self->get_elasticsearch();
354 my @body = map { { delete => { _id => "$_" } } } @{$biblionums};
355 my $result = $elasticsearch->bulk(
356 index => $self->index_name,
358 include_type_name => JSON::true(),
361 if ($result->{errors}) {
362 croak "An Elasticsearch error occurred during bulk delete";
366 =head2 delete_index_background($biblionums)
368 Identical to L</delete_index($biblionums)>
372 # TODO: Should be made async
373 sub delete_index_background {
375 $self->delete_index(@_);
380 Drops the index from the Elasticsearch server.
386 if ($self->index_exists) {
387 my $elasticsearch = $self->get_elasticsearch();
388 $elasticsearch->indices->delete(index => $self->index_name);
389 $self->set_index_status_recreate_required();
395 Creates the index (including mappings) on the Elasticsearch server.
401 my $settings = $self->get_elasticsearch_settings();
402 my $elasticsearch = $self->get_elasticsearch();
403 $elasticsearch->indices->create(
404 index => $self->index_name,
406 settings => $settings
409 $self->update_mappings();
414 Checks if index has been created on the Elasticsearch server. Returns C<1> or the
415 empty string to indicate whether index exists or not.
421 my $elasticsearch = $self->get_elasticsearch();
422 return $elasticsearch->indices->exists(
423 index => $self->index_name,
435 =item Chris Cormack C<< <chrisc@catalyst.net.nz> >>
437 =item Robin Sheat C<< <robin@catalyst.net.nz> >>