1 package Koha::SearchEngine::Elasticsearch::Indexer;
3 # Copyright 2013 Catalyst IT
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
23 use List::Util qw(any);
24 use base qw(Koha::SearchEngine::Elasticsearch);
28 use Koha::SearchEngine::Zebra::Indexer;
34 Koha::SearchEngine::Elasticsearch::Indexer - handles adding new records to the index
38 my $indexer = Koha::SearchEngine::Elasticsearch::Indexer->new(
39 { index => Koha::SearchEngine::BIBLIOS_INDEX } );
40 $indexer->drop_index();
41 $indexer->update_index(\@biblionumbers, \@records);
48 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_OK>
50 Represents an index state where index is created and in a working state.
52 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_REINDEX_REQUIRED>
54 Not currently used, but could be useful later, for example if can detect when new field or mapping added.
56 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_RECREATE_REQUIRED>
58 Representings an index state where index needs to be recreated and is not in a working state.
66 INDEX_STATUS_REINDEX_REQUIRED => 1,
67 INDEX_STATUS_RECREATE_REQUIRED => 2,
72 =head2 update_index($biblionums, $records)
75 $self->update_index($biblionums, $records);
77 die("Something went wrong trying to update index:" . $_[0]);
80 Converts C<MARC::Records> C<$records> to Elasticsearch documents and performs
81 an update request for these records on the Elasticsearch index.
87 Arrayref of biblio numbers for the C<$records>, the order must be the same as
88 and match up with C<$records>.
92 Arrayref of C<MARC::Record>s.
99 my ($self, $biblionums, $records) = @_;
101 my $documents = $self->marc_records_to_documents($records);
104 for (my $i = 0; $i < scalar @$biblionums; $i++) {
105 my $id = $biblionums->[$i];
106 my $document = $documents->[$i];
112 push @body, $document;
116 my $elasticsearch = $self->get_elasticsearch();
117 $response = $elasticsearch->bulk(
118 index => $self->index_name,
119 type => 'data', # is just hard coded in Indexer.pm?
122 if ($response->{errors}) {
123 carp "One or more ElasticSearch errors occurred when indexing documents";
129 =head2 set_index_status_ok
131 Convenience method for setting index status to C<INDEX_STATUS_OK>.
135 sub set_index_status_ok {
137 $self->index_status(INDEX_STATUS_OK);
140 =head2 is_index_status_ok
142 Convenience method for checking if index status is C<INDEX_STATUS_OK>.
146 sub is_index_status_ok {
148 return $self->index_status == INDEX_STATUS_OK;
151 =head2 set_index_status_reindex_required
153 Convenience method for setting index status to C<INDEX_REINDEX_REQUIRED>.
157 sub set_index_status_reindex_required {
159 $self->index_status(INDEX_STATUS_REINDEX_REQUIRED);
162 =head2 is_index_status_reindex_required
164 Convenience method for checking if index status is C<INDEX_STATUS_REINDEX_REQUIRED>.
168 sub is_index_status_reindex_required {
170 return $self->index_status == INDEX_STATUS_REINDEX_REQUIRED;
173 =head2 set_index_status_recreate_required
175 Convenience method for setting index status to C<INDEX_STATUS_RECREATE_REQUIRED>.
179 sub set_index_status_recreate_required {
181 $self->index_status(INDEX_STATUS_RECREATE_REQUIRED);
184 =head2 is_index_status_recreate_required
186 Convenience method for checking if index status is C<INDEX_STATUS_RECREATE_REQUIRED>.
190 sub is_index_status_recreate_required {
192 return $self->index_status == INDEX_STATUS_RECREATE_REQUIRED;
195 =head2 index_status($status)
197 Will either set the current index status to C<$status> and return C<$status>,
198 or return the current index status if called with no arguments.
204 Optional argument. If passed will set current index status to C<$status> if C<$status> is
205 a valid status. See L</CONSTANTS>.
212 my ($self, $status) = @_;
213 my $key = 'ElasticsearchIndexStatus_' . $self->index;
215 if (defined $status) {
216 unless (any { $status == $_ } (
218 INDEX_STATUS_REINDEX_REQUIRED,
219 INDEX_STATUS_RECREATE_REQUIRED,
222 Koha::Exceptions::Exception->throw("Invalid index status: $status");
224 C4::Context->set_preference($key, $status);
228 return C4::Context->preference($key);
232 =head2 update_mappings
234 Generate Elasticsearch mappings from mappings stored in database and
235 perform a request to update Elasticsearch index mappings. Will throw an
236 error and set index status to C<INDEX_STATUS_RECREATE_REQUIRED> if update
241 sub update_mappings {
243 my $elasticsearch = $self->get_elasticsearch();
244 my $mappings = $self->get_elasticsearch_mappings();
246 foreach my $type (keys %{$mappings}) {
248 my $response = $elasticsearch->indices->put_mapping(
249 index => $self->index_name,
252 $type => $mappings->{$type}
256 $self->set_index_status_recreate_required();
257 my $reason = $_[0]->{vars}->{body}->{error}->{reason};
258 my $index_name = $self->index_name;
259 Koha::Exceptions::Exception->throw(
260 error => "Unable to update mappings for index \"$index_name\". Reason was: \"$reason\". Index needs to be recreated and reindexed",
264 $self->set_index_status_ok();
267 =head2 update_index_background($biblionums, $records)
269 This has exactly the same API as C<update_index> however it'll
270 return immediately. It'll start a background process that does the adding.
272 If it fails to add to Elasticsearch then it'll add to a queue that will cause
273 it to be updated by a regular index cron job in the future.
277 # TODO implement in the future - I don't know the best way of doing this yet.
278 # If fork: make sure process group is changed so apache doesn't wait for us.
280 sub update_index_background {
282 $self->update_index(@_);
287 This function takes an array of biblionumbers and fetches the records to send to update_index
290 If $records parameter is provided the records will be used as-is, this is only utilized for authorities
293 The other variables are used for parity with Zebra indexing calls. Currently the calls are passed through
299 my ( $self, $biblionumbers, $op, $server, $records ) = @_;
300 $biblionumbers = [$biblionumbers] if ref $biblionumbers ne 'ARRAY' && defined $biblionumbers;
301 $records = [$records] if ref $records ne 'ARRAY' && defined $records;
302 if ( $op eq 'specialUpdate' ) {
303 my $index_biblionumbers;
305 foreach my $biblionumber ( @$biblionumbers ){
306 my $record = C4::Biblio::GetMarcBiblio({
307 biblionumber => $biblionumber,
310 push @$records, $record;
311 push @$index_biblionumbers, $biblionumber;
315 $self->update_index_background( $index_biblionumbers, $records ) if $index_biblionumbers && $records;
317 elsif ( $op eq 'recordDelete' ) {
318 $self->delete_index_background( $biblionumbers );
320 #FIXME Current behaviour is to index Zebra when using ES, at some point we should stop
321 Koha::SearchEngine::Zebra::Indexer::index_records( $self, $biblionumbers, $op, $server, undef );
324 =head2 delete_index($biblionums)
326 C<$biblionums> is an arrayref of biblionumbers to delete from the index.
331 my ($self, $biblionums) = @_;
333 my $elasticsearch = $self->get_elasticsearch();
334 my @body = map { { delete => { _id => "$_" } } } @{$biblionums};
335 my $result = $elasticsearch->bulk(
336 index => $self->index_name,
340 if ($result->{errors}) {
341 croak "An Elasticsearch error occurred during bulk delete";
345 =head2 delete_index_background($biblionums)
347 Identical to L</delete_index($biblionums)>
351 # TODO: Should be made async
352 sub delete_index_background {
354 $self->delete_index(@_);
359 Drops the index from the Elasticsearch server.
365 if ($self->index_exists) {
366 my $elasticsearch = $self->get_elasticsearch();
367 $elasticsearch->indices->delete(index => $self->index_name);
368 $self->set_index_status_recreate_required();
374 Creates the index (including mappings) on the Elasticsearch server.
380 my $settings = $self->get_elasticsearch_settings();
381 my $elasticsearch = $self->get_elasticsearch();
382 $elasticsearch->indices->create(
383 index => $self->index_name,
385 settings => $settings
388 $self->update_mappings();
393 Checks if index has been created on the Elasticsearch server. Returns C<1> or the
394 empty string to indicate whether index exists or not.
400 my $elasticsearch = $self->get_elasticsearch();
401 return $elasticsearch->indices->exists(
402 index => $self->index_name,
414 =item Chris Cormack C<< <chrisc@catalyst.net.nz> >>
416 =item Robin Sheat C<< <robin@catalyst.net.nz> >>