1 package Koha::SearchEngine::Elasticsearch::Indexer;
3 # Copyright 2013 Catalyst IT
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
23 use List::Util qw(any);
24 use base qw(Koha::SearchEngine::Elasticsearch);
32 Koha::SearchEngine::Elasticsearch::Indexer - handles adding new records to the index
36 my $indexer = Koha::SearchEngine::Elasticsearch::Indexer->new(
37 { index => Koha::SearchEngine::BIBLIOS_INDEX } );
38 $indexer->drop_index();
39 $indexer->update_index(\@biblionumbers, \@records);
46 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_OK>
48 Represents an index state where index is created and in a working state.
50 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_REINDEX_REQUIRED>
52 Not currently used, but could be useful later, for example if can detect when new field or mapping added.
54 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_RECREATE_REQUIRED>
56 Representings an index state where index needs to be recreated and is not in a working state.
64 INDEX_STATUS_REINDEX_REQUIRED => 1,
65 INDEX_STATUS_RECREATE_REQUIRED => 2,
70 =head2 update_index($biblionums, $records)
73 $self->update_index($biblionums, $records);
75 die("Something went wrong trying to update index:" . $_[0]);
78 Converts C<MARC::Records> C<$records> to Elasticsearch documents and performs
79 an update request for these records on the Elasticsearch index.
85 Arrayref of biblio numbers for the C<$records>, the order must be the same as
86 and match up with C<$records>.
90 Arrayref of C<MARC::Record>s.
97 my ($self, $biblionums, $records) = @_;
98 my $conf = $self->get_elasticsearch_params();
99 my $elasticsearch = $self->get_elasticsearch();
100 my $documents = $self->marc_records_to_documents($records);
103 for (my $i = 0; $i < scalar @$biblionums; $i++) {
104 my $id = $biblionums->[$i];
105 my $document = $documents->[$i];
111 push @body, $document;
115 $response = $elasticsearch->bulk(
116 index => $conf->{index_name},
117 type => 'data', # is just hard coded in Indexer.pm?
124 =head2 set_index_status_ok
126 Convenience method for setting index status to C<INDEX_STATUS_OK>.
130 sub set_index_status_ok {
132 $self->index_status(INDEX_STATUS_OK);
135 =head2 is_index_status_ok
137 Convenience method for checking if index status is C<INDEX_STATUS_OK>.
141 sub is_index_status_ok {
143 return $self->index_status == INDEX_STATUS_OK;
146 =head2 set_index_status_reindex_required
148 Convenience method for setting index status to C<INDEX_REINDEX_REQUIRED>.
152 sub set_index_status_reindex_required {
154 $self->index_status(INDEX_STATUS_REINDEX_REQUIRED);
157 =head2 is_index_status_reindex_required
159 Convenience method for checking if index status is C<INDEX_STATUS_REINDEX_REQUIRED>.
163 sub is_index_status_reindex_required {
165 return $self->index_status == INDEX_STATUS_REINDEX_REQUIRED;
168 =head2 set_index_status_recreate_required
170 Convenience method for setting index status to C<INDEX_STATUS_RECREATE_REQUIRED>.
174 sub set_index_status_recreate_required {
176 $self->index_status(INDEX_STATUS_RECREATE_REQUIRED);
179 =head2 is_index_status_recreate_required
181 Convenience method for checking if index status is C<INDEX_STATUS_RECREATE_REQUIRED>.
185 sub is_index_status_recreate_required {
187 return $self->index_status == INDEX_STATUS_RECREATE_REQUIRED;
190 =head2 index_status($status)
192 Will either set the current index status to C<$status> and return C<$status>,
193 or return the current index status if called with no arguments.
199 Optional argument. If passed will set current index status to C<$status> if C<$status> is
200 a valid status. See L</CONSTANTS>.
207 my ($self, $status) = @_;
208 my $key = 'ElasticsearchIndexStatus_' . $self->index;
210 if (defined $status) {
211 unless (any { $status == $_ } (
213 INDEX_STATUS_REINDEX_REQUIRED,
214 INDEX_STATUS_RECREATE_REQUIRED,
217 Koha::Exceptions::Exception->throw("Invalid index status: $status");
219 C4::Context->set_preference($key, $status);
223 return C4::Context->preference($key);
227 =head2 update_mappings
229 Generate Elasticsearch mappings from mappings stored in database and
230 perform a request to update Elasticsearch index mappings. Will throw an
231 error and set index status to C<INDEX_STATUS_RECREATE_REQUIRED> if update
236 sub update_mappings {
238 my $conf = $self->get_elasticsearch_params();
239 my $elasticsearch = $self->get_elasticsearch();
240 my $mappings = $self->get_elasticsearch_mappings();
242 foreach my $type (keys %{$mappings}) {
244 my $response = $elasticsearch->indices->put_mapping(
245 index => $conf->{index_name},
248 $type => $mappings->{$type}
252 $self->set_index_status_recreate_required();
253 my $reason = $_[0]->{vars}->{body}->{error}->{reason};
254 Koha::Exceptions::Exception->throw(
255 error => "Unable to update mappings for index \"$conf->{index_name}\". Reason was: \"$reason\". Index needs to be recreated and reindexed",
259 $self->set_index_status_ok();
262 =head2 update_index_background($biblionums, $records)
264 This has exactly the same API as C<update_index> however it'll
265 return immediately. It'll start a background process that does the adding.
267 If it fails to add to Elasticsearch then it'll add to a queue that will cause
268 it to be updated by a regular index cron job in the future.
272 # TODO implement in the future - I don't know the best way of doing this yet.
273 # If fork: make sure process group is changed so apache doesn't wait for us.
275 sub update_index_background {
277 $self->update_index(@_);
280 =head2 delete_index($biblionums)
282 C<$biblionums> is an arrayref of biblionumbers to delete from the index.
287 my ($self, $biblionums) = @_;
289 my $elasticsearch = $self->get_elasticsearch();
290 my $conf = $self->get_elasticsearch_params();
292 my @body = map { { delete => { _id => $_ } } } @{$biblionums};
293 my $result = $elasticsearch->bulk(
294 index => $conf->{index_name},
298 if ($result->{errors}) {
299 croak "An Elasticsearch error occurred during bulk delete";
303 =head2 delete_index_background($biblionums)
305 Identical to L</delete_index($biblionums)>
309 # TODO: Should be made async
310 sub delete_index_background {
312 $self->delete_index(@_);
317 Drops the index from the Elasticsearch server.
323 if ($self->index_exists) {
324 my $conf = $self->get_elasticsearch_params();
325 my $elasticsearch = $self->get_elasticsearch();
326 $elasticsearch->indices->delete(index => $conf->{index_name});
327 $self->set_index_status_recreate_required();
333 Creates the index (including mappings) on the Elasticsearch server.
339 my $conf = $self->get_elasticsearch_params();
340 my $settings = $self->get_elasticsearch_settings();
341 my $elasticsearch = $self->get_elasticsearch();
342 $elasticsearch->indices->create(
343 index => $conf->{index_name},
345 settings => $settings
348 $self->update_mappings();
353 Checks if index has been created on the Elasticsearch server. Returns C<1> or the
354 empty string to indicate whether index exists or not.
360 my $conf = $self->get_elasticsearch_params();
361 my $elasticsearch = $self->get_elasticsearch();
362 return $elasticsearch->indices->exists(
363 index => $conf->{index_name},
375 =item Chris Cormack C<< <chrisc@catalyst.net.nz> >>
377 =item Robin Sheat C<< <robin@catalyst.net.nz> >>