3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # Koha is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
22 use Getopt::Long qw( GetOptions );
23 use Fcntl qw( LOCK_EX LOCK_NB LOCK_UN );
24 use File::Temp qw( tempdir );
25 use File::Path qw( mkpath rmtree );
26 use C4::Biblio qw( GetXmlBiblio );
27 use C4::AuthoritiesMarc qw( GetAuthority GetAuthorityXML );
28 use C4::Items qw( Item2Marc );
29 use Koha::RecordProcessor;
33 use constant LOCK_FILENAME => 'rebuild..LCK';
35 # script that checks zebradir structure & create directories & mandatory files if needed
39 $|=1; # flushes output
40 # If the cron job starts us in an unreadable dir, we will break without
42 chdir $ENV{HOME} if (!(-r '.'));
56 my $process_zebraqueue;
57 my $process_zebraqueue_skip_deletes;
58 my $do_not_clear_zebraqueue;
63 my $run_user = (getpwuid($<))[0];
64 my $wait_for_lock = 0;
66 my $table = 'biblioitems';
67 my $is_memcached = Koha::Caches->get_instance->memcached_cache;
69 my $verbose_logging = 0;
70 my $zebraidx_log_opt = " -v none,fatal,warn ";
71 my $result = GetOptions(
72 'daemon' => \$daemon_mode,
73 'sleep:i' => \$daemon_sleep,
78 'I|skip-index' => \$skip_index,
79 'nosanitize' => \$nosanitize,
83 'h|help' => \$want_help,
85 'y' => \$do_not_clear_zebraqueue,
86 'z' => \$process_zebraqueue,
87 'skip-deletes' => \$process_zebraqueue_skip_deletes,
89 'length:i' => \$length,
90 'offset:i' => \$offset,
91 'v+' => \$verbose_logging,
92 'run-as-root' => \$run_as_root,
93 'wait-for-lock' => \$wait_for_lock,
94 't|table:s' => \$table,
97 if (not $result or $want_help) {
103 warn "Warning: You passed -x which is already the default and is now deprecated\n";
104 undef $as_xml; # Should not be used later
107 if( not defined $run_as_root and $run_user eq 'root') {
108 my $msg = "Warning: You are running this script as the user 'root'.\n";
109 $msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n";
110 $msg .= "Please do '$0 --help' to see usage.\n";
114 if ($process_zebraqueue and ($skip_export or $reset)) {
115 my $msg = "Cannot specify -r or -s if -z is specified\n";
116 $msg .= "Please do '$0 --help' to see usage.\n";
120 if ($process_zebraqueue and $do_not_clear_zebraqueue) {
121 my $msg = "Cannot specify both -y and -z\n";
122 $msg .= "Please do '$0 --help' to see usage.\n";
127 # incompatible flags handled above: help, reset, and do_not_clear_zebraqueue
128 if ($skip_export or $keep_export or $skip_index or
129 $where or $length or $offset) {
130 my $msg = "Cannot specify -s, -k, -I, -where, -length, or -offset with -daemon.\n";
131 $msg .= "Please do '$0 --help' to see usage.\n";
134 unless ($is_memcached) {
135 warn "Warning: script running in daemon mode, without recommended caching system (memcached).\n";
139 $process_zebraqueue = 1;
142 if (not $biblios and not $authorities) {
143 my $msg = "Must specify -b or -a to reindex bibs or authorities\n";
144 $msg .= "Please do '$0 --help' to see usage.\n";
148 our @tables_allowed_for_select = ( 'biblioitems', 'items', 'biblio', 'biblio_metadata' );
149 unless ( grep { $_ eq $table } @tables_allowed_for_select ) {
150 die "Cannot specify -t|--table with value '$table'. Only "
151 . ( join ', ', @tables_allowed_for_select )
156 # -v is for verbose, which seems backwards here because of how logging is set
157 # on the CLI of zebraidx. It works this way. The default is to not log much
158 if ($verbose_logging >= 2) {
159 $zebraidx_log_opt = '-v none,fatal,warn,all';
163 unless ($directory) {
165 $directory = tempdir(CLEANUP => ($keep_export ? 0 : 1));
169 my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory};
170 my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
172 my $kohadir = C4::Context->config('intranetdir');
174 my ($biblionumbertagfield,$biblionumbertagsubfield) = C4::Biblio::GetMarcFromKohaField( "biblio.biblionumber" );
175 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = C4::Biblio::GetMarcFromKohaField( "biblioitems.biblioitemnumber" );
177 my $marcxml_open = q{<?xml version="1.0" encoding="UTF-8"?>
178 <collection xmlns="http://www.loc.gov/MARC21/slim">
181 my $marcxml_close = q{
185 # Protect again simultaneous update of the zebra index by using a lock file.
186 # Create our own lock directory if it is missing. This should be created
187 # by koha-zebra-ctl.sh or at system installation. If the desired directory
188 # does not exist and cannot be created, we fall back on /tmp - which will
191 my ($lockfile, $LockFH);
193 C4::Context->config("zebra_lockdir"),
194 '/var/lock/zebra_' . C4::Context->config('database'),
195 '/tmp/zebra_' . C4::Context->config('database')
197 #we try three possibilities (we really want to lock :)
199 ($LockFH, $lockfile) = _create_lockfile($_.'/rebuild');
200 last if defined $LockFH;
202 if( !defined $LockFH ) {
203 print "WARNING: Could not create lock file $lockfile: $!\n";
204 print "Please check your koha-conf.xml for ZEBRA_LOCKDIR.\n";
205 print "Verify file permissions for it too.\n";
206 $use_flock = 0; # we disable file locking now and will continue
208 # note that this mimics old behavior (before we used
212 my $start_time = time();
213 if ( $verbose_logging ) {
214 my $pretty_time = POSIX::strftime("%H:%M:%S",localtime($start_time));
215 print "Zebra configuration information\n";
216 print "================================\n";
217 print "Zebra biblio directory = $biblioserverdir\n";
218 print "Zebra authorities directory = $authorityserverdir\n";
219 print "Koha directory = $kohadir\n";
220 print "Lockfile = $lockfile\n" if $lockfile;
221 print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
222 print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
223 print "================================\n";
224 print "Job started: $pretty_time\n";
227 my $tester = XML::LibXML->new();
230 # The main work is done here by calling do_one_pass(). We have added locking
231 # avoid race conditions between full rebuilds and incremental updates either from
232 # daemon mode or periodic invocation from cron. The race can lead to an updated
233 # record being overwritten by a rebuild if the update is applied after the export
234 # by the rebuild and before the rebuild finishes (more likely to affect large
237 # We have chosen to exit immediately by default if we cannot obtain the lock
238 # to prevent the potential for a infinite backlog from cron invocations, but an
239 # option (wait-for-lock) is provided to let the program wait for the lock.
240 # See http://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=11078 for details.
243 # For incremental updates, skip the update if the updates are locked
244 if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
246 $dbh = C4::Context->dbh;
247 if( zebraqueue_not_empty() ) {
248 Koha::Caches->flush_L1_caches() if $is_memcached;
252 if ($@ && $verbose_logging) {
253 warn "Warning : $@\n";
255 _flock($LockFH, LOCK_UN);
260 # all one-off invocations
261 my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
262 if (_flock($LockFH, $lock_mode)) {
263 $dbh = C4::Context->dbh;
265 _flock($LockFH, LOCK_UN);
267 print "Skipping rebuild/update because flock failed on $lockfile: $!\n";
272 if ( $verbose_logging ) {
273 print "====================\n";
274 print "Indexing complete: ". pretty_time() . "\n";
275 print "====================\n";
277 print "====================\n";
280 print "NOTHING cleaned : the export $directory has been kept.\n";
281 print "You can re-run this script with the -s ";
283 print " and -d $directory parameters";
288 print "if you just want to rebuild zebra after changing zebra config files\n";
290 unless ($use_tempdir) {
291 # if we're using a temporary directory
292 # created by File::Temp, it will be removed
294 rmtree($directory, 0, 1);
295 print "directory $directory deleted\n";
301 index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
303 print "skipping authorities\n" if ( $verbose_logging );
307 index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
309 print "skipping biblios\n" if ( $verbose_logging );
313 # Check the zebra update queue and return true if there are records to process
314 # This routine will handle each of -ab, -a, or -b, but in practice we force
315 # -ab when in daemon mode.
316 sub zebraqueue_not_empty {
319 if ($authorities && $biblios) {
320 $where_str = 'done = 0;';
322 $where_str = 'server = "biblioserver" AND done = 0;';
324 $where_str = 'server = "authorityserver" AND done = 0;';
327 $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
330 my $count = $query->fetchrow_arrayref->[0];
331 print "queued records: $count\n" if $verbose_logging > 0;
335 # This checks to see if the zebra directories exist under the provided path.
336 # If they don't, then zebra is likely to spit the dummy. This returns true
337 # if the directories had to be created, false otherwise.
338 sub check_zebra_dirs {
339 my ($base) = shift() . '/';
340 my $needed_repairing = 0;
341 my @dirs = ( '', 'key', 'register', 'shadow', 'tmp' );
342 foreach my $dir (@dirs) {
343 my $bdir = $base . $dir;
345 $needed_repairing = 1;
346 mkdir $bdir || die "Unable to create '$bdir': $!\n";
347 print "$0: needed to create '$bdir'\n";
350 return $needed_repairing;
351 } # ---------- end of subroutine check_zebra_dirs ----------
354 my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
356 my $num_records_exported = 0;
357 my $records_deleted = {};
358 my $need_reset = check_zebra_dirs($server_dir);
360 print "$0: found broken zebra server directories: forcing a rebuild\n";
363 if ($skip_export && $verbose_logging) {
364 print "====================\n";
365 print "SKIPPING $record_type export\n";
366 print "====================\n";
368 if ( $verbose_logging ) {
369 print "====================\n";
370 print "exporting $record_type " . pretty_time() . "\n";
371 print "====================\n";
373 mkdir "$directory" unless (-d $directory);
374 mkdir "$directory/$record_type" unless (-d "$directory/$record_type");
375 if ($process_zebraqueue) {
378 unless ( $process_zebraqueue_skip_deletes ) {
379 $entries = select_zebraqueue_records($record_type, 'deleted');
380 mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
381 $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type");
382 mark_zebraqueue_batch_done($entries);
385 $entries = select_zebraqueue_records($record_type, 'updated');
386 mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");
387 $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $records_deleted);
388 mark_zebraqueue_batch_done($entries);
391 my $sth = select_all_records($record_type);
392 $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $nosanitize);
393 unless ($do_not_clear_zebraqueue) {
394 mark_all_zebraqueue_done($record_type);
400 # and reindexing everything
403 if ($verbose_logging) {
404 print "====================\n";
405 print "SKIPPING $record_type indexing\n";
406 print "====================\n";
409 if ( $verbose_logging ) {
410 print "====================\n";
411 print "REINDEXING zebra " . pretty_time() . "\n";
412 print "====================\n";
414 my $record_fmt = 'marcxml';
415 if ($process_zebraqueue) {
416 do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
417 if %$records_deleted;
418 do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
419 if $num_records_exported;
421 do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
422 if ($num_records_exported or $skip_export);
428 sub select_zebraqueue_records {
429 my ($record_type, $update_type) = @_;
431 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
432 my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';
434 my $sth = $dbh->prepare("SELECT id, biblio_auth_number
440 $sth->execute($server, $op);
441 my $entries = $sth->fetchall_arrayref({});
444 sub mark_all_zebraqueue_done {
445 my ($record_type) = @_;
447 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
449 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
452 $sth->execute($server);
455 sub mark_zebraqueue_batch_done {
458 $dbh->{AutoCommit} = 0;
459 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?");
461 foreach my $id (map { $_->{id} } @$entries) {
464 $dbh->{AutoCommit} = 1;
467 sub select_all_records {
468 my $record_type = shift;
469 return ($record_type eq 'biblio') ? select_all_biblios() : select_all_authorities();
472 sub select_all_authorities {
473 my $strsth=qq{SELECT authid FROM auth_header};
474 $strsth.=qq{ WHERE $where } if ($where);
475 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
476 $strsth.=qq{ LIMIT $offset,$length } if ($length && $offset);
477 my $sth = $dbh->prepare($strsth);
482 sub select_all_biblios {
483 $table = 'biblioitems'
484 unless grep { $_ eq $table } @tables_allowed_for_select;
485 my $strsth = qq{ SELECT DISTINCT biblionumber FROM $table };
486 $strsth.=qq{ WHERE $where } if ($where);
487 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
488 $strsth.=qq{ LIMIT $offset,$length } if ($offset);
489 my $sth = $dbh->prepare($strsth);
494 sub export_marc_records_from_sth {
495 my ($record_type, $sth, $directory, $nosanitize) = @_;
497 my $num_exported = 0;
498 open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
500 print {$fh} $marcxml_open;
503 my ( $itemtag, $itemsubfield ) = C4::Biblio::GetMarcFromKohaField( "items.itemnumber" );
504 while (my ($record_number) = $sth->fetchrow_array) {
505 print "." if ( $verbose_logging );
506 print "\r$i" unless ($i++ %100 or !$verbose_logging);
508 my $marcxml = $record_type eq 'biblio'
509 ? GetXmlBiblio( $record_number )
510 : GetAuthorityXML( $record_number );
511 if ($record_type eq 'biblio'){
512 my $biblio = Koha::Biblios->find($record_number);
514 my $items = $biblio->items;
516 my $record = MARC::Record->new;
517 $record->encoding('UTF-8');
519 for my $item ( @{$items->unblessed} ) {
520 my $record = Item2Marc($item, $record_number);
521 push @itemsrecord, $record->field($itemtag);
523 $record->insert_fields_ordered(@itemsrecord);
524 my $itemsxml = $record->as_xml_record();
526 substr($marcxml, 0, length($marcxml)-10) .
527 substr($itemsxml, index($itemsxml, "</leader>\n", 0) + 10);
530 # extra test to ensure that result is valid XML; otherwise
531 # Zebra won't parse it in DOM mode
533 my $doc = $tester->parse_string($marcxml);
536 warn "Error exporting record $record_number ($record_type): $@\n";
540 $marcxml =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
541 print {$fh} $marcxml;
546 my ($marc) = get_corrected_marc_record($record_type, $record_number);
549 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
551 my $doc = $tester->parse_string($rec);
554 die "invalid XML: $@";
556 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
561 warn "Error exporting record $record_number ($record_type) XML";
562 warn "... specific error is $@" if $verbose_logging;
566 print "\nRecords exported: $num_exported " . pretty_time() . "\n" if ( $verbose_logging );
567 print {$fh} $marcxml_close;
570 return $num_exported;
573 sub export_marc_records_from_list {
574 my ($record_type, $entries, $directory, $records_deleted) = @_;
576 my $num_exported = 0;
577 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
579 print {$fh} $marcxml_open;
583 # Skip any deleted records. We check for this anyway, but this reduces error spam
584 my %found = %$records_deleted;
585 foreach my $record_number ( map { $_->{biblio_auth_number} }
586 grep { !$found{ $_->{biblio_auth_number} }++ }
588 print "." if ( $verbose_logging );
589 print "\r$i" unless ($i++ %100 or !$verbose_logging);
590 my ($marc) = get_corrected_marc_record($record_type, $record_number);
593 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
594 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
599 warn "Error exporting record $record_number ($record_type) XML";
603 print "\nRecords exported: $num_exported " . pretty_time() . "\n" if ( $verbose_logging );
605 print {$fh} $marcxml_close;
608 return $num_exported;
611 sub generate_deleted_marc_records {
613 my ($record_type, $entries, $directory) = @_;
615 my $records_deleted = {};
616 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
618 print {$fh} $marcxml_open;
621 foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
622 print "\r$i" unless ($i++ %100 or !$verbose_logging);
623 print "." if ( $verbose_logging );
625 my $marc = MARC::Record->new();
626 if ($record_type eq 'biblio') {
627 fix_biblio_ids($marc, $record_number, $record_number);
629 fix_authority_id($marc, $record_number);
631 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
632 fix_unimarc_100($marc);
635 my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
636 # Remove the record's XML header
637 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
640 $records_deleted->{$record_number} = 1;
642 print "\nRecords exported: $i " . pretty_time() . "\n" if ( $verbose_logging );
644 print {$fh} $marcxml_close;
647 return $records_deleted;
650 sub get_corrected_marc_record {
651 my ( $record_type, $record_number ) = @_;
653 my $marc = get_raw_marc_record( $record_type, $record_number );
655 if ( defined $marc ) {
657 if ( $record_type eq 'authority' ) {
658 fix_authority_id( $marc, $record_number );
660 elsif ( $record_type eq 'biblio' ) {
663 push @filters, 'EmbedItemsAvailability';
664 push @filters, 'EmbedSeeFromHeadings'
665 if C4::Context->preference('IncludeSeeFromInSearches');
667 my $normalizer = Koha::RecordProcessor->new( { filters => \@filters } );
668 $marc = $normalizer->process($marc);
670 if ( C4::Context->preference("marcflavour") eq "UNIMARC" ) {
671 fix_unimarc_100($marc);
678 sub get_raw_marc_record {
679 my ($record_type, $record_number) = @_;
682 if ($record_type eq 'biblio') {
684 my $biblio = Koha::Biblios->find($record_number);
685 $marc = $biblio->metadata->record({ embed_items => 1 });
688 # here we do warn since catching an exception
689 # means that the bib was found but failed
691 warn "error retrieving biblio $record_number";
695 eval { $marc = GetAuthority($record_number); };
697 warn "error retrieving authority $record_number";
705 # FIXME - this routine is suspect
706 # It blanks the Leader/00-05 and Leader/12-16 to
707 # force them to be recalculated correct when
708 # the $marc->as_usmarc() or $marc->as_xml() is called.
709 # But why is this necessary? It would be a serious bug
710 # in MARC::Record (definitely) and MARC::File::XML (arguably)
711 # if they are emitting incorrect leader values.
714 my $leader = $marc->leader;
715 substr($leader, 0, 5) = ' ';
716 substr($leader, 10, 7) = '22 ';
717 $marc->leader(substr($leader, 0, 24));
721 # FIXME - it is essential to ensure that the biblionumber is present,
722 # otherwise, Zebra will choke on the record. However, this
723 # logic belongs in the relevant C4::Biblio APIs.
725 my $biblionumber = shift;
726 my $biblioitemnumber;
728 $biblioitemnumber = shift;
730 my $sth = $dbh->prepare(
731 "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
732 $sth->execute($biblionumber);
733 ($biblioitemnumber) = $sth->fetchrow_array;
735 unless ($biblioitemnumber) {
736 warn "failed to get biblioitemnumber for biblio $biblionumber";
741 # FIXME - this is cheating on two levels
742 # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function
743 # 2. Making sure that the biblionumber and biblioitemnumber are correct and
744 # present in the MARC::Record object ought to be part of GetMarcBiblio.
746 # On the other hand, this better for now than what rebuild_zebra.pl used to
747 # do, which was duplicate the code for inserting the biblionumber
748 # and biblioitemnumber
749 C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber);
754 sub fix_authority_id {
755 # FIXME - as with fix_biblio_ids, the authid must be present
756 # for Zebra's sake. However, this really belongs
757 # in C4::AuthoritiesMarc.
758 my ($marc, $authid) = @_;
759 unless ($marc->field('001') and $marc->field('001')->data() eq $authid){
760 $marc->delete_field($marc->field('001'));
761 $marc->insert_fields_ordered(MARC::Field->new('001',$authid));
765 sub fix_unimarc_100 {
766 # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc.
770 my $length_100a = length($marc->subfield( 100, "a" ));
771 if ( $length_100a and $length_100a == 36 ) {
772 $string = $marc->subfield( 100, "a" );
773 my $f100 = $marc->field(100);
774 $marc->delete_field($f100);
777 $string = POSIX::strftime( "%Y%m%d", localtime );
779 $string = sprintf( "%-*s", 35, $string );
781 substr( $string, 22, 6, "frey50" );
782 $length_100a = length($marc->subfield( 100, "a" ));
783 unless ( $length_100a and $length_100a == 36 ) {
784 $marc->delete_field($marc->field(100));
785 $marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
790 my ($record_type, $op, $record_dir, $reset_index, $noshadow, $record_format, $zebraidx_log_opt) = @_;
792 my $zebra_server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
793 my $zebra_db_name = ($record_type eq 'biblio') ? 'biblios' : 'authorities';
794 my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
795 my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
799 if ($noshadow or $reset_index) {
803 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index;
804 system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
805 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;
809 # test if flock is present; if so, use it; if not, return true
810 # op refers to the official flock operations including LOCK_EX,
812 # combining LOCK_EX with LOCK_NB returns immediately
814 if( !defined($use_flock) ) {
815 #check if flock is present; if not, you will have a fatal error
816 my $lock_acquired = eval { flock($fh, $op) };
817 # assuming that $fh and $op are fine(..), an undef $lock_acquired
819 $use_flock = defined($lock_acquired) ? 1 : 0;
820 print "Warning: flock could not be used!\n" if $verbose_logging && !$use_flock;
821 return 1 if !$use_flock;
822 return $lock_acquired;
824 return 1 if !$use_flock;
825 return flock($fh, $op);
829 sub _create_lockfile { #returns undef on failure
832 eval { mkpath($dir, 0, oct(755)) };
835 return if !open my $fh, q{>}, $dir.'/'.LOCK_FILENAME;
836 return ( $fh, $dir.'/'.LOCK_FILENAME );
842 my $elapsed = $now - $start_time;
851 my $now_pretty = POSIX::strftime("%H:%M:%S",localtime($now));
852 my $elapsed_pretty = sprintf "[%02d:%02d:%02d]",$h,$m,$s;
854 return "$now_pretty $elapsed_pretty";
859 $0: reindex MARC bibs and/or authorities in Zebra.
861 Use this batch job to reindex all biblio or authority
862 records in your Koha database.
866 -b index bibliographic records
868 -a index authority records
870 -daemon Run in daemon mode. The program will loop checking
871 for entries on the zebraqueue table, processing
872 them incrementally if present, and then sleep
873 for a few seconds before repeating the process
874 Checking the zebraqueue table is done with a cheap
875 SQL query. This allows for near realtime update of
876 the zebra search index with low system overhead.
877 Use -sleep to control the checking interval.
879 Daemon mode implies -z, -a, -b. The program will
880 refuse to start if options are present that do not
881 make sense while running as an incremental update
882 daemon (e.g. -r or -offset).
884 -sleep 10 Seconds to sleep between checks of the zebraqueue
885 table in daemon mode. The default is 5 seconds.
887 -z select only updated and deleted
888 records marked in the zebraqueue
889 table. Cannot be used with -r
892 --skip-deletes only select record updates, not record
893 deletions, to avoid potential excessive
894 I/O when zebraidx processes deletions.
895 If this option is used for normal indexing,
896 a cronjob should be set up to run
897 rebuild_zebra.pl -z without --skip-deletes
899 Only effective with -z.
901 -r clear Zebra index before
902 adding records to index. Implies -w.
904 -d Temporary directory for indexing.
905 If not specified, one is automatically
906 created. The export directory
907 is automatically deleted unless
908 you supply the -k switch.
910 -k Do not delete export directory.
912 -s Skip export. Used if you have
913 already exported the records
916 -nosanitize export biblio/authority records directly from DB marcxml
917 field without sanitizing records. It speed up
918 dump process but could fail if DB contains badly
919 encoded records. Works only with -x,
921 -w skip shadow indexing for this batch
923 -y do NOT clear zebraqueue after indexing; normally,
924 after doing batch indexing, zebraqueue should be
925 marked done for the affected record type(s) so that
926 a running zebraqueue_daemon doesn't try to reindex
927 the same records - specify -y to override this.
928 Cannot be used with -z.
930 -v increase the amount of logging. Normally only
931 warnings and errors from the indexing are shown.
932 Use log level 2 (-v -v) to include all Zebra logs.
934 --length 1234 how many biblio you want to export
935 --offset 1243 offset you want to start to
936 example: --offset 500 --length=500 will result in a LIMIT 500,1000 (exporting 1000 records, starting by the 500th one)
937 note that the numbers are NOT related to biblionumber, that's the intended behaviour.
938 --where let you specify a WHERE query, like itemtype='BOOK'
939 or something like that
941 --run-as-root explicitily allow script to run as 'root' user
943 --wait-for-lock when not running in daemon mode, the default
944 behavior is to abort a rebuild if the rebuild
945 lock is busy. This option will cause the program
946 to wait for the lock to free and then continue
947 processing the rebuild request,
949 --table specify a table (can be items, biblioitems, biblio, biblio_metadata) to retrieve biblionumber to index.
950 biblioitems is the default value.
952 --help or -h show this message.