#!/usr/bin/perl
-use strict;
-#use warnings; FIXME - Bug 2505
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# Koha is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Koha; if not, see <http://www.gnu.org/licenses>.
+
+use Modern::Perl;
use C4::Context;
use Getopt::Long;
my $reset;
my $biblios;
my $authorities;
-my $noxml;
+my $as_usmarc;
+my $as_xml;
my $noshadow;
my $want_help;
-my $as_xml;
my $process_zebraqueue;
+my $process_zebraqueue_skip_deletes;
my $do_not_clear_zebraqueue;
my $length;
my $where;
my $run_user = (getpwuid($<))[0];
my $wait_for_lock = 0;
my $use_flock;
+my $table = 'biblioitems';
my $verbose_logging = 0;
my $zebraidx_log_opt = " -v none,fatal,warn ";
'I|skip-index' => \$skip_index,
'nosanitize' => \$nosanitize,
'b' => \$biblios,
- 'noxml' => \$noxml,
+ 'noxml' => \$as_usmarc,
'w' => \$noshadow,
'a' => \$authorities,
'h|help' => \$want_help,
'x' => \$as_xml,
'y' => \$do_not_clear_zebraqueue,
'z' => \$process_zebraqueue,
+ 'skip-deletes' => \$process_zebraqueue_skip_deletes,
'where:s' => \$where,
'length:i' => \$length,
'offset:i' => \$offset,
'v+' => \$verbose_logging,
'run-as-root' => \$run_as_root,
'wait-for-lock' => \$wait_for_lock,
+ 't|table:s' => \$table,
);
if (not $result or $want_help) {
exit 0;
}
+if ( $as_xml ) {
+ warn "Warning: You passed -x which is already the default and is now deprecated\n";
+ undef $as_xml; # Should not be used later
+}
+
if( not defined $run_as_root and $run_user eq 'root') {
my $msg = "Warning: You are running this script as the user 'root'.\n";
$msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n";
die $msg;
}
-if ( !$as_xml and $nosanitize ) {
- my $msg = "Cannot specify both -no_xml and -nosanitize\n";
+if ( $as_usmarc and $nosanitize ) {
+ my $msg = "Cannot specify both -noxml and -nosanitize\n";
$msg .= "Please do '$0 --help' to see usage.\n";
die $msg;
}
die $msg;
}
-if ($reset) {
- $noshadow = 1;
-}
-
-if ($noshadow) {
- $noshadow = ' -n ';
-}
-
if ($daemon_mode) {
# incompatible flags handled above: help, reset, and do_not_clear_zebraqueue
if ($skip_export or $keep_export or $skip_index or
die $msg;
}
+our @tables_allowed_for_select = ( 'biblioitems', 'items', 'biblio' );
+unless ( grep { /^$table$/ } @tables_allowed_for_select ) {
+ die "Cannot specify -t|--table with value '$table'. Only "
+ . ( join ', ', @tables_allowed_for_select )
+ . " are allowed.";
+}
+
# -v is for verbose, which seems backwards here because of how logging is set
# on the CLI of zebraidx. It works this way. The default is to not log much
my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
my $kohadir = C4::Context->config('intranetdir');
-my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') || 'grs1';
-my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') || 'dom';
+my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') // 'dom';
+my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') // 'dom';
-my $dbh = C4::Context->dbh;
my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
+my $marcxml_open = q{<?xml version="1.0" encoding="UTF-8"?>
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+};
+
+my $marcxml_close = q{
+</collection>
+};
+
# Protect again simultaneous update of the zebra index by using a lock file.
# Create our own lock directory if its missing. This shouild be created
# by koha-zebra-ctl.sh or at system installation. If the desired directory
}
my $tester = XML::LibXML->new();
+my $dbh;
# The main work is done here by calling do_one_pass(). We have added locking
# avoid race conditions between full rebuilds and incremental updates either from
while (1) {
# For incremental updates, skip the update if the updates are locked
if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
- do_one_pass() if ( zebraqueue_not_empty() );
+ eval {
+ $dbh = C4::Context->dbh;
+ do_one_pass() if ( zebraqueue_not_empty() );
+ };
+ if ($@ && $verbose_logging) {
+ warn "Warning : $@\n";
+ }
_flock($LockFH, LOCK_UN);
}
sleep $daemon_sleep;
# all one-off invocations
my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
if (_flock($LockFH, $lock_mode)) {
+ $dbh = C4::Context->dbh;
do_one_pass();
_flock($LockFH, LOCK_UN);
} else {
sub do_one_pass {
if ($authorities) {
- index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
+ index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
} else {
print "skipping authorities\n" if ( $verbose_logging );
}
if ($biblios) {
- index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
+ index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
} else {
print "skipping biblios\n" if ( $verbose_logging );
}
} # ---------- end of subroutine check_zebra_dirs ----------
sub index_records {
- my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
+ my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
my $num_records_exported = 0;
- my $records_deleted;
+ my $records_deleted = {};
my $need_reset = check_zebra_dirs($server_dir);
if ($need_reset) {
print "$0: found broken zebra server directories: forcing a rebuild\n";
mkdir "$directory" unless (-d $directory);
mkdir "$directory/$record_type" unless (-d "$directory/$record_type");
if ($process_zebraqueue) {
- my $entries = select_zebraqueue_records($record_type, 'deleted');
- mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
- $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml);
- mark_zebraqueue_batch_done($entries);
+ my $entries;
+
+ unless ( $process_zebraqueue_skip_deletes ) {
+ $entries = select_zebraqueue_records($record_type, 'deleted');
+ mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
+ $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_usmarc);
+ mark_zebraqueue_batch_done($entries);
+ }
+
$entries = select_zebraqueue_records($record_type, 'updated');
mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");
- $num_records_exported = export_marc_records_from_list($record_type,
- $entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted);
+ $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $as_usmarc, $records_deleted);
mark_zebraqueue_batch_done($entries);
+
} else {
my $sth = select_all_records($record_type);
- $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $nosanitize);
+ $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_usmarc, $nosanitize);
unless ($do_not_clear_zebraqueue) {
mark_all_zebraqueue_done($record_type);
}
print "REINDEXING zebra\n";
print "====================\n";
}
- my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
+ my $record_fmt = ($as_usmarc) ? 'iso2709' : 'marcxml' ;
if ($process_zebraqueue) {
do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
if %$records_deleted;
}
sub select_all_biblios {
- my $strsth = qq{ SELECT biblionumber FROM biblioitems };
+ $table = 'biblioitems'
+ unless grep { /^$table$/ } @tables_allowed_for_select;
+ my $strsth = qq{ SELECT biblionumber FROM $table };
$strsth.=qq{ WHERE $where } if ($where);
$strsth.=qq{ LIMIT $length } if ($length && !$offset);
$strsth.=qq{ LIMIT $offset,$length } if ($offset);
return $sth;
}
-sub include_xml_wrapper {
- my $as_xml = shift;
- my $record_type = shift;
-
- return 0 unless $as_xml;
- return 1 if $record_type eq 'biblio' and $bib_index_mode eq 'dom';
- return 1 if $record_type eq 'authority' and $auth_index_mode eq 'dom';
- return 0;
-
-}
-
sub export_marc_records_from_sth {
- my ($record_type, $sth, $directory, $as_xml, $noxml, $nosanitize) = @_;
+ my ($record_type, $sth, $directory, $as_usmarc, $nosanitize) = @_;
my $num_exported = 0;
open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
- if (include_xml_wrapper($as_xml, $record_type)) {
- # include XML declaration and root element
- print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
- }
+
+ print {$fh} $marcxml_open
+ unless $as_usmarc;
+
my $i = 0;
my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",'');
while (my ($record_number) = $sth->fetchrow_array) {
}
next;
}
- my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
+ my ($marc) = get_corrected_marc_record($record_type, $record_number, $as_usmarc);
if (defined $marc) {
eval {
my $rec;
- if ($as_xml) {
+ if ($as_usmarc) {
+ $rec = $marc->as_usmarc();
+ } else {
$rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
eval {
my $doc = $tester->parse_string($rec);
die "invalid XML: $@";
}
$rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
- } else {
- $rec = $marc->as_usmarc();
}
print {$fh} $rec;
$num_exported++;
};
if ($@) {
- warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
+ warn "Error exporting record $record_number ($record_type) ".($as_usmarc ? "not XML" : "XML");
warn "... specific error is $@" if $verbose_logging;
}
}
}
print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
- print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
+ print {$fh} $marcxml_close
+ unless $as_usmarc;
+
close $fh;
return $num_exported;
}
sub export_marc_records_from_list {
- my ($record_type, $entries, $directory, $as_xml, $noxml, $records_deleted) = @_;
+ my ($record_type, $entries, $directory, $as_usmarc, $records_deleted) = @_;
my $num_exported = 0;
open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
- if (include_xml_wrapper($as_xml, $record_type)) {
- # include XML declaration and root element
- print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
- }
+
+ print {$fh} $marcxml_open
+ unless $as_usmarc;
+
my $i = 0;
# Skip any deleted records. We check for this anyway, but this reduces error spam
@$entries ) {
print "." if ( $verbose_logging );
print "\r$i" unless ($i++ %100 or !$verbose_logging);
- my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
+ my ($marc) = get_corrected_marc_record($record_type, $record_number, $as_usmarc);
if (defined $marc) {
eval {
my $rec;
- if ($as_xml) {
+ if ( $as_usmarc ) {
+ $rec = $marc->as_usmarc();
+ } else {
$rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
$rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
- } else {
- $rec = $marc->as_usmarc();
}
print {$fh} $rec;
$num_exported++;
};
if ($@) {
- warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
+ warn "Error exporting record $record_number ($record_type) ".($as_usmarc ? "not XML" : "XML");
}
}
}
print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
- print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
+
+ print {$fh} $marcxml_close
+ unless $as_usmarc;
+
close $fh;
return $num_exported;
}
sub generate_deleted_marc_records {
- my ($record_type, $entries, $directory, $as_xml) = @_;
+
+ my ($record_type, $entries, $directory, $as_usmarc) = @_;
my $records_deleted = {};
open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
- if (include_xml_wrapper($as_xml, $record_type)) {
- # include XML declaration and root element
- print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
- }
+
+ print {$fh} $marcxml_open
+ unless $as_usmarc;
+
my $i = 0;
foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
print "\r$i" unless ($i++ %100 or !$verbose_logging);
}
my $rec;
- if ($as_xml) {
+ if ( $as_usmarc ) {
+ $rec = $marc->as_usmarc();
+ } else {
$rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
+ # Remove the record's XML header
$rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
- } else {
- $rec = $marc->as_usmarc();
}
print {$fh} $rec;
$records_deleted->{$record_number} = 1;
}
print "\nRecords exported: $i\n" if ( $verbose_logging );
- print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
- close $fh;
- return $records_deleted;
+ print {$fh} $marcxml_close
+ unless $as_usmarc;
+ close $fh;
+ return $records_deleted;
}
sub get_corrected_marc_record {
- my ($record_type, $record_number, $noxml) = @_;
+ my ($record_type, $record_number, $as_usmarc) = @_;
- my $marc = get_raw_marc_record($record_type, $record_number, $noxml);
+ my $marc = get_raw_marc_record($record_type, $record_number, $as_usmarc);
if (defined $marc) {
fix_leader($marc);
}
sub get_raw_marc_record {
- my ($record_type, $record_number, $noxml) = @_;
+ my ($record_type, $record_number, $as_usmarc) = @_;
my $marc;
if ($record_type eq 'biblio') {
- if ($noxml) {
+ if ($as_usmarc) {
my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?");
$fetch_sth->execute($record_number);
if (my ($blob) = $fetch_sth->fetchrow_array) {
my $marc = shift;
my $string;
- if ( length($marc->subfield( 100, "a" )) == 36 ) {
+ my $length_100a = length($marc->subfield( 100, "a" ));
+ if ( $length_100a and $length_100a == 36 ) {
$string = $marc->subfield( 100, "a" );
my $f100 = $marc->field(100);
$marc->delete_field($f100);
$string = sprintf( "%-*s", 35, $string );
}
substr( $string, 22, 6, "frey50" );
- unless ( length($marc->subfield( 100, "a" )) == 36 ) {
+ $length_100a = length($marc->subfield( 100, "a" ));
+ unless ( $length_100a and $length_100a == 36 ) {
$marc->delete_field($marc->field(100));
$marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
}
my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
+ $noshadow //= '';
+
+ if ($noshadow or $reset_index) {
+ $noshadow = '-n';
+ }
+
system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index;
system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;
-
}
sub _flock {
table. Cannot be used with -r
or -s.
+ --skip-deletes only select record updates, not record
+ deletions, to avoid potential excessive
+ I/O when zebraidx processes deletions.
+ If this option is used for normal indexing,
+ a cronjob should be set up to run
+ rebuild_zebra.pl -z without --skip-deletes
+ during off hours.
+ Only effective with -z.
+
-r clear Zebra index before
adding records to index. Implies -w.
option is recommended only
for advanced user.
- -x export and index as xml instead of is02709 (biblios only).
- use this if you might have records > 99,999 chars,
-
-nosanitize export biblio/authority records directly from DB marcxml
field without sanitizing records. It speed up
dump process but could fail if DB contains badly
to wait for the lock to free and then continue
processing the rebuild request,
+ --table specify a table (can be items, biblioitems or biblio) to retrieve biblionumber to index.
+ biblioitems is the default value.
+
--help or -h show this message.
_USAGE_
}