#!/usr/bin/perl
-use strict;
-#use warnings; FIXME - Bug 2505
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# Koha is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Koha; if not, see <http://www.gnu.org/licenses>.
+
+use Modern::Perl;
use C4::Context;
use Getopt::Long;
my $reset;
my $biblios;
my $authorities;
-my $noxml;
+my $as_usmarc;
+my $as_xml;
my $noshadow;
my $want_help;
-my $as_xml;
my $process_zebraqueue;
my $process_zebraqueue_skip_deletes;
my $do_not_clear_zebraqueue;
'I|skip-index' => \$skip_index,
'nosanitize' => \$nosanitize,
'b' => \$biblios,
- 'noxml' => \$noxml,
+ 'noxml' => \$as_usmarc,
'w' => \$noshadow,
'a' => \$authorities,
'h|help' => \$want_help,
exit 0;
}
+if ( $as_xml ) {
+ warn "Warning: You passed -x which is already the default and is now deprecated\n";
+ undef $as_xml; # Should not be used later
+}
+
if( not defined $run_as_root and $run_user eq 'root') {
my $msg = "Warning: You are running this script as the user 'root'.\n";
$msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n";
die $msg;
}
-if ( !$as_xml and $nosanitize ) {
- my $msg = "Cannot specify both -no_xml and -nosanitize\n";
+if ( $as_usmarc and $nosanitize ) {
+ my $msg = "Cannot specify both -noxml and -nosanitize\n";
$msg .= "Please do '$0 --help' to see usage.\n";
die $msg;
}
die $msg;
}
-if ($reset) {
- $noshadow = 1;
-}
-
-if ($noshadow) {
- $noshadow = ' -n ';
-}
-
if ($daemon_mode) {
# incompatible flags handled above: help, reset, and do_not_clear_zebraqueue
if ($skip_export or $keep_export or $skip_index or
my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') // 'dom';
my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') // 'dom';
-my $dbh = C4::Context->dbh;
my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
+my $marcxml_open = q{<?xml version="1.0" encoding="UTF-8"?>
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+};
+
+my $marcxml_close = q{
+</collection>
+};
+
# Protect again simultaneous update of the zebra index by using a lock file.
# Create our own lock directory if its missing. This shouild be created
# by koha-zebra-ctl.sh or at system installation. If the desired directory
}
my $tester = XML::LibXML->new();
+my $dbh;
# The main work is done here by calling do_one_pass(). We have added locking
# avoid race conditions between full rebuilds and incremental updates either from
while (1) {
# For incremental updates, skip the update if the updates are locked
if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
- do_one_pass() if ( zebraqueue_not_empty() );
+ eval {
+ $dbh = C4::Context->dbh;
+ do_one_pass() if ( zebraqueue_not_empty() );
+ };
+ if ($@ && $verbose_logging) {
+ warn "Warning : $@\n";
+ }
_flock($LockFH, LOCK_UN);
}
sleep $daemon_sleep;
# all one-off invocations
my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
if (_flock($LockFH, $lock_mode)) {
+ $dbh = C4::Context->dbh;
do_one_pass();
_flock($LockFH, LOCK_UN);
} else {
sub do_one_pass {
if ($authorities) {
- index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
+ index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
} else {
print "skipping authorities\n" if ( $verbose_logging );
}
if ($biblios) {
- index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
+ index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
} else {
print "skipping biblios\n" if ( $verbose_logging );
}
} # ---------- end of subroutine check_zebra_dirs ----------
sub index_records {
- my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
+ my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
my $num_records_exported = 0;
my $records_deleted = {};
unless ( $process_zebraqueue_skip_deletes ) {
$entries = select_zebraqueue_records($record_type, 'deleted');
mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
- $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml);
+ $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_usmarc);
mark_zebraqueue_batch_done($entries);
}
$entries = select_zebraqueue_records($record_type, 'updated');
mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");
- $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted);
+ $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $as_usmarc, $records_deleted);
mark_zebraqueue_batch_done($entries);
} else {
my $sth = select_all_records($record_type);
- $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $nosanitize);
+ $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_usmarc, $nosanitize);
unless ($do_not_clear_zebraqueue) {
mark_all_zebraqueue_done($record_type);
}
print "REINDEXING zebra\n";
print "====================\n";
}
- my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
+ my $record_fmt = ($as_usmarc) ? 'iso2709' : 'marcxml' ;
if ($process_zebraqueue) {
do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
if %$records_deleted;
sub select_all_biblios {
$table = 'biblioitems'
- if grep { /^$table$/ } @tables_allowed_for_select;
+ unless grep { /^$table$/ } @tables_allowed_for_select;
my $strsth = qq{ SELECT biblionumber FROM $table };
$strsth.=qq{ WHERE $where } if ($where);
$strsth.=qq{ LIMIT $length } if ($length && !$offset);
return $sth;
}
-sub include_xml_wrapper {
- my $as_xml = shift;
- my $record_type = shift;
-
- return 0 unless $as_xml;
- return 1 if $record_type eq 'biblio' and $bib_index_mode eq 'dom';
- return 1 if $record_type eq 'authority' and $auth_index_mode eq 'dom';
- return 0;
-
-}
-
sub export_marc_records_from_sth {
- my ($record_type, $sth, $directory, $as_xml, $noxml, $nosanitize) = @_;
+ my ($record_type, $sth, $directory, $as_usmarc, $nosanitize) = @_;
my $num_exported = 0;
open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
- if (include_xml_wrapper($as_xml, $record_type)) {
- # include XML declaration and root element
- print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
- }
+
+ print {$fh} $marcxml_open
+ unless $as_usmarc;
+
my $i = 0;
my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",'');
while (my ($record_number) = $sth->fetchrow_array) {
}
next;
}
- my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
+ my ($marc) = get_corrected_marc_record($record_type, $record_number, $as_usmarc);
if (defined $marc) {
eval {
my $rec;
- if ($as_xml) {
+ if ($as_usmarc) {
+ $rec = $marc->as_usmarc();
+ } else {
$rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
eval {
my $doc = $tester->parse_string($rec);
die "invalid XML: $@";
}
$rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
- } else {
- $rec = $marc->as_usmarc();
}
print {$fh} $rec;
$num_exported++;
};
if ($@) {
- warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
+ warn "Error exporting record $record_number ($record_type) ".($as_usmarc ? "not XML" : "XML");
warn "... specific error is $@" if $verbose_logging;
}
}
}
print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
- print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
+ print {$fh} $marcxml_close
+ unless $as_usmarc;
+
close $fh;
return $num_exported;
}
sub export_marc_records_from_list {
- my ($record_type, $entries, $directory, $as_xml, $noxml, $records_deleted) = @_;
+ my ($record_type, $entries, $directory, $as_usmarc, $records_deleted) = @_;
my $num_exported = 0;
open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
- if (include_xml_wrapper($as_xml, $record_type)) {
- # include XML declaration and root element
- print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
- }
+
+ print {$fh} $marcxml_open
+ unless $as_usmarc;
+
my $i = 0;
# Skip any deleted records. We check for this anyway, but this reduces error spam
@$entries ) {
print "." if ( $verbose_logging );
print "\r$i" unless ($i++ %100 or !$verbose_logging);
- my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
+ my ($marc) = get_corrected_marc_record($record_type, $record_number, $as_usmarc);
if (defined $marc) {
eval {
my $rec;
- if ($as_xml) {
+ if ( $as_usmarc ) {
+ $rec = $marc->as_usmarc();
+ } else {
$rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
$rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
- } else {
- $rec = $marc->as_usmarc();
}
print {$fh} $rec;
$num_exported++;
};
if ($@) {
- warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
+ warn "Error exporting record $record_number ($record_type) ".($as_usmarc ? "not XML" : "XML");
}
}
}
print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
- print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
+
+ print {$fh} $marcxml_close
+ unless $as_usmarc;
+
close $fh;
return $num_exported;
}
sub generate_deleted_marc_records {
- my ($record_type, $entries, $directory, $as_xml) = @_;
+
+ my ($record_type, $entries, $directory, $as_usmarc) = @_;
my $records_deleted = {};
open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
- if (include_xml_wrapper($as_xml, $record_type)) {
- # include XML declaration and root element
- print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
- }
+
+ print {$fh} $marcxml_open
+ unless $as_usmarc;
+
my $i = 0;
foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
print "\r$i" unless ($i++ %100 or !$verbose_logging);
}
my $rec;
- if ($as_xml) {
+ if ( $as_usmarc ) {
+ $rec = $marc->as_usmarc();
+ } else {
$rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
+ # Remove the record's XML header
$rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
- } else {
- $rec = $marc->as_usmarc();
}
print {$fh} $rec;
$records_deleted->{$record_number} = 1;
}
print "\nRecords exported: $i\n" if ( $verbose_logging );
- print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
- close $fh;
- return $records_deleted;
+ print {$fh} $marcxml_close
+ unless $as_usmarc;
+ close $fh;
+ return $records_deleted;
}
sub get_corrected_marc_record {
- my ($record_type, $record_number, $noxml) = @_;
+ my ($record_type, $record_number, $as_usmarc) = @_;
- my $marc = get_raw_marc_record($record_type, $record_number, $noxml);
+ my $marc = get_raw_marc_record($record_type, $record_number, $as_usmarc);
if (defined $marc) {
fix_leader($marc);
}
sub get_raw_marc_record {
- my ($record_type, $record_number, $noxml) = @_;
+ my ($record_type, $record_number, $as_usmarc) = @_;
my $marc;
if ($record_type eq 'biblio') {
- if ($noxml) {
+ if ($as_usmarc) {
my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?");
$fetch_sth->execute($record_number);
if (my ($blob) = $fetch_sth->fetchrow_array) {
my $marc = shift;
my $string;
- if ( length($marc->subfield( 100, "a" )) == 36 ) {
+ my $length_100a = length($marc->subfield( 100, "a" ));
+ if ( $length_100a and $length_100a == 36 ) {
$string = $marc->subfield( 100, "a" );
my $f100 = $marc->field(100);
$marc->delete_field($f100);
$string = sprintf( "%-*s", 35, $string );
}
substr( $string, 22, 6, "frey50" );
- unless ( length($marc->subfield( 100, "a" )) == 36 ) {
+ $length_100a = length($marc->subfield( 100, "a" ));
+ unless ( $length_100a and $length_100a == 36 ) {
$marc->delete_field($marc->field(100));
$marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
}
my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
+ $noshadow //= '';
+
+ if ($noshadow or $reset_index) {
+ $noshadow = '-n';
+ }
+
system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index;
system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;
-
}
sub _flock {
option is recommended only
for advanced user.
- -x export and index as xml instead of is02709 (biblios only).
- use this if you might have records > 99,999 chars,
-
-nosanitize export biblio/authority records directly from DB marcxml
field without sanitizing records. It speed up
dump process but could fail if DB contains badly