use Modern::Perl;
+use Koha::Script;
use C4::Context;
-use Getopt::Long;
-use Fcntl qw(:flock);
-use File::Temp qw/ tempdir /;
-use File::Path;
-use C4::Biblio;
+use Getopt::Long qw( GetOptions );
+use Fcntl qw( LOCK_EX LOCK_NB LOCK_UN );
+use File::Temp qw( tempdir );
+use File::Path qw( mkpath rmtree );
+use C4::Biblio qw( GetXmlBiblio );
use C4::AuthoritiesMarc;
-use C4::Items;
+use C4::Items qw( GetItemsInfo Item2Marc );
use Koha::RecordProcessor;
+use Koha::Caches;
use XML::LibXML;
use constant LOCK_FILENAME => 'rebuild..LCK';
my $reset;
my $biblios;
my $authorities;
-my $as_usmarc;
my $as_xml;
my $noshadow;
my $want_help;
my $wait_for_lock = 0;
my $use_flock;
my $table = 'biblioitems';
+my $is_memcached = Koha::Caches->get_instance->memcached_cache;
my $verbose_logging = 0;
my $zebraidx_log_opt = " -v none,fatal,warn ";
'I|skip-index' => \$skip_index,
'nosanitize' => \$nosanitize,
'b' => \$biblios,
- 'noxml' => \$as_usmarc,
'w' => \$noshadow,
'a' => \$authorities,
'h|help' => \$want_help,
}
if ( $as_xml ) {
- warn "Warning: You passed -x which is already the default and is now deprecated·\n";
+ warn "Warning: You passed -x which is already the default and is now deprecated\n";
+ undef $as_xml; # Should not be used later
}
if( not defined $run_as_root and $run_user eq 'root') {
die $msg;
}
-if ( $as_usmarc and $nosanitize ) {
- my $msg = "Cannot specify both -no_xml and -nosanitize\n";
- $msg .= "Please do '$0 --help' to see usage.\n";
- die $msg;
-}
-
if ($process_zebraqueue and ($skip_export or $reset)) {
my $msg = "Cannot specify -r or -s if -z is specified\n";
$msg .= "Please do '$0 --help' to see usage.\n";
$msg .= "Please do '$0 --help' to see usage.\n";
die $msg;
}
+ unless ($is_memcached) {
+ warn "Warning: script running in daemon mode, without recommended caching system (memcached).\n";
+ }
$authorities = 1;
$biblios = 1;
$process_zebraqueue = 1;
die $msg;
}
-our @tables_allowed_for_select = ( 'biblioitems', 'items', 'biblio' );
-unless ( grep { /^$table$/ } @tables_allowed_for_select ) {
+our @tables_allowed_for_select = ( 'biblioitems', 'items', 'biblio', 'biblio_metadata' );
+unless ( grep { $_ eq $table } @tables_allowed_for_select ) {
die "Cannot specify -t|--table with value '$table'. Only "
. ( join ', ', @tables_allowed_for_select )
. " are allowed.";
my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
my $kohadir = C4::Context->config('intranetdir');
-my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') // 'dom';
-my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') // 'dom';
-my $dbh = C4::Context->dbh;
-my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
-my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
+my ($biblionumbertagfield,$biblionumbertagsubfield) = C4::Biblio::GetMarcFromKohaField( "biblio.biblionumber" );
+my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = C4::Biblio::GetMarcFromKohaField( "biblioitems.biblioitemnumber" );
my $marcxml_open = q{<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
};
# Protect again simultaneous update of the zebra index by using a lock file.
-# Create our own lock directory if its missing. This shouild be created
-# by koha-zebra-ctl.sh or at system installation. If the desired directory
+# Create our own lock directory if it is missing. This should be created
+# by koha-zebra-ctl.sh or at system installation. If the desired directory
# does not exist and cannot be created, we fall back on /tmp - which will
# always work.
# the lockfile)
};
+my $start_time = time();
if ( $verbose_logging ) {
+ my $pretty_time = POSIX::strftime("%H:%M:%S",localtime($start_time));
print "Zebra configuration information\n";
print "================================\n";
print "Zebra biblio directory = $biblioserverdir\n";
print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
print "================================\n";
+ print "Job started: $pretty_time\n";
}
my $tester = XML::LibXML->new();
+my $dbh;
# The main work is done here by calling do_one_pass(). We have added locking
# avoid race conditions between full rebuilds and incremental updates either from
while (1) {
# For incremental updates, skip the update if the updates are locked
if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
- do_one_pass() if ( zebraqueue_not_empty() );
+ eval {
+ $dbh = C4::Context->dbh;
+ if( zebraqueue_not_empty() ) {
+ Koha::Caches->flush_L1_caches() if $is_memcached;
+ do_one_pass();
+ }
+ };
+ if ($@ && $verbose_logging) {
+ warn "Warning : $@\n";
+ }
_flock($LockFH, LOCK_UN);
}
sleep $daemon_sleep;
# all one-off invocations
my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
if (_flock($LockFH, $lock_mode)) {
+ $dbh = C4::Context->dbh;
do_one_pass();
_flock($LockFH, LOCK_UN);
} else {
if ( $verbose_logging ) {
print "====================\n";
+ print "Indexing complete: ". pretty_time() . "\n";
+ print "====================\n";
print "CLEANING\n";
print "====================\n";
}
print "parameter";
}
print "\n";
- print "if you just want to rebuild zebra after changing the record.abs\n";
- print "or another zebra config file\n";
+ print "if you just want to rebuild zebra after changing zebra config files\n";
} else {
unless ($use_tempdir) {
# if we're using a temporary directory
sub do_one_pass {
if ($authorities) {
- index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
+ index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
} else {
print "skipping authorities\n" if ( $verbose_logging );
}
if ($biblios) {
- index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
+ index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
} else {
print "skipping biblios\n" if ( $verbose_logging );
}
} # ---------- end of subroutine check_zebra_dirs ----------
sub index_records {
- my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
+ my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
my $num_records_exported = 0;
my $records_deleted = {};
} else {
if ( $verbose_logging ) {
print "====================\n";
- print "exporting $record_type\n";
+ print "exporting $record_type " . pretty_time() . "\n";
print "====================\n";
}
mkdir "$directory" unless (-d $directory);
unless ( $process_zebraqueue_skip_deletes ) {
$entries = select_zebraqueue_records($record_type, 'deleted');
mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
- $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_usmarc);
+ $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type");
mark_zebraqueue_batch_done($entries);
}
$entries = select_zebraqueue_records($record_type, 'updated');
mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");
- $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $as_usmarc, $records_deleted);
+ $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $records_deleted);
mark_zebraqueue_batch_done($entries);
} else {
my $sth = select_all_records($record_type);
- $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_usmarc, $nosanitize);
+ $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $nosanitize);
unless ($do_not_clear_zebraqueue) {
mark_all_zebraqueue_done($record_type);
}
} else {
if ( $verbose_logging ) {
print "====================\n";
- print "REINDEXING zebra\n";
+ print "REINDEXING zebra " . pretty_time() . "\n";
print "====================\n";
}
- my $record_fmt = ($as_usmarc) ? 'iso2709' : 'marcxml' ;
+ my $record_fmt = 'marcxml';
if ($process_zebraqueue) {
do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
if %$records_deleted;
sub select_all_biblios {
$table = 'biblioitems'
- unless grep { /^$table$/ } @tables_allowed_for_select;
- my $strsth = qq{ SELECT biblionumber FROM $table };
+ unless grep { $_ eq $table } @tables_allowed_for_select;
+ my $strsth = qq{ SELECT DISTINCT biblionumber FROM $table };
$strsth.=qq{ WHERE $where } if ($where);
$strsth.=qq{ LIMIT $length } if ($length && !$offset);
$strsth.=qq{ LIMIT $offset,$length } if ($offset);
}
sub export_marc_records_from_sth {
- my ($record_type, $sth, $directory, $as_usmarc, $nosanitize) = @_;
+ my ($record_type, $sth, $directory, $nosanitize) = @_;
my $num_exported = 0;
open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
- print {$fh} $marcxml_open
- unless $as_usmarc;
+ print {$fh} $marcxml_open;
my $i = 0;
- my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",'');
+ my ( $itemtag, $itemsubfield ) = C4::Biblio::GetMarcFromKohaField( "items.itemnumber" );
while (my ($record_number) = $sth->fetchrow_array) {
print "." if ( $verbose_logging );
print "\r$i" unless ($i++ %100 or !$verbose_logging);
}
next;
}
- my ($marc) = get_corrected_marc_record($record_type, $record_number, $as_usmarc);
+ my ($marc) = get_corrected_marc_record($record_type, $record_number);
if (defined $marc) {
eval {
- my $rec;
- if ($as_usmarc) {
- $rec = $marc->as_usmarc();
- } else {
- $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
- eval {
- my $doc = $tester->parse_string($rec);
- };
- if ($@) {
- die "invalid XML: $@";
- }
- $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
+ my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
+ eval {
+ my $doc = $tester->parse_string($rec);
+ };
+ if ($@) {
+ die "invalid XML: $@";
}
+ $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
print {$fh} $rec;
$num_exported++;
};
if ($@) {
- warn "Error exporting record $record_number ($record_type) ".($as_usmarc ? "not XML" : "XML");
+ warn "Error exporting record $record_number ($record_type) XML";
warn "... specific error is $@" if $verbose_logging;
}
}
}
- print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
- print {$fh} $marcxml_close
- unless $as_usmarc;
+ print "\nRecords exported: $num_exported " . pretty_time() . "\n" if ( $verbose_logging );
+ print {$fh} $marcxml_close;
close $fh;
return $num_exported;
}
sub export_marc_records_from_list {
- my ($record_type, $entries, $directory, $as_usmarc, $records_deleted) = @_;
+ my ($record_type, $entries, $directory, $records_deleted) = @_;
my $num_exported = 0;
open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
- print {$fh} $marcxml_open
- unless $as_usmarc;
+ print {$fh} $marcxml_open;
my $i = 0;
@$entries ) {
print "." if ( $verbose_logging );
print "\r$i" unless ($i++ %100 or !$verbose_logging);
- my ($marc) = get_corrected_marc_record($record_type, $record_number, $as_usmarc);
+ my ($marc) = get_corrected_marc_record($record_type, $record_number);
if (defined $marc) {
eval {
- my $rec;
- if ( $as_usmarc ) {
- $rec = $marc->as_usmarc();
- } else {
- $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
- $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
- }
+ my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
+ $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
print {$fh} $rec;
$num_exported++;
};
if ($@) {
- warn "Error exporting record $record_number ($record_type) ".($as_usmarc ? "not XML" : "XML");
+ warn "Error exporting record $record_number ($record_type) XML";
}
}
}
- print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
+ print "\nRecords exported: $num_exported " . pretty_time() . "\n" if ( $verbose_logging );
- print {$fh} $marcxml_close
- unless $as_usmarc;
+ print {$fh} $marcxml_close;
close $fh;
return $num_exported;
sub generate_deleted_marc_records {
- my ($record_type, $entries, $directory, $as_usmarc) = @_;
+ my ($record_type, $entries, $directory) = @_;
my $records_deleted = {};
open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
- print {$fh} $marcxml_open
- unless $as_usmarc;
+ print {$fh} $marcxml_open;
my $i = 0;
foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
fix_unimarc_100($marc);
}
- my $rec;
- if ( $as_usmarc ) {
- $rec = $marc->as_usmarc();
- } else {
- $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
- # Remove the record's XML header
- $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
- }
+ my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
+ # Remove the record's XML header
+ $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
print {$fh} $rec;
$records_deleted->{$record_number} = 1;
}
- print "\nRecords exported: $i\n" if ( $verbose_logging );
+ print "\nRecords exported: $i " . pretty_time() . "\n" if ( $verbose_logging );
- print {$fh} $marcxml_close
- unless $as_usmarc;
+ print {$fh} $marcxml_close;
close $fh;
return $records_deleted;
}
sub get_corrected_marc_record {
- my ($record_type, $record_number, $as_usmarc) = @_;
+ my ( $record_type, $record_number ) = @_;
- my $marc = get_raw_marc_record($record_type, $record_number, $as_usmarc);
+ my $marc = get_raw_marc_record( $record_type, $record_number );
- if (defined $marc) {
+ if ( defined $marc ) {
fix_leader($marc);
- if ($record_type eq 'authority') {
- fix_authority_id($marc, $record_number);
- } elsif ($record_type eq 'biblio' && C4::Context->preference('IncludeSeeFromInSearches')) {
- my $normalizer = Koha::RecordProcessor->new( { filters => 'EmbedSeeFromHeadings' } );
+ if ( $record_type eq 'authority' ) {
+ fix_authority_id( $marc, $record_number );
+ }
+ elsif ( $record_type eq 'biblio' ) {
+
+ my @filters;
+ push @filters, 'EmbedItemsAvailability';
+ push @filters, 'EmbedSeeFromHeadings'
+ if C4::Context->preference('IncludeSeeFromInSearches');
+
+ my $normalizer = Koha::RecordProcessor->new( { filters => \@filters } );
$marc = $normalizer->process($marc);
}
- if (C4::Context->preference("marcflavour") eq "UNIMARC") {
+ if ( C4::Context->preference("marcflavour") eq "UNIMARC" ) {
fix_unimarc_100($marc);
}
}
}
sub get_raw_marc_record {
- my ($record_type, $record_number, $as_usmarc) = @_;
+ my ($record_type, $record_number) = @_;
my $marc;
if ($record_type eq 'biblio') {
- if ($as_usmarc) {
- my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?");
- $fetch_sth->execute($record_number);
- if (my ($blob) = $fetch_sth->fetchrow_array) {
- $marc = MARC::Record->new_from_usmarc($blob);
- unless ($marc) {
- warn "error creating MARC::Record from $blob";
- }
- }
- # failure to find a bib is not a problem -
- # a delete could have been done before
- # trying to process a record update
-
- $fetch_sth->finish();
- return unless $marc;
- } else {
- eval { $marc = GetMarcBiblio($record_number, 1); };
- if ($@ || !$marc) {
- # here we do warn since catching an exception
- # means that the bib was found but failed
- # to be parsed
- warn "error retrieving biblio $record_number";
- return;
- }
+ eval { $marc = C4::Biblio::GetMarcBiblio({ biblionumber => $record_number, embed_items => 1 }); };
+ if ($@ || !$marc) {
+ # here we do warn since catching an exception
+ # means that the bib was found but failed
+ # to be parsed
+ warn "error retrieving biblio $record_number";
+ return;
}
} else {
eval { $marc = GetAuthority($record_number); };
my $marc = shift;
my $string;
- if ( length($marc->subfield( 100, "a" )) == 36 ) {
+ my $length_100a = length($marc->subfield( 100, "a" ));
+ if ( $length_100a and $length_100a == 36 ) {
$string = $marc->subfield( 100, "a" );
my $f100 = $marc->field(100);
$marc->delete_field($f100);
$string = sprintf( "%-*s", 35, $string );
}
substr( $string, 22, 6, "frey50" );
- unless ( length($marc->subfield( 100, "a" )) == 36 ) {
+ $length_100a = length($marc->subfield( 100, "a" ));
+ unless ( $length_100a and $length_100a == 36 ) {
$marc->delete_field($marc->field(100));
$marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
}
return ( $fh, $dir.'/'.LOCK_FILENAME );
}
+sub pretty_time {
+ use integer;
+ my $now = time;
+ my $elapsed = $now - $start_time;
+ local $_ = $elapsed;
+ my ( $h, $m, $s );
+ $s = $_ % 60;
+ $_ /= 60;
+ $m = $_ % 60;
+ $_ /= 60;
+ $h = $_ % 24;
+
+ my $now_pretty = POSIX::strftime("%H:%M:%S",localtime($now));
+ my $elapsed_pretty = sprintf "[%02d:%02d:%02d]",$h,$m,$s;
+
+ return "$now_pretty $elapsed_pretty";
+}
+
sub print_usage {
print <<_USAGE_;
$0: reindex MARC bibs and/or authorities in Zebra.
already exported the records
in a previous run.
- -noxml index from ISO MARC blob
- instead of MARC XML. This
- option is recommended only
- for advanced user.
-
-nosanitize export biblio/authority records directly from DB marcxml
field without sanitizing records. It speed up
dump process but could fail if DB contains badly
to wait for the lock to free and then continue
processing the rebuild request,
- --table specify a table (can be items, biblioitems or biblio) to retrieve biblionumber to index.
+ --table specify a table (can be items, biblioitems, biblio, biblio_metadata) to retrieve biblionumber to index.
biblioitems is the default value.
--help or -h show this message.