#!/usr/bin/perl
use strict;
+#use warnings; FIXME - Bug 2505
use C4::Context;
use Getopt::Long;
#
$|=1; # flushes output
-
+# If the cron job starts us in an unreadable dir, we will break without
+# this.
+chdir $ENV{HOME} if (!(-r '.'));
my $directory;
+my $nosanitize;
my $skip_export;
my $keep_export;
my $reset;
'reset' => \$reset,
's' => \$skip_export,
'k' => \$keep_export,
+ 'nosanitize' => \$nosanitize,
'b' => \$biblios,
'noxml' => \$noxml,
'w' => \$noshadow,
die $msg;
}
+if ( !$as_xml and $nosanitize ) {
+ my $msg = "Cannot specify both -no_xml and -nosanitize\n";
+ $msg .= "Please do '$0 --help' to see usage.\n";
+ die $msg;
+}
+
if ($process_zebraqueue and ($skip_export or $reset)) {
my $msg = "Cannot specify -r or -s if -z is specified\n";
$msg .= "Please do '$0 --help' to see usage.\n";
}
if ($authorities) {
- index_records('authority', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt);
+ index_records('authority', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
} else {
- print "skipping authorities\n";
+ print "skipping authorities\n" if ( $verbose_logging );
}
if ($biblios) {
- index_records('biblio', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt);
+ index_records('biblio', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
} else {
- print "skipping biblios\n";
+ print "skipping biblios\n" if ( $verbose_logging );
}
}
}
+# This checks to see if the zebra directories exist under the provided path.
+# If they don't, then zebra is likely to spit the dummy. This returns true
+# if the directories had to be created, false otherwise.
+sub check_zebra_dirs {
+ my ($base) = shift() . '/';
+ my $needed_repairing = 0;
+ my @dirs = ( '', 'key', 'register', 'shadow' );
+ foreach my $dir (@dirs) {
+ my $bdir = $base . $dir;
+ if (! -d $bdir) {
+ $needed_repairing = 1;
+ mkdir $bdir || die "Unable to create '$bdir': $!\n";
+ print "$0: needed to create '$bdir'\n";
+ }
+ }
+ return $needed_repairing;
+} # ---------- end of subroutine check_zebra_dirs ----------
+
sub index_records {
- my ($record_type, $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt) = @_;
+ my ($record_type, $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
my $num_records_exported = 0;
my $num_records_deleted = 0;
+ my $need_reset = check_zebra_dirs($server_dir);
+ if ($need_reset) {
+ print "$0: found broken zebra server directories: forcing a rebuild\n";
+ $reset = 1;
+ }
if ($skip_export && $verbose_logging) {
print "====================\n";
print "SKIPPING $record_type export\n";
mark_zebraqueue_batch_done($entries);
} else {
my $sth = select_all_records($record_type);
- $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml);
+ $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $nosanitize);
unless ($do_not_clear_zebraqueue) {
mark_all_zebraqueue_done($record_type);
}
if $num_records_exported;
} else {
do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
- if $num_records_exported;
+ if ($num_records_exported or $skip_export);
}
}
+
sub select_zebraqueue_records {
my ($record_type, $update_type) = @_;
}
sub export_marc_records_from_sth {
- my ($record_type, $sth, $directory, $as_xml, $noxml) = @_;
+ my ($record_type, $sth, $directory, $as_xml, $noxml, $nosanitize) = @_;
my $num_exported = 0;
open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
while (my ($record_number) = $sth->fetchrow_array) {
print "." if ( $verbose_logging );
print "\r$i" unless ($i++ %100 or !$verbose_logging);
+ if ( $nosanitize ) {
+ my $marcxml = $record_type eq 'biblio'
+ ? GetXmlBiblio( $record_number )
+ : GetAuthorityXML( $record_number );
+ if ( $marcxml ) {
+ print OUT $marcxml if $marcxml;
+ $num_exported++;
+ }
+ next;
+ }
my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
if (defined $marc) {
# FIXME - when more than one record is exported and $as_xml is true,
-x export and index as xml instead of is02709 (biblios only).
use this if you might have records > 99,999 chars,
+ -nosanitize export biblio/authority records directly from DB marcxml
+ field without sanitizing records. It speed up
+ dump process but could fail if DB contains badly
+ encoded records. Works only with -x,
+
-w skip shadow indexing for this batch
-y do NOT clear zebraqueue after indexing; normally,