$|=1; # flushes output
-# limit for database dumping
my $directory;
my $skip_export;
my $keep_export;
my $do_munge;
my $want_help;
my $as_xml;
+my $process_zebraqueue;
my $result = GetOptions(
'd:s' => \$directory,
'reset' => \$reset,
'a' => \$authorities,
'h|help' => \$want_help,
'x' => \$as_xml,
+ 'z' => \$process_zebraqueue,
);
die $msg;
}
+if ($process_zebraqueue and ($skip_export or $reset)) {
+ my $msg = "Cannot specify -r or -s if -z is specified\n";
+ $msg .= "Please do '$0 --help' to see usage.\n";
+ die $msg;
+}
+
if ($noshadow) {
$noshadow = ' -n ';
}
munge_config();
}
+$dbh->{AutoCommit} = 0; # don't autocommit - want a consistent view of the zebraqueue table
+
if ($authorities) {
- #
- # exporting authorities
- #
- if ($skip_export) {
- print "====================\n";
- print "SKIPPING authorities export\n";
- print "====================\n";
- } else {
- print "====================\n";
- print "exporting authorities\n";
- print "====================\n";
- mkdir "$directory" unless (-d $directory);
- mkdir "$directory/authorities" unless (-d "$directory/authorities");
- my $dbh=C4::Context->dbh;
- my $sth;
- $sth=$dbh->prepare("select authid,marc from auth_header");
- $sth->execute();
- export_marc_records('authority', $sth, "$directory/authorities", $as_xml, $noxml);
- }
-
- #
- # and reindexing everything
- #
- print "====================\n";
- print "REINDEXING zebra\n";
- print "====================\n";
- my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
- do_indexing('authority', 'update', "$directory/authorities", $reset, $noshadow, $record_fmt);
+ index_records('authority', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml);
+ $dbh->commit(); # commit changes to zebraqueue, if any
} else {
print "skipping authorities\n";
}
-#################################################################################################################
-# BIBLIOS
-#################################################################################################################
if ($biblios) {
- #
- # exporting biblios
- #
- if ($skip_export) {
- print "====================\n";
- print "SKIPPING biblio export\n";
- print "====================\n";
- } else {
- print "====================\n";
- print "exporting biblios\n";
- print "====================\n";
- mkdir "$directory" unless (-d $directory);
- mkdir "$directory/biblios" unless (-d "$directory/biblios");
- my $dbh=C4::Context->dbh;
- my $sth = $dbh->prepare("SELECT biblionumber FROM biblioitems ORDER BY biblionumber");
- $sth->execute();
- export_marc_records('biblio', $sth, "$directory/biblios", $as_xml, $noxml);
- }
-
- #
- # and reindexing everything
- #
- print "====================\n";
- print "REINDEXING zebra\n";
- print "====================\n";
- my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
- do_indexing('biblio', 'update', "$directory/biblios", $reset, $noshadow, $record_fmt);
+ index_records('biblio', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml);
+ $dbh->commit(); # commit changes to zebraqueue, if any
} else {
print "skipping biblios\n";
}
+
print "====================\n";
print "CLEANING\n";
print "====================\n";
}
}
+sub index_records {
+ my ($record_type, $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml) = @_;
+
+ my $num_records_exported = 0;
+ my $num_records_deleted = 0;
+ if ($skip_export) {
+ print "====================\n";
+ print "SKIPPING $record_type export\n";
+ print "====================\n";
+ } else {
+ print "====================\n";
+ print "exporting $record_type\n";
+ print "====================\n";
+ mkdir "$directory" unless (-d $directory);
+ mkdir "$directory/$record_type" unless (-d "$directory/$record_type");
+ if ($process_zebraqueue) {
+ my $sth = select_zebraqueue_records($record_type, 'deleted');
+ mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
+ $num_records_deleted = generate_deleted_marc_records($record_type, $sth, "$directory/del_$record_type", $as_xml);
+ mark_zebraqueue_done($record_type, 'deleted');
+ $sth = select_zebraqueue_records($record_type, 'updated');
+ mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");
+ $num_records_exported = export_marc_records($record_type, $sth, "$directory/upd_$record_type", $as_xml, $noxml);
+ mark_zebraqueue_done($record_type, 'updated');
+ } else {
+ my $sth = select_all_records($record_type);
+ $num_records_exported = export_marc_records($record_type, $sth, "$directory/$record_type", $as_xml, $noxml);
+ }
+ }
+
+ #
+ # and reindexing everything
+ #
+ print "====================\n";
+ print "REINDEXING zebra\n";
+ print "====================\n";
+ my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
+ if ($process_zebraqueue) {
+ do_indexing($record_type, 'delete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt)
+ if $num_records_deleted;
+ do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt)
+ if $num_records_exported;
+ } else {
+ do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt)
+ if $num_records_exported;
+ }
+}
+
+sub select_zebraqueue_records {
+ my ($record_type, $update_type) = @_;
+
+ my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
+ my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';
+
+ my $sth = $dbh->prepare("SELECT DISTINCT biblio_auth_number
+ FROM zebraqueue
+ WHERE server = ?
+ AND operation = ?
+ AND done = 0");
+ $sth->execute($server, $op);
+ return $sth;
+}
+
+sub mark_zebraqueue_done {
+ my ($record_type, $update_type) = @_;
+
+ my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
+ my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';
+
+ if ($op eq 'recordDelete') {
+ my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
+ WHERE id IN (
+ SELECT id FROM (
+ SELECT z1.id
+ FROM zebraqueue z1
+ JOIN zebraqueue z2 ON z2.biblio_auth_number = z1.biblio_auth_number
+ WHERE z1.done = 0
+ AND z1.server = ?
+ AND z2.done = 0
+ AND z2.server = ?
+ AND z1.operation = ?
+ ) d2
+ )
+ ");
+ $sth->execute($server, $server, $op); # if we've deleted a record, any prior specialUpdates are void
+ } else {
+ my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
+ WHERE server = ?
+ AND operation = ?
+ AND done = 0");
+ $sth->execute($server, $op);
+ }
+}
+
+sub select_all_records {
+ my $record_type = shift;
+ return ($record_type eq 'biblio') ? select_all_biblios() : select_all_authorities();
+}
+
+sub select_all_authorities {
+ my $sth = $dbh->prepare("SELECT authid FROM auth_header");
+ $sth->execute();
+ return $sth;
+}
+
+sub select_all_biblios {
+ my $sth = $dbh->prepare("SELECT biblionumber FROM biblioitems ORDER BY biblionumber");
+ $sth->execute();
+ return $sth;
+}
+
sub export_marc_records {
my ($record_type, $sth, $directory, $as_xml, $noxml) = @_;
+ my $num_exported = 0;
open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
my $i = 0;
while (my ($record_number) = $sth->fetchrow_array) {
# to care, though, at least if you're using the GRS-1 filter. It does
# care if you're using the DOM filter, which requires valid XML file(s).
print OUT ($as_xml) ? $marc->as_xml_record() : $marc->as_usmarc();
+ $num_exported++;
}
}
- print "\nRecords exported: $i\n";
+ print "\nRecords exported: $num_exported\n";
close OUT;
+ return $num_exported;
+}
+
+sub generate_deleted_marc_records {
+ my ($record_type, $sth, $directory, $as_xml) = @_;
+
+ my $num_exported = 0;
+ open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
+ my $i = 0;
+ while (my ($record_number) = $sth->fetchrow_array) {
+ print "\r$i" unless ($i++ %100);
+ print ".";
+
+ my $marc = MARC::Record->new();
+ if ($record_type eq 'biblio') {
+ fix_biblio_ids($marc, $record_number, $record_number);
+ } else {
+ fix_authority_id($marc, $record_number);
+ }
+ if (C4::Context->preference("marcflavour") eq "UNIMARC") {
+ fix_unimarc_100($marc);
+ }
+
+ print OUT ($as_xml) ? $marc->as_xml_record() : $marc->as_usmarc();
+ $num_exported++;
+ }
+ print "\nRecords exported: $num_exported\n";
+ close OUT;
+ return $num_exported;
+
+
}
sub get_corrected_marc_record {
# otherwise, Zebra will choke on the record. However, this
# logic belongs in the relevant C4::Biblio APIs.
my ($marc, $biblionumber) = @_;
-
- my $sth = $dbh->prepare(
- "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
- $sth->execute($biblionumber);
- my ($biblioitemnumber) = $sth->fetchrow_array;
- $sth->finish;
- unless ($biblioitemnumber) {
- warn "failed to get biblioitemnumber for biblio $biblionumber";
- return 0;
+ my $biblioitemnumber;
+ if (@_) {
+ $biblioitemnumber = shift;
+ } else {
+ my $sth = $dbh->prepare(
+ "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
+ $sth->execute($biblionumber);
+ ($biblioitemnumber) = $sth->fetchrow_array;
+ $sth->finish;
+ unless ($biblioitemnumber) {
+ warn "failed to get biblioitemnumber for biblio $biblionumber";
+ return 0;
+ }
}
# FIXME - this is cheating on two levels
# for Zebra's sake. However, this really belongs
# in C4::AuthoritiesMarc.
my ($marc, $authid) = @_;
- unless ($marc->field('001')->data() eq $authid){
- print "$authid don't exist for this authority :".$marc->as_formatted;
+ unless ($marc->field('001') and $marc->field('001')->data() eq $authid){
$marc->delete_field($marc->field('001'));
$marc->insert_fields_ordered(MARC::Field->new('001',$authid));
}
-a index authority records
+ -z select only updated and deleted
+ records marked in the zebraqueue
+ table. Cannot be used with -r
+ or -s.
+
-r clear Zebra index before
adding records to index