X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=blobdiff_plain;f=misc%2Fmigration_tools%2Frebuild_zebra.pl;h=dadf43f0b545a83a30421501681420a9f896db55;hb=6be9d2b27dd47c859eb6ab0cfd873b2059206986;hp=f5d503e9ac21c43a3301088eebdb0e98491d6c05;hpb=a3999812e691110f2e16fffd91ce0ed82d69178c;p=koha_fer diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index f5d503e9ac..dadf43f0b5 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -1,36 +1,110 @@ #!/usr/bin/perl +use strict; +#use warnings; FIXME - Bug 2505 + use C4::Context; use Getopt::Long; +use File::Temp qw/ tempdir /; +use File::Path; use C4::Biblio; use C4::AuthoritiesMarc; +use C4::Items; -use strict; # # script that checks zebradir structure & create directories & mandatory files if needed # # $|=1; # flushes output - -# limit for database dumping -my $limit;# = "LIMIT 500"; +# If the cron job starts us in an unreadable dir, we will break without +# this. +chdir $ENV{HOME} if (!(-r '.')); my $directory; +my $nosanitize; my $skip_export; my $keep_export; my $reset; my $biblios; my $authorities; -GetOptions( - 'd:s' => \$directory, - 'reset' => \$reset, - 's' => \$skip_export, - 'k' => \$keep_export, - 'b' => \$biblios, - 'a' => \$authorities, - ); +my $noxml; +my $noshadow; +my $do_munge; +my $want_help; +my $as_xml; +my $process_zebraqueue; +my $do_not_clear_zebraqueue; +my $verbose_logging; +my $zebraidx_log_opt = " -v none,fatal,warn "; +my $result = GetOptions( + 'd:s' => \$directory, + 'r|reset' => \$reset, + 's' => \$skip_export, + 'k' => \$keep_export, + 'nosanitize' => \$nosanitize, + 'b' => \$biblios, + 'noxml' => \$noxml, + 'w' => \$noshadow, + 'munge-config' => \$do_munge, + 'a' => \$authorities, + 'h|help' => \$want_help, + 'x' => \$as_xml, + 'y' => \$do_not_clear_zebraqueue, + 'z' => \$process_zebraqueue, + 'v' => \$verbose_logging, +); -$directory = "export" unless $directory; + +if (not $result or $want_help) { + print_usage(); + exit 0; +} + +if (not $biblios and not $authorities) { + my $msg = "Must specify -b or -a to reindex bibs or authorities\n"; + $msg .= "Please do '$0 --help' to see usage.\n"; + die $msg; +} + +if ($authorities and $as_xml) { + my $msg = "Cannot specify both -a and -x\n"; + $msg .= "Please do '$0 --help' to see usage.\n"; + die $msg; +} + +if ( !$as_xml and $nosanitize ) { + my $msg = "Cannot specify both -no_xml and -nosanitize\n"; + $msg .= "Please do '$0 --help' to see usage.\n"; + die $msg; +} + +if ($process_zebraqueue and ($skip_export or $reset)) { + my $msg = "Cannot specify -r or -s if -z is specified\n"; + $msg .= "Please do '$0 --help' to see usage.\n"; + die $msg; +} + +if ($process_zebraqueue and $do_not_clear_zebraqueue) { + my $msg = "Cannot specify both -y and -z\n"; + $msg .= "Please do '$0 --help' to see usage.\n"; + die $msg; +} + +if ($noshadow) { + $noshadow = ' -n '; +} + +# -v is for verbose, which seems backwards here because of how logging is set +# on the CLI of zebraidx. It works this way. The default is to not log much +if ($verbose_logging) { + $zebraidx_log_opt = ''; +} + +my $use_tempdir = 0; +unless ($directory) { + $use_tempdir = 1; + $directory = tempdir(CLEANUP => ($keep_export ? 0 : 1)); +} my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory}; @@ -38,17 +112,550 @@ my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory my $kohadir = C4::Context->config('intranetdir'); my $dbh = C4::Context->dbh; -my ($biblionumbertagfield,$biblionumbertagsubfield) = &MARCfind_marc_from_kohafield($dbh,"biblio.biblionumber",""); -my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &MARCfind_marc_from_kohafield($dbh,"biblioitems.biblioitemnumber",""); - -print "some informations\n"; -print "=================\n"; -print "Zebra biblio directory =>$biblioserverdir\n"; -print "Zebra authorities directory =>$authorityserverdir\n"; -print "Koha directory =>$kohadir\n"; -print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n"; -print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n"; -print "=================\n"; +my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber",""); +my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber",""); + +if ( $verbose_logging ) { + print "Zebra configuration information\n"; + print "================================\n"; + print "Zebra biblio directory = $biblioserverdir\n"; + print "Zebra authorities directory = $authorityserverdir\n"; + print "Koha directory = $kohadir\n"; + print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n"; + print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n"; + print "================================\n"; +} + +if ($do_munge) { + munge_config(); +} + +if ($authorities) { + index_records('authority', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir); +} else { + print "skipping authorities\n" if ( $verbose_logging ); +} + +if ($biblios) { + index_records('biblio', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir); +} else { + print "skipping biblios\n" if ( $verbose_logging ); +} + + +if ( $verbose_logging ) { + print "====================\n"; + print "CLEANING\n"; + print "====================\n"; +} +if ($keep_export) { + print "NOTHING cleaned : the export $directory has been kept.\n"; + print "You can re-run this script with the -s "; + if ($use_tempdir) { + print " and -d $directory parameters"; + } else { + print "parameter"; + } + print "\n"; + print "if you just want to rebuild zebra after changing the record.abs\n"; + print "or another zebra config file\n"; +} else { + unless ($use_tempdir) { + # if we're using a temporary directory + # created by File::Temp, it will be removed + # automatically. + rmtree($directory, 0, 1); + print "directory $directory deleted\n"; + } +} + +# This checks to see if the zebra directories exist under the provided path. +# If they don't, then zebra is likely to spit the dummy. This returns true +# if the directories had to be created, false otherwise. +sub check_zebra_dirs { + my ($base) = shift() . '/'; + my $needed_repairing = 0; + my @dirs = ( '', 'key', 'register', 'shadow' ); + foreach my $dir (@dirs) { + my $bdir = $base . $dir; + if (! -d $bdir) { + $needed_repairing = 1; + mkdir $bdir || die "Unable to create '$bdir': $!\n"; + print "$0: needed to create '$bdir'\n"; + } + } + return $needed_repairing; +} # ---------- end of subroutine check_zebra_dirs ---------- + +sub index_records { + my ($record_type, $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_; + + my $num_records_exported = 0; + my $records_deleted; + my $need_reset = check_zebra_dirs($server_dir); + if ($need_reset) { + print "$0: found broken zebra server directories: forcing a rebuild\n"; + $reset = 1; + } + if ($skip_export && $verbose_logging) { + print "====================\n"; + print "SKIPPING $record_type export\n"; + print "====================\n"; + } else { + if ( $verbose_logging ) { + print "====================\n"; + print "exporting $record_type\n"; + print "====================\n"; + } + mkdir "$directory" unless (-d $directory); + mkdir "$directory/$record_type" unless (-d "$directory/$record_type"); + if ($process_zebraqueue) { + my $entries = select_zebraqueue_records($record_type, 'deleted'); + mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type"); + $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml); + mark_zebraqueue_batch_done($entries); + $entries = select_zebraqueue_records($record_type, 'updated'); + mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type"); + $num_records_exported = export_marc_records_from_list($record_type, + $entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted); + mark_zebraqueue_batch_done($entries); + } else { + my $sth = select_all_records($record_type); + $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $nosanitize); + unless ($do_not_clear_zebraqueue) { + mark_all_zebraqueue_done($record_type); + } + } + } + + # + # and reindexing everything + # + if ( $verbose_logging ) { + print "====================\n"; + print "REINDEXING zebra\n"; + print "====================\n"; + } + my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ; + if ($process_zebraqueue) { + do_indexing($record_type, 'delete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt) + if %$records_deleted; + do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt) + if $num_records_exported; + } else { + do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt) + if ($num_records_exported or $skip_export); + } +} + + +sub select_zebraqueue_records { + my ($record_type, $update_type) = @_; + + my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver'; + my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate'; + + my $sth = $dbh->prepare("SELECT id, biblio_auth_number + FROM zebraqueue + WHERE server = ? + AND operation = ? + AND done = 0 + ORDER BY id DESC"); + $sth->execute($server, $op); + my $entries = $sth->fetchall_arrayref({}); +} + +sub mark_all_zebraqueue_done { + my ($record_type) = @_; + + my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver'; + + my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 + WHERE server = ? + AND done = 0"); + $sth->execute($server); +} + +sub mark_zebraqueue_batch_done { + my ($entries) = @_; + + $dbh->{AutoCommit} = 0; + my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?"); + $dbh->commit(); + foreach my $id (map { $_->{id} } @$entries) { + $sth->execute($id); + } + $dbh->{AutoCommit} = 1; +} + +sub select_all_records { + my $record_type = shift; + return ($record_type eq 'biblio') ? select_all_biblios() : select_all_authorities(); +} + +sub select_all_authorities { + my $sth = $dbh->prepare("SELECT authid FROM auth_header"); + $sth->execute(); + return $sth; +} + +sub select_all_biblios { + my $sth = $dbh->prepare("SELECT biblionumber FROM biblioitems ORDER BY biblionumber"); + $sth->execute(); + return $sth; +} + +sub export_marc_records_from_sth { + my ($record_type, $sth, $directory, $as_xml, $noxml, $nosanitize) = @_; + + my $num_exported = 0; + open (OUT, ">:utf8 ", "$directory/exported_records") or die $!; + my $i = 0; + my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",''); + while (my ($record_number) = $sth->fetchrow_array) { + print "." if ( $verbose_logging ); + print "\r$i" unless ($i++ %100 or !$verbose_logging); + if ( $nosanitize ) { + my $marcxml = $record_type eq 'biblio' + ? GetXmlBiblio( $record_number ) + : GetAuthorityXML( $record_number ); + if ($record_type eq 'biblio'){ + my @items = GetItemsInfo($record_number); + if (@items){ + my $record = MARC::Record->new; + my @itemsrecord; + foreach my $item (@items){ + my $record = Item2Marc($item, $record_number); + push @itemsrecord, $record->field($itemtag); + } + $record->insert_fields_ordered(@itemsrecord); + my $itemsxml=$record->as_xml_record(); + my $searchstring = '\n'; + my $index = index($itemsxml, '\n', 0); + $itemsxml = substr($itemsxml, $index + length($searchstring)); + $searchstring = ''; + $marcxml = substr($marcxml, 0, index($marcxml, $searchstring)); + $marcxml .= $itemsxml; + } + } + if ( $marcxml ) { + print OUT $marcxml if $marcxml; + $num_exported++; + } + next; + } + my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml); + if (defined $marc) { + # FIXME - when more than one record is exported and $as_xml is true, + # the output file is not valid XML - it's just multiple elements + # strung together with no single root element. zebraidx doesn't seem + # to care, though, at least if you're using the GRS-1 filter. It does + # care if you're using the DOM filter, which requires valid XML file(s). + eval { + print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference('marcflavour')) : $marc->as_usmarc(); + $num_exported++; + }; + if ($@) { + warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML"); + } + } + } + print "\nRecords exported: $num_exported\n" if ( $verbose_logging ); + close OUT; + return $num_exported; +} + +sub export_marc_records_from_list { + my ($record_type, $entries, $directory, $as_xml, $noxml, $records_deleted) = @_; + + my $num_exported = 0; + open (OUT, ">:utf8 ", "$directory/exported_records") or die $!; + my $i = 0; + + # Skip any deleted records. We check for this anyway, but this reduces error spam + my %found = %$records_deleted; + foreach my $record_number ( map { $_->{biblio_auth_number} } + grep { !$found{ $_->{biblio_auth_number} }++ } + @$entries ) { + print "." if ( $verbose_logging ); + print "\r$i" unless ($i++ %100 or !$verbose_logging); + my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml); + if (defined $marc) { + # FIXME - when more than one record is exported and $as_xml is true, + # the output file is not valid XML - it's just multiple elements + # strung together with no single root element. zebraidx doesn't seem + # to care, though, at least if you're using the GRS-1 filter. It does + # care if you're using the DOM filter, which requires valid XML file(s). + print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference('marcflavour')) : $marc->as_usmarc(); + $num_exported++; + } + } + print "\nRecords exported: $num_exported\n" if ( $verbose_logging ); + close OUT; + return $num_exported; +} + +sub generate_deleted_marc_records { + my ($record_type, $entries, $directory, $as_xml) = @_; + + my $records_deleted = {}; + open (OUT, ">:utf8 ", "$directory/exported_records") or die $!; + my $i = 0; + foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) { + print "\r$i" unless ($i++ %100 or !$verbose_logging); + print "." if ( $verbose_logging ); + + my $marc = MARC::Record->new(); + if ($record_type eq 'biblio') { + fix_biblio_ids($marc, $record_number, $record_number); + } else { + fix_authority_id($marc, $record_number); + } + if (C4::Context->preference("marcflavour") eq "UNIMARC") { + fix_unimarc_100($marc); + } + + print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference("marcflavour")) : $marc->as_usmarc(); + + $records_deleted->{$record_number} = 1; + } + print "\nRecords exported: $i\n" if ( $verbose_logging ); + close OUT; + return $records_deleted; + + +} + +sub get_corrected_marc_record { + my ($record_type, $record_number, $noxml) = @_; + + my $marc = get_raw_marc_record($record_type, $record_number, $noxml); + + if (defined $marc) { + fix_leader($marc); + if ($record_type eq 'biblio') { + my $succeeded = fix_biblio_ids($marc, $record_number); + return unless $succeeded; + } else { + fix_authority_id($marc, $record_number); + } + if (C4::Context->preference("marcflavour") eq "UNIMARC") { + fix_unimarc_100($marc); + } + } + + return $marc; +} + +sub get_raw_marc_record { + my ($record_type, $record_number, $noxml) = @_; + + my $marc; + if ($record_type eq 'biblio') { + if ($noxml) { + my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?"); + $fetch_sth->execute($record_number); + if (my ($blob) = $fetch_sth->fetchrow_array) { + $marc = MARC::Record->new_from_usmarc($blob); + unless ($marc) { + warn "error creating MARC::Record from $blob"; + } + } + # failure to find a bib is not a problem - + # a delete could have been done before + # trying to process a record update + + $fetch_sth->finish(); + return unless $marc; + } else { + eval { $marc = GetMarcBiblio($record_number); }; + if ($@ || !$marc) { + # here we do warn since catching an exception + # means that the bib was found but failed + # to be parsed + warn "error retrieving biblio $record_number"; + return; + } + } + # ITEM + C4::Biblio::EmbedItemsInMarcBiblio($marc, $record_number); + } else { + eval { $marc = GetAuthority($record_number); }; + if ($@) { + warn "error retrieving authority $record_number"; + return; + } + } + return $marc; +} + +sub fix_leader { + # FIXME - this routine is suspect + # It blanks the Leader/00-05 and Leader/12-16 to + # force them to be recalculated correct when + # the $marc->as_usmarc() or $marc->as_xml() is called. + # But why is this necessary? It would be a serious bug + # in MARC::Record (definitely) and MARC::File::XML (arguably) + # if they are emitting incorrect leader values. + my $marc = shift; + + my $leader = $marc->leader; + substr($leader, 0, 5) = ' '; + substr($leader, 10, 7) = '22 '; + $marc->leader(substr($leader, 0, 24)); +} + +sub fix_biblio_ids { + # FIXME - it is essential to ensure that the biblionumber is present, + # otherwise, Zebra will choke on the record. However, this + # logic belongs in the relevant C4::Biblio APIs. + my $marc = shift; + my $biblionumber = shift; + my $biblioitemnumber; + if (@_) { + $biblioitemnumber = shift; + } else { + my $sth = $dbh->prepare( + "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?"); + $sth->execute($biblionumber); + ($biblioitemnumber) = $sth->fetchrow_array; + $sth->finish; + unless ($biblioitemnumber) { + warn "failed to get biblioitemnumber for biblio $biblionumber"; + return 0; + } + } + + # FIXME - this is cheating on two levels + # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function + # 2. Making sure that the biblionumber and biblioitemnumber are correct and + # present in the MARC::Record object ought to be part of GetMarcBiblio. + # + # On the other hand, this better for now than what rebuild_zebra.pl used to + # do, which was duplicate the code for inserting the biblionumber + # and biblioitemnumber + C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber); + + return 1; +} + +sub fix_authority_id { + # FIXME - as with fix_biblio_ids, the authid must be present + # for Zebra's sake. However, this really belongs + # in C4::AuthoritiesMarc. + my ($marc, $authid) = @_; + unless ($marc->field('001') and $marc->field('001')->data() eq $authid){ + $marc->delete_field($marc->field('001')); + $marc->insert_fields_ordered(MARC::Field->new('001',$authid)); + } +} + +sub fix_unimarc_100 { + # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc. + my $marc = shift; + + my $string; + if ( length($marc->subfield( 100, "a" )) == 35 ) { + $string = $marc->subfield( 100, "a" ); + my $f100 = $marc->field(100); + $marc->delete_field($f100); + } + else { + $string = POSIX::strftime( "%Y%m%d", localtime ); + $string =~ s/\-//g; + $string = sprintf( "%-*s", 35, $string ); + } + substr( $string, 22, 6, "frey50" ); + unless ( length($marc->subfield( 100, "a" )) == 35 ) { + $marc->delete_field($marc->field(100)); + $marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string )); + } +} + +sub do_indexing { + my ($record_type, $op, $record_dir, $reset_index, $noshadow, $record_format, $zebraidx_log_opt) = @_; + + my $zebra_server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver'; + my $zebra_db_name = ($record_type eq 'biblio') ? 'biblios' : 'authorities'; + my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'}; + my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'}; + + system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index; + system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir"); + system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow; + +} + +sub print_usage { + print <<_USAGE_; +$0: reindex MARC bibs and/or authorities in Zebra. + +Use this batch job to reindex all biblio or authority +records in your Koha database. This job is useful +only if you are using Zebra; if you are using the 'NoZebra' +mode, this job should not be used. + +Parameters: + -b index bibliographic records + + -a index authority records + + -z select only updated and deleted + records marked in the zebraqueue + table. Cannot be used with -r + or -s. + + -r clear Zebra index before + adding records to index + + -d Temporary directory for indexing. + If not specified, one is automatically + created. The export directory + is automatically deleted unless + you supply the -k switch. + + -k Do not delete export directory. + + -s Skip export. Used if you have + already exported the records + in a previous run. + + -noxml index from ISO MARC blob + instead of MARC XML. This + option is recommended only + for advanced user. + + -x export and index as xml instead of is02709 (biblios only). + use this if you might have records > 99,999 chars, + + -nosanitize export biblio/authority records directly from DB marcxml + field without sanitizing records. It speed up + dump process but could fail if DB contains badly + encoded records. Works only with -x, + + -w skip shadow indexing for this batch + + -y do NOT clear zebraqueue after indexing; normally, + after doing batch indexing, zebraqueue should be + marked done for the affected record type(s) so that + a running zebraqueue_daemon doesn't try to reindex + the same records - specify -y to override this. + Cannot be used with -z. + + -v increase the amount of logging. Normally only + warnings and errors from the indexing are shown. + + -munge-config Deprecated option to try + to fix Zebra config files. + --help or -h show this message. +_USAGE_ +} + +# FIXME: the following routines are deprecated and +# will be removed once it is determined whether +# a script to fix Zebra configuration files is +# actually needed. +sub munge_config { # # creating zebra-biblios.cfg depending on system # @@ -129,9 +736,11 @@ print "Info: tab dir : $tabdir\n"; # my $created_dir_or_file = 0; if ($authorities) { - print "====================\n"; - print "checking directories & files for authorities\n"; - print "====================\n"; + if ( $verbose_logging ) { + print "====================\n"; + print "checking directories & files for authorities\n"; + print "====================\n"; + } unless (-d "$authorityserverdir") { system("mkdir -p $authorityserverdir"); print "Info: created $authorityserverdir\n"; @@ -175,43 +784,44 @@ if ($authorities) { # the record model, depending on marc flavour unless (-f "$authorityserverdir/tab/record.abs") { if (C4::Context->preference("marcflavour") eq "UNIMARC") { - system("cp -f $kohadir/misc/zebra/record_authorities_unimarc.abs $authorityserverdir/tab/record.abs"); + system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/authorities/record.abs $authorityserverdir/tab/record.abs"); print "Info: copied record.abs for UNIMARC\n"; } else { - system("cp -f $kohadir/misc/zebra/record_authorities_usmarc.abs $authorityserverdir/tab/record.abs"); + system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/authorities/record.abs $authorityserverdir/tab/record.abs"); print "Info: copied record.abs for USMARC\n"; } $created_dir_or_file++; } - unless (-f "$authorityserverdir/tab/sort-string-utf_french.chr") { - system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $authorityserverdir/tab/sort-string-utf.chr"); + unless (-f "$authorityserverdir/tab/sort-string-utf.chr") { + system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/sort-string-utf.chr"); print "Info: copied sort-string-utf.chr\n"; $created_dir_or_file++; } unless (-f "$authorityserverdir/tab/word-phrase-utf.chr") { - system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $authorityserverdir/tab/word-phrase-utf.chr"); + system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/word-phrase-utf.chr"); print "Info: copied word-phase-utf.chr\n"; $created_dir_or_file++; } unless (-f "$authorityserverdir/tab/auth1.att") { - system("cp -f $kohadir/misc/zebra/bib1_authorities.att $authorityserverdir/tab/auth1.att"); + system("cp -f $kohadir/etc/zebradb/authorities/etc/bib1.att $authorityserverdir/tab/auth1.att"); print "Info: copied auth1.att\n"; $created_dir_or_file++; } unless (-f "$authorityserverdir/tab/default.idx") { - system("cp -f $kohadir/misc/zebra/default.idx $authorityserverdir/tab/default.idx"); + system("cp -f $kohadir/etc/zebradb/etc/default.idx $authorityserverdir/tab/default.idx"); print "Info: copied default.idx\n"; $created_dir_or_file++; } - unless (-f C4::Context->zebraoptions('authorityserver')->{ccl2rpn}) { - system("cp -f $kohadir/misc/zebra/ccl.properties ".C4::Context->zebraoptions('authorityserver')->{ccl2rpn}); - # system("cp -f $kohadir/misc/zebra/ccl.properties $authorityserverdir/etc/ccl.properties"); + unless (-f "$authorityserverdir/etc/ccl.properties") { +# system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn}); + system("cp -f $kohadir/etc/zebradb/ccl.properties $authorityserverdir/etc/ccl.properties"); print "Info: copied ccl.properties\n"; $created_dir_or_file++; } - unless (-f C4::Context->zebraconfig('authorityserver')->{cql2rpn}) { - system("cp -f $kohadir/misc/zebra/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{cql2rpn}); + unless (-f "$authorityserverdir/etc/pqf.properties") { +# system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn}); + system("cp -f $kohadir/etc/zebradb/pqf.properties $authorityserverdir/etc/pqf.properties"); print "Info: copied pqf.properties\n"; $created_dir_or_file++; } @@ -222,42 +832,40 @@ if ($authorities) { unless (-f C4::Context->zebraconfig('authorityserver')->{config}) { open ZD,">:utf8 ",C4::Context->zebraconfig('authorityserver')->{config}; print ZD " - # generated by KOHA/misc/migration_tools/rebuild_zebra.pl - profilePath:\${srcdir:-.}:$authorityserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/ - - encoding: UTF-8 - # Files that describe the attribute sets supported. - attset: auth1.att - attset: explain.att - attset: gils.att - - modulePath:$modulesdir/modules/ - # Specify record type - iso2709.recordType:grs.marcxml.record - recordType:grs.xml - recordId: (auth1,Local-Number) - storeKeys:1 - storeData:1 - - - # Lock File Area - lockDir: $authorityserverdir/lock - perm.anonymous:r - perm.kohaadmin:rw - passw.kohalis - shadow - register: $authorityserverdir/register:4G - shadow: $authorityserverdir/shadow:4G - - # Temp File area for result sets - setTmpDir: $authorityserverdir/tmp - - # Temp File area for index program - keyTmpDir: $authorityserverdir/key - - # Approx. Memory usage during indexing - memMax: 40M - rank:rank-1 +# generated by KOHA/misc/migration_tools/rebuild_zebra.pl +profilePath:\${srcdir:-.}:$authorityserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/ + +encoding: UTF-8 +# Files that describe the attribute sets supported. +attset: auth1.att +attset: explain.att +attset: gils.att + +modulePath:$modulesdir/modules/ +# Specify record type +iso2709.recordType:grs.marcxml.record +recordType:grs.xml +recordId: (auth1,Local-Number) +storeKeys:1 +storeData:1 + + +# Lock File Area +lockDir: $authorityserverdir/lock +perm.anonymous:r +perm.kohaadmin:rw +register: $authorityserverdir/register:4G +shadow: $authorityserverdir/shadow:4G + +# Temp File area for result sets +setTmpDir: $authorityserverdir/tmp + +# Temp File area for index program +keyTmpDir: $authorityserverdir/key + +# Approx. Memory usage during indexing +memMax: 40M +rank:rank-1 "; print "Info: creating zebra-authorities.cfg\n"; $created_dir_or_file++; @@ -269,55 +877,14 @@ if ($authorities) { print "Info: file & directories OK\n"; } - # - # exporting authorities - # - if ($skip_export) { - print "====================\n"; - print "SKIPPING authorities export\n"; - print "====================\n"; - } else { +} +if ($biblios) { + if ( $verbose_logging ) { print "====================\n"; - print "exporting authorities\n"; + print "checking directories & files for biblios\n"; print "====================\n"; - mkdir "$directory" unless (-d $directory); - mkdir "$directory/authorities" unless (-d "$directory/authorities"); - open(OUT,">:utf8","$directory/authorities/authorities.iso2709") or die $!; - my $dbh=C4::Context->dbh; - my $sth; - $sth=$dbh->prepare("select authid from auth_header $limit"); - $sth->execute(); - my $i=0; - while (my ($authid) = $sth->fetchrow) { - my $record = AUTHgetauthority($dbh,$authid); - print "."; - print "\r$i" unless ($i++ %100); - print OUT $record->as_usmarc(); - } - close(OUT); } - - # - # and reindexing everything - # - print "====================\n"; - print "REINDEXING zebra\n"; - print "====================\n"; - system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('authorityserver')->{config}." -d authorities init") if ($reset); - system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('authorityserver')->{config}." -d authorities update $directory/authorities"); - system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('authorityserver')->{config}." -d authorities commit"); -} else { - print "skipping authorities\n"; -} -################################################################################################################# -# BIBLIOS -################################################################################################################# -if ($biblios) { - print "====================\n"; - print "checking directories & files for biblios\n"; - print "====================\n"; - # # BIBLIOS : creating directory structure # @@ -363,42 +930,43 @@ if ($biblios) { # the record model, depending on marc flavour unless (-f "$biblioserverdir/tab/record.abs") { if (C4::Context->preference("marcflavour") eq "UNIMARC") { - system("cp -f $kohadir/misc/zebra/record_biblios_unimarc.abs $biblioserverdir/tab/record.abs"); + system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/biblios/record.abs $biblioserverdir/tab/record.abs"); print "Info: copied record.abs for UNIMARC\n"; } else { - system("cp -f $kohadir/misc/zebra/record_biblios_usmarc.abs $biblioserverdir/tab/record.abs"); + system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/biblios/record.abs $biblioserverdir/tab/record.abs"); print "Info: copied record.abs for USMARC\n"; } $created_dir_or_file++; } - unless (-f "$biblioserverdir/tab/sort-string-utf_french.chr") { - system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $biblioserverdir/tab/sort-string-utf.chr"); + unless (-f "$biblioserverdir/tab/sort-string-utf.chr") { + system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/sort-string-utf.chr"); print "Info: copied sort-string-utf.chr\n"; $created_dir_or_file++; } unless (-f "$biblioserverdir/tab/word-phrase-utf.chr") { - system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $biblioserverdir/tab/word-phrase-utf.chr"); + system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/word-phrase-utf.chr"); print "Info: copied word-phase-utf.chr\n"; $created_dir_or_file++; } unless (-f "$biblioserverdir/tab/bib1.att") { - system("cp -f $kohadir/misc/zebra/bib1_biblios.att $biblioserverdir/tab/bib1.att"); + system("cp -f $kohadir/etc/zebradb/biblios/etc/bib1.att $biblioserverdir/tab/bib1.att"); print "Info: copied bib1.att\n"; $created_dir_or_file++; } unless (-f "$biblioserverdir/tab/default.idx") { - system("cp -f $kohadir/misc/zebra/default.idx $biblioserverdir/tab/default.idx"); + system("cp -f $kohadir/etc/zebradb/etc/default.idx $biblioserverdir/tab/default.idx"); print "Info: copied default.idx\n"; $created_dir_or_file++; } - unless (-f C4::Context->zebraoptions('biblioserver')->{ccl2rpn}) { - system("cp -f $kohadir/misc/zebra/ccl.properties ".C4::Context->zebraoptions('biblioserver')->{ccl2rpn}); - # system("cp -f $kohadir/misc/zebra/ccl.properties $biblioserverdir/etc/ccl.properties"); + unless (-f "$biblioserverdir/etc/ccl.properties") { +# system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn}); + system("cp -f $kohadir/etc/zebradb/ccl.properties $biblioserverdir/etc/ccl.properties"); print "Info: copied ccl.properties\n"; $created_dir_or_file++; } - unless (-f C4::Context->zebraconfig('biblioserver')->{cql2rpn}) { - system("cp -f $kohadir/misc/zebra/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{cql2rpn}); + unless (-f "$biblioserverdir/etc/pqf.properties") { +# system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn}); + system("cp -f $kohadir/etc/zebradb/pqf.properties $biblioserverdir/etc/pqf.properties"); print "Info: copied pqf.properties\n"; $created_dir_or_file++; } @@ -409,42 +977,40 @@ if ($biblios) { unless (-f C4::Context->zebraconfig('biblioserver')->{config}) { open ZD,">:utf8 ",C4::Context->zebraconfig('biblioserver')->{config}; print ZD " - # generated by KOHA/misc/migrtion_tools/rebuild_zebra.pl - profilePath:\${srcdir:-.}:$biblioserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/ - - encoding: UTF-8 - # Files that describe the attribute sets supported. - attset:bib1.att - attset:explain.att - attset:gils.att - - modulePath:$modulesdir/modules/ - # Specify record type - iso2709.recordType:grs.marcxml.record - recordType:grs.xml - recordId: (bib1,Local-Number) - storeKeys:1 - storeData:1 - - - # Lock File Area - lockDir: $biblioserverdir/lock - perm.anonymous:r - perm.kohaadmin:rw - passw.kohalis - shadow - register: $biblioserverdir/register:4G - shadow: $biblioserverdir/shadow:4G - - # Temp File area for result sets - setTmpDir: $biblioserverdir/tmp - - # Temp File area for index program - keyTmpDir: $biblioserverdir/key - - # Approx. Memory usage during indexing - memMax: 40M - rank:rank-1 +# generated by KOHA/misc/migrtion_tools/rebuild_zebra.pl +profilePath:\${srcdir:-.}:$biblioserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/ + +encoding: UTF-8 +# Files that describe the attribute sets supported. +attset:bib1.att +attset:explain.att +attset:gils.att + +modulePath:$modulesdir/modules/ +# Specify record type +iso2709.recordType:grs.marcxml.record +recordType:grs.xml +recordId: (bib1,Local-Number) +storeKeys:1 +storeData:1 + + +# Lock File Area +lockDir: $biblioserverdir/lock +perm.anonymous:r +perm.kohaadmin:rw +register: $biblioserverdir/register:4G +shadow: $biblioserverdir/shadow:4G + +# Temp File area for result sets +setTmpDir: $biblioserverdir/tmp + +# Temp File area for index program +keyTmpDir: $biblioserverdir/key + +# Approx. Memory usage during indexing +memMax: 40M +rank:rank-1 "; print "Info: creating zebra-biblios.cfg\n"; $created_dir_or_file++; @@ -456,117 +1022,5 @@ if ($biblios) { print "Info: file & directories OK\n"; } - # die; - # - # exporting biblios - # - if ($skip_export) { - print "====================\n"; - print "SKIPPING biblio export\n"; - print "====================\n"; - } else { - print "====================\n"; - print "exporting biblios\n"; - print "====================\n"; - mkdir "$directory" unless (-d $directory); - mkdir "$directory/biblios" unless (-d "$directory/biblios"); - open(OUT,">:utf8 ","$directory/biblios/export") or die $!; - my $dbh=C4::Context->dbh; - my $sth; - $sth=$dbh->prepare("select biblionumber from biblioitems order by biblionumber $limit"); - $sth->execute(); - my $i=0; - while (my ($biblionumber) = $sth->fetchrow) { - my $record = GetMarcBiblio($biblionumber); - # warn $record->as_formatted; - # print $record; - # check that biblionumber & biblioitemnumber are stored in the MARC record, otherwise, add them & update the biblioitems.marcxml data. - my $record_correct=1; - next unless $record->field($biblionumbertagfield); - if ($biblionumbertagfield eq '001') { - unless ($record->field($biblionumbertagfield)->data()) { - $record_correct=0; - my $field; - # if the field where biblionumber is already exist, just update it, otherwise create it - if ($record->field($biblionumbertagfield)) { - $field = $record->field($biblionumbertagfield); - $field->update($biblionumber); - } else { - my $newfield; - $newfield = MARC::Field->new( $biblionumbertagfield, $biblionumber); - $record->append_fields($newfield); - } - } - } else { - unless ($record->subfield($biblionumbertagfield,$biblionumbertagsubfield)) { - $record_correct=0; - my $field; - # if the field where biblionumber is already exist, just update it, otherwise create it - if ($record->field($biblionumbertagfield)) { - $field = $record->field($biblionumbertagfield); - $field->add_subfields($biblionumbertagsubfield => $biblionumber); - } else { - my $newfield; - $newfield = MARC::Field->new( $biblionumbertagfield,'','', $biblionumbertagsubfield => $biblionumber); - $record->append_fields($newfield); - } - } - # warn "FIXED BIBLIONUMBER".$record->as_formatted; - } - unless ($record->subfield($biblioitemnumbertagfield,$biblioitemnumbertagsubfield)) { - $record_correct=0; - # warn "INCORRECT BIBLIOITEMNUMBER :".$record->as_formatted; - my $field; - # if the field where biblionumber is already exist, just update it, otherwise create it - if ($record->field($biblioitemnumbertagfield)) { - $field = $record->field($biblioitemnumbertagfield); - if ($biblioitemnumbertagfield <10) { - $field->update($biblionumber); - } else { - $field->add_subfields($biblioitemnumbertagsubfield => $biblionumber); - } - } else { - my $newfield; - if ($biblioitemnumbertagfield <10) { - $newfield = MARC::Field->new( $biblioitemnumbertagfield, $biblionumber); - } else { - $newfield = MARC::Field->new( $biblioitemnumbertagfield,'','', $biblioitemnumbertagsubfield => $biblionumber); - } - $record->insert_grouped_field($newfield); - } - # warn "FIXED BIBLIOITEMNUMBER".$record->as_formatted; - } - unless ($record_correct) { - my $update_xml = $dbh->prepare("update biblioitems set marcxml=? where biblionumber=?"); - warn "UPDATING $biblionumber (missing biblionumber or biblioitemnumber in MARC record : ".$record->as_xml; - $update_xml->execute($record->as_xml,$biblionumber); - } - print "."; - print "\r$i" unless ($i++ %100); - print OUT $record->as_usmarc(); - } - close(OUT); - } - - # - # and reindexing everything - # - print "====================\n"; - print "REINDEXING zebra\n"; - print "====================\n"; - system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('biblioserver')->{config}." -d biblios init") if ($reset); - system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('biblioserver')->{config}." -d biblios update $directory/biblios"); - system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('biblioserver')->{config}." -d biblios commit"); -} else { - print "skipping biblios\n"; } - -print "====================\n"; -print "CLEANING\n"; -print "====================\n"; -if ($keep_export) { - print "NOTHING cleaned : the $directory has been kept. You can re-run this script with the -s parameter if you just want to rebuild zebra after changing the record.abs or another zebra config file\n"; -} else { - system("rm -rf $directory"); - print "directory $directory deleted\n"; }