7 use File::Temp qw/ tempdir /;
10 use C4::AuthoritiesMarc;
13 # script that checks zebradir structure & create directories & mandatory files if needed
17 $|=1; # flushes output
19 # limit for database dumping
31 my $result = GetOptions(
39 'munge-config' => \$do_munge,
41 'h|help' => \$want_help,
46 if (not $result or $want_help) {
51 if (not $biblios and not $authorities) {
52 my $msg = "Must specify -b or -a to reindex bibs or authorities\n";
53 $msg .= "Please do '$0 --help' to see usage.\n";
57 if ($authorities and $as_xml) {
58 my $msg = "Cannot specify both -a and -x\n";
59 $msg .= "Please do '$0 --help' to see usage.\n";
69 $directory = tempdir(CLEANUP => ($keep_export ? 0 : 1));
73 my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory};
74 my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
76 my $kohadir = C4::Context->config('intranetdir');
77 my $dbh = C4::Context->dbh;
78 my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
79 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
81 print "Zebra configuration information\n";
82 print "================================\n";
83 print "Zebra biblio directory = $biblioserverdir\n";
84 print "Zebra authorities directory = $authorityserverdir\n";
85 print "Koha directory = $kohadir\n";
86 print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
87 print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
88 print "================================\n";
96 # exporting authorities
99 print "====================\n";
100 print "SKIPPING authorities export\n";
101 print "====================\n";
103 print "====================\n";
104 print "exporting authorities\n";
105 print "====================\n";
106 mkdir "$directory" unless (-d $directory);
107 mkdir "$directory/authorities" unless (-d "$directory/authorities");
108 my $dbh=C4::Context->dbh;
110 $sth=$dbh->prepare("select authid,marc from auth_header");
112 export_marc_records('authority', $sth, "$directory/authorities", $as_xml, $noxml);
116 # and reindexing everything
118 print "====================\n";
119 print "REINDEXING zebra\n";
120 print "====================\n";
121 my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
122 do_indexing('authority', 'update', "$directory/authorities", $reset, $noshadow, $record_fmt);
124 print "skipping authorities\n";
126 #################################################################################################################
128 #################################################################################################################
135 print "====================\n";
136 print "SKIPPING biblio export\n";
137 print "====================\n";
139 print "====================\n";
140 print "exporting biblios\n";
141 print "====================\n";
142 mkdir "$directory" unless (-d $directory);
143 mkdir "$directory/biblios" unless (-d "$directory/biblios");
144 my $dbh=C4::Context->dbh;
145 my $sth = $dbh->prepare("SELECT biblionumber FROM biblioitems ORDER BY biblionumber");
147 export_marc_records('biblio', $sth, "$directory/biblios", $as_xml, $noxml);
151 # and reindexing everything
153 print "====================\n";
154 print "REINDEXING zebra\n";
155 print "====================\n";
156 my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
157 do_indexing('biblio', 'update', "$directory/biblios", $reset, $noshadow, $record_fmt);
159 print "skipping biblios\n";
162 print "====================\n";
164 print "====================\n";
166 print "NOTHING cleaned : the export $directory has been kept.\n";
167 print "You can re-run this script with the -s ";
169 print " and -d $directory parameters";
174 print "if you just want to rebuild zebra after changing the record.abs\n";
175 print "or another zebra config file\n";
177 unless ($use_tempdir) {
178 # if we're using a temporary directory
179 # created by File::Temp, it will be removed
181 rmtree($directory, 0, 1);
182 print "directory $directory deleted\n";
186 sub export_marc_records {
187 my ($record_type, $sth, $directory, $as_xml, $noxml) = @_;
189 open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
191 while (my ($record_number) = $sth->fetchrow_array) {
193 print "\r$i" unless ($i++ %100);
194 my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
196 # FIXME - when more than one record is exported and $as_xml is true,
197 # the output file is not valid XML - it's just multiple <record> elements
198 # strung together with no single root element. zebraidx doesn't seem
199 # to care, though, at least if you're using the GRS-1 filter. It does
200 # care if you're using the DOM filter, which requires valid XML file(s).
201 print OUT ($as_xml) ? $marc->as_xml_record() : $marc->as_usmarc();
204 print "\nRecords exported: $i\n";
208 sub get_corrected_marc_record {
209 my ($record_type, $record_number, $noxml) = @_;
211 my $marc = get_raw_marc_record($record_type, $record_number, $noxml);
215 if ($record_type eq 'biblio') {
216 my $succeeded = fix_biblio_ids($marc, $record_number);
217 return unless $succeeded;
219 fix_authority_id($marc, $record_number);
221 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
222 fix_unimarc_100($marc);
229 sub get_raw_marc_record {
230 my ($record_type, $record_number, $noxml) = @_;
233 if ($record_type eq 'biblio') {
235 my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?");
236 $fetch_sth->execute($record_number);
237 if (my ($blob) = $fetch_sth->fetchrow_array) {
238 $marc = MARC::Record->new_from_usmarc($blob);
240 warn "failed to retrieve biblio $record_number";
242 $fetch_sth->finish();
244 eval { $marc = GetMarcBiblio($record_number); };
246 warn "failed to retrieve biblio $record_number";
251 eval { $marc = GetAuthority($record_number); };
253 warn "failed to retrieve authority $record_number";
261 # FIXME - this routine is suspect
262 # It blanks the Leader/00-05 and Leader/12-16 to
263 # force them to be recalculated correct when
264 # the $marc->as_usmarc() or $marc->as_xml() is called.
265 # But why is this necessary? It would be a serious bug
266 # in MARC::Record (definitely) and MARC::File::XML (arguably)
267 # if they are emitting incorrect leader values.
270 my $leader = $marc->leader;
271 substr($leader, 0, 5) = ' ';
272 substr($leader, 10, 7) = '22 ';
273 $marc->leader(substr($leader, 0, 24));
277 # FIXME - it is essential to ensure that the biblionumber is present,
278 # otherwise, Zebra will choke on the record. However, this
279 # logic belongs in the relevant C4::Biblio APIs.
280 my ($marc, $biblionumber) = @_;
282 my $sth = $dbh->prepare(
283 "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
284 $sth->execute($biblionumber);
285 my ($biblioitemnumber) = $sth->fetchrow_array;
287 unless ($biblioitemnumber) {
288 warn "failed to get biblioitemnumber for biblio $biblionumber";
292 # FIXME - this is cheating on two levels
293 # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function
294 # 2. Making sure that the biblionumber and biblioitemnumber are correct and
295 # present in the MARC::Record object ought to be part of GetMarcBiblio.
297 # On the other hand, this better for now than what rebuild_zebra.pl used to
298 # do, which was duplicate the code for inserting the biblionumber
299 # and biblioitemnumber
300 C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber);
305 sub fix_authority_id {
306 # FIXME - as with fix_biblio_ids, the authid must be present
307 # for Zebra's sake. However, this really belongs
308 # in C4::AuthoritiesMarc.
309 my ($marc, $authid) = @_;
310 unless ($marc->field('001')->data() eq $authid){
311 print "$authid don't exist for this authority :".$marc->as_formatted;
312 $marc->delete_field($marc->field('001'));
313 $marc->insert_fields_ordered(MARC::Field->new('001',$authid));
317 sub fix_unimarc_100 {
318 # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc.
322 if ( length($marc->subfield( 100, "a" )) == 35 ) {
323 $string = $marc->subfield( 100, "a" );
324 my $f100 = $marc->field(100);
325 $marc->delete_field($f100);
328 $string = POSIX::strftime( "%Y%m%d", localtime );
330 $string = sprintf( "%-*s", 35, $string );
332 substr( $string, 22, 6, "frey50" );
333 unless ( length($marc->subfield( 100, "a" )) == 35 ) {
334 $marc->delete_field($marc->field(100));
335 $marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
340 my ($record_type, $op, $record_dir, $reset_index, $noshadow, $record_format) = @_;
342 my $zebra_server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
343 my $zebra_db_name = ($record_type eq 'biblio') ? 'biblios' : 'authorities';
344 my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
345 my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
347 system("zebraidx -c $zebra_config -g $record_format -d $zebra_db_name init") if $reset_index;
348 system("zebraidx -c $zebra_config $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
349 system("zebraidx -c $zebra_config -g $record_format -d $zebra_db_name commit") unless $noshadow;
355 $0: reindex MARC bibs and/or authorities in Zebra.
357 Use this batch job to reindex all biblio or authority
358 records in your Koha database. This job is useful
359 only if you are using Zebra; if you are using the 'NoZebra'
360 mode, this job should not be used.
363 -b index bibliographic records
365 -a index authority records
367 -r clear Zebra index before
368 adding records to index
370 -d Temporary directory for indexing.
371 If not specified, one is automatically
372 created. The export directory
373 is automatically deleted unless
374 you supply the -k switch.
376 -k Do not delete export directory.
378 -s Skip export. Used if you have
379 already exported the records
382 -noxml index from ISO MARC blob
383 instead of MARC XML. This
384 option is recommended only
387 -x export and index as xml instead of is02709 (biblios only).
388 use this if you might have records > 99,999 chars,
390 -w skip shadow indexing for this batch
392 -munge-config Deprecated option to try
393 to fix Zebra config files.
394 --help or -h show this message.
398 # FIXME: the following routines are deprecated and
399 # will be removed once it is determined whether
400 # a script to fix Zebra configuration files is
404 # creating zebra-biblios.cfg depending on system
407 # getting zebraidx directory
409 foreach (qw(/usr/local/bin/zebraidx
418 unless ($zebraidxdir) {
420 ERROR: could not find zebraidx directory
421 ERROR: Either zebra is not installed,
422 ERROR: or it's in a directory I don't checked.
423 ERROR: do a which zebraidx and edit this file to add the result you get
427 $zebraidxdir =~ s/\/bin\/.*//;
428 print "Info : zebra is in $zebraidxdir \n";
430 # getting modules directory
432 foreach (qw(/usr/local/lib/idzebra-2.0/modules/mod-grs-xml.so
433 /usr/local/lib/idzebra/modules/mod-grs-xml.so
434 /usr/lib/idzebra/modules/mod-grs-xml.so
435 /usr/lib/idzebra-2.0/modules/mod-grs-xml.so
442 unless ($modulesdir) {
444 ERROR: could not find mod-grs-xml.so directory
445 ERROR: Either zebra is not properly compiled (libxml2 is not setup and you don t have mod-grs-xml.so,
446 ERROR: or it's in a directory I don't checked.
447 ERROR: find where mod-grs-xml.so is and edit this file to add the result you get
451 $modulesdir =~ s/\/modules\/.*//;
452 print "Info: zebra modules dir : $modulesdir\n";
454 # getting tab directory
456 foreach (qw(/usr/local/share/idzebra/tab/explain.att
457 /usr/local/share/idzebra-2.0/tab/explain.att
458 /usr/share/idzebra/tab/explain.att
459 /usr/share/idzebra-2.0/tab/explain.att
468 ERROR: could not find explain.att directory
469 ERROR: Either zebra is not properly compiled,
470 ERROR: or it's in a directory I don't checked.
471 ERROR: find where explain.att is and edit this file to add the result you get
475 $tabdir =~ s/\/tab\/.*//;
476 print "Info: tab dir : $tabdir\n";
479 # AUTHORITIES creating directory structure
481 my $created_dir_or_file = 0;
483 print "====================\n";
484 print "checking directories & files for authorities\n";
485 print "====================\n";
486 unless (-d "$authorityserverdir") {
487 system("mkdir -p $authorityserverdir");
488 print "Info: created $authorityserverdir\n";
489 $created_dir_or_file++;
491 unless (-d "$authorityserverdir/lock") {
492 mkdir "$authorityserverdir/lock";
493 print "Info: created $authorityserverdir/lock\n";
494 $created_dir_or_file++;
496 unless (-d "$authorityserverdir/register") {
497 mkdir "$authorityserverdir/register";
498 print "Info: created $authorityserverdir/register\n";
499 $created_dir_or_file++;
501 unless (-d "$authorityserverdir/shadow") {
502 mkdir "$authorityserverdir/shadow";
503 print "Info: created $authorityserverdir/shadow\n";
504 $created_dir_or_file++;
506 unless (-d "$authorityserverdir/tab") {
507 mkdir "$authorityserverdir/tab";
508 print "Info: created $authorityserverdir/tab\n";
509 $created_dir_or_file++;
511 unless (-d "$authorityserverdir/key") {
512 mkdir "$authorityserverdir/key";
513 print "Info: created $authorityserverdir/key\n";
514 $created_dir_or_file++;
517 unless (-d "$authorityserverdir/etc") {
518 mkdir "$authorityserverdir/etc";
519 print "Info: created $authorityserverdir/etc\n";
520 $created_dir_or_file++;
524 # AUTHORITIES : copying mandatory files
526 # the record model, depending on marc flavour
527 unless (-f "$authorityserverdir/tab/record.abs") {
528 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
529 system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/authorities/record.abs $authorityserverdir/tab/record.abs");
530 print "Info: copied record.abs for UNIMARC\n";
532 system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/authorities/record.abs $authorityserverdir/tab/record.abs");
533 print "Info: copied record.abs for USMARC\n";
535 $created_dir_or_file++;
537 unless (-f "$authorityserverdir/tab/sort-string-utf.chr") {
538 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/sort-string-utf.chr");
539 print "Info: copied sort-string-utf.chr\n";
540 $created_dir_or_file++;
542 unless (-f "$authorityserverdir/tab/word-phrase-utf.chr") {
543 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/word-phrase-utf.chr");
544 print "Info: copied word-phase-utf.chr\n";
545 $created_dir_or_file++;
547 unless (-f "$authorityserverdir/tab/auth1.att") {
548 system("cp -f $kohadir/etc/zebradb/authorities/etc/bib1.att $authorityserverdir/tab/auth1.att");
549 print "Info: copied auth1.att\n";
550 $created_dir_or_file++;
552 unless (-f "$authorityserverdir/tab/default.idx") {
553 system("cp -f $kohadir/etc/zebradb/etc/default.idx $authorityserverdir/tab/default.idx");
554 print "Info: copied default.idx\n";
555 $created_dir_or_file++;
558 unless (-f "$authorityserverdir/etc/ccl.properties") {
559 # system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
560 system("cp -f $kohadir/etc/zebradb/ccl.properties $authorityserverdir/etc/ccl.properties");
561 print "Info: copied ccl.properties\n";
562 $created_dir_or_file++;
564 unless (-f "$authorityserverdir/etc/pqf.properties") {
565 # system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
566 system("cp -f $kohadir/etc/zebradb/pqf.properties $authorityserverdir/etc/pqf.properties");
567 print "Info: copied pqf.properties\n";
568 $created_dir_or_file++;
572 # AUTHORITIES : copying mandatory files
574 unless (-f C4::Context->zebraconfig('authorityserver')->{config}) {
575 open ZD,">:utf8 ",C4::Context->zebraconfig('authorityserver')->{config};
577 # generated by KOHA/misc/migration_tools/rebuild_zebra.pl
578 profilePath:\${srcdir:-.}:$authorityserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
581 # Files that describe the attribute sets supported.
586 modulePath:$modulesdir/modules/
587 # Specify record type
588 iso2709.recordType:grs.marcxml.record
590 recordId: (auth1,Local-Number)
596 lockDir: $authorityserverdir/lock
599 register: $authorityserverdir/register:4G
600 shadow: $authorityserverdir/shadow:4G
602 # Temp File area for result sets
603 setTmpDir: $authorityserverdir/tmp
605 # Temp File area for index program
606 keyTmpDir: $authorityserverdir/key
608 # Approx. Memory usage during indexing
612 print "Info: creating zebra-authorities.cfg\n";
613 $created_dir_or_file++;
616 if ($created_dir_or_file) {
617 print "Info: created : $created_dir_or_file directories & files\n";
619 print "Info: file & directories OK\n";
624 print "====================\n";
625 print "checking directories & files for biblios\n";
626 print "====================\n";
629 # BIBLIOS : creating directory structure
631 unless (-d "$biblioserverdir") {
632 system("mkdir -p $biblioserverdir");
633 print "Info: created $biblioserverdir\n";
634 $created_dir_or_file++;
636 unless (-d "$biblioserverdir/lock") {
637 mkdir "$biblioserverdir/lock";
638 print "Info: created $biblioserverdir/lock\n";
639 $created_dir_or_file++;
641 unless (-d "$biblioserverdir/register") {
642 mkdir "$biblioserverdir/register";
643 print "Info: created $biblioserverdir/register\n";
644 $created_dir_or_file++;
646 unless (-d "$biblioserverdir/shadow") {
647 mkdir "$biblioserverdir/shadow";
648 print "Info: created $biblioserverdir/shadow\n";
649 $created_dir_or_file++;
651 unless (-d "$biblioserverdir/tab") {
652 mkdir "$biblioserverdir/tab";
653 print "Info: created $biblioserverdir/tab\n";
654 $created_dir_or_file++;
656 unless (-d "$biblioserverdir/key") {
657 mkdir "$biblioserverdir/key";
658 print "Info: created $biblioserverdir/key\n";
659 $created_dir_or_file++;
661 unless (-d "$biblioserverdir/etc") {
662 mkdir "$biblioserverdir/etc";
663 print "Info: created $biblioserverdir/etc\n";
664 $created_dir_or_file++;
668 # BIBLIOS : copying mandatory files
670 # the record model, depending on marc flavour
671 unless (-f "$biblioserverdir/tab/record.abs") {
672 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
673 system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/biblios/record.abs $biblioserverdir/tab/record.abs");
674 print "Info: copied record.abs for UNIMARC\n";
676 system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/biblios/record.abs $biblioserverdir/tab/record.abs");
677 print "Info: copied record.abs for USMARC\n";
679 $created_dir_or_file++;
681 unless (-f "$biblioserverdir/tab/sort-string-utf.chr") {
682 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/sort-string-utf.chr");
683 print "Info: copied sort-string-utf.chr\n";
684 $created_dir_or_file++;
686 unless (-f "$biblioserverdir/tab/word-phrase-utf.chr") {
687 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/word-phrase-utf.chr");
688 print "Info: copied word-phase-utf.chr\n";
689 $created_dir_or_file++;
691 unless (-f "$biblioserverdir/tab/bib1.att") {
692 system("cp -f $kohadir/etc/zebradb/biblios/etc/bib1.att $biblioserverdir/tab/bib1.att");
693 print "Info: copied bib1.att\n";
694 $created_dir_or_file++;
696 unless (-f "$biblioserverdir/tab/default.idx") {
697 system("cp -f $kohadir/etc/zebradb/etc/default.idx $biblioserverdir/tab/default.idx");
698 print "Info: copied default.idx\n";
699 $created_dir_or_file++;
701 unless (-f "$biblioserverdir/etc/ccl.properties") {
702 # system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
703 system("cp -f $kohadir/etc/zebradb/ccl.properties $biblioserverdir/etc/ccl.properties");
704 print "Info: copied ccl.properties\n";
705 $created_dir_or_file++;
707 unless (-f "$biblioserverdir/etc/pqf.properties") {
708 # system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
709 system("cp -f $kohadir/etc/zebradb/pqf.properties $biblioserverdir/etc/pqf.properties");
710 print "Info: copied pqf.properties\n";
711 $created_dir_or_file++;
715 # BIBLIOS : copying mandatory files
717 unless (-f C4::Context->zebraconfig('biblioserver')->{config}) {
718 open ZD,">:utf8 ",C4::Context->zebraconfig('biblioserver')->{config};
720 # generated by KOHA/misc/migrtion_tools/rebuild_zebra.pl
721 profilePath:\${srcdir:-.}:$biblioserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
724 # Files that describe the attribute sets supported.
729 modulePath:$modulesdir/modules/
730 # Specify record type
731 iso2709.recordType:grs.marcxml.record
733 recordId: (bib1,Local-Number)
739 lockDir: $biblioserverdir/lock
742 register: $biblioserverdir/register:4G
743 shadow: $biblioserverdir/shadow:4G
745 # Temp File area for result sets
746 setTmpDir: $biblioserverdir/tmp
748 # Temp File area for index program
749 keyTmpDir: $biblioserverdir/key
751 # Approx. Memory usage during indexing
755 print "Info: creating zebra-biblios.cfg\n";
756 $created_dir_or_file++;
759 if ($created_dir_or_file) {
760 print "Info: created : $created_dir_or_file directories & files\n";
762 print "Info: file & directories OK\n";