6 use C4::AuthoritiesMarc;
10 # script that checks zebradir structure & create directories & mandatory files if needed
14 $|=1; # flushes output
16 # limit for database dumping
17 my $limit;# = "LIMIT 1";
35 $directory = "export" unless $directory;
38 my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory};
39 my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
41 my $kohadir = C4::Context->config('intranetdir');
42 my $dbh = C4::Context->dbh;
43 my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
44 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
46 print "some informations\n";
47 print "=================\n";
48 print "Zebra biblio directory =>$biblioserverdir\n";
49 print "Zebra authorities directory =>$authorityserverdir\n";
50 print "Koha directory =>$kohadir\n";
51 print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
52 print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
53 print "=================\n";
55 # creating zebra-biblios.cfg depending on system
58 # getting zebraidx directory
60 foreach (qw(/usr/local/bin/zebraidx
69 unless ($zebraidxdir) {
71 ERROR: could not find zebraidx directory
72 ERROR: Either zebra is not installed,
73 ERROR: or it's in a directory I don't checked.
74 ERROR: do a which zebraidx and edit this file to add the result you get
78 $zebraidxdir =~ s/\/bin\/.*//;
79 print "Info : zebra is in $zebraidxdir \n";
81 # getting modules directory
83 foreach (qw(/usr/local/lib/idzebra-2.0/modules/mod-grs-xml.so
84 /usr/local/lib/idzebra/modules/mod-grs-xml.so
85 /usr/lib/idzebra/modules/mod-grs-xml.so
86 /usr/lib/idzebra-2.0/modules/mod-grs-xml.so
93 unless ($modulesdir) {
95 ERROR: could not find mod-grs-xml.so directory
96 ERROR: Either zebra is not properly compiled (libxml2 is not setup and you don t have mod-grs-xml.so,
97 ERROR: or it's in a directory I don't checked.
98 ERROR: find where mod-grs-xml.so is and edit this file to add the result you get
102 $modulesdir =~ s/\/modules\/.*//;
103 print "Info: zebra modules dir : $modulesdir\n";
105 # getting tab directory
107 foreach (qw(/usr/local/share/idzebra/tab/explain.att
108 /usr/local/share/idzebra-2.0/tab/explain.att
109 /usr/share/idzebra/tab/explain.att
110 /usr/share/idzebra-2.0/tab/explain.att
119 ERROR: could not find explain.att directory
120 ERROR: Either zebra is not properly compiled,
121 ERROR: or it's in a directory I don't checked.
122 ERROR: find where explain.att is and edit this file to add the result you get
126 $tabdir =~ s/\/tab\/.*//;
127 print "Info: tab dir : $tabdir\n";
130 # AUTHORITIES creating directory structure
132 my $created_dir_or_file = 0;
134 print "====================\n";
135 print "checking directories & files for authorities\n";
136 print "====================\n";
137 unless (-d "$authorityserverdir") {
138 system("mkdir -p $authorityserverdir");
139 print "Info: created $authorityserverdir\n";
140 $created_dir_or_file++;
142 unless (-d "$authorityserverdir/lock") {
143 mkdir "$authorityserverdir/lock";
144 print "Info: created $authorityserverdir/lock\n";
145 $created_dir_or_file++;
147 unless (-d "$authorityserverdir/register") {
148 mkdir "$authorityserverdir/register";
149 print "Info: created $authorityserverdir/register\n";
150 $created_dir_or_file++;
152 unless (-d "$authorityserverdir/shadow") {
153 mkdir "$authorityserverdir/shadow";
154 print "Info: created $authorityserverdir/shadow\n";
155 $created_dir_or_file++;
157 unless (-d "$authorityserverdir/tab") {
158 mkdir "$authorityserverdir/tab";
159 print "Info: created $authorityserverdir/tab\n";
160 $created_dir_or_file++;
162 unless (-d "$authorityserverdir/key") {
163 mkdir "$authorityserverdir/key";
164 print "Info: created $authorityserverdir/key\n";
165 $created_dir_or_file++;
168 unless (-d "$authorityserverdir/etc") {
169 mkdir "$authorityserverdir/etc";
170 print "Info: created $authorityserverdir/etc\n";
171 $created_dir_or_file++;
175 # AUTHORITIES : copying mandatory files
177 # the record model, depending on marc flavour
178 unless (-f "$authorityserverdir/tab/record.abs") {
179 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
180 system("cp -f $kohadir/misc/zebra/record_authorities_unimarc.abs $authorityserverdir/tab/record.abs");
181 print "Info: copied record.abs for UNIMARC\n";
183 system("cp -f $kohadir/misc/zebra/record_authorities_usmarc.abs $authorityserverdir/tab/record.abs");
184 print "Info: copied record.abs for USMARC\n";
186 $created_dir_or_file++;
188 unless (-f "$authorityserverdir/tab/sort-string-utf.chr") {
189 system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $authorityserverdir/tab/sort-string-utf.chr");
190 print "Info: copied sort-string-utf.chr\n";
191 $created_dir_or_file++;
193 unless (-f "$authorityserverdir/tab/word-phrase-utf.chr") {
194 system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $authorityserverdir/tab/word-phrase-utf.chr");
195 print "Info: copied word-phase-utf.chr\n";
196 $created_dir_or_file++;
198 unless (-f "$authorityserverdir/tab/auth1.att") {
199 system("cp -f $kohadir/misc/zebra/bib1_authorities.att $authorityserverdir/tab/auth1.att");
200 print "Info: copied auth1.att\n";
201 $created_dir_or_file++;
203 unless (-f "$authorityserverdir/tab/default.idx") {
204 system("cp -f $kohadir/misc/zebra/default.idx $authorityserverdir/tab/default.idx");
205 print "Info: copied default.idx\n";
206 $created_dir_or_file++;
209 unless (-f "$authorityserverdir/etc/ccl.properties") {
210 # system("cp -f $kohadir/misc/zebra/ccl.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
211 system("cp -f $kohadir/misc/zebra/ccl.properties $authorityserverdir/etc/ccl.properties");
212 print "Info: copied ccl.properties\n";
213 $created_dir_or_file++;
215 unless (-f "$authorityserverdir/etc/pqf.properties") {
216 # system("cp -f $kohadir/misc/zebra/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
217 system("cp -f $kohadir/misc/zebra/pqf.properties $authorityserverdir/etc/pqf.properties");
218 print "Info: copied pqf.properties\n";
219 $created_dir_or_file++;
223 # AUTHORITIES : copying mandatory files
225 unless (-f C4::Context->zebraconfig('authorityserver')->{config}) {
226 open ZD,">:utf8 ",C4::Context->zebraconfig('authorityserver')->{config};
228 # generated by KOHA/misc/migration_tools/rebuild_zebra.pl
229 profilePath:\${srcdir:-.}:$authorityserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
232 # Files that describe the attribute sets supported.
237 modulePath:$modulesdir/modules/
238 # Specify record type
239 iso2709.recordType:grs.marcxml.record
241 recordId: (auth1,Local-Number)
247 lockDir: $authorityserverdir/lock
250 register: $authorityserverdir/register:4G
251 shadow: $authorityserverdir/shadow:4G
253 # Temp File area for result sets
254 setTmpDir: $authorityserverdir/tmp
256 # Temp File area for index program
257 keyTmpDir: $authorityserverdir/key
259 # Approx. Memory usage during indexing
263 print "Info: creating zebra-authorities.cfg\n";
264 $created_dir_or_file++;
267 if ($created_dir_or_file) {
268 print "Info: created : $created_dir_or_file directories & files\n";
270 print "Info: file & directories OK\n";
274 # exporting authorities
277 print "====================\n";
278 print "SKIPPING authorities export\n";
279 print "====================\n";
281 print "====================\n";
282 print "exporting authorities\n";
283 print "====================\n";
284 mkdir "$directory" unless (-d $directory);
285 mkdir "$directory/authorities" unless (-d "$directory/authorities");
286 open(OUT,">:utf8","$directory/authorities/authorities.iso2709") or die $!;
287 my $dbh=C4::Context->dbh;
289 $sth=$dbh->prepare("select authid,marc from auth_header $limit");
292 while (my ($authid,$record) = $sth->fetchrow) {
293 # FIXME : we retrieve the iso2709 record. if the GetAuthority (that uses the XML) fails
294 # due to some MARC::File::XML failure, then try the iso2709,
295 # (add authid & authtype if needed)
298 $record = GetAuthority($authid);
301 # force authid in case it's not here, otherwise, zebra will die on this authority
302 unless ($record->field('001')->data() eq $authid){
303 print "$authid don't exist for this authority :".$record->as_formatted;
304 $record->delete_field($record->field('001'));
305 $record->insert_fields_ordered(MARC::Field->new('001',$authid));
308 print " There was some pb getting authority : ".$authid."\n";
313 print "\r$i" unless ($i++ %100);
314 # # remove leader length, that could be wrong, it will be calculated automatically by as_usmarc
315 # # otherwise, if it's wron, zebra will fail miserabily (and never index what is after the failing record)
316 my $leader=$record->leader;
317 substr($leader,0,5)=' ';
318 substr($leader,10,7)='22 ';
319 $record->leader(substr($leader,0,24));
320 print OUT $record->as_usmarc;
326 # and reindexing everything
328 print "====================\n";
329 print "REINDEXING zebra\n";
330 print "====================\n";
331 system("zebraidx -c ".C4::Context->zebraconfig('authorityserver')->{config}." -g iso2709 -d authorities init") if ($reset);
332 system("zebraidx -c ".C4::Context->zebraconfig('authorityserver')->{config}." -g iso2709 -d authorities update $directory/authorities");
333 system("zebraidx -c ".C4::Context->zebraconfig('authorityserver')->{config}." -g iso2709 -d authorities commit");
335 print "skipping authorities\n";
337 #################################################################################################################
339 #################################################################################################################
342 print "====================\n";
343 print "checking directories & files for biblios\n";
344 print "====================\n";
347 # BIBLIOS : creating directory structure
349 unless (-d "$biblioserverdir") {
350 system("mkdir -p $biblioserverdir");
351 print "Info: created $biblioserverdir\n";
352 $created_dir_or_file++;
354 unless (-d "$biblioserverdir/lock") {
355 mkdir "$biblioserverdir/lock";
356 print "Info: created $biblioserverdir/lock\n";
357 $created_dir_or_file++;
359 unless (-d "$biblioserverdir/register") {
360 mkdir "$biblioserverdir/register";
361 print "Info: created $biblioserverdir/register\n";
362 $created_dir_or_file++;
364 unless (-d "$biblioserverdir/shadow") {
365 mkdir "$biblioserverdir/shadow";
366 print "Info: created $biblioserverdir/shadow\n";
367 $created_dir_or_file++;
369 unless (-d "$biblioserverdir/tab") {
370 mkdir "$biblioserverdir/tab";
371 print "Info: created $biblioserverdir/tab\n";
372 $created_dir_or_file++;
374 unless (-d "$biblioserverdir/key") {
375 mkdir "$biblioserverdir/key";
376 print "Info: created $biblioserverdir/key\n";
377 $created_dir_or_file++;
379 unless (-d "$biblioserverdir/etc") {
380 mkdir "$biblioserverdir/etc";
381 print "Info: created $biblioserverdir/etc\n";
382 $created_dir_or_file++;
386 # BIBLIOS : copying mandatory files
388 # the record model, depending on marc flavour
389 unless (-f "$biblioserverdir/tab/record.abs") {
390 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
391 system("cp -f $kohadir/misc/zebra/record_biblios_unimarc.abs $biblioserverdir/tab/record.abs");
392 print "Info: copied record.abs for UNIMARC\n";
394 system("cp -f $kohadir/misc/zebra/record_biblios_usmarc.abs $biblioserverdir/tab/record.abs");
395 print "Info: copied record.abs for USMARC\n";
397 $created_dir_or_file++;
399 unless (-f "$biblioserverdir/tab/sort-string-utf.chr") {
400 system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $biblioserverdir/tab/sort-string-utf.chr");
401 print "Info: copied sort-string-utf.chr\n";
402 $created_dir_or_file++;
404 unless (-f "$biblioserverdir/tab/word-phrase-utf.chr") {
405 system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $biblioserverdir/tab/word-phrase-utf.chr");
406 print "Info: copied word-phase-utf.chr\n";
407 $created_dir_or_file++;
409 unless (-f "$biblioserverdir/tab/bib1.att") {
410 system("cp -f $kohadir/misc/zebra/bib1_biblios.att $biblioserverdir/tab/bib1.att");
411 print "Info: copied bib1.att\n";
412 $created_dir_or_file++;
414 unless (-f "$biblioserverdir/tab/default.idx") {
415 system("cp -f $kohadir/misc/zebra/default.idx $biblioserverdir/tab/default.idx");
416 print "Info: copied default.idx\n";
417 $created_dir_or_file++;
419 unless (-f "$biblioserverdir/etc/ccl.properties") {
420 # system("cp -f $kohadir/misc/zebra/ccl.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
421 system("cp -f $kohadir/misc/zebra/ccl.properties $biblioserverdir/etc/ccl.properties");
422 print "Info: copied ccl.properties\n";
423 $created_dir_or_file++;
425 unless (-f "$biblioserverdir/etc/pqf.properties") {
426 # system("cp -f $kohadir/misc/zebra/pqf.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
427 system("cp -f $kohadir/misc/zebra/pqf.properties $biblioserverdir/etc/pqf.properties");
428 print "Info: copied pqf.properties\n";
429 $created_dir_or_file++;
433 # BIBLIOS : copying mandatory files
435 unless (-f C4::Context->zebraconfig('biblioserver')->{config}) {
436 open ZD,">:utf8 ",C4::Context->zebraconfig('biblioserver')->{config};
438 # generated by KOHA/misc/migrtion_tools/rebuild_zebra.pl
439 profilePath:\${srcdir:-.}:$biblioserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
442 # Files that describe the attribute sets supported.
447 modulePath:$modulesdir/modules/
448 # Specify record type
449 iso2709.recordType:grs.marcxml.record
451 recordId: (bib1,Local-Number)
457 lockDir: $biblioserverdir/lock
460 register: $biblioserverdir/register:4G
461 shadow: $biblioserverdir/shadow:4G
463 # Temp File area for result sets
464 setTmpDir: $biblioserverdir/tmp
466 # Temp File area for index program
467 keyTmpDir: $biblioserverdir/key
469 # Approx. Memory usage during indexing
473 print "Info: creating zebra-biblios.cfg\n";
474 $created_dir_or_file++;
477 if ($created_dir_or_file) {
478 print "Info: created : $created_dir_or_file directories & files\n";
480 print "Info: file & directories OK\n";
488 print "====================\n";
489 print "SKIPPING biblio export\n";
490 print "====================\n";
492 print "====================\n";
493 print "exporting biblios\n";
494 print "====================\n";
495 mkdir "$directory" unless (-d $directory);
496 mkdir "$directory/biblios" unless (-d "$directory/biblios");
497 open(OUT,">:utf8 ","$directory/biblios/export") or die $!;
498 my $dbh=C4::Context->dbh;
501 $sth=$dbh->prepare("select biblionumber,marc from biblioitems order by biblionumber $limit");
504 while (my ($biblionumber,$marc) = $sth->fetchrow) {
506 $record=MARC::Record->new_from_usmarc($marc);
507 my $record_correct=1;
508 # skip uncorrect records : isn't this bogus, as just after we reintroduce biblionumber if it's missing ?
509 # FIXME next unless $record->field($biblionumbertagfield);
510 # check if biblionumber is present, otherwise, add it on the fly
511 if ($biblionumbertagfield eq '001') {
512 unless ($record->field($biblionumbertagfield)->data()) {
515 # if the field where biblionumber is already exist, just update it, otherwise create it
516 if ($record->field($biblionumbertagfield)) {
517 $field = $record->field($biblionumbertagfield);
518 $field->update($biblionumber);
521 $newfield = MARC::Field->new( $biblionumbertagfield, $biblionumber);
522 $record->append_fields($newfield);
526 unless ($record->subfield($biblionumbertagfield,$biblionumbertagsubfield)) {
529 # if the field where biblionumber is already exist, just update it, otherwise create it
530 if ($record->field($biblionumbertagfield)) {
531 $field = $record->field($biblionumbertagfield);
532 $field->add_subfields($biblionumbertagsubfield => $biblionumber);
535 $newfield = MARC::Field->new( $biblionumbertagfield,'','', $biblionumbertagsubfield => $biblionumber);
536 $record->append_fields($newfield);
539 # warn "FIXED BIBLIONUMBER".$record->as_formatted;
541 unless ($record->subfield($biblioitemnumbertagfield,$biblioitemnumbertagsubfield)) {
543 # warn "INCORRECT BIBLIOITEMNUMBER :".$record->as_formatted;
545 # if the field where biblionumber is already exist, just update it, otherwise create it
546 if ($record->field($biblioitemnumbertagfield)) {
547 $field = $record->field($biblioitemnumbertagfield);
548 if ($biblioitemnumbertagfield <10) {
549 $field->update($biblionumber);
551 $field->add_subfields($biblioitemnumbertagsubfield => $biblionumber);
555 if ($biblioitemnumbertagfield <10) {
556 $newfield = MARC::Field->new( $biblioitemnumbertagfield, $biblionumber);
558 $newfield = MARC::Field->new( $biblioitemnumbertagfield,'','', $biblioitemnumbertagsubfield => $biblionumber);
560 $record->insert_grouped_field($newfield);
562 # warn "FIXED BIBLIOITEMNUMBER".$record->as_formatted;
564 my $leader=$record->leader;
565 substr($leader,0,5)=' ';
566 substr($leader,10,7)='22 ';
567 $record->leader(substr($leader,0,24));
568 print OUT $record->as_usmarc();
572 $sth=$dbh->prepare("SELECT biblionumber FROM biblioitems ORDER BY biblionumber $limit");
575 while (my ($biblionumber) = $sth->fetchrow) {
577 print "\r$i" unless ($i++ %100);
580 $record = GetMarcBiblio($biblionumber);
583 print " There was some pb getting biblio : #".$biblionumber."\n";
587 # warn $record->as_formatted;
588 # die if $record->subfield('090','9') eq 11;
590 # check that biblionumber & biblioitemnumber are stored in the MARC record, otherwise, add them & update the biblioitems.marcxml data.
591 my $record_correct=1;
592 # skip uncorrect records : isn't this bogus, as just after we reintroduce biblionumber if it's missing ?
593 # FIXME next unless $record->field($biblionumbertagfield);
594 # check if biblionumber is present, otherwise, add it on the fly
595 if ($biblionumbertagfield eq '001') {
596 unless ($record->field($biblionumbertagfield) && $record->field($biblionumbertagfield)->data()) {
599 # if the field where biblionumber is already exist, just update it, otherwise create it
600 if ($record->field($biblionumbertagfield)) {
601 $field = $record->field($biblionumbertagfield);
602 $field->update($biblionumber);
605 $newfield = MARC::Field->new( $biblionumbertagfield, $biblionumber);
606 $record->append_fields($newfield);
610 unless ($record->subfield($biblionumbertagfield,$biblionumbertagsubfield)) {
611 # warn "fixing biblionumber for $biblionumbertagfield,$biblionumbertagsubfield = $biblionumber";
614 # if the field where biblionumber is already exist, just update it, otherwise create it
615 if ($record->field($biblionumbertagfield)) {
616 $field = $record->field($biblionumbertagfield);
617 $field->add_subfields($biblionumbertagsubfield => $biblionumber);
620 $newfield = MARC::Field->new( $biblionumbertagfield,'','', $biblionumbertagsubfield => $biblionumber);
621 $record->append_fields($newfield);
624 # warn "FIXED BIBLIONUMBER".$record->as_formatted;
626 unless ($record->subfield($biblioitemnumbertagfield,$biblioitemnumbertagsubfield)) {
627 # warn "fixing biblioitemnumber for $biblioitemnumbertagfield,$biblioitemnumbertagsubfield = $biblionumber";
630 # if the field where biblionumber is already exist, just update it, otherwise create it
631 if ($record->field($biblioitemnumbertagfield)) {
632 $field = $record->field($biblioitemnumbertagfield);
633 if ($biblioitemnumbertagfield <10) {
634 $field->update($biblionumber);
636 $field->add_subfields($biblioitemnumbertagsubfield => $biblionumber);
640 if ($biblioitemnumbertagfield <10) {
641 $newfield = MARC::Field->new( $biblioitemnumbertagfield, $biblionumber);
643 $newfield = MARC::Field->new( $biblioitemnumbertagfield,'','', $biblioitemnumbertagsubfield => $biblionumber);
645 $record->insert_grouped_field($newfield);
647 # warn "FIXED BIBLIOITEMNUMBER".$record->as_formatted;
649 my $encoding = C4::Context->preference("marcflavour");
650 # deal with UNIMARC field 100 (encoding) : create it if needed & set encoding to unicode
651 if ( $encoding eq "UNIMARC" ) {
653 if ( length($record->subfield( 100, "a" )) == 35 ) {
654 $string = $record->subfield( 100, "a" );
655 my $f100 = $record->field(100);
656 $record->delete_field($f100);
659 $string = POSIX::strftime( "%Y%m%d", localtime );
661 $string = sprintf( "%-*s", 35, $string );
663 substr( $string, 22, 6, "frey50" );
664 unless ( $record->subfield( 100, "a" ) ) {
665 $record->insert_grouped_field(
666 MARC::Field->new( 100, "", "", "a" => $string ) );
669 unless ($record_correct) {
670 my $update_xml = $dbh->prepare("update biblioitems set marcxml=? where biblionumber=?");
671 warn "UPDATING $biblionumber (missing biblionumber or biblioitemnumber in MARC record : ".$record->as_xml;
672 $update_xml->execute($record->as_xml,$biblionumber);
674 # remove leader length, that could be wrong, it will be calculated automatically by as_usmarc
675 # otherwise, if it's wron, zebra will fail miserabily (and never index what is after the failing record)
676 my $leader=$record->leader;
677 substr($leader,0,5)=' ';
678 substr($leader,10,7)='22 ';
679 $record->leader(substr($leader,0,24));
680 print OUT $record->as_usmarc();
687 # and reindexing everything
689 print "====================\n";
690 print "REINDEXING zebra\n";
691 print "====================\n";
692 system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('biblioserver')->{config}." -d biblios init") if ($reset);
693 system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('biblioserver')->{config}." -d biblios update $directory/biblios");
694 system("zebraidx -g iso2709 -c ".C4::Context->zebraconfig('biblioserver')->{config}." -d biblios commit");
696 print "skipping biblios\n";
699 print "====================\n";
701 print "====================\n";
703 print "NOTHING cleaned : the $directory has been kept. You can re-run this script with the -s parameter if you just want to rebuild zebra after changing the record.abs or another zebra config file\n";
705 system("rm -rf $directory");
706 print "directory $directory deleted\n";