X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=blobdiff_plain;f=misc%2Fmigration_tools%2Frebuild_zebra.pl;h=02f50b7b30f0571e857681b7f6b8845609d07388;hb=262a6e2a9a16b404a42c34961e0d15140ead6347;hp=0a2d1220df905c4e96410bf938a302898ef9b102;hpb=5ff7fcffa4b1e34c06cdb171503516b11c2350d4;p=koha_fer diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index 0a2d1220df..02f50b7b30 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -14,19 +14,21 @@ use strict; $|=1; # flushes output # limit for database dumping -my $limit;# = "LIMIT 1000"; +my $limit;# = "LIMIT 1"; my $directory; my $skip_export; my $keep_export; my $reset; my $biblios; my $authorities; +my $noxml; GetOptions( 'd:s' => \$directory, 'reset' => \$reset, 's' => \$skip_export, 'k' => \$keep_export, 'b' => \$biblios, + 'noxml' => \$noxml, 'a' => \$authorities, ); @@ -175,44 +177,44 @@ if ($authorities) { # the record model, depending on marc flavour unless (-f "$authorityserverdir/tab/record.abs") { if (C4::Context->preference("marcflavour") eq "UNIMARC") { - system("cp -f $kohadir/misc/zebra/record_authorities_unimarc.abs $authorityserverdir/tab/record.abs"); + system("cp -f $kohadir/etc/zebradb/authorities/etc/record_unimarc.abs $authorityserverdir/tab/record.abs"); print "Info: copied record.abs for UNIMARC\n"; } else { - system("cp -f $kohadir/misc/zebra/record_authorities_usmarc.abs $authorityserverdir/tab/record.abs"); + system("cp -f $kohadir/etc/zebradb/authorities/etc/record.abs $authorityserverdir/tab/record.abs"); print "Info: copied record.abs for USMARC\n"; } $created_dir_or_file++; } unless (-f "$authorityserverdir/tab/sort-string-utf.chr") { - system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $authorityserverdir/tab/sort-string-utf.chr"); + system("cp -f $kohadir/etc/zebradb/etc/sort-string-utf_french.chr $authorityserverdir/tab/sort-string-utf.chr"); print "Info: copied sort-string-utf.chr\n"; $created_dir_or_file++; } unless (-f "$authorityserverdir/tab/word-phrase-utf.chr") { - system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $authorityserverdir/tab/word-phrase-utf.chr"); + system("cp -f $kohadir/etc/zebradb/etc/sort-string-utf_french.chr $authorityserverdir/tab/word-phrase-utf.chr"); print "Info: copied word-phase-utf.chr\n"; $created_dir_or_file++; } unless (-f "$authorityserverdir/tab/auth1.att") { - system("cp -f $kohadir/misc/zebra/bib1_authorities.att $authorityserverdir/tab/auth1.att"); + system("cp -f $kohadir/etc/zebradb/authorities/etc/bib1.att $authorityserverdir/tab/auth1.att"); print "Info: copied auth1.att\n"; $created_dir_or_file++; } unless (-f "$authorityserverdir/tab/default.idx") { - system("cp -f $kohadir/misc/zebra/default.idx $authorityserverdir/tab/default.idx"); + system("cp -f $kohadir/etc/zebradb/etc/default.idx $authorityserverdir/tab/default.idx"); print "Info: copied default.idx\n"; $created_dir_or_file++; } - unless (-f C4::Context->zebraconfig('authorityserver')->{ccl2rpn}) { -# system("cp -f $kohadir/misc/zebra/ccl.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn}); - system("cp -f $kohadir/misc/zebra/ccl.properties $authorityserverdir/etc/ccl.properties"); + unless (-f "$authorityserverdir/etc/ccl.properties") { +# system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn}); + system("cp -f $kohadir/etc/zebradb/ccl.properties $authorityserverdir/etc/ccl.properties"); print "Info: copied ccl.properties\n"; $created_dir_or_file++; } - unless (-f C4::Context->zebraconfig('authorityserver')->{cql2rpn}) { -# system("cp -f $kohadir/misc/zebra/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn}); - system("cp -f $kohadir/misc/zebra/pqf.properties $authorityserverdir/etc/pqf.properties"); + unless (-f "$authorityserverdir/etc/pqf.properties") { +# system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn}); + system("cp -f $kohadir/etc/zebradb/pqf.properties $authorityserverdir/etc/pqf.properties"); print "Info: copied pqf.properties\n"; $created_dir_or_file++; } @@ -245,8 +247,6 @@ storeData:1 lockDir: $authorityserverdir/lock perm.anonymous:r perm.kohaadmin:rw -passw.kohalis -shadow register: $authorityserverdir/register:4G shadow: $authorityserverdir/shadow:4G @@ -286,14 +286,38 @@ rank:rank-1 open(OUT,">:utf8","$directory/authorities/authorities.iso2709") or die $!; my $dbh=C4::Context->dbh; my $sth; - $sth=$dbh->prepare("select authid from auth_header $limit"); + $sth=$dbh->prepare("select authid,marc from auth_header $limit"); $sth->execute(); my $i=0; - while (my ($authid) = $sth->fetchrow) { - my $record = GetAuthority($authid); + while (my ($authid,$record) = $sth->fetchrow) { + # FIXME : we retrieve the iso2709 record. if the GetAuthority (that uses the XML) fails + # due to some MARC::File::XML failure, then try the iso2709, + # (add authid & authtype if needed) + my $record; + eval { + $record = GetAuthority($authid); + }; + next unless $record; + # force authid in case it's not here, otherwise, zebra will die on this authority + unless ($record->field('001')->data() eq $authid){ + print "$authid don't exist for this authority :".$record->as_formatted; + $record->delete_field($record->field('001')); + $record->insert_fields_ordered(MARC::Field->new('001',$authid)); + } + if($@){ + print " There was some pb getting authority : ".$authid."\n"; + next; + } + print "."; print "\r$i" unless ($i++ %100); - print OUT $record->as_usmarc(); +# # remove leader length, that could be wrong, it will be calculated automatically by as_usmarc +# # otherwise, if it's wron, zebra will fail miserabily (and never index what is after the failing record) + my $leader=$record->leader; + substr($leader,0,5)=' '; + substr($leader,10,7)='22 '; + $record->leader(substr($leader,0,24)); + print OUT $record->as_usmarc; } close(OUT); } @@ -364,43 +388,43 @@ if ($biblios) { # the record model, depending on marc flavour unless (-f "$biblioserverdir/tab/record.abs") { if (C4::Context->preference("marcflavour") eq "UNIMARC") { - system("cp -f $kohadir/misc/zebra/record_biblios_unimarc.abs $biblioserverdir/tab/record.abs"); + system("cp -f $kohadir/etc/zebradb/biblios/etc/record_unimarc.abs $biblioserverdir/tab/record.abs"); print "Info: copied record.abs for UNIMARC\n"; } else { - system("cp -f $kohadir/misc/zebra/record_biblios_usmarc.abs $biblioserverdir/tab/record.abs"); + system("cp -f $kohadir/etc/zebradb/biblios/etc/record.abs $biblioserverdir/tab/record.abs"); print "Info: copied record.abs for USMARC\n"; } $created_dir_or_file++; } unless (-f "$biblioserverdir/tab/sort-string-utf.chr") { - system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $biblioserverdir/tab/sort-string-utf.chr"); + system("cp -f $kohadir/etc/zebradb/etc/sort-string-utf_french.chr $biblioserverdir/tab/sort-string-utf.chr"); print "Info: copied sort-string-utf.chr\n"; $created_dir_or_file++; } unless (-f "$biblioserverdir/tab/word-phrase-utf.chr") { - system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $biblioserverdir/tab/word-phrase-utf.chr"); + system("cp -f $kohadir/etc/zebradb/etc/sort-string-utf_french.chr $biblioserverdir/tab/word-phrase-utf.chr"); print "Info: copied word-phase-utf.chr\n"; $created_dir_or_file++; } unless (-f "$biblioserverdir/tab/bib1.att") { - system("cp -f $kohadir/misc/zebra/bib1_biblios.att $biblioserverdir/tab/bib1.att"); + system("cp -f $kohadir/etc/zebradb/biblios/etc/bib1.att $biblioserverdir/tab/bib1.att"); print "Info: copied bib1.att\n"; $created_dir_or_file++; } unless (-f "$biblioserverdir/tab/default.idx") { - system("cp -f $kohadir/misc/zebra/default.idx $biblioserverdir/tab/default.idx"); + system("cp -f $kohadir/etc/zebradb/etc/default.idx $biblioserverdir/tab/default.idx"); print "Info: copied default.idx\n"; $created_dir_or_file++; } - unless (-f C4::Context->zebraconfig('biblioserver')->{ccl2rpn}) { -# system("cp -f $kohadir/misc/zebra/ccl.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn}); - system("cp -f $kohadir/misc/zebra/ccl.properties $biblioserverdir/etc/ccl.properties"); + unless (-f "$biblioserverdir/etc/ccl.properties") { +# system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn}); + system("cp -f $kohadir/etc/zebradb/ccl.properties $biblioserverdir/etc/ccl.properties"); print "Info: copied ccl.properties\n"; $created_dir_or_file++; } - unless (-f C4::Context->zebraconfig('biblioserver')->{cql2rpn}) { -# system("cp -f $kohadir/misc/zebra/pqf.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn}); - system("cp -f $kohadir/misc/zebra/pqf.properties $biblioserverdir/etc/pqf.properties"); + unless (-f "$biblioserverdir/etc/pqf.properties") { +# system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn}); + system("cp -f $kohadir/etc/zebradb/pqf.properties $biblioserverdir/etc/pqf.properties"); print "Info: copied pqf.properties\n"; $created_dir_or_file++; } @@ -433,8 +457,6 @@ storeData:1 lockDir: $biblioserverdir/lock perm.anonymous:r perm.kohaadmin:rw -passw.kohalis -shadow register: $biblioserverdir/register:4G shadow: $biblioserverdir/shadow:4G @@ -475,19 +497,106 @@ rank:rank-1 open(OUT,">:utf8 ","$directory/biblios/export") or die $!; my $dbh=C4::Context->dbh; my $sth; - $sth=$dbh->prepare("select biblionumber from biblioitems order by biblionumber $limit"); + if ($noxml){ + $sth=$dbh->prepare("select biblionumber,marc from biblioitems order by biblionumber $limit"); + $sth->execute(); + my $i=0; + while (my ($biblionumber,$marc) = $sth->fetchrow) { + my $record; + $record=MARC::Record->new_from_usmarc($marc); + my $record_correct=1; + # skip uncorrect records : isn't this bogus, as just after we reintroduce biblionumber if it's missing ? + # FIXME next unless $record->field($biblionumbertagfield); + # check if biblionumber is present, otherwise, add it on the fly + if ($biblionumbertagfield eq '001') { + unless ($record->field($biblionumbertagfield)->data()) { + $record_correct=0; + my $field; + # if the field where biblionumber is already exist, just update it, otherwise create it + if ($record->field($biblionumbertagfield)) { + $field = $record->field($biblionumbertagfield); + $field->update($biblionumber); + } else { + my $newfield; + $newfield = MARC::Field->new( $biblionumbertagfield, $biblionumber); + $record->append_fields($newfield); + } + } + } else { + unless ($record->subfield($biblionumbertagfield,$biblionumbertagsubfield)) { + $record_correct=0; + my $field; + # if the field where biblionumber is already exist, just update it, otherwise create it + if ($record->field($biblionumbertagfield)) { + $field = $record->field($biblionumbertagfield); + $field->add_subfields($biblionumbertagsubfield => $biblionumber); + } else { + my $newfield; + $newfield = MARC::Field->new( $biblionumbertagfield,'','', $biblionumbertagsubfield => $biblionumber); + $record->append_fields($newfield); + } + } + # warn "FIXED BIBLIONUMBER".$record->as_formatted; + } + unless ($record->subfield($biblioitemnumbertagfield,$biblioitemnumbertagsubfield)) { + $record_correct=0; + # warn "INCORRECT BIBLIOITEMNUMBER :".$record->as_formatted; + my $field; + # if the field where biblionumber is already exist, just update it, otherwise create it + if ($record->field($biblioitemnumbertagfield)) { + $field = $record->field($biblioitemnumbertagfield); + if ($biblioitemnumbertagfield <10) { + $field->update($biblionumber); + } else { + $field->add_subfields($biblioitemnumbertagsubfield => $biblionumber); + } + } else { + my $newfield; + if ($biblioitemnumbertagfield <10) { + $newfield = MARC::Field->new( $biblioitemnumbertagfield, $biblionumber); + } else { + $newfield = MARC::Field->new( $biblioitemnumbertagfield,'','', $biblioitemnumbertagsubfield => $biblionumber); + } + $record->insert_grouped_field($newfield); + } + # warn "FIXED BIBLIOITEMNUMBER".$record->as_formatted; + } + my $leader=$record->leader; + substr($leader,0,5)=' '; + substr($leader,10,7)='22 '; + $record->leader(substr($leader,0,24)); + print OUT $record->as_usmarc(); + } + close (OUT); + } else { + $sth=$dbh->prepare("SELECT biblionumber FROM biblioitems ORDER BY biblionumber $limit"); $sth->execute(); my $i=0; while (my ($biblionumber) = $sth->fetchrow) { - my $record = GetMarcBiblio($biblionumber); -# warn $record->as_formatted; + print "."; + print "\r$i" unless ($i++ %100); + my $record; + eval { + $record = GetMarcBiblio($biblionumber); + }; + if($@){ + print " There was some pb getting biblio : #".$biblionumber."\n"; + next; + } + next unless $record; # die if $record->subfield('090','9') eq 11; # print $record; # check that biblionumber & biblioitemnumber are stored in the MARC record, otherwise, add them & update the biblioitems.marcxml data. my $record_correct=1; - next unless $record->field($biblionumbertagfield); + # skip uncorrect records : isn't this bogus, as just after we reintroduce biblionumber if it's missing ? + # FIXME next unless $record->field($biblionumbertagfield); + # + # + # CHECK biblionumber + # + # if ($biblionumbertagfield eq '001') { - unless ($record->field($biblionumbertagfield)->data()) { + unless ($record->field($biblionumbertagfield) && $record->field($biblionumbertagfield)->data()) { $record_correct=0; my $field; # if the field where biblionumber is already exist, just update it, otherwise create it @@ -502,6 +611,7 @@ rank:rank-1 } } else { unless ($record->subfield($biblionumbertagfield,$biblionumbertagsubfield)) { +# warn "fixing biblionumber for $biblionumbertagfield,$biblionumbertagsubfield = $biblionumber"; $record_correct=0; my $field; # if the field where biblionumber is already exist, just update it, otherwise create it @@ -514,11 +624,16 @@ rank:rank-1 $record->append_fields($newfield); } } - # warn "FIXED BIBLIONUMBER".$record->as_formatted; +# warn "FIXED BIBLIONUMBER".$record->as_formatted; } + # + # + # CHECK BIBLIOITEMNUMBER + # + # unless ($record->subfield($biblioitemnumbertagfield,$biblioitemnumbertagsubfield)) { +# warn "fixing biblioitemnumber for $biblioitemnumbertagfield,$biblioitemnumbertagsubfield = $biblionumber"; $record_correct=0; - # warn "INCORRECT BIBLIOITEMNUMBER :".$record->as_formatted; my $field; # if the field where biblionumber is already exist, just update it, otherwise create it if ($record->field($biblioitemnumbertagfield)) { @@ -539,16 +654,47 @@ rank:rank-1 } # warn "FIXED BIBLIOITEMNUMBER".$record->as_formatted; } + # + # + # CHECK FIELD 100 + # + # + my $encoding = C4::Context->preference("marcflavour"); + # deal with UNIMARC field 100 (encoding) : create it if needed & set encoding to unicode + if ( $encoding eq "UNIMARC" ) { + my $string; + if ( length($record->subfield( 100, "a" )) == 35 ) { + $string = $record->subfield( 100, "a" ); + my $f100 = $record->field(100); + $record->delete_field($f100); + } + else { + $string = POSIX::strftime( "%Y%m%d", localtime ); + $string =~ s/\-//g; + $string = sprintf( "%-*s", 35, $string ); + } + substr( $string, 22, 6, "frey50" ); + unless ( length($record->subfield( 100, "a" )) == 35 ) { + $record->delete_field($record->field(100)); + $record->insert_grouped_field( + MARC::Field->new( 100, "", "", "a" => $string ) ); + } + } unless ($record_correct) { my $update_xml = $dbh->prepare("update biblioitems set marcxml=? where biblionumber=?"); warn "UPDATING $biblionumber (missing biblionumber or biblioitemnumber in MARC record : ".$record->as_xml; $update_xml->execute($record->as_xml,$biblionumber); } - print "."; - print "\r$i" unless ($i++ %100); + # remove leader length, that could be wrong, it will be calculated automatically by as_usmarc + # otherwise, if it's wron, zebra will fail miserabily (and never index what is after the failing record) + my $leader=$record->leader; + substr($leader,0,5)=' '; + substr($leader,10,7)='22 '; + $record->leader(substr($leader,0,24)); print OUT $record->as_usmarc(); } close(OUT); + } } #