Updating rebuild_zebra.pl : now uses etc config files
[koha_fer] / misc / migration_tools / rebuild_zebra.pl
index 1bc8259..02f50b7 100755 (executable)
@@ -21,12 +21,14 @@ my $keep_export;
 my $reset;
 my $biblios;
 my $authorities;
+my $noxml;
 GetOptions(
        'd:s'      => \$directory,
        'reset'      => \$reset,
        's'        => \$skip_export,
        'k'        => \$keep_export,
        'b'        => \$biblios,
+       'noxml'        => \$noxml,
        'a'        => \$authorities,
        );
 
@@ -175,44 +177,44 @@ if ($authorities) {
     # the record model, depending on marc flavour
     unless (-f "$authorityserverdir/tab/record.abs") {
         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
-            system("cp -f $kohadir/misc/zebra/record_authorities_unimarc.abs $authorityserverdir/tab/record.abs");
+            system("cp -f $kohadir/etc/zebradb/authorities/etc/record_unimarc.abs $authorityserverdir/tab/record.abs");
             print "Info: copied record.abs for UNIMARC\n";
         } else {
-            system("cp -f $kohadir/misc/zebra/record_authorities_usmarc.abs $authorityserverdir/tab/record.abs");
+            system("cp -f $kohadir/etc/zebradb/authorities/etc/record.abs $authorityserverdir/tab/record.abs");
             print "Info: copied record.abs for USMARC\n";
         }
         $created_dir_or_file++;
     }
     unless (-f "$authorityserverdir/tab/sort-string-utf.chr") {
-        system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $authorityserverdir/tab/sort-string-utf.chr");
+        system("cp -f $kohadir/etc/zebradb/etc/sort-string-utf_french.chr $authorityserverdir/tab/sort-string-utf.chr");
         print "Info: copied sort-string-utf.chr\n";
         $created_dir_or_file++;
     }
     unless (-f "$authorityserverdir/tab/word-phrase-utf.chr") {
-        system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $authorityserverdir/tab/word-phrase-utf.chr");
+        system("cp -f $kohadir/etc/zebradb/etc/sort-string-utf_french.chr $authorityserverdir/tab/word-phrase-utf.chr");
         print "Info: copied word-phase-utf.chr\n";
         $created_dir_or_file++;
     }
     unless (-f "$authorityserverdir/tab/auth1.att") {
-        system("cp -f $kohadir/misc/zebra/bib1_authorities.att $authorityserverdir/tab/auth1.att");
+        system("cp -f $kohadir/etc/zebradb/authorities/etc/bib1.att $authorityserverdir/tab/auth1.att");
         print "Info: copied auth1.att\n";
         $created_dir_or_file++;
     }
     unless (-f "$authorityserverdir/tab/default.idx") {
-        system("cp -f $kohadir/misc/zebra/default.idx $authorityserverdir/tab/default.idx");
+        system("cp -f $kohadir/etc/zebradb/etc/default.idx $authorityserverdir/tab/default.idx");
         print "Info: copied default.idx\n";
         $created_dir_or_file++;
     }
     
     unless (-f "$authorityserverdir/etc/ccl.properties") {
-#         system("cp -f $kohadir/misc/zebra/ccl.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
-        system("cp -f $kohadir/misc/zebra/ccl.properties $authorityserverdir/etc/ccl.properties");
+#         system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
+        system("cp -f $kohadir/etc/zebradb/ccl.properties $authorityserverdir/etc/ccl.properties");
         print "Info: copied ccl.properties\n";
         $created_dir_or_file++;
     }
     unless (-f "$authorityserverdir/etc/pqf.properties") {
-#         system("cp -f $kohadir/misc/zebra/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
-        system("cp -f $kohadir/misc/zebra/pqf.properties $authorityserverdir/etc/pqf.properties");
+#         system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
+        system("cp -f $kohadir/etc/zebradb/pqf.properties $authorityserverdir/etc/pqf.properties");
         print "Info: copied pqf.properties\n";
         $created_dir_or_file++;
     }
@@ -245,8 +247,6 @@ storeData:1
 lockDir: $authorityserverdir/lock
 perm.anonymous:r
 perm.kohaadmin:rw
-passw.passwd
-shadow
 register: $authorityserverdir/register:4G
 shadow: $authorityserverdir/shadow:4G
 
@@ -286,28 +286,38 @@ rank:rank-1
         open(OUT,">:utf8","$directory/authorities/authorities.iso2709") or die $!;
         my $dbh=C4::Context->dbh;
         my $sth;
-        $sth=$dbh->prepare("select authid from auth_header $limit");
+        $sth=$dbh->prepare("select authid,marc from auth_header $limit");
         $sth->execute();
         my $i=0;
-        while (my ($authid) = $sth->fetchrow) {
+        while (my ($authid,$record) = $sth->fetchrow) {
+            # FIXME : we retrieve the iso2709 record. if the GetAuthority (that uses the XML) fails
+            # due to some MARC::File::XML failure, then try the iso2709, 
+            # (add authid & authtype if needed)
             my $record;
             eval {
                 $record = GetAuthority($authid);
             };
+            next unless $record;
+            # force authid in case it's not here, otherwise, zebra will die on this authority
+            unless ($record->field('001')->data() eq $authid){
+                print "$authid don't exist for this authority :".$record->as_formatted;
+                $record->delete_field($record->field('001'));
+                $record->insert_fields_ordered(MARC::Field->new('001',$authid));
+            }
             if($@){
                 print "  There was some pb getting authority : ".$authid."\n";
             next;
             }
-
+               
             print ".";
             print "\r$i" unless ($i++ %100);
-            # remove leader length, that could be wrong, it will be calculated automatically by as_usmarc
-            # otherwise, if it's wron, zebra will fail miserabily (and never index what is after the failing record)
+#            # remove leader length, that could be wrong, it will be calculated automatically by as_usmarc
+#            # otherwise, if it's wron, zebra will fail miserabily (and never index what is after the failing record)
             my $leader=$record->leader;
             substr($leader,0,5)='     ';
             substr($leader,10,7)='22     ';
             $record->leader(substr($leader,0,24));
-            print OUT $record->as_usmarc();
+            print OUT $record->as_usmarc;
         }
         close(OUT);
     }
@@ -378,43 +388,43 @@ if ($biblios) {
     # the record model, depending on marc flavour
     unless (-f "$biblioserverdir/tab/record.abs") {
         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
-            system("cp -f $kohadir/misc/zebra/record_biblios_unimarc.abs $biblioserverdir/tab/record.abs");
+            system("cp -f $kohadir/etc/zebradb/biblios/etc/record_unimarc.abs $biblioserverdir/tab/record.abs");
             print "Info: copied record.abs for UNIMARC\n";
         } else {
-            system("cp -f $kohadir/misc/zebra/record_biblios_usmarc.abs $biblioserverdir/tab/record.abs");
+            system("cp -f $kohadir/etc/zebradb/biblios/etc/record.abs $biblioserverdir/tab/record.abs");
             print "Info: copied record.abs for USMARC\n";
         }
         $created_dir_or_file++;
     }
     unless (-f "$biblioserverdir/tab/sort-string-utf.chr") {
-        system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $biblioserverdir/tab/sort-string-utf.chr");
+        system("cp -f $kohadir/etc/zebradb/etc/sort-string-utf_french.chr $biblioserverdir/tab/sort-string-utf.chr");
         print "Info: copied sort-string-utf.chr\n";
         $created_dir_or_file++;
     }
     unless (-f "$biblioserverdir/tab/word-phrase-utf.chr") {
-        system("cp -f $kohadir/misc/zebra/sort-string-utf_french.chr $biblioserverdir/tab/word-phrase-utf.chr");
+        system("cp -f $kohadir/etc/zebradb/etc/sort-string-utf_french.chr $biblioserverdir/tab/word-phrase-utf.chr");
         print "Info: copied word-phase-utf.chr\n";
         $created_dir_or_file++;
     }
     unless (-f "$biblioserverdir/tab/bib1.att") {
-        system("cp -f $kohadir/misc/zebra/bib1_biblios.att $biblioserverdir/tab/bib1.att");
+        system("cp -f $kohadir/etc/zebradb/biblios/etc/bib1.att $biblioserverdir/tab/bib1.att");
         print "Info: copied bib1.att\n";
         $created_dir_or_file++;
     }
     unless (-f "$biblioserverdir/tab/default.idx") {
-        system("cp -f $kohadir/misc/zebra/default.idx $biblioserverdir/tab/default.idx");
+        system("cp -f $kohadir/etc/zebradb/etc/default.idx $biblioserverdir/tab/default.idx");
         print "Info: copied default.idx\n";
         $created_dir_or_file++;
     }
     unless (-f "$biblioserverdir/etc/ccl.properties") {
-#         system("cp -f $kohadir/misc/zebra/ccl.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
-        system("cp -f $kohadir/misc/zebra/ccl.properties $biblioserverdir/etc/ccl.properties");
+#         system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
+        system("cp -f $kohadir/etc/zebradb/ccl.properties $biblioserverdir/etc/ccl.properties");
         print "Info: copied ccl.properties\n";
         $created_dir_or_file++;
     }
     unless (-f "$biblioserverdir/etc/pqf.properties") {
-#         system("cp -f $kohadir/misc/zebra/pqf.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
-        system("cp -f $kohadir/misc/zebra/pqf.properties $biblioserverdir/etc/pqf.properties");
+#         system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
+        system("cp -f $kohadir/etc/zebradb/pqf.properties $biblioserverdir/etc/pqf.properties");
         print "Info: copied pqf.properties\n";
         $created_dir_or_file++;
     }
@@ -447,8 +457,6 @@ storeData:1
 lockDir: $biblioserverdir/lock
 perm.anonymous:r
 perm.kohaadmin:rw
-passw.passwd
-shadow
 register: $biblioserverdir/register:4G
 shadow: $biblioserverdir/shadow:4G
 
@@ -489,26 +497,106 @@ rank:rank-1
         open(OUT,">:utf8 ","$directory/biblios/export") or die $!;
         my $dbh=C4::Context->dbh;
         my $sth;
-        $sth=$dbh->prepare("select biblionumber from biblioitems order by biblionumber $limit");
+       if ($noxml){
+        $sth=$dbh->prepare("select biblionumber,marc from biblioitems order by biblionumber $limit");
+        $sth->execute();
+        my $i=0;
+        while (my ($biblionumber,$marc) = $sth->fetchrow) {
+            my $record;
+            $record=MARC::Record->new_from_usmarc($marc);
+            my $record_correct=1;
+            # skip uncorrect records : isn't this bogus, as just after we reintroduce biblionumber if it's missing ?
+            # FIXME next unless $record->field($biblionumbertagfield);
+            # check if biblionumber is present, otherwise, add it on the fly
+            if ($biblionumbertagfield eq '001') {
+                unless ($record->field($biblionumbertagfield)->data()) {
+                    $record_correct=0;
+                    my $field;
+                    # if the field where biblionumber is already exist, just update it, otherwise create it
+                if ($record->field($biblionumbertagfield)) {
+                $field =  $record->field($biblionumbertagfield);
+                $field->update($biblionumber);
+                } else {
+                my $newfield;
+                $newfield = MARC::Field->new( $biblionumbertagfield, $biblionumber);
+                $record->append_fields($newfield);
+                }
+            }
+            } else {
+            unless ($record->subfield($biblionumbertagfield,$biblionumbertagsubfield)) {
+                $record_correct=0;
+                my $field;
+                # if the field where biblionumber is already exist, just update it, otherwise create it
+                if ($record->field($biblionumbertagfield)) {
+                $field =  $record->field($biblionumbertagfield);
+                $field->add_subfields($biblionumbertagsubfield => $biblionumber);
+                } else {
+                my $newfield;
+                $newfield = MARC::Field->new( $biblionumbertagfield,'','', $biblionumbertagsubfield => $biblionumber);
+                $record->append_fields($newfield);
+                }
+            }
+    #             warn "FIXED BIBLIONUMBER".$record->as_formatted;
+            }
+            unless ($record->subfield($biblioitemnumbertagfield,$biblioitemnumbertagsubfield)) {
+                $record_correct=0;
+            #             warn "INCORRECT BIBLIOITEMNUMBER :".$record->as_formatted;
+            my $field;
+                # if the field where biblionumber is already exist, just update it, otherwise create it
+                if ($record->field($biblioitemnumbertagfield)) {
+                    $field =  $record->field($biblioitemnumbertagfield);
+                    if ($biblioitemnumbertagfield <10) {
+                    $field->update($biblionumber);
+                    } else {
+                    $field->add_subfields($biblioitemnumbertagsubfield => $biblionumber);
+                    }
+                } else {
+                    my $newfield;
+                    if ($biblioitemnumbertagfield <10) {
+                    $newfield = MARC::Field->new( $biblioitemnumbertagfield, $biblionumber);
+                    } else {
+                    $newfield = MARC::Field->new( $biblioitemnumbertagfield,'','', $biblioitemnumbertagsubfield => $biblionumber);
+                    }
+                    $record->insert_grouped_field($newfield);
+            }
+        #             warn "FIXED BIBLIOITEMNUMBER".$record->as_formatted;
+            }
+            my $leader=$record->leader;
+            substr($leader,0,5)='     ';
+            substr($leader,10,7)='22     ';
+            $record->leader(substr($leader,0,24));
+                print OUT $record->as_usmarc();
+        }
+        close (OUT);
+    } else {
+        $sth=$dbh->prepare("SELECT biblionumber FROM biblioitems ORDER BY biblionumber $limit");
         $sth->execute();
         my $i=0;
         while (my ($biblionumber) = $sth->fetchrow) {
+            print ".";
+            print "\r$i" unless ($i++ %100);
             my $record;
             eval {
                 $record = GetMarcBiblio($biblionumber);
             };
             if($@){
                 print "  There was some pb getting biblio : #".$biblionumber."\n";
-            next;
+                next;
             }
-#             warn $record->as_formatted;
+            next unless $record;
 # die if $record->subfield('090','9') eq 11;
     #         print $record;
             # check that biblionumber & biblioitemnumber are stored in the MARC record, otherwise, add them & update the biblioitems.marcxml data.
             my $record_correct=1;
-            next unless $record->field($biblionumbertagfield);
+            # skip uncorrect records : isn't this bogus, as just after we reintroduce biblionumber if it's missing ?
+            # FIXME next unless $record->field($biblionumbertagfield);
+            #
+            #
+            # CHECK  biblionumber
+            #
+            #
             if ($biblionumbertagfield eq '001') {
-                unless ($record->field($biblionumbertagfield)->data()) {
+                unless ($record->field($biblionumbertagfield) && $record->field($biblionumbertagfield)->data()) {
                     $record_correct=0;
                     my $field;
                     # if the field where biblionumber is already exist, just update it, otherwise create it
@@ -523,6 +611,7 @@ rank:rank-1
                 }
             } else {
                 unless ($record->subfield($biblionumbertagfield,$biblionumbertagsubfield)) {
+#                 warn "fixing biblionumber for $biblionumbertagfield,$biblionumbertagsubfield = $biblionumber";
                     $record_correct=0;
                     my $field;
                     # if the field where biblionumber is already exist, just update it, otherwise create it
@@ -535,11 +624,16 @@ rank:rank-1
                         $record->append_fields($newfield);
                     }
                 }
-    #             warn "FIXED BIBLIONUMBER".$record->as_formatted;
+#                 warn "FIXED BIBLIONUMBER".$record->as_formatted;
             }
+            #
+            #
+            # CHECK BIBLIOITEMNUMBER
+            #
+            #
             unless ($record->subfield($biblioitemnumbertagfield,$biblioitemnumbertagsubfield)) {
+#                 warn "fixing biblioitemnumber for $biblioitemnumbertagfield,$biblioitemnumbertagsubfield = $biblionumber";
                 $record_correct=0;
-    #             warn "INCORRECT BIBLIOITEMNUMBER :".$record->as_formatted;
                 my $field;
                 # if the field where biblionumber is already exist, just update it, otherwise create it
                 if ($record->field($biblioitemnumbertagfield)) {
@@ -560,13 +654,37 @@ rank:rank-1
                 }
     #             warn "FIXED BIBLIOITEMNUMBER".$record->as_formatted;
             }
+            #
+            #
+            # CHECK FIELD 100
+            #
+            #
+            my $encoding = C4::Context->preference("marcflavour");
+            # deal with UNIMARC field 100 (encoding) : create it if needed & set encoding to unicode
+            if ( $encoding eq "UNIMARC" ) {
+                my $string;
+                if ( length($record->subfield( 100, "a" )) == 35 ) {
+                    $string = $record->subfield( 100, "a" );
+                    my $f100 = $record->field(100);
+                    $record->delete_field($f100);
+                }
+                else {
+                    $string = POSIX::strftime( "%Y%m%d", localtime );
+                    $string =~ s/\-//g;
+                    $string = sprintf( "%-*s", 35, $string );
+                }
+                substr( $string, 22, 6, "frey50" );
+                unless ( length($record->subfield( 100, "a" )) == 35 ) {
+                    $record->delete_field($record->field(100));
+                    $record->insert_grouped_field(
+                        MARC::Field->new( 100, "", "", "a" => $string ) );
+                }
+            }
             unless ($record_correct) {
                 my $update_xml = $dbh->prepare("update biblioitems set marcxml=? where biblionumber=?");
                 warn "UPDATING $biblionumber (missing biblionumber or biblioitemnumber in MARC record : ".$record->as_xml;
                 $update_xml->execute($record->as_xml,$biblionumber);
             }
-            print ".";
-            print "\r$i" unless ($i++ %100);
             # remove leader length, that could be wrong, it will be calculated automatically by as_usmarc
             # otherwise, if it's wron, zebra will fail miserabily (and never index what is after the failing record)
             my $leader=$record->leader;
@@ -576,6 +694,7 @@ rank:rank-1
             print OUT $record->as_usmarc();
         }
         close(OUT);
+       }
     }
     
     #