3 # Copyright 2009 Liblime
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along with
17 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
18 # Suite 330, Boston, MA 02111-1307 USA
25 use Getopt::Long qw(:config auto_help auto_version);
33 use vars qw($VERSION);
36 # find Koha's Perl modules
37 # test carefully before changing this
39 eval { require "$FindBin::Bin/../kohalib.pl" };
50 my $limit; # undef, not zero.
59 'verbose=i' => \$verbose,
61 'offset=i' => \$offset,
63 'summary!' => \$summary,
66 pod2usage( -verbose => 2 ) if ($man);
67 pod2usage( -verbose => 2 ) if ($help and $verbose);
68 pod2usage(1) if $help;
75 my $marcflavour = C4::Context->preference('marcflavour');
77 my $all = C4::Context->dbh->prepare("SELECT COUNT(*) FROM biblioitems");
79 my $total = $all->fetchrow;
81 my $count_query = "SELECT COUNT(*) FROM biblioitems WHERE substr(marc, 10, 1) = ?";
82 my $query = "SELECT * FROM biblioitems WHERE substr(marc, 10, 1) <> ?";
84 my $sth = C4::Context->dbh->prepare($count_query);
86 my $count = $sth->fetchrow;
87 my $badcount = $total-$count;
90 print "# biblioitems with leader/09 = 'a'\n";
91 printf "# %9s match\n", $count;
92 printf "# %9s BAD \n", $badcount;
93 printf "# %9s total\n\n", $total;
94 printf "# Examining %s BAD record(s), offset %d:\n", ($limit || 'all'), $offset;
97 my $bad_recs = C4::Context->dbh->prepare($query);
98 $bad_recs->execute('a');
99 $limit or $limit = $bad_recs->rows(); # limit becomes max if unspecified
100 $limit += $offset if $offset; # increase limit for offset
103 $marcflavour or die "No marcflavour (MARC21 or UNIMARC) set in syspref";
105 MARC::File::XML->default_record_format($marcflavour) or die "FAILED MARC::File::XML->default_record_format($marcflavour)";
107 while ( my $row = $bad_recs->fetchrow_hashref() ) {
108 (++$i > $limit) and last;
109 ( $i > $offset) or next;
110 my $xml = $row->{marcxml};
111 $xml =~ s/.*(\<leader\>)/$1/s;
112 $xml =~ s/(\<\/leader\>).*/$1/s;
113 # $xml now pared down to just the <leader> element
114 printf "# %4d of %4d: biblionumber %s : %s\n", $i, $badcount, $row->{biblionumber}, $xml;
115 my $stripped = StripNonXmlChars($row->{marcxml});
116 ($stripped eq $row->{marcxml}) or printf STDERR "%d NON-XML Characters removed!!\n", (length($row->{marcxml}) - length($stripped));
117 my $record = eval { MARC::Record::new_from_xml( $stripped, 'utf8', $marcflavour ) };
118 if ($@ or not $record) {
119 print STDERR "ERROR in MARC::Record::new_from_xml(\$marcxml, 'utf8', $marcflavour): $@\n\tSkipping $row->{biblionumber}\n";
123 $record->encoding('UTF-8');
124 if (ModBiblioMarc($record, $row->{biblionumber})) {
125 printf "# %4d of %4d: biblionumber %s : <leader>%s</leader>\n", $i, $badcount, $row->{biblionumber}, $record->leader();
127 print STDERR "ERROR in ModBiblioMarc(\$record, $row->{biblionumber})\n";
130 $dump and print $row->{marcxml}, "\n";
137 leader_fix.pl - Repair missing leader position 9 value ("a" for MARC21 - UTF8).
141 leader_fix.pl [ -h | -m ] [ -v ] [ -d ] [ -s ] [ -l N ] [ -o N ] [ -f ]
144 -h --help -? Brief help message
145 -m --man Full documentation, same as --help --verbose
146 --version Prints version info
149 -d --dump Dump MARCXML of biblioitems processed, default OFF
150 -s --summary Print initial summary of good and bad biblioitems counted, default ON
151 -v --verbose Increase verbosity of output, default OFF
154 -f --fix Save repaired leaders to biblioitems.marcxml,
155 -l --limit Number of biblioitems to display or fix
156 -o --offset Number of biblioitems to skip (not displayed or fixed)
164 This is the most important option. Without it, the script just tells you about the problem records.
165 With --fix, the script fixes the same records.
169 Like a LIMIT statement in SQL, this contrains the number of records targeted by the script to an integer N.
170 The default is to target all records with bad leaders.
174 Like an OFFSET statement in SQL, this tells the script to skip N of the targetted records.
175 The default is 0, i.e. skip none of them.
179 The binary ON/OFF options can be negated like:
180 B<--nosummary> Do not display summary.
181 B<--nodump> Do not dump MARCXML.
182 B<--nofix> Do not change any records. This is the default mode.
186 Koha expects to have all MARXML records internalized in UTF-8 encoding. This
187 presents a problem when records have been inserted with the leader/09 showing
188 blank for MARC8 encoding. This script is used to determine the extent of the
189 problem and to fix the affected leaders.
191 Run leader_fix.pl the first time with no options, and assuming you agree that the leaders
192 presented need fixing, run it again with B<--fix>.
194 =head1 USAGE EXAMPLES
198 In the most basic form, displays summary of biblioitems examined
199 and the leader from any found without /09 = a.
201 B<leader_fix.pl --fix>
203 Fixes the same biblioitems, displaying summary and each leader before/after change.
205 B<leader_fix.pl --limit=3 --offset=15 --nosummary --dump>
207 Dumps MARCXML from the 16th, 17th and 18th bad records found.
209 B<leader_fix.pl -l 3 -o 15 -s 0 -d>
211 Same thing as previous example in terse form.
215 Allow biblionumbers to be piped into STDIN as the selection mechanism.