2 # small script that import an iso2709 file into koha 2.0
8 use MARC::File::USMARC;
9 # Uncomment the line below and use MARC::File::XML again when it works better.
11 # use MARC::File::XML;
16 # According to kados, an undocumented feature of setting MARC::Charset to
17 # ignore_errors(1) is that errors are not ignored. Instead of deleting the
18 # whole subfield when a character does not translate properly from MARC8 into
19 # UTF-8, just the problem characters are deleted. This should solve at least
20 # some of the fixme problems for fMARC8ToUTF8().
22 # Problems remain if there are MARC 21 records where 000/09 is set incorrectly.
24 # MARC::Charset->ignore_errors(1);
28 use Time::HiRes qw(gettimeofday);
30 binmode(STDOUT, ":utf8");
34 my ( $input_marc_file, $number) = ('',0);
35 my ($version, $delete, $test_parameter,$char_encoding, $verbose, $commit);
40 'commit:f' => \$commit,
41 'file:s' => \$input_marc_file,
45 't' => \$test_parameter,
46 'c:s' => \$char_encoding,
50 # FIXME: Management of error conditions needed for record parsing problems
51 # and MARC8 character sets with mappings to Unicode not yet included in
52 # MARC::Charset. The real world rarity of these problems is not fully tested.
53 # Unmapped character sets will throw a warning currently and processing will
54 # continue with the error condition. A fairly trivial correction should
55 # address some record parsing and unmapped character set problems but I need
56 # time to implement a test and correction for undef subfields and revert to
57 # MARC8 if mappings are missing. -- thd
58 sub fMARC8ToUTF8($$) {
60 my ($verbose) = shift;
63 my $leader = $record->leader();
65 print "\n000 " . $leader;
68 foreach my $field ($record->fields()) {
69 if ($field->is_control_field()) {
72 my $fieldName = $field->tag();
73 my $fieldValue = $field->data();
74 $fieldValue =~ s/ /#/g;
75 print "\n" . $fieldName;
76 print ' ' . $fieldValue;
81 my $fieldName = $field->tag();
82 my $indicator1Value = $field->indicator(1);
83 my $indicator2Value = $field->indicator(2);
86 $indicator1Value =~ s/ /#/;
87 $indicator2Value =~ s/ /#/;
88 print "\n" . $fieldName . ' ' .
93 foreach my $subfield ($field->subfields()) {
94 my $subfieldName = $subfield->[0];
95 my $subfieldValue = $subfield->[1];
96 $subfieldValue = MARC::Charset::marc8_to_utf8($subfieldValue);
98 # Alas, MARC::Field::update() does not work correctly.
99 ## push (@subfieldsArray, $subfieldName, $subfieldValue);
101 push @subfieldsArray, [$subfieldName, $subfieldValue];
104 print " \$" . $subfieldName . ' ' . $subfieldValue;
109 # Alas, MARC::Field::update() does not work correctly.
111 # The first instance in the field of a of a repeated subfield
112 # overwrites the content from later instances with the content
113 # from the first instance.
114 ## $field->update(@subfieldsArray);
116 foreach my $subfieldRow(@subfieldsArray) {
117 my $subfieldName = $subfieldRow->[0];
118 $field->delete_subfields($subfieldName);
120 foreach my $subfieldRow(@subfieldsArray) {
121 $field->add_subfields(@$subfieldRow);
126 # Reading the indicator values again is not necessary.
127 # They were not converted.
128 # $indicator1Value = $field->indicator(1);
129 # $indicator2Value = $field->indicator(2);
130 # $indicator1Value =~ s/ /#/;
131 # $indicator2Value =~ s/ /#/;
132 print "\nCONVERTED TO UTF-8:\n" . $fieldName . ' ' .
135 foreach my $subfield ($field->subfields()) {
136 my $subfieldName = $subfield->[0];
137 my $subfieldValue = $subfield->[1];
138 print " \$" . $subfieldName . ' ' . $subfieldValue;
144 print "\n" if $verbose;
149 $record->encoding('UTF-8');
154 if ($version || ($input_marc_file eq '')) {
156 small script to import an iso2709 file into Koha.
158 \th : this version/help screen
159 \tfile /path/to/file/to/dump : the file to dump
160 \tv : verbose mode. 1 means "some infos", 2 means "MARC dumping"
161 \tn : the number of records to import. If missing, all the file is imported
162 \tcommit : the number of records to wait before performing a 'commit' operation
163 \tt : test mode : parses the file, saying what he would do, but doing nothing.
164 \tc : the characteristic MARC flavour. At the moment, only MARC21 and UNIMARC
165 \tsupported. MARC21 by default.
166 \td : delete EVERYTHING related to biblio in koha-DB before import :tables :
167 \t\tbiblio, \t\tbiblioitems, \t\tsubjects,\titems
169 \t\tmarc_subfield_table, \tmarc_word, \t\tmarc_blob_subfield
170 IMPORTANT : don't use this script before you've entered and checked your MARC parameters tables twice (or more!).
171 Otherwise, the import won't work correctly and you will get invalid data.
174 \t\$ export KOHA_CONF=/etc/koha.conf
175 \t\$ perl misc/migration_tools/bulkmarcimport.pl -d -commit 1000 -file /home/jmf/koha.mrc -n 3000
181 my $dbh = C4::Context->dbh;
184 print "deleting biblios\n";
185 $dbh->do("delete from biblio");
186 $dbh->do("delete from biblioitems");
187 $dbh->do("delete from items");
189 if ($test_parameter) {
190 print "TESTING MODE ONLY\n DOING NOTHING\n===============\n";
193 my $marcFlavour = C4::Context->preference('marcflavour') || 'MARC21';
195 print "Characteristic MARC flavour: $marcFlavour\n" if $verbose;
197 my $starttime = gettimeofday;
198 my $batch = MARC::Batch->new( 'USMARC', $input_marc_file );
199 $batch->warnings_off();
200 $batch->strict_off();
206 $commitnum = $commit;
210 #1st of all, find item MARC tag.
211 my ($tagfield,$tagsubfield) = &MARCfind_marc_from_kohafield($dbh,"items.itemnumber",'');
212 # $dbh->do("lock tables biblio write, biblioitems write, items write, marc_biblio write, marc_subfield_table write, marc_blob_subfield write, marc_word write, marc_subfield_structure write, stopwords write");
213 while ( my $record = $batch->next() ) {
214 # warn "=>".$record->as_formatted;
216 # warn "NUM:".$number;
219 print "\r$i" unless $i % 100;
221 # z3950_extended_services('commit',set_service_options('commit'));
222 # print "COMMIT OPERATION SUCCESSFUL\n";
224 # my $timeneeded = gettimeofday - $starttime;
225 # die "$i MARC records imported in $timeneeded seconds\n";
227 # # perform the commit operation ever so often
229 # z3950_extended_services('commit',set_service_options('commit'));
230 # $commit+=$commitnum;
231 # print "COMMIT OPERATION SUCCESSFUL\n";
233 #now, parse the record, extract the item fields, and store them in somewhere else.
235 ## create an empty record object to populate
236 my $newRecord = MARC::Record->new();
237 $newRecord->leader($record->leader());
239 # go through each field in the existing record
240 foreach my $oldField ( $record->fields() ) {
242 # just reproduce tags < 010 in our new record
244 # Fields are not necessarily only numeric in the actual world of records
245 # nor in what I would recommend for additonal safe non-interfering local
246 # use fields. The following regular expression match is much safer than
247 # a numeric evaluation. -- thd
248 if ( $oldField->tag() =~ m/^00/ ) {
249 $newRecord->append_fields( $oldField );
253 # store our new subfield data in this list
254 my @newSubfields = ();
256 # go through each subfield code/data pair
257 foreach my $pair ( $oldField->subfields() ) {
258 #$pair->[1] =~ s/\<//g;
259 #$pair->[1] =~ s/\>//g;
260 push( @newSubfields, $pair->[0], $pair->[1] ); #char_decode($pair->[1],$char_encoding) );
263 # add the new field to our new record
264 my $newField = MARC::Field->new(
266 $oldField->indicator(1),
267 $oldField->indicator(2),
271 $newRecord->append_fields( $newField );
275 warn "$i ==>".$newRecord->as_formatted() if $verbose eq 2;
276 my @fields = $newRecord->field($tagfield);
280 foreach my $field (@fields) {
281 my $item = MARC::Record->new();
282 $item->append_fields($field);
284 $newRecord->delete_field($field);
287 print "$i : $nbitems items found\n" if $verbose;
288 # now, create biblio and items with Addbiblio call.
289 unless ($test_parameter) {
290 warn "NEWREC : ".$newRecord->as_formatted;
291 my ($bibid,$oldbibitemnum) = AddBiblio($newRecord,'');
292 warn "ADDED biblio NB $bibid in DB\n" if $verbose;
293 for (my $i=0;$i<=$#items;$i++) {
294 # warn "here is the biblioitemnumber $oldbibitemnum";
295 AddItem($items[$i],$bibid,$oldbibitemnum);
299 # final commit of the changes
300 z3950_extended_services('commit',set_service_options('commit'));
301 print "COMMIT OPERATION SUCCESSFUL\n";
303 my $timeneeded = gettimeofday - $starttime;
304 print "$i MARC records done in $timeneeded seconds\n";