X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=blobdiff_plain;f=C4%2FRecord.pm;h=89bde3227f04f003f4615ce0941eba49b174b07a;hb=44ab1c531d1aca7bf5223ad011b7a1e848a291b7;hp=127d5214bd72d36a7b55932f63241152f155f9de;hpb=8ad2c7d7acc3cb0033426bd78928214a22ad9dd1;p=koha_gimpoz diff --git a/C4/Record.pm b/C4/Record.pm index 127d5214bd..89bde3227f 100644 --- a/C4/Record.pm +++ b/C4/Record.pm @@ -1,7 +1,7 @@ package C4::Record; # # Copyright 2006 (C) LibLime -# Joshua Ferraro +# Parts copyright 2010 BibLibre # # This file is part of Koha. # @@ -14,16 +14,17 @@ package C4::Record; # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # -# You should have received a copy of the GNU General Public License along with -# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, -# Suite 330, Boston, MA 02111-1307 USA +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # -use strict;# use warnings; #FIXME: turn off warnings before release +use strict; +#use warnings; FIXME - Bug 2505 # please specify in which methods a given module is used -use MARC::Record; # marc2marcxml, marcxml2marc, html2marc, changeEncoding -use MARC::File::XML; # marc2marcxml, marcxml2marc, html2marcxml, changeEncoding +use MARC::Record; # marc2marcxml, marcxml2marc, changeEncoding +use MARC::File::XML; # marc2marcxml, marcxml2marc, changeEncoding use MARC::Crosswalk::DublinCore; # marc2dcxml use Biblio::EndnoteStyle; use Unicode::Normalize; # _entity_encode @@ -31,7 +32,9 @@ use XML::LibXSLT; use XML::LibXML; use C4::Biblio; #marc2bibtex use C4::Csv; #marc2csv -use Text::CSV; #marc2csv +use C4::Koha; #marc2csv +use YAML; #marcrecords2csv +use Text::CSV::Encoded; #marc2csv use vars qw($VERSION @ISA @EXPORT); @@ -51,9 +54,6 @@ $VERSION = 3.00; &marc2modsxml &marc2bibtex &marc2csv - - &html2marcxml - &html2marc &changeEncoding ); @@ -69,32 +69,49 @@ New in Koha 3.x. This module handles all record-related management functions. =head2 marc2marc - Convert from one flavour of ISO-2709 to another -=over 4 - -my ($error,$newmarc) = marc2marc($marc,$to_flavour,$from_flavour,$encoding); + my ($error,$newmarc) = marc2marc($marc,$to_flavour,$from_flavour,$encoding); Returns an ISO-2709 scalar -=back - =cut sub marc2marc { my ($marc,$to_flavour,$from_flavour,$encoding) = @_; - my $error = "Feature not yet implemented\n"; + my $error; + if ($to_flavour =~ m/marcstd/) { + my $marc_record_obj; + if ($marc =~ /^MARC::Record/) { # it's already a MARC::Record object + $marc_record_obj = $marc; + } else { # it's not a MARC::Record object, make it one + eval { $marc_record_obj = MARC::Record->new_from_usmarc($marc) }; # handle exceptions + +# conversion to MARC::Record object failed, populate $error + if ($@) { $error .="\nCreation of MARC::Record object failed: ".$MARC::File::ERROR }; + } + unless ($error) { + my @privatefields; + foreach my $field ($marc_record_obj->fields()) { + if ($field->tag() =~ m/9/ && ($field->tag() != '490' || C4::Context->preference("marcflavour") eq 'UNIMARC')) { + push @privatefields, $field; + } elsif (! ($field->is_control_field())) { + $field->delete_subfield(code => '9') if ($field->subfield('9')); + } + } + $marc_record_obj->delete_field($_) for @privatefields; + $marc = $marc_record_obj->as_usmarc(); + } + } else { + $error = "Feature not yet implemented\n"; + } return ($error,$marc); } =head2 marc2marcxml - Convert from ISO-2709 to MARCXML -=over 4 - -my ($error,$marcxml) = marc2marcxml($marc,$encoding,$flavour); + my ($error,$marcxml) = marc2marcxml($marc,$encoding,$flavour); Returns a MARCXML scalar -=over 2 - C<$marc> - an ISO-2709 scalar or MARC::Record object C<$encoding> - UTF-8 or MARC-8 [UTF-8] @@ -103,10 +120,6 @@ C<$flavour> - MARC21 or UNIMARC C<$dont_entity_encode> - a flag that instructs marc2marcxml not to entity encode the xml before returning (optional) -=back - -=back - =cut sub marc2marcxml { @@ -173,24 +186,16 @@ sub marc2marcxml { =head2 marcxml2marc - Convert from MARCXML to ISO-2709 -=over 4 - -my ($error,$marc) = marcxml2marc($marcxml,$encoding,$flavour); + my ($error,$marc) = marcxml2marc($marcxml,$encoding,$flavour); Returns an ISO-2709 scalar -=over 2 - C<$marcxml> - a MARCXML record C<$encoding> - UTF-8 or MARC-8 [UTF-8] C<$flavour> - MARC21 or UNIMARC -=back - -=back - =cut sub marcxml2marc { @@ -213,24 +218,16 @@ sub marcxml2marc { =head2 marc2dcxml - Convert from ISO-2709 to Dublin Core -=over 4 - -my ($error,$dcxml) = marc2dcxml($marc,$qualified); + my ($error,$dcxml) = marc2dcxml($marc,$qualified); Returns a DublinCore::Record object, will eventually return a Dublin Core scalar FIXME: should return actual XML, not just an object -=over 2 - C<$marc> - an ISO-2709 scalar or MARC::Record object C<$qualified> - specify whether qualified Dublin Core should be used in the input or output [0] -=back - -=back - =cut sub marc2dcxml { @@ -267,16 +264,13 @@ sub marc2dcxml { $dcxmlfinal .= "\n"; return ($error,$dcxmlfinal); } -=head2 marc2modsxml - Convert from ISO-2709 to MODS -=over 4 +=head2 marc2modsxml - Convert from ISO-2709 to MODS -my ($error,$modsxml) = marc2modsxml($marc); + my ($error,$modsxml) = marc2modsxml($marc); Returns a MODS scalar -=back - =cut sub marc2modsxml { @@ -297,12 +291,19 @@ sub marc2modsxml { sub marc2endnote { my ($marc) = @_; my $marc_rec_obj = MARC::Record->new_from_usmarc($marc); - my $f260 = $marc_rec_obj->field('260'); - my $f260a = $f260->subfield('a') if $f260; + my ( $abstract, $f260a, $f710a ); + my $f260 = $marc_rec_obj->field('260'); + if ($f260) { + $f260a = $f260->subfield('a') if $f260; + } my $f710 = $marc_rec_obj->field('710'); - my $f710a = $f710->subfield('a') if $f710; - my $f500 = $marc_rec_obj->field('500'); - my $abstract = $f500->subfield('a') if $f500; + if ($f710) { + $f710a = $f710->subfield('a'); + } + my $f500 = $marc_rec_obj->field('500'); + if ($f500) { + $abstract = $f500->subfield('a'); + } my $fields = { DB => C4::Context->preference("LibraryName"), Title => $marc_rec_obj->title(), @@ -327,60 +328,143 @@ sub marc2endnote { } -=head2 marc2csv - Convert from UNIMARC to CSV +=head2 marc2csv - Convert several records from UNIMARC to CSV -=over 4 + my ($csv) = marc2csv($biblios, $csvprofileid); -my ($csv) = marc2csv($record, $csvprofileid); +Pre and postprocessing can be done through a YAML file Returns a CSV scalar -=over 2 +C<$biblio> - a list of biblionumbers -C<$record> - a MARC::Record object +C<$csvprofileid> - the id of the CSV profile to use for the export (see export_format.export_format_id and the GetCsvProfiles function in C4::Csv) + +=cut + +sub marc2csv { + my ($biblios, $id) = @_; + my $output; + my $csv = Text::CSV::Encoded->new(); + + # Getting yaml file + my $configfile = "../tools/csv-profiles/$id.yaml"; + my ($preprocess, $postprocess, $fieldprocessing); + if (-e $configfile){ + ($preprocess,$postprocess, $fieldprocessing) = YAML::LoadFile($configfile); + } + + # Preprocessing + eval $preprocess if ($preprocess); + + my $firstpass = 1; + foreach my $biblio (@$biblios) { + $output .= marcrecord2csv($biblio, $id, $firstpass, $csv, $fieldprocessing) ; + $firstpass = 0; + } + + # Postprocessing + eval $postprocess if ($postprocess); + + return $output; +} + +=head2 marcrecord2csv - Convert a single record from UNIMARC to CSV + + my ($csv) = marcrecord2csv($biblio, $csvprofileid, $header); + +Returns a CSV scalar + +C<$biblio> - a biblionumber C<$csvprofileid> - the id of the CSV profile to use for the export (see export_format.export_format_id and the GetCsvProfiles function in C4::Csv) -=back +C<$header> - true if the headers are to be printed (typically at first pass) -=back +C<$csv> - an already initialised Text::CSV object =cut -sub marc2csv { - my ($record, $id, $header) = @_; +sub marcrecord2csv { + my ($biblio, $id, $header, $csv, $fieldprocessing) = @_; my $output; - my $csv = Text::CSV->new(); - # Get the information about the csv profile - my $marcfieldslist = GetMarcFieldsForCsv($id); + # Getting the record + my $record = GetMarcBiblio($biblio, 1); + next unless $record; + # Getting the framework + my $frameworkcode = GetFrameworkCode($biblio); + + # Getting information about the csv profile + my $profile = GetCsvProfile($id); + + # Getting output encoding + my $encoding = $profile->{encoding} || 'utf8'; + # Getting separators + my $csvseparator = $profile->{csv_separator} || ','; + my $fieldseparator = $profile->{field_separator} || '#'; + my $subfieldseparator = $profile->{subfield_separator} || '|'; + + # TODO: Be more generic (in case we have to handle other protected chars or more separators) + if ($csvseparator eq '\t') { $csvseparator = "\t" } + if ($fieldseparator eq '\t') { $fieldseparator = "\t" } + if ($subfieldseparator eq '\t') { $subfieldseparator = "\t" } + if ($csvseparator eq '\n') { $csvseparator = "\n" } + if ($fieldseparator eq '\n') { $fieldseparator = "\n" } + if ($subfieldseparator eq '\n') { $subfieldseparator = "\n" } + + $csv = $csv->encoding_out($encoding) ; + $csv->sep_char($csvseparator); + + # Getting the marcfields + my $marcfieldslist = $profile->{marcfields}; # Getting the marcfields as an array - my @marcfields = split('\|', $marcfieldslist); + my @marcfieldsarray = split('\|', $marcfieldslist); + + # Separating the marcfields from the user-supplied headers + my @marcfields; + foreach (@marcfieldsarray) { + my @result = split('=', $_); + if (scalar(@result) == 2) { + push @marcfields, { header => $result[0], field => $result[1] }; + } else { + push @marcfields, { field => $result[0] } + } + } # If we have to insert the headers if ($header) { my @marcfieldsheaders; - my $dbh = C4::Context->dbh; # For each field or subfield foreach (@marcfields) { - # We get the matching tag name - if (index($_, '$') > 0) { - my ($fieldtag, $subfieldtag) = split('\$', $_); - my $query = "SELECT liblibrarian FROM marc_subfield_structure WHERE tagfield=? AND tagsubfield=?"; - my $sth = $dbh->prepare($query); - $sth->execute($fieldtag, $subfieldtag); - my @results = $sth->fetchrow_array(); - push @marcfieldsheaders, $results[0]; + + my $field = $_->{field}; + # Remove any blank char that might have unintentionally insered into the tag name + $field =~ s/\s+//g; + + # If we have a user-supplied header, we use it + if (exists $_->{header}) { + push @marcfieldsheaders, $_->{header}; } else { - my $query = "SELECT liblibrarian FROM marc_tag_structure WHERE tagfield=?"; - my $sth = $dbh->prepare($query); - $sth->execute($_); - my @results = $sth->fetchrow_array(); - push @marcfieldsheaders, $results[0]; + # If not, we get the matching tag name from koha + if (index($field, '$') > 0) { + my ($fieldtag, $subfieldtag) = split('\$', $field); + my $query = "SELECT liblibrarian FROM marc_subfield_structure WHERE tagfield=? AND tagsubfield=?"; + my $sth = $dbh->prepare($query); + $sth->execute($fieldtag, $subfieldtag); + my @results = $sth->fetchrow_array(); + push @marcfieldsheaders, $results[0]; + } else { + my $query = "SELECT liblibrarian FROM marc_tag_structure WHERE tagfield=?"; + my $sth = $dbh->prepare($query); + $sth->execute($field); + my @results = $sth->fetchrow_array(); + push @marcfieldsheaders, $results[0]; + } } } $csv->combine(@marcfieldsheaders); @@ -389,7 +473,8 @@ sub marc2csv { # For each marcfield to export my @fieldstab; - foreach my $marcfield (@marcfields) { + foreach (@marcfields) { + my $marcfield = $_->{field}; # If it is a subfield if (index($marcfield, '$') > 0) { my ($fieldtag, $subfieldtag) = split('\$', $marcfield); @@ -402,213 +487,48 @@ sub marc2csv { # We take every matching subfield my @subfields = $field->subfield($subfieldtag); foreach my $subfield (@subfields) { - push @tmpfields, $subfield; + + # Getting authorised value + my $authvalues = GetKohaAuthorisedValuesFromField($fieldtag, $subfieldtag, $frameworkcode, undef); + push @tmpfields, (defined $authvalues->{$subfield}) ? $authvalues->{$subfield} : $subfield; } } - push (@fieldstab, join(',', @tmpfields)); + push (@fieldstab, join($subfieldseparator, @tmpfields)); # Or a field } else { my @fields = ($record->field($marcfield)); - push (@fieldstab, join(',', map($_->as_string(), @fields))); - } - }; - - $csv->combine(@fieldstab); - $output .= $csv->string() . "\n"; - - return $output; - -} - - -=head2 html2marcxml - -=over 4 + my $authvalues = GetKohaAuthorisedValuesFromField($marcfield, undef, $frameworkcode, undef); -my ($error,$marcxml) = html2marcxml($tags,$subfields,$values,$indicator,$ind_tag); + my @valuesarray; + foreach (@fields) { + my $value; -Returns a MARCXML scalar + # Getting authorised value + $value = defined $authvalues->{$_->as_string} ? $authvalues->{$_->as_string} : $_->as_string; -this is used in addbiblio.pl and additem.pl to build the MARCXML record from -the form submission. + # Field processing + eval $fieldprocessing if ($fieldprocessing); -FIXME: this could use some better code documentation + push @valuesarray, $value; + } + push (@fieldstab, join($fieldseparator, @valuesarray)); + } + }; -=back + $csv->combine(@fieldstab); + $output .= $csv->string() . "\n"; -=cut + return $output; -sub html2marcxml { - my ($tags,$subfields,$values,$indicator,$ind_tag) = @_; - my $error; - # add the header info - my $marcxml= MARC::File::XML::header(C4::Context->preference('TemplateEncoding'),C4::Context->preference('marcflavour')); - - # some flags used to figure out where in the record we are - my $prevvalue; - my $prevtag=-1; - my $first=1; - my $j = -1; - - # handle characters that would cause the parser to choke FIXME: is there a more elegant solution? - for (my $i=0;$i<=@$tags;$i++){ - @$values[$i] =~ s/&/&/g; - @$values[$i] =~ s//>/g; - @$values[$i] =~ s/"/"/g; - @$values[$i] =~ s/'/'/g; - - if ((@$tags[$i] ne $prevtag)){ - $j++ unless (@$tags[$i] eq ""); - #warn "IND:".substr(@$indicator[$j],0,1).substr(@$indicator[$j],1,1)." ".@$tags[$i]; - if (!$first){ - $marcxml.="\n"; - if ((@$tags[$i] > 10) && (@$values[$i] ne "")){ - my $ind1 = substr(@$indicator[$j],0,1); - my $ind2 = substr(@$indicator[$j],1,1); - $marcxml.="\n"; - $marcxml.="@$values[$i]\n"; - $first=0; - } else { - $first=1; - } - } else { - if (@$values[$i] ne "") { - # handle the leader - if (@$tags[$i] eq "000") { - $marcxml.="@$values[$i]\n"; - $first=1; - # rest of the fixed fields - } elsif (@$tags[$i] lt '010') { # don't compare numerically 010 == 8 - $marcxml.="@$values[$i]\n"; - $first=1; - } else { - my $ind1 = substr(@$indicator[$j],0,1); - my $ind2 = substr(@$indicator[$j],1,1); - $marcxml.="\n"; - $marcxml.="@$values[$i]\n"; - $first=0; - } - } - } - } else { # @$tags[$i] eq $prevtag - if (@$values[$i] eq "") { - } else { - if ($first){ - my $ind1 = substr(@$indicator[$j],0,1); - my $ind2 = substr(@$indicator[$j],1,1); - $marcxml.="\n"; - $first=0; - } - $marcxml.="@$values[$i]\n"; - } - } - $prevtag = @$tags[$i]; - } - $marcxml.= MARC::File::XML::footer(); - #warn $marcxml; - return ($error,$marcxml); } -=head2 html2marc - -=over 4 - -Probably best to avoid using this ... it has some rather striking problems: - -=over 2 - -* saves blank subfields - -* subfield order is hardcoded to always start with 'a' for repeatable tags (because it is hardcoded in the addfield routine). - -* only possible to specify one set of indicators for each set of tags (ie, one for all the 650s). (because they were stored in a hash with the tag as the key). - -* the underlying routines didn't support subfield reordering or subfield repeatability. - -=back - -I've left it in here because it could be useful if someone took the time to fix it. -- kados - -=back - -=cut - -sub html2marc { - my ($dbh,$rtags,$rsubfields,$rvalues,%indicators) = @_; - my $prevtag = -1; - my $record = MARC::Record->new(); -# my %subfieldlist=(); - my $prevvalue; # if tag <10 - my $field; # if tag >=10 - for (my $i=0; $i< @$rtags; $i++) { - # rebuild MARC::Record -# warn "0=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": "; - if (@$rtags[$i] ne $prevtag) { - if ($prevtag < 10) { - if ($prevvalue) { - if (($prevtag ne '000') && ($prevvalue ne "")) { - $record->add_fields((sprintf "%03s",$prevtag),$prevvalue); - } elsif ($prevvalue ne ""){ - $record->leader($prevvalue); - } - } - } else { - if (($field) && ($field ne "")) { - $record->add_fields($field); - } - } - $indicators{@$rtags[$i]}.=' '; - # skip blank tags, I hope this works - if (@$rtags[$i] eq ''){ - $prevtag = @$rtags[$i]; - undef $field; - next; - } - if (@$rtags[$i] <10) { - $prevvalue= @$rvalues[$i]; - undef $field; - } else { - undef $prevvalue; - if (@$rvalues[$i] eq "") { - undef $field; - } else { - $field = MARC::Field->new( (sprintf "%03s",@$rtags[$i]), substr($indicators{@$rtags[$i]},0,1),substr($indicators{@$rtags[$i]},1,1), @$rsubfields[$i] => @$rvalues[$i]); - } -# warn "1=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": ".$field->as_formatted; - } - $prevtag = @$rtags[$i]; - } else { - if (@$rtags[$i] <10) { - $prevvalue=@$rvalues[$i]; - } else { - if (length(@$rvalues[$i])>0) { - $field->add_subfields(@$rsubfields[$i] => @$rvalues[$i]); -# warn "2=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": ".$field->as_formatted; - } - } - $prevtag= @$rtags[$i]; - } - } - #} - # the last has not been included inside the loop... do it now ! - #use Data::Dumper; - #warn Dumper($field->{_subfields}); - $record->add_fields($field) if (($field) && $field ne ""); - #warn "HTML2MARC=".$record->as_formatted; - return $record; -} =head2 changeEncoding - Change the encoding of a record -=over 4 - -my ($error, $newrecord) = changeEncoding($record,$format,$flavour,$to_encoding,$from_encoding); + my ($error, $newrecord) = changeEncoding($record,$format,$flavour,$to_encoding,$from_encoding); Changes the encoding of a record -=over 2 - C<$record> - the record itself can be in ISO-2709, a MARC::Record object, or MARCXML for now (required) C<$format> - MARC or MARCXML (required) @@ -619,16 +539,12 @@ C<$to_encoding> - the encoding you want the record to end up in (optional) [UTF- C<$from_encoding> - the encoding the record is currently in (optional, it will probably be able to tell unless there's a problem with the record) -=back - FIXME: the from_encoding doesn't work yet FIXME: better handling for UNIMARC, it should allow management of 100 field FIXME: shouldn't have to convert to and from xml/marc just to change encoding someone needs to re-write MARC::Record's 'encoding' method to actually alter the encoding rather than just changing the leader -=back - =cut sub changeEncoding { @@ -664,22 +580,14 @@ sub changeEncoding { =head2 marc2bibtex - Convert from MARC21 and UNIMARC to BibTex -=over 4 - -my ($bibtex) = marc2bibtex($record, $id); + my ($bibtex) = marc2bibtex($record, $id); Returns a BibTex scalar -=over 2 - C<$record> - a MARC::Record object C<$id> - an id for the BibTex record (might be the biblionumber) -=back - -=back - =cut @@ -754,18 +662,14 @@ sub marc2bibtex { =head2 _entity_encode - Entity-encode an array of strings -=over 4 - -my ($entity_encoded_string) = _entity_encode($string); + my ($entity_encoded_string) = _entity_encode($string); or -my (@entity_encoded_strings) = _entity_encode(@strings); + my (@entity_encoded_strings) = _entity_encode(@strings); Entity-encode an array of strings -=back - =cut sub _entity_encode {