X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=blobdiff_plain;f=C4%2FRecord.pm;h=89bde3227f04f003f4615ce0941eba49b174b07a;hb=0a44448bfb4a46c069889319136bece143841dbc;hp=7751bc09b9d99a19bd2eb677ac265369a462dcaf;hpb=866c3dd616202ad198276ea654388bb2fd7189f3;p=koha_gimpoz diff --git a/C4/Record.pm b/C4/Record.pm index 7751bc09b9..89bde3227f 100644 --- a/C4/Record.pm +++ b/C4/Record.pm @@ -1,7 +1,7 @@ package C4::Record; # # Copyright 2006 (C) LibLime -# Joshua Ferraro +# Parts copyright 2010 BibLibre # # This file is part of Koha. # @@ -14,21 +14,27 @@ package C4::Record; # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # -# You should have received a copy of the GNU General Public License along with -# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, -# Suite 330, Boston, MA 02111-1307 USA +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # -use strict;# use warnings; #FIXME: turn off warnings before release +use strict; +#use warnings; FIXME - Bug 2505 # please specify in which methods a given module is used -use MARC::Record; # marc2marcxml, marcxml2marc, html2marc, changeEncoding -use MARC::File::XML; # marc2marcxml, marcxml2marc, html2marcxml, changeEncoding +use MARC::Record; # marc2marcxml, marcxml2marc, changeEncoding +use MARC::File::XML; # marc2marcxml, marcxml2marc, changeEncoding use MARC::Crosswalk::DublinCore; # marc2dcxml use Biblio::EndnoteStyle; use Unicode::Normalize; # _entity_encode use XML::LibXSLT; use XML::LibXML; +use C4::Biblio; #marc2bibtex +use C4::Csv; #marc2csv +use C4::Koha; #marc2csv +use YAML; #marcrecords2csv +use Text::CSV::Encoded; #marc2csv use vars qw($VERSION @ISA @EXPORT); @@ -46,9 +52,8 @@ $VERSION = 3.00; &marcxml2marc &marc2dcxml &marc2modsxml - - &html2marcxml - &html2marc + &marc2bibtex + &marc2csv &changeEncoding ); @@ -64,32 +69,49 @@ New in Koha 3.x. This module handles all record-related management functions. =head2 marc2marc - Convert from one flavour of ISO-2709 to another -=over 4 - -my ($error,$newmarc) = marc2marc($marc,$to_flavour,$from_flavour,$encoding); + my ($error,$newmarc) = marc2marc($marc,$to_flavour,$from_flavour,$encoding); Returns an ISO-2709 scalar -=back - =cut sub marc2marc { my ($marc,$to_flavour,$from_flavour,$encoding) = @_; - my $error = "Feature not yet implemented\n"; + my $error; + if ($to_flavour =~ m/marcstd/) { + my $marc_record_obj; + if ($marc =~ /^MARC::Record/) { # it's already a MARC::Record object + $marc_record_obj = $marc; + } else { # it's not a MARC::Record object, make it one + eval { $marc_record_obj = MARC::Record->new_from_usmarc($marc) }; # handle exceptions + +# conversion to MARC::Record object failed, populate $error + if ($@) { $error .="\nCreation of MARC::Record object failed: ".$MARC::File::ERROR }; + } + unless ($error) { + my @privatefields; + foreach my $field ($marc_record_obj->fields()) { + if ($field->tag() =~ m/9/ && ($field->tag() != '490' || C4::Context->preference("marcflavour") eq 'UNIMARC')) { + push @privatefields, $field; + } elsif (! ($field->is_control_field())) { + $field->delete_subfield(code => '9') if ($field->subfield('9')); + } + } + $marc_record_obj->delete_field($_) for @privatefields; + $marc = $marc_record_obj->as_usmarc(); + } + } else { + $error = "Feature not yet implemented\n"; + } return ($error,$marc); } =head2 marc2marcxml - Convert from ISO-2709 to MARCXML -=over 4 - -my ($error,$marcxml) = marc2marcxml($marc,$encoding,$flavour); + my ($error,$marcxml) = marc2marcxml($marc,$encoding,$flavour); Returns a MARCXML scalar -=over 2 - C<$marc> - an ISO-2709 scalar or MARC::Record object C<$encoding> - UTF-8 or MARC-8 [UTF-8] @@ -98,10 +120,6 @@ C<$flavour> - MARC21 or UNIMARC C<$dont_entity_encode> - a flag that instructs marc2marcxml not to entity encode the xml before returning (optional) -=back - -=back - =cut sub marc2marcxml { @@ -168,24 +186,16 @@ sub marc2marcxml { =head2 marcxml2marc - Convert from MARCXML to ISO-2709 -=over 4 - -my ($error,$marc) = marcxml2marc($marcxml,$encoding,$flavour); + my ($error,$marc) = marcxml2marc($marcxml,$encoding,$flavour); Returns an ISO-2709 scalar -=over 2 - C<$marcxml> - a MARCXML record C<$encoding> - UTF-8 or MARC-8 [UTF-8] C<$flavour> - MARC21 or UNIMARC -=back - -=back - =cut sub marcxml2marc { @@ -208,24 +218,16 @@ sub marcxml2marc { =head2 marc2dcxml - Convert from ISO-2709 to Dublin Core -=over 4 - -my ($error,$dcxml) = marc2dcxml($marc,$qualified); + my ($error,$dcxml) = marc2dcxml($marc,$qualified); Returns a DublinCore::Record object, will eventually return a Dublin Core scalar FIXME: should return actual XML, not just an object -=over 2 - C<$marc> - an ISO-2709 scalar or MARC::Record object C<$qualified> - specify whether qualified Dublin Core should be used in the input or output [0] -=back - -=back - =cut sub marc2dcxml { @@ -248,7 +250,7 @@ sub marc2dcxml { $crosswalk = MARC::Crosswalk::DublinCore->new( qualified => 1 ); } my $dcxml = $crosswalk->as_dublincore($marc_record_obj); - my $dcxmlfinal = "\n"; + my $dcxmlfinal = "\n"; $dcxmlfinal .= ""; foreach my $element ( $dcxml->elements() ) { - $dcxmlfinal.="<"."dc:".$element->name().">".$element->content()."name()."\n"; + $dcxmlfinal.="<"."dc:".$element->name().">".$element->content()."name().">\n"; } $dcxmlfinal .= "\n"; return ($error,$dcxmlfinal); } -=head2 marc2modsxml - Convert from ISO-2709 to MODS -=over 4 +=head2 marc2modsxml - Convert from ISO-2709 to MODS -my ($error,$modsxml) = marc2modsxml($marc); + my ($error,$modsxml) = marc2modsxml($marc); Returns a MODS scalar -=back - =cut sub marc2modsxml { my ($marc) = @_; # grab the XML, run it through our stylesheet, push it out to the browser my $xmlrecord = marc2marcxml($marc); - my $xslfile = C4::Context->config('intranetdir')."/koha-tmpl/intranet-tmpl/prog/en/xslt/MARC21slim2MODS3-1.xsl"; + my $xslfile = C4::Context->config('intrahtdocs')."/prog/en/xslt/MARC21slim2MODS3-1.xsl"; my $parser = XML::LibXML->new(); my $xslt = XML::LibXSLT->new(); my $source = $parser->parse_string($xmlrecord); @@ -292,12 +291,19 @@ sub marc2modsxml { sub marc2endnote { my ($marc) = @_; my $marc_rec_obj = MARC::Record->new_from_usmarc($marc); - my $f260 = $marc_rec_obj->field('260'); - my $f260a = $f260->subfield('a') if $f260; + my ( $abstract, $f260a, $f710a ); + my $f260 = $marc_rec_obj->field('260'); + if ($f260) { + $f260a = $f260->subfield('a') if $f260; + } my $f710 = $marc_rec_obj->field('710'); - my $f710a = $f710->subfield('a') if $f710; - my $f500 = $marc_rec_obj->field('500'); - my $abstract = $f500->subfield('a') if $f500; + if ($f710) { + $f710a = $f710->subfield('a'); + } + my $f500 = $marc_rec_obj->field('500'); + if ($f500) { + $abstract = $f500->subfield('a'); + } my $fields = { DB => C4::Context->preference("LibraryName"), Title => $marc_rec_obj->title(), @@ -322,195 +328,207 @@ sub marc2endnote { } +=head2 marc2csv - Convert several records from UNIMARC to CSV -=head2 html2marcxml - -=over 4 + my ($csv) = marc2csv($biblios, $csvprofileid); -my ($error,$marcxml) = html2marcxml($tags,$subfields,$values,$indicator,$ind_tag); +Pre and postprocessing can be done through a YAML file -Returns a MARCXML scalar +Returns a CSV scalar -this is used in addbiblio.pl and additem.pl to build the MARCXML record from -the form submission. +C<$biblio> - a list of biblionumbers -FIXME: this could use some better code documentation - -=back +C<$csvprofileid> - the id of the CSV profile to use for the export (see export_format.export_format_id and the GetCsvProfiles function in C4::Csv) =cut -sub html2marcxml { - my ($tags,$subfields,$values,$indicator,$ind_tag) = @_; - my $error; - # add the header info - my $marcxml= MARC::File::XML::header(C4::Context->preference('TemplateEncoding'),C4::Context->preference('marcflavour')); - - # some flags used to figure out where in the record we are - my $prevvalue; - my $prevtag=-1; - my $first=1; - my $j = -1; - - # handle characters that would cause the parser to choke FIXME: is there a more elegant solution? - for (my $i=0;$i<=@$tags;$i++){ - @$values[$i] =~ s/&/&/g; - @$values[$i] =~ s//>/g; - @$values[$i] =~ s/"/"/g; - @$values[$i] =~ s/'/'/g; - - if ((@$tags[$i] ne $prevtag)){ - $j++ unless (@$tags[$i] eq ""); - #warn "IND:".substr(@$indicator[$j],0,1).substr(@$indicator[$j],1,1)." ".@$tags[$i]; - if (!$first){ - $marcxml.="\n"; - if ((@$tags[$i] > 10) && (@$values[$i] ne "")){ - my $ind1 = substr(@$indicator[$j],0,1); - my $ind2 = substr(@$indicator[$j],1,1); - $marcxml.="\n"; - $marcxml.="@$values[$i]\n"; - $first=0; - } else { - $first=1; - } - } else { - if (@$values[$i] ne "") { - # handle the leader - if (@$tags[$i] eq "000") { - $marcxml.="@$values[$i]\n"; - $first=1; - # rest of the fixed fields - } elsif (@$tags[$i] < 010) { #FIXME: <10 was the way it was, there might even be a better way - $marcxml.="@$values[$i]\n"; - $first=1; - } else { - my $ind1 = substr(@$indicator[$j],0,1); - my $ind2 = substr(@$indicator[$j],1,1); - $marcxml.="\n"; - $marcxml.="@$values[$i]\n"; - $first=0; - } - } - } - } else { # @$tags[$i] eq $prevtag - if (@$values[$i] eq "") { - } else { - if ($first){ - my $ind1 = substr(@$indicator[$j],0,1); - my $ind2 = substr(@$indicator[$j],1,1); - $marcxml.="\n"; - $first=0; - } - $marcxml.="@$values[$i]\n"; - } - } - $prevtag = @$tags[$i]; - } - $marcxml.= MARC::File::XML::footer(); - #warn $marcxml; - return ($error,$marcxml); -} +sub marc2csv { + my ($biblios, $id) = @_; + my $output; + my $csv = Text::CSV::Encoded->new(); -=head2 html2marc + # Getting yaml file + my $configfile = "../tools/csv-profiles/$id.yaml"; + my ($preprocess, $postprocess, $fieldprocessing); + if (-e $configfile){ + ($preprocess,$postprocess, $fieldprocessing) = YAML::LoadFile($configfile); + } -=over 4 + # Preprocessing + eval $preprocess if ($preprocess); -Probably best to avoid using this ... it has some rather striking problems: + my $firstpass = 1; + foreach my $biblio (@$biblios) { + $output .= marcrecord2csv($biblio, $id, $firstpass, $csv, $fieldprocessing) ; + $firstpass = 0; + } -=over 2 + # Postprocessing + eval $postprocess if ($postprocess); -* saves blank subfields + return $output; +} -* subfield order is hardcoded to always start with 'a' for repeatable tags (because it is hardcoded in the addfield routine). +=head2 marcrecord2csv - Convert a single record from UNIMARC to CSV -* only possible to specify one set of indicators for each set of tags (ie, one for all the 650s). (because they were stored in a hash with the tag as the key). + my ($csv) = marcrecord2csv($biblio, $csvprofileid, $header); -* the underlying routines didn't support subfield reordering or subfield repeatability. +Returns a CSV scalar -=back +C<$biblio> - a biblionumber -I've left it in here because it could be useful if someone took the time to fix it. -- kados +C<$csvprofileid> - the id of the CSV profile to use for the export (see export_format.export_format_id and the GetCsvProfiles function in C4::Csv) -=back +C<$header> - true if the headers are to be printed (typically at first pass) + +C<$csv> - an already initialised Text::CSV object =cut -sub html2marc { - my ($dbh,$rtags,$rsubfields,$rvalues,%indicators) = @_; - my $prevtag = -1; - my $record = MARC::Record->new(); -# my %subfieldlist=(); - my $prevvalue; # if tag <10 - my $field; # if tag >=10 - for (my $i=0; $i< @$rtags; $i++) { - # rebuild MARC::Record -# warn "0=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": "; - if (@$rtags[$i] ne $prevtag) { - if ($prevtag < 10) { - if ($prevvalue) { - if (($prevtag ne '000') && ($prevvalue ne "")) { - $record->add_fields((sprintf "%03s",$prevtag),$prevvalue); - } elsif ($prevvalue ne ""){ - $record->leader($prevvalue); - } - } - } else { - if (($field) && ($field ne "")) { - $record->add_fields($field); - } - } - $indicators{@$rtags[$i]}.=' '; - # skip blank tags, I hope this works - if (@$rtags[$i] eq ''){ - $prevtag = @$rtags[$i]; - undef $field; - next; - } - if (@$rtags[$i] <10) { - $prevvalue= @$rvalues[$i]; - undef $field; - } else { - undef $prevvalue; - if (@$rvalues[$i] eq "") { - undef $field; - } else { - $field = MARC::Field->new( (sprintf "%03s",@$rtags[$i]), substr($indicators{@$rtags[$i]},0,1),substr($indicators{@$rtags[$i]},1,1), @$rsubfields[$i] => @$rvalues[$i]); - } -# warn "1=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": ".$field->as_formatted; - } - $prevtag = @$rtags[$i]; + +sub marcrecord2csv { + my ($biblio, $id, $header, $csv, $fieldprocessing) = @_; + my $output; + + # Getting the record + my $record = GetMarcBiblio($biblio, 1); + next unless $record; + # Getting the framework + my $frameworkcode = GetFrameworkCode($biblio); + + # Getting information about the csv profile + my $profile = GetCsvProfile($id); + + # Getting output encoding + my $encoding = $profile->{encoding} || 'utf8'; + # Getting separators + my $csvseparator = $profile->{csv_separator} || ','; + my $fieldseparator = $profile->{field_separator} || '#'; + my $subfieldseparator = $profile->{subfield_separator} || '|'; + + # TODO: Be more generic (in case we have to handle other protected chars or more separators) + if ($csvseparator eq '\t') { $csvseparator = "\t" } + if ($fieldseparator eq '\t') { $fieldseparator = "\t" } + if ($subfieldseparator eq '\t') { $subfieldseparator = "\t" } + if ($csvseparator eq '\n') { $csvseparator = "\n" } + if ($fieldseparator eq '\n') { $fieldseparator = "\n" } + if ($subfieldseparator eq '\n') { $subfieldseparator = "\n" } + + $csv = $csv->encoding_out($encoding) ; + $csv->sep_char($csvseparator); + + # Getting the marcfields + my $marcfieldslist = $profile->{marcfields}; + + # Getting the marcfields as an array + my @marcfieldsarray = split('\|', $marcfieldslist); + + # Separating the marcfields from the user-supplied headers + my @marcfields; + foreach (@marcfieldsarray) { + my @result = split('=', $_); + if (scalar(@result) == 2) { + push @marcfields, { header => $result[0], field => $result[1] }; } else { - if (@$rtags[$i] <10) { - $prevvalue=@$rvalues[$i]; - } else { - if (length(@$rvalues[$i])>0) { - $field->add_subfields(@$rsubfields[$i] => @$rvalues[$i]); -# warn "2=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": ".$field->as_formatted; - } - } - $prevtag= @$rtags[$i]; + push @marcfields, { field => $result[0] } } } - #} - # the last has not been included inside the loop... do it now ! - #use Data::Dumper; - #warn Dumper($field->{_subfields}); - $record->add_fields($field) if (($field) && $field ne ""); - #warn "HTML2MARC=".$record->as_formatted; - return $record; + + # If we have to insert the headers + if ($header) { + my @marcfieldsheaders; + my $dbh = C4::Context->dbh; + + # For each field or subfield + foreach (@marcfields) { + + my $field = $_->{field}; + # Remove any blank char that might have unintentionally insered into the tag name + $field =~ s/\s+//g; + + # If we have a user-supplied header, we use it + if (exists $_->{header}) { + push @marcfieldsheaders, $_->{header}; + } else { + # If not, we get the matching tag name from koha + if (index($field, '$') > 0) { + my ($fieldtag, $subfieldtag) = split('\$', $field); + my $query = "SELECT liblibrarian FROM marc_subfield_structure WHERE tagfield=? AND tagsubfield=?"; + my $sth = $dbh->prepare($query); + $sth->execute($fieldtag, $subfieldtag); + my @results = $sth->fetchrow_array(); + push @marcfieldsheaders, $results[0]; + } else { + my $query = "SELECT liblibrarian FROM marc_tag_structure WHERE tagfield=?"; + my $sth = $dbh->prepare($query); + $sth->execute($field); + my @results = $sth->fetchrow_array(); + push @marcfieldsheaders, $results[0]; + } + } + } + $csv->combine(@marcfieldsheaders); + $output = $csv->string() . "\n"; + } + + # For each marcfield to export + my @fieldstab; + foreach (@marcfields) { + my $marcfield = $_->{field}; + # If it is a subfield + if (index($marcfield, '$') > 0) { + my ($fieldtag, $subfieldtag) = split('\$', $marcfield); + my @fields = $record->field($fieldtag); + my @tmpfields; + + # For each field + foreach my $field (@fields) { + + # We take every matching subfield + my @subfields = $field->subfield($subfieldtag); + foreach my $subfield (@subfields) { + + # Getting authorised value + my $authvalues = GetKohaAuthorisedValuesFromField($fieldtag, $subfieldtag, $frameworkcode, undef); + push @tmpfields, (defined $authvalues->{$subfield}) ? $authvalues->{$subfield} : $subfield; + } + } + push (@fieldstab, join($subfieldseparator, @tmpfields)); + # Or a field + } else { + my @fields = ($record->field($marcfield)); + my $authvalues = GetKohaAuthorisedValuesFromField($marcfield, undef, $frameworkcode, undef); + + my @valuesarray; + foreach (@fields) { + my $value; + + # Getting authorised value + $value = defined $authvalues->{$_->as_string} ? $authvalues->{$_->as_string} : $_->as_string; + + # Field processing + eval $fieldprocessing if ($fieldprocessing); + + push @valuesarray, $value; + } + push (@fieldstab, join($fieldseparator, @valuesarray)); + } + }; + + $csv->combine(@fieldstab); + $output .= $csv->string() . "\n"; + + return $output; + } -=head2 changeEncoding - Change the encoding of a record -=over 4 +=head2 changeEncoding - Change the encoding of a record -my ($error, $newrecord) = changeEncoding($record,$format,$flavour,$to_encoding,$from_encoding); + my ($error, $newrecord) = changeEncoding($record,$format,$flavour,$to_encoding,$from_encoding); Changes the encoding of a record -=over 2 - C<$record> - the record itself can be in ISO-2709, a MARC::Record object, or MARCXML for now (required) C<$format> - MARC or MARCXML (required) @@ -521,16 +539,12 @@ C<$to_encoding> - the encoding you want the record to end up in (optional) [UTF- C<$from_encoding> - the encoding the record is currently in (optional, it will probably be able to tell unless there's a problem with the record) -=back - FIXME: the from_encoding doesn't work yet FIXME: better handling for UNIMARC, it should allow management of 100 field FIXME: shouldn't have to convert to and from xml/marc just to change encoding someone needs to re-write MARC::Record's 'encoding' method to actually alter the encoding rather than just changing the leader -=back - =cut sub changeEncoding { @@ -564,22 +578,98 @@ sub changeEncoding { return ($error,$newrecord); } +=head2 marc2bibtex - Convert from MARC21 and UNIMARC to BibTex + + my ($bibtex) = marc2bibtex($record, $id); + +Returns a BibTex scalar + +C<$record> - a MARC::Record object + +C<$id> - an id for the BibTex record (might be the biblionumber) + +=cut + + +sub marc2bibtex { + my ($record, $id) = @_; + my $tex; + + # Authors + my $marcauthors = GetMarcAuthors($record,C4::Context->preference("marcflavour")); + my $author; + for my $authors ( map { map { @$_ } values %$_ } @$marcauthors ) { + $author .= " and " if ($author && $$authors{value}); + $author .= $$authors{value} if ($$authors{value}); + } + + # Defining the conversion hash according to the marcflavour + my %bh; + if (C4::Context->preference("marcflavour") eq "UNIMARC") { + + # FIXME, TODO : handle repeatable fields + # TODO : handle more types of documents + + # Unimarc to bibtex hash + %bh = ( + + # Mandatory + author => $author, + title => $record->subfield("200", "a") || "", + editor => $record->subfield("210", "g") || "", + publisher => $record->subfield("210", "c") || "", + year => $record->subfield("210", "d") || $record->subfield("210", "h") || "", + + # Optional + volume => $record->subfield("200", "v") || "", + series => $record->subfield("225", "a") || "", + address => $record->subfield("210", "a") || "", + edition => $record->subfield("205", "a") || "", + note => $record->subfield("300", "a") || "", + url => $record->subfield("856", "u") || "" + ); + } else { + + # Marc21 to bibtex hash + %bh = ( + + # Mandatory + author => $author, + title => $record->subfield("245", "a") || "", + editor => $record->subfield("260", "f") || "", + publisher => $record->subfield("260", "b") || "", + year => $record->subfield("260", "c") || $record->subfield("260", "g") || "", + + # Optional + # unimarc to marc21 specification says not to convert 200$v to marc21 + series => $record->subfield("490", "a") || "", + address => $record->subfield("260", "a") || "", + edition => $record->subfield("250", "a") || "", + note => $record->subfield("500", "a") || "", + url => $record->subfield("856", "u") || "" + ); + } + + $tex .= "\@book{"; + $tex .= join(",\n", $id, map { $bh{$_} ? qq(\t$_ = "$bh{$_}") : () } keys %bh); + $tex .= "\n}\n"; + + return $tex; +} + + =head1 INTERNAL FUNCTIONS =head2 _entity_encode - Entity-encode an array of strings -=over 4 - -my ($entity_encoded_string) = _entity_encode($string); + my ($entity_encoded_string) = _entity_encode($string); or -my (@entity_encoded_strings) = _entity_encode(@strings); + my (@entity_encoded_strings) = _entity_encode(@strings); Entity-encode an array of strings -=back - =cut sub _entity_encode {