X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=blobdiff_plain;f=C4%2FRecord.pm;h=89bde3227f04f003f4615ce0941eba49b174b07a;hb=31a3a7ed9353958e3f684fffc62316caf21e3e77;hp=804e79f7e51734530db39afa61822a07d86da320;hpb=0ee6cf80a58407ca96bf65b9dd0e71a79553f8e9;p=koha_gimpoz diff --git a/C4/Record.pm b/C4/Record.pm index 804e79f7e5..89bde3227f 100644 --- a/C4/Record.pm +++ b/C4/Record.pm @@ -1,7 +1,7 @@ package C4::Record; # # Copyright 2006 (C) LibLime -# Joshua Ferraro +# Parts copyright 2010 BibLibre # # This file is part of Koha. # @@ -14,312 +14,684 @@ package C4::Record; # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # -# You should have received a copy of the GNU General Public License along with -# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, -# Suite 330, Boston, MA 02111-1307 USA +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # -# $Id$ # -use strict; use warnings; #FIXME: turn off warnings before release +use strict; +#use warnings; FIXME - Bug 2505 # please specify in which methods a given module is used -use MARC::Record; #marc2marcxml, marcxml2marc, html2marc, changeEncoding -use MARC::File::XML; #marc2marcxml, marcxml2marc, html2marcxml, changeEncoding +use MARC::Record; # marc2marcxml, marcxml2marc, changeEncoding +use MARC::File::XML; # marc2marcxml, marcxml2marc, changeEncoding +use MARC::Crosswalk::DublinCore; # marc2dcxml +use Biblio::EndnoteStyle; +use Unicode::Normalize; # _entity_encode +use XML::LibXSLT; +use XML::LibXML; +use C4::Biblio; #marc2bibtex +use C4::Csv; #marc2csv +use C4::Koha; #marc2csv +use YAML; #marcrecords2csv +use Text::CSV::Encoded; #marc2csv use vars qw($VERSION @ISA @EXPORT); # set the version for version checking -$VERSION = do { my @v = '$Revision$' =~ /\d+/g; - shift(@v) . "." . join("_", map {sprintf "%03d", $_ } @v); }; +$VERSION = 3.00; @ISA = qw(Exporter); # only export API methods @EXPORT = qw( + &marc2endnote + &marc2marc &marc2marcxml &marcxml2marc - &html2marcxml - &html2marc + &marc2dcxml + &marc2modsxml + &marc2bibtex + &marc2csv &changeEncoding ); =head1 NAME -C4::Record - MARC, MARCXML, XML, etc. Record Management Functions and API +C4::Record - MARC, MARCXML, DC, MODS, XML, etc. Record Management Functions and API =head1 SYNOPSIS New in Koha 3.x. This module handles all record-related management functions. -=head1 API +=head1 API (EXPORTED FUNCTIONS) -=head2 marc2marcxml +=head2 marc2marc - Convert from one flavour of ISO-2709 to another -my $marcxml = marc2marcxml($marc,$encoding,$flavour); + my ($error,$newmarc) = marc2marc($marc,$to_flavour,$from_flavour,$encoding); -returns an XML scalar variable +Returns an ISO-2709 scalar -C<$marc> a MARC::Record object or binary MARC record +=cut + +sub marc2marc { + my ($marc,$to_flavour,$from_flavour,$encoding) = @_; + my $error; + if ($to_flavour =~ m/marcstd/) { + my $marc_record_obj; + if ($marc =~ /^MARC::Record/) { # it's already a MARC::Record object + $marc_record_obj = $marc; + } else { # it's not a MARC::Record object, make it one + eval { $marc_record_obj = MARC::Record->new_from_usmarc($marc) }; # handle exceptions + +# conversion to MARC::Record object failed, populate $error + if ($@) { $error .="\nCreation of MARC::Record object failed: ".$MARC::File::ERROR }; + } + unless ($error) { + my @privatefields; + foreach my $field ($marc_record_obj->fields()) { + if ($field->tag() =~ m/9/ && ($field->tag() != '490' || C4::Context->preference("marcflavour") eq 'UNIMARC')) { + push @privatefields, $field; + } elsif (! ($field->is_control_field())) { + $field->delete_subfield(code => '9') if ($field->subfield('9')); + } + } + $marc_record_obj->delete_field($_) for @privatefields; + $marc = $marc_record_obj->as_usmarc(); + } + } else { + $error = "Feature not yet implemented\n"; + } + return ($error,$marc); +} -C<$encoding> UTF-8 or MARC-8 [UTF-8} +=head2 marc2marcxml - Convert from ISO-2709 to MARCXML -C<$flavour> MARC21 or UNIMARC + my ($error,$marcxml) = marc2marcxml($marc,$encoding,$flavour); + +Returns a MARCXML scalar + +C<$marc> - an ISO-2709 scalar or MARC::Record object + +C<$encoding> - UTF-8 or MARC-8 [UTF-8] + +C<$flavour> - MARC21 or UNIMARC + +C<$dont_entity_encode> - a flag that instructs marc2marcxml not to entity encode the xml before returning (optional) =cut sub marc2marcxml { - my ($marc,$encoding,$flavour) = @_; - unless($encoding) {$encoding = "UTF-8"}; - unless($flavour) {$flavour = C4::Context->preference("TemplateEncoding")}; - #FIXME: add error handling - my $marcxml = $record->as_xml_record($marc,$encoding,$flavour); - return $marcxml; + my ($marc,$encoding,$flavour,$dont_entity_encode) = @_; + my $error; # the error string + my $marcxml; # the final MARCXML scalar + + # test if it's already a MARC::Record object, if not, make it one + my $marc_record_obj; + if ($marc =~ /^MARC::Record/) { # it's already a MARC::Record object + $marc_record_obj = $marc; + } else { # it's not a MARC::Record object, make it one + eval { $marc_record_obj = MARC::Record->new_from_usmarc($marc) }; # handle exceptions + + # conversion to MARC::Record object failed, populate $error + if ($@) { $error .="\nCreation of MARC::Record object failed: ".$MARC::File::ERROR }; + } + # only proceed if no errors so far + unless ($error) { + + # check the record for warnings + my @warnings = $marc_record_obj->warnings(); + if (@warnings) { + warn "\nWarnings encountered while processing ISO-2709 record with title \"".$marc_record_obj->title()."\":\n"; + foreach my $warn (@warnings) { warn "\t".$warn }; + } + unless($encoding) {$encoding = "UTF-8"}; # set default encoding + unless($flavour) {$flavour = C4::Context->preference("marcflavour")}; # set default MARC flavour + + # attempt to convert the record to MARCXML + eval { $marcxml = $marc_record_obj->as_xml_record($flavour) }; #handle exceptions + + # record creation failed, populate $error + if ($@) { + $error .= "Creation of MARCXML failed:".$MARC::File::ERROR; + $error .= "Additional information:\n"; + my @warnings = $@->warnings(); + foreach my $warn (@warnings) { $error.=$warn."\n" }; + + # record creation was successful + } else { + + # check the record for warning flags again (warnings() will be cleared already if there was an error, see above block + @warnings = $marc_record_obj->warnings(); + if (@warnings) { + warn "\nWarnings encountered while processing ISO-2709 record with title \"".$marc_record_obj->title()."\":\n"; + foreach my $warn (@warnings) { warn "\t".$warn }; + } + } + + # only proceed if no errors so far + unless ($error) { + + # entity encode the XML unless instructed not to + unless ($dont_entity_encode) { + my ($marcxml_entity_encoded) = _entity_encode($marcxml); + $marcxml = $marcxml_entity_encoded; + } + } + } + # return result to calling program + return ($error,$marcxml); } -=head2 marcxml2marc +=head2 marcxml2marc - Convert from MARCXML to ISO-2709 -my $marc = marcxml2marc($marcxml,$encoding,$flavour); + my ($error,$marc) = marcxml2marc($marcxml,$encoding,$flavour); -returns a binary MARC scalar variable +Returns an ISO-2709 scalar -C<$marcxml> a MARCXML record +C<$marcxml> - a MARCXML record -C<$encoding> UTF-8 or MARC-8 [UTF-8] +C<$encoding> - UTF-8 or MARC-8 [UTF-8] -C<$flavour> MARC21 or UNIMARC +C<$flavour> - MARC21 or UNIMARC =cut sub marcxml2marc { my ($marcxml,$encoding,$flavour) = @_; - unless($encoding) {$encoding = "UTF-8"}; - unless($flavour) {$flavour = C4::Context->preference("TemplateEncoding")}; - #FIXME: add error handling - my $marc = $marcxml->new_from_xml($record,$encoding,$flavour); - return $marc; + my $error; # the error string + my $marc; # the final ISO-2709 scalar + unless($encoding) {$encoding = "UTF-8"}; # set the default encoding + unless($flavour) {$flavour = C4::Context->preference("marcflavour")}; # set the default MARC flavour + + # attempt to do the conversion + eval { $marc = MARC::Record->new_from_xml($marcxml,$encoding,$flavour) }; # handle exceptions + + # record creation failed, populate $error + if ($@) {$error .="\nCreation of MARCXML Record failed: ".$@; + $error.=$MARC::File::ERROR if ($MARC::File::ERROR); + }; + # return result to calling program + return ($error,$marc); } -=head2 html2marcxml +=head2 marc2dcxml - Convert from ISO-2709 to Dublin Core -my $marcxml = html2marcxml($tags,$subfields,$values,$indicator,$ind_tag); + my ($error,$dcxml) = marc2dcxml($marc,$qualified); -returns a MARCXML scalar variable +Returns a DublinCore::Record object, will eventually return a Dublin Core scalar -this is used in addbiblio.pl and additem.pl to build the MARCXML record from -the form submission. +FIXME: should return actual XML, not just an object -FIXME: this could use some better code documentation +C<$marc> - an ISO-2709 scalar or MARC::Record object + +C<$qualified> - specify whether qualified Dublin Core should be used in the input or output [0] =cut -sub html2marcxml { - my ($tags,$subfields,$values,$indicator,$ind_tag) = @_; - # add the header info - my $marcxml= MARC::File::XML::header(C4::Context->preference('TemplateEncoding'),C4::Context->preference('marcflavour')); - - # some flags used to figure out where in the record we are - my $prevvalue; - my $prevtag=-1; - my $first=1; - my $j = -1; - - # handle characters that would cause the parser to choke FIXME: is there a more elegant solution? - for (my $i=0;$i<=@$tags;$i++){ - @$values[$i] =~ s/&/&/g; - @$values[$i] =~ s//>/g; - @$values[$i] =~ s/"/"/g; - @$values[$i] =~ s/'/'/g; - - if ((@$tags[$i] ne $prevtag)){ - $j++ unless (@$tags[$i] eq ""); - #warn "IND:".substr(@$indicator[$j],0,1).substr(@$indicator[$j],1,1)." ".@$tags[$i]; - if (!$first){ - $marcxml.="\n"; - if ((@$tags[$i] > 10) && (@$values[$i] ne "")){ - my $ind1 = substr(@$indicator[$j],0,1); - my $ind2 = substr(@$indicator[$j],1,1); - $marcxml.="\n"; - $marcxml.="@$values[$i]\n"; - $first=0; - } else { - $first=1; - } - } else { - if (@$values[$i] ne "") { - # handle the leader - if (@$tags[$i] eq "000") { - $marcxml.="@$values[$i]\n"; - $first=1; - # rest of the fixed fields - } elsif (@$tags[$i] < 010) { #FIXME: <10 was the way it was, there might even be a better way - $marcxml.="@$values[$i]\n"; - $first=1; - } else { - my $ind1 = substr(@$indicator[$j],0,1); - my $ind2 = substr(@$indicator[$j],1,1); - $marcxml.="\n"; - $marcxml.="@$values[$i]\n"; - $first=0; - } - } - } - } else { # @$tags[$i] eq $prevtag - if (@$values[$i] eq "") { - } else { - if ($first){ - my $ind1 = substr(@$indicator[$j],0,1); - my $ind2 = substr(@$indicator[$j],1,1); - $marcxml.="\n"; - $first=0; - } - $marcxml.="@$values[$i]\n"; - } +sub marc2dcxml { + my ($marc,$qualified) = @_; + my $error; + # test if it's already a MARC::Record object, if not, make it one + my $marc_record_obj; + if ($marc =~ /^MARC::Record/) { # it's already a MARC::Record object + $marc_record_obj = $marc; + } else { # it's not a MARC::Record object, make it one + eval { $marc_record_obj = MARC::Record->new_from_usmarc($marc) }; # handle exceptions + + # conversion to MARC::Record object failed, populate $error + if ($@) { + $error .="\nCreation of MARC::Record object failed: ".$MARC::File::ERROR; } - $prevtag = @$tags[$i]; } - $marcxml.= MARC::File::XML::footer(); - #warn $marcxml; - return $marcxml; + my $crosswalk = MARC::Crosswalk::DublinCore->new; + if ($qualified) { + $crosswalk = MARC::Crosswalk::DublinCore->new( qualified => 1 ); + } + my $dcxml = $crosswalk->as_dublincore($marc_record_obj); + my $dcxmlfinal = "\n"; + $dcxmlfinal .= ""; + + foreach my $element ( $dcxml->elements() ) { + $dcxmlfinal.="<"."dc:".$element->name().">".$element->content()."name().">\n"; + } + $dcxmlfinal .= "\n"; + return ($error,$dcxmlfinal); } -=head2 html2marc +=head2 marc2modsxml - Convert from ISO-2709 to MODS -Probably best to avoid using this ... it has some rather striking problems: + my ($error,$modsxml) = marc2modsxml($marc); -* saves blank subfields -* subfield order is hardcoded to always start - with 'a' for repeatable tags (because it is hardcoded in the - addfield routine). -* only possible to specify one set of indicators for each set of - tags (ie, one for all the 650s). (because they were stored in a - hash with the tag as the key). -* the underlying routines didn't support subfield - reordering or subfield repeatability. +Returns a MODS scalar -I've left it in here because it could be useful if someone took the time to -fix it. +=cut + +sub marc2modsxml { + my ($marc) = @_; + # grab the XML, run it through our stylesheet, push it out to the browser + my $xmlrecord = marc2marcxml($marc); + my $xslfile = C4::Context->config('intrahtdocs')."/prog/en/xslt/MARC21slim2MODS3-1.xsl"; + my $parser = XML::LibXML->new(); + my $xslt = XML::LibXSLT->new(); + my $source = $parser->parse_string($xmlrecord); + my $style_doc = $parser->parse_file($xslfile); + my $stylesheet = $xslt->parse_stylesheet($style_doc); + my $results = $stylesheet->transform($source); + my $newxmlrecord = $stylesheet->output_string($results); + return ($newxmlrecord); +} + +sub marc2endnote { + my ($marc) = @_; + my $marc_rec_obj = MARC::Record->new_from_usmarc($marc); + my ( $abstract, $f260a, $f710a ); + my $f260 = $marc_rec_obj->field('260'); + if ($f260) { + $f260a = $f260->subfield('a') if $f260; + } + my $f710 = $marc_rec_obj->field('710'); + if ($f710) { + $f710a = $f710->subfield('a'); + } + my $f500 = $marc_rec_obj->field('500'); + if ($f500) { + $abstract = $f500->subfield('a'); + } + my $fields = { + DB => C4::Context->preference("LibraryName"), + Title => $marc_rec_obj->title(), + Author => $marc_rec_obj->author(), + Publisher => $f710a, + City => $f260a, + Year => $marc_rec_obj->publication_date, + Abstract => $abstract, + }; + my $endnote; + my $style = new Biblio::EndnoteStyle(); + my $template; + $template.= "DB - DB\n" if C4::Context->preference("LibraryName"); + $template.="T1 - Title\n" if $marc_rec_obj->title(); + $template.="A1 - Author\n" if $marc_rec_obj->author(); + $template.="PB - Publisher\n" if $f710a; + $template.="CY - City\n" if $f260a; + $template.="Y1 - Year\n" if $marc_rec_obj->publication_date; + $template.="AB - Abstract\n" if $abstract; + my ($text, $errmsg) = $style->format($template, $fields); + return ($text); + +} + +=head2 marc2csv - Convert several records from UNIMARC to CSV + + my ($csv) = marc2csv($biblios, $csvprofileid); + +Pre and postprocessing can be done through a YAML file + +Returns a CSV scalar + +C<$biblio> - a list of biblionumbers + +C<$csvprofileid> - the id of the CSV profile to use for the export (see export_format.export_format_id and the GetCsvProfiles function in C4::Csv) =cut -sub html2marc { - my ($dbh,$rtags,$rsubfields,$rvalues,%indicators) = @_; - my $prevtag = -1; - my $record = MARC::Record->new(); -# my %subfieldlist=(); - my $prevvalue; # if tag <10 - my $field; # if tag >=10 - for (my $i=0; $i< @$rtags; $i++) { - # rebuild MARC::Record -# warn "0=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": "; - if (@$rtags[$i] ne $prevtag) { - if ($prevtag < 10) { - if ($prevvalue) { - if (($prevtag ne '000') && ($prevvalue ne "")) { - $record->add_fields((sprintf "%03s",$prevtag),$prevvalue); - } elsif ($prevvalue ne ""){ - $record->leader($prevvalue); - } - } - } else { - if (($field) && ($field ne "")) { - $record->add_fields($field); - } - } - $indicators{@$rtags[$i]}.=' '; - # skip blank tags, I hope this works - if (@$rtags[$i] eq ''){ - $prevtag = @$rtags[$i]; - undef $field; - next; - } - if (@$rtags[$i] <10) { - $prevvalue= @$rvalues[$i]; - undef $field; - } else { - undef $prevvalue; - if (@$rvalues[$i] eq "") { - undef $field; - } else { - $field = MARC::Field->new( (sprintf "%03s",@$rtags[$i]), substr($indicators{@$rtags[$i]},0,1),substr($indicators{@$rtags[$i]},1,1), @$rsubfields[$i] => @$rvalues[$i]); - } -# warn "1=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": ".$field->as_formatted; - } - $prevtag = @$rtags[$i]; +sub marc2csv { + my ($biblios, $id) = @_; + my $output; + my $csv = Text::CSV::Encoded->new(); + + # Getting yaml file + my $configfile = "../tools/csv-profiles/$id.yaml"; + my ($preprocess, $postprocess, $fieldprocessing); + if (-e $configfile){ + ($preprocess,$postprocess, $fieldprocessing) = YAML::LoadFile($configfile); + } + + # Preprocessing + eval $preprocess if ($preprocess); + + my $firstpass = 1; + foreach my $biblio (@$biblios) { + $output .= marcrecord2csv($biblio, $id, $firstpass, $csv, $fieldprocessing) ; + $firstpass = 0; + } + + # Postprocessing + eval $postprocess if ($postprocess); + + return $output; +} + +=head2 marcrecord2csv - Convert a single record from UNIMARC to CSV + + my ($csv) = marcrecord2csv($biblio, $csvprofileid, $header); + +Returns a CSV scalar + +C<$biblio> - a biblionumber + +C<$csvprofileid> - the id of the CSV profile to use for the export (see export_format.export_format_id and the GetCsvProfiles function in C4::Csv) + +C<$header> - true if the headers are to be printed (typically at first pass) + +C<$csv> - an already initialised Text::CSV object + +=cut + + +sub marcrecord2csv { + my ($biblio, $id, $header, $csv, $fieldprocessing) = @_; + my $output; + + # Getting the record + my $record = GetMarcBiblio($biblio, 1); + next unless $record; + # Getting the framework + my $frameworkcode = GetFrameworkCode($biblio); + + # Getting information about the csv profile + my $profile = GetCsvProfile($id); + + # Getting output encoding + my $encoding = $profile->{encoding} || 'utf8'; + # Getting separators + my $csvseparator = $profile->{csv_separator} || ','; + my $fieldseparator = $profile->{field_separator} || '#'; + my $subfieldseparator = $profile->{subfield_separator} || '|'; + + # TODO: Be more generic (in case we have to handle other protected chars or more separators) + if ($csvseparator eq '\t') { $csvseparator = "\t" } + if ($fieldseparator eq '\t') { $fieldseparator = "\t" } + if ($subfieldseparator eq '\t') { $subfieldseparator = "\t" } + if ($csvseparator eq '\n') { $csvseparator = "\n" } + if ($fieldseparator eq '\n') { $fieldseparator = "\n" } + if ($subfieldseparator eq '\n') { $subfieldseparator = "\n" } + + $csv = $csv->encoding_out($encoding) ; + $csv->sep_char($csvseparator); + + # Getting the marcfields + my $marcfieldslist = $profile->{marcfields}; + + # Getting the marcfields as an array + my @marcfieldsarray = split('\|', $marcfieldslist); + + # Separating the marcfields from the user-supplied headers + my @marcfields; + foreach (@marcfieldsarray) { + my @result = split('=', $_); + if (scalar(@result) == 2) { + push @marcfields, { header => $result[0], field => $result[1] }; } else { - if (@$rtags[$i] <10) { - $prevvalue=@$rvalues[$i]; - } else { - if (length(@$rvalues[$i])>0) { - $field->add_subfields(@$rsubfields[$i] => @$rvalues[$i]); -# warn "2=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": ".$field->as_formatted; - } - } - $prevtag= @$rtags[$i]; + push @marcfields, { field => $result[0] } } } - #} - # the last has not been included inside the loop... do it now ! - #use Data::Dumper; - #warn Dumper($field->{_subfields}); - $record->add_fields($field) if (($field) && $field ne ""); - #warn "HTML2MARC=".$record->as_formatted; - return $record; + + # If we have to insert the headers + if ($header) { + my @marcfieldsheaders; + my $dbh = C4::Context->dbh; + + # For each field or subfield + foreach (@marcfields) { + + my $field = $_->{field}; + # Remove any blank char that might have unintentionally insered into the tag name + $field =~ s/\s+//g; + + # If we have a user-supplied header, we use it + if (exists $_->{header}) { + push @marcfieldsheaders, $_->{header}; + } else { + # If not, we get the matching tag name from koha + if (index($field, '$') > 0) { + my ($fieldtag, $subfieldtag) = split('\$', $field); + my $query = "SELECT liblibrarian FROM marc_subfield_structure WHERE tagfield=? AND tagsubfield=?"; + my $sth = $dbh->prepare($query); + $sth->execute($fieldtag, $subfieldtag); + my @results = $sth->fetchrow_array(); + push @marcfieldsheaders, $results[0]; + } else { + my $query = "SELECT liblibrarian FROM marc_tag_structure WHERE tagfield=?"; + my $sth = $dbh->prepare($query); + $sth->execute($field); + my @results = $sth->fetchrow_array(); + push @marcfieldsheaders, $results[0]; + } + } + } + $csv->combine(@marcfieldsheaders); + $output = $csv->string() . "\n"; + } + + # For each marcfield to export + my @fieldstab; + foreach (@marcfields) { + my $marcfield = $_->{field}; + # If it is a subfield + if (index($marcfield, '$') > 0) { + my ($fieldtag, $subfieldtag) = split('\$', $marcfield); + my @fields = $record->field($fieldtag); + my @tmpfields; + + # For each field + foreach my $field (@fields) { + + # We take every matching subfield + my @subfields = $field->subfield($subfieldtag); + foreach my $subfield (@subfields) { + + # Getting authorised value + my $authvalues = GetKohaAuthorisedValuesFromField($fieldtag, $subfieldtag, $frameworkcode, undef); + push @tmpfields, (defined $authvalues->{$subfield}) ? $authvalues->{$subfield} : $subfield; + } + } + push (@fieldstab, join($subfieldseparator, @tmpfields)); + # Or a field + } else { + my @fields = ($record->field($marcfield)); + my $authvalues = GetKohaAuthorisedValuesFromField($marcfield, undef, $frameworkcode, undef); + + my @valuesarray; + foreach (@fields) { + my $value; + + # Getting authorised value + $value = defined $authvalues->{$_->as_string} ? $authvalues->{$_->as_string} : $_->as_string; + + # Field processing + eval $fieldprocessing if ($fieldprocessing); + + push @valuesarray, $value; + } + push (@fieldstab, join($fieldseparator, @valuesarray)); + } + }; + + $csv->combine(@fieldstab); + $output .= $csv->string() . "\n"; + + return $output; + } -=head2 changeEncoding -$newrecord = changeEncoding($record,$format,$flavour,$toencoding,$fromencoding); +=head2 changeEncoding - Change the encoding of a record + + my ($error, $newrecord) = changeEncoding($record,$format,$flavour,$to_encoding,$from_encoding); -changes the encoding of a record +Changes the encoding of a record - - the record itself can be in ISO-2709, a MARC::Record object, or MARCXML for now (required) - - MARC or MARCXML (required) - - MARC21 or UNIMARC, if MARC21, it will change the leader (optional) [defaults to Koha system preference] - - the encoding you want the record to end up in (optional) [UTF-8] - - the encoding the record is currently in (optional, it will probably be able to tell unless there's a problem with the record) + +FIXME: the from_encoding doesn't work yet -FIXME: the fromencoding doesn't work yet FIXME: better handling for UNIMARC, it should allow management of 100 field -FIXME: shouldn't have to convert to and from xml/marc just to change encoding, - someone needs to re-write MARC::Record's 'encoding' method to actually - alter the encoding rather than just changing the leader + +FIXME: shouldn't have to convert to and from xml/marc just to change encoding someone needs to re-write MARC::Record's 'encoding' method to actually alter the encoding rather than just changing the leader =cut sub changeEncoding { - my ($record,$format,$flavour,$toencoding,$fromencoding) = @_; + my ($record,$format,$flavour,$to_encoding,$from_encoding) = @_; my $newrecord; + my $error; unless($flavour) {$flavour = C4::Context->preference("marcflavour")}; - unless($toencoding) {$toencoding = "UTF-8"}; - if (lc($format) =~ /^MARC$/o) { # ISO2790 Record - my $marcxml = marc2marcxml($record,$encoding,$flavour); - $newrecord = marcxml2marc($marcxml,$encoding,$flavour); - } elsif (lc($format) =~ /^MARCXML$/o) { # MARCXML Record - my $marc = marcxml2marc($record,$encoding,$flavour); - $newrecord = marc2marcxml($record,$encoding,$flavour); + unless($to_encoding) {$to_encoding = "UTF-8"}; + + # ISO-2709 Record (MARC21 or UNIMARC) + if (lc($format) =~ /^marc$/o) { + # if we're converting encoding of an ISO2709 file, we need to roundtrip through XML + # because MARC::Record doesn't directly provide us with an encoding method + # It's definitely less than idea and should be fixed eventually - kados + my $marcxml; # temporary storage of MARCXML scalar + ($error,$marcxml) = marc2marcxml($record,$to_encoding,$flavour); + unless ($error) { + ($error,$newrecord) = marcxml2marc($marcxml,$to_encoding,$flavour); + } + + # MARCXML Record + } elsif (lc($format) =~ /^marcxml$/o) { # MARCXML Record + my $marc; + ($error,$marc) = marcxml2marc($record,$to_encoding,$flavour); + unless ($error) { + ($error,$newrecord) = marc2marcxml($record,$to_encoding,$flavour); + } } else { - #FIXME: handle other record formats, and finally, handle errors + $error.="Unsupported record format:".$format; } - return $newrecord; + return ($error,$newrecord); +} + +=head2 marc2bibtex - Convert from MARC21 and UNIMARC to BibTex + + my ($bibtex) = marc2bibtex($record, $id); + +Returns a BibTex scalar + +C<$record> - a MARC::Record object + +C<$id> - an id for the BibTex record (might be the biblionumber) + +=cut + + +sub marc2bibtex { + my ($record, $id) = @_; + my $tex; + + # Authors + my $marcauthors = GetMarcAuthors($record,C4::Context->preference("marcflavour")); + my $author; + for my $authors ( map { map { @$_ } values %$_ } @$marcauthors ) { + $author .= " and " if ($author && $$authors{value}); + $author .= $$authors{value} if ($$authors{value}); + } + + # Defining the conversion hash according to the marcflavour + my %bh; + if (C4::Context->preference("marcflavour") eq "UNIMARC") { + + # FIXME, TODO : handle repeatable fields + # TODO : handle more types of documents + + # Unimarc to bibtex hash + %bh = ( + + # Mandatory + author => $author, + title => $record->subfield("200", "a") || "", + editor => $record->subfield("210", "g") || "", + publisher => $record->subfield("210", "c") || "", + year => $record->subfield("210", "d") || $record->subfield("210", "h") || "", + + # Optional + volume => $record->subfield("200", "v") || "", + series => $record->subfield("225", "a") || "", + address => $record->subfield("210", "a") || "", + edition => $record->subfield("205", "a") || "", + note => $record->subfield("300", "a") || "", + url => $record->subfield("856", "u") || "" + ); + } else { + + # Marc21 to bibtex hash + %bh = ( + + # Mandatory + author => $author, + title => $record->subfield("245", "a") || "", + editor => $record->subfield("260", "f") || "", + publisher => $record->subfield("260", "b") || "", + year => $record->subfield("260", "c") || $record->subfield("260", "g") || "", + + # Optional + # unimarc to marc21 specification says not to convert 200$v to marc21 + series => $record->subfield("490", "a") || "", + address => $record->subfield("260", "a") || "", + edition => $record->subfield("250", "a") || "", + note => $record->subfield("500", "a") || "", + url => $record->subfield("856", "u") || "" + ); + } + + $tex .= "\@book{"; + $tex .= join(",\n", $id, map { $bh{$_} ? qq(\t$_ = "$bh{$_}") : () } keys %bh); + $tex .= "\n}\n"; + + return $tex; +} + + +=head1 INTERNAL FUNCTIONS + +=head2 _entity_encode - Entity-encode an array of strings + + my ($entity_encoded_string) = _entity_encode($string); + +or + + my (@entity_encoded_strings) = _entity_encode(@strings); + +Entity-encode an array of strings + +=cut + +sub _entity_encode { + my @strings = @_; + my @strings_entity_encoded; + foreach my $string (@strings) { + my $nfc_string = NFC($string); + $nfc_string =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe; + push @strings_entity_encoded, $nfc_string; + } + return @strings_entity_encoded; } END { } # module clean-up code here (global destructor) 1; __END__ -=back - =head1 AUTHOR Joshua Ferraro -=cut -=head MODIFICATIONS -# $Id$ +=head1 MODIFICATIONS + + =cut