# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
-# You should have received a copy of the GNU General Public License along with
-# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
-# Suite 330, Boston, MA 02111-1307 USA
+# You should have received a copy of the GNU General Public License along
+# with Koha; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
-# $Id$
#
-use strict;# use warnings; #FIXME: turn off warnings before release
+use strict;
+#use warnings; FIXME - Bug 2505
# please specify in which methods a given module is used
-use MARC::Record; # marc2marcxml, marcxml2marc, html2marc, changeEncoding
-use MARC::File::XML; # marc2marcxml, marcxml2marc, html2marcxml, changeEncoding
+use MARC::Record; # marc2marcxml, marcxml2marc, changeEncoding
+use MARC::File::XML; # marc2marcxml, marcxml2marc, changeEncoding
use MARC::Crosswalk::DublinCore; # marc2dcxml
+use Biblio::EndnoteStyle;
use Unicode::Normalize; # _entity_encode
use XML::LibXSLT;
use XML::LibXML;
+use C4::Biblio; #marc2bibtex
+use C4::Csv; #marc2csv
+use C4::Koha; #marc2csv
+use YAML; #marcrecords2csv
+use Text::CSV::Encoded; #marc2csv
use vars qw($VERSION @ISA @EXPORT);
# set the version for version checking
-$VERSION = do { my @v = '$Revision$' =~ /\d+/g;
- shift(@v) . "." . join("_", map {sprintf "%03d", $_ } @v); };
+$VERSION = 3.00;
@ISA = qw(Exporter);
# only export API methods
@EXPORT = qw(
+ &marc2endnote
&marc2marc
&marc2marcxml
&marcxml2marc
&marc2dcxml
&marc2modsxml
-
- &html2marcxml
- &html2marc
+ &marc2bibtex
+ &marc2csv
&changeEncoding
);
=head2 marc2marc - Convert from one flavour of ISO-2709 to another
-=over 4
-
-my ($error,$newmarc) = marc2marc($marc,$to_flavour,$from_flavour,$encoding);
+ my ($error,$newmarc) = marc2marc($marc,$to_flavour,$from_flavour,$encoding);
Returns an ISO-2709 scalar
-=back
-
=cut
sub marc2marc {
=head2 marc2marcxml - Convert from ISO-2709 to MARCXML
-=over 4
-
-my ($error,$marcxml) = marc2marcxml($marc,$encoding,$flavour);
+ my ($error,$marcxml) = marc2marcxml($marc,$encoding,$flavour);
Returns a MARCXML scalar
-=over 2
-
C<$marc> - an ISO-2709 scalar or MARC::Record object
C<$encoding> - UTF-8 or MARC-8 [UTF-8]
C<$dont_entity_encode> - a flag that instructs marc2marcxml not to entity encode the xml before returning (optional)
-=back
-
-=back
-
=cut
sub marc2marcxml {
=head2 marcxml2marc - Convert from MARCXML to ISO-2709
-=over 4
-
-my ($error,$marc) = marcxml2marc($marcxml,$encoding,$flavour);
+ my ($error,$marc) = marcxml2marc($marcxml,$encoding,$flavour);
Returns an ISO-2709 scalar
-=over 2
-
C<$marcxml> - a MARCXML record
C<$encoding> - UTF-8 or MARC-8 [UTF-8]
C<$flavour> - MARC21 or UNIMARC
-=back
-
-=back
-
=cut
sub marcxml2marc {
=head2 marc2dcxml - Convert from ISO-2709 to Dublin Core
-=over 4
-
-my ($error,$dcxml) = marc2dcxml($marc,$qualified);
+ my ($error,$dcxml) = marc2dcxml($marc,$qualified);
Returns a DublinCore::Record object, will eventually return a Dublin Core scalar
FIXME: should return actual XML, not just an object
-=over 2
-
C<$marc> - an ISO-2709 scalar or MARC::Record object
C<$qualified> - specify whether qualified Dublin Core should be used in the input or output [0]
-=back
-
-=back
-
=cut
sub marc2dcxml {
$crosswalk = MARC::Crosswalk::DublinCore->new( qualified => 1 );
}
my $dcxml = $crosswalk->as_dublincore($marc_record_obj);
- my $dcxmlfinal = "<?xml version=\"1.0\"?>\n";
+ my $dcxmlfinal = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
$dcxmlfinal .= "<metadata
xmlns=\"http://example.org/myapp/\"
xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
xmlns:dcterms=\"http://purl.org/dc/terms/\">";
foreach my $element ( $dcxml->elements() ) {
- $dcxmlfinal.="<"."dc:".$element->name().">".$element->content()."</"."dc:".$element->name()."\n";
+ $dcxmlfinal.="<"."dc:".$element->name().">".$element->content()."</"."dc:".$element->name().">\n";
}
$dcxmlfinal .= "\n</metadata>";
return ($error,$dcxmlfinal);
}
-=head2 marc2modsxml - Convert from ISO-2709 to MODS
-=over 4
+=head2 marc2modsxml - Convert from ISO-2709 to MODS
-my ($error,$modsxml) = marc2modsxml($marc);
+ my ($error,$modsxml) = marc2modsxml($marc);
Returns a MODS scalar
-=back
-
=cut
sub marc2modsxml {
my ($marc) = @_;
# grab the XML, run it through our stylesheet, push it out to the browser
my $xmlrecord = marc2marcxml($marc);
- my $xslfile = C4::Context->config('intranetdir')."/misc/xslt/MARC21slim2MODS3-1.xsl";
+ my $xslfile = C4::Context->config('intrahtdocs')."/prog/en/xslt/MARC21slim2MODS3-1.xsl";
my $parser = XML::LibXML->new();
my $xslt = XML::LibXSLT->new();
my $source = $parser->parse_string($xmlrecord);
my $newxmlrecord = $stylesheet->output_string($results);
return ($newxmlrecord);
}
-=head2 html2marcxml
-=over 4
+sub marc2endnote {
+ my ($marc) = @_;
+ my $marc_rec_obj = MARC::Record->new_from_usmarc($marc);
+ my $f260 = $marc_rec_obj->field('260');
+ my $f260a = $f260->subfield('a') if $f260;
+ my $f710 = $marc_rec_obj->field('710');
+ my $f710a = $f710->subfield('a') if $f710;
+ my $f500 = $marc_rec_obj->field('500');
+ my $abstract = $f500->subfield('a') if $f500;
+ my $fields = {
+ DB => C4::Context->preference("LibraryName"),
+ Title => $marc_rec_obj->title(),
+ Author => $marc_rec_obj->author(),
+ Publisher => $f710a,
+ City => $f260a,
+ Year => $marc_rec_obj->publication_date,
+ Abstract => $abstract,
+ };
+ my $endnote;
+ my $style = new Biblio::EndnoteStyle();
+ my $template;
+ $template.= "DB - DB\n" if C4::Context->preference("LibraryName");
+ $template.="T1 - Title\n" if $marc_rec_obj->title();
+ $template.="A1 - Author\n" if $marc_rec_obj->author();
+ $template.="PB - Publisher\n" if $f710a;
+ $template.="CY - City\n" if $f260a;
+ $template.="Y1 - Year\n" if $marc_rec_obj->publication_date;
+ $template.="AB - Abstract\n" if $abstract;
+ my ($text, $errmsg) = $style->format($template, $fields);
+ return ($text);
+
+}
-my ($error,$marcxml) = html2marcxml($tags,$subfields,$values,$indicator,$ind_tag);
+=head2 marc2csv - Convert several records from UNIMARC to CSV
-Returns a MARCXML scalar
+ my ($csv) = marc2csv($biblios, $csvprofileid);
+
+Pre and postprocessing can be done through a YAML file
-this is used in addbiblio.pl and additem.pl to build the MARCXML record from
-the form submission.
+Returns a CSV scalar
-FIXME: this could use some better code documentation
+C<$biblio> - a list of biblionumbers
-=back
+C<$csvprofileid> - the id of the CSV profile to use for the export (see export_format.export_format_id and the GetCsvProfiles function in C4::Csv)
=cut
-sub html2marcxml {
- my ($tags,$subfields,$values,$indicator,$ind_tag) = @_;
- my $error;
- # add the header info
- my $marcxml= MARC::File::XML::header(C4::Context->preference('TemplateEncoding'),C4::Context->preference('marcflavour'));
-
- # some flags used to figure out where in the record we are
- my $prevvalue;
- my $prevtag=-1;
- my $first=1;
- my $j = -1;
-
- # handle characters that would cause the parser to choke FIXME: is there a more elegant solution?
- for (my $i=0;$i<=@$tags;$i++){
- @$values[$i] =~ s/&/&/g;
- @$values[$i] =~ s/</</g;
- @$values[$i] =~ s/>/>/g;
- @$values[$i] =~ s/"/"/g;
- @$values[$i] =~ s/'/'/g;
-
- if ((@$tags[$i] ne $prevtag)){
- $j++ unless (@$tags[$i] eq "");
- #warn "IND:".substr(@$indicator[$j],0,1).substr(@$indicator[$j],1,1)." ".@$tags[$i];
- if (!$first){
- $marcxml.="</datafield>\n";
- if ((@$tags[$i] > 10) && (@$values[$i] ne "")){
- my $ind1 = substr(@$indicator[$j],0,1);
- my $ind2 = substr(@$indicator[$j],1,1);
- $marcxml.="<datafield tag=\"@$tags[$i]\" ind1=\"$ind1\" ind2=\"$ind2\">\n";
- $marcxml.="<subfield code=\"@$subfields[$i]\">@$values[$i]</subfield>\n";
- $first=0;
- } else {
- $first=1;
- }
- } else {
- if (@$values[$i] ne "") {
- # handle the leader
- if (@$tags[$i] eq "000") {
- $marcxml.="<leader>@$values[$i]</leader>\n";
- $first=1;
- # rest of the fixed fields
- } elsif (@$tags[$i] < 010) { #FIXME: <10 was the way it was, there might even be a better way
- $marcxml.="<controlfield tag=\"@$tags[$i]\">@$values[$i]</controlfield>\n";
- $first=1;
- } else {
- my $ind1 = substr(@$indicator[$j],0,1);
- my $ind2 = substr(@$indicator[$j],1,1);
- $marcxml.="<datafield tag=\"@$tags[$i]\" ind1=\"$ind1\" ind2=\"$ind2\">\n";
- $marcxml.="<subfield code=\"@$subfields[$i]\">@$values[$i]</subfield>\n";
- $first=0;
- }
- }
- }
- } else { # @$tags[$i] eq $prevtag
- if (@$values[$i] eq "") {
- } else {
- if ($first){
- my $ind1 = substr(@$indicator[$j],0,1);
- my $ind2 = substr(@$indicator[$j],1,1);
- $marcxml.="<datafield tag=\"@$tags[$i]\" ind1=\"$ind1\" ind2=\"$ind2\">\n";
- $first=0;
- }
- $marcxml.="<subfield code=\"@$subfields[$i]\">@$values[$i]</subfield>\n";
- }
- }
- $prevtag = @$tags[$i];
- }
- $marcxml.= MARC::File::XML::footer();
- #warn $marcxml;
- return ($error,$marcxml);
-}
+sub marc2csv {
+ my ($biblios, $id) = @_;
+ my $output;
+ my $csv = Text::CSV::Encoded->new();
-=head2 html2marc
+ # Getting yaml file
+ my $configfile = "../tools/csv-profiles/$id.yaml";
+ my ($preprocess, $postprocess, $fieldprocessing);
+ if (-e $configfile){
+ ($preprocess,$postprocess, $fieldprocessing) = YAML::LoadFile($configfile);
+ }
-=over 4
+ # Preprocessing
+ eval $preprocess if ($preprocess);
-Probably best to avoid using this ... it has some rather striking problems:
+ my $firstpass = 1;
+ foreach my $biblio (@$biblios) {
+ $output .= marcrecord2csv($biblio, $id, $firstpass, $csv, $fieldprocessing) ;
+ $firstpass = 0;
+ }
-=over 2
+ # Postprocessing
+ eval $postprocess if ($postprocess);
+
+ return $output;
+}
-* saves blank subfields
+=head2 marcrecord2csv - Convert a single record from UNIMARC to CSV
-* subfield order is hardcoded to always start with 'a' for repeatable tags (because it is hardcoded in the addfield routine).
+ my ($csv) = marcrecord2csv($biblio, $csvprofileid, $header);
-* only possible to specify one set of indicators for each set of tags (ie, one for all the 650s). (because they were stored in a hash with the tag as the key).
+Returns a CSV scalar
-* the underlying routines didn't support subfield reordering or subfield repeatability.
+C<$biblio> - a biblionumber
-=back
+C<$csvprofileid> - the id of the CSV profile to use for the export (see export_format.export_format_id and the GetCsvProfiles function in C4::Csv)
-I've left it in here because it could be useful if someone took the time to fix it. -- kados
+C<$header> - true if the headers are to be printed (typically at first pass)
-=back
+C<$csv> - an already initialised Text::CSV object
=cut
-sub html2marc {
- my ($dbh,$rtags,$rsubfields,$rvalues,%indicators) = @_;
- my $prevtag = -1;
- my $record = MARC::Record->new();
-# my %subfieldlist=();
- my $prevvalue; # if tag <10
- my $field; # if tag >=10
- for (my $i=0; $i< @$rtags; $i++) {
- # rebuild MARC::Record
-# warn "0=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": ";
- if (@$rtags[$i] ne $prevtag) {
- if ($prevtag < 10) {
- if ($prevvalue) {
- if (($prevtag ne '000') && ($prevvalue ne "")) {
- $record->add_fields((sprintf "%03s",$prevtag),$prevvalue);
- } elsif ($prevvalue ne ""){
- $record->leader($prevvalue);
- }
- }
- } else {
- if (($field) && ($field ne "")) {
- $record->add_fields($field);
- }
- }
- $indicators{@$rtags[$i]}.=' ';
- # skip blank tags, I hope this works
- if (@$rtags[$i] eq ''){
- $prevtag = @$rtags[$i];
- undef $field;
- next;
- }
- if (@$rtags[$i] <10) {
- $prevvalue= @$rvalues[$i];
- undef $field;
- } else {
- undef $prevvalue;
- if (@$rvalues[$i] eq "") {
- undef $field;
- } else {
- $field = MARC::Field->new( (sprintf "%03s",@$rtags[$i]), substr($indicators{@$rtags[$i]},0,1),substr($indicators{@$rtags[$i]},1,1), @$rsubfields[$i] => @$rvalues[$i]);
- }
-# warn "1=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": ".$field->as_formatted;
- }
- $prevtag = @$rtags[$i];
+
+sub marcrecord2csv {
+ my ($biblio, $id, $header, $csv, $fieldprocessing) = @_;
+ my $output;
+
+ # Getting the record
+ my $record = GetMarcBiblio($biblio);
+ next unless $record;
+ # Getting the framework
+ my $frameworkcode = GetFrameworkCode($biblio);
+
+ # Getting information about the csv profile
+ my $profile = GetCsvProfile($id);
+
+ # Getting output encoding
+ my $encoding = $profile->{encoding} || 'utf8';
+ # Getting separators
+ my $csvseparator = $profile->{csv_separator} || ',';
+ my $fieldseparator = $profile->{field_separator} || '#';
+ my $subfieldseparator = $profile->{subfield_separator} || '|';
+
+ # TODO: Be more generic (in case we have to handle other protected chars or more separators)
+ if ($csvseparator eq '\t') { $csvseparator = "\t" }
+ if ($fieldseparator eq '\t') { $fieldseparator = "\t" }
+ if ($subfieldseparator eq '\t') { $subfieldseparator = "\t" }
+ if ($csvseparator eq '\n') { $csvseparator = "\n" }
+ if ($fieldseparator eq '\n') { $fieldseparator = "\n" }
+ if ($subfieldseparator eq '\n') { $subfieldseparator = "\n" }
+
+ $csv = $csv->encoding_out($encoding) ;
+ $csv->sep_char($csvseparator);
+
+ # Getting the marcfields
+ my $marcfieldslist = $profile->{marcfields};
+
+ # Getting the marcfields as an array
+ my @marcfieldsarray = split('\|', $marcfieldslist);
+
+ # Separating the marcfields from the the user-supplied headers
+ my @marcfields;
+ foreach (@marcfieldsarray) {
+ my @result = split('=', $_);
+ if (scalar(@result) == 2) {
+ push @marcfields, { header => $result[0], field => $result[1] };
} else {
- if (@$rtags[$i] <10) {
- $prevvalue=@$rvalues[$i];
- } else {
- if (length(@$rvalues[$i])>0) {
- $field->add_subfields(@$rsubfields[$i] => @$rvalues[$i]);
-# warn "2=>".@$rtags[$i].@$rsubfields[$i]." = ".@$rvalues[$i].": ".$field->as_formatted;
- }
- }
- $prevtag= @$rtags[$i];
+ push @marcfields, { field => $result[0] }
}
}
- #}
- # the last has not been included inside the loop... do it now !
- #use Data::Dumper;
- #warn Dumper($field->{_subfields});
- $record->add_fields($field) if (($field) && $field ne "");
- #warn "HTML2MARC=".$record->as_formatted;
- return $record;
+
+ # If we have to insert the headers
+ if ($header) {
+ my @marcfieldsheaders;
+ my $dbh = C4::Context->dbh;
+
+ # For each field or subfield
+ foreach (@marcfields) {
+
+ my $field = $_->{field};
+
+ # If we have a user-supplied header, we use it
+ if (exists $_->{header}) {
+ push @marcfieldsheaders, $_->{header};
+ } else {
+ # If not, we get the matching tag name from koha
+ if (index($field, '$') > 0) {
+ my ($fieldtag, $subfieldtag) = split('\$', $field);
+ my $query = "SELECT liblibrarian FROM marc_subfield_structure WHERE tagfield=? AND tagsubfield=?";
+ my $sth = $dbh->prepare($query);
+ $sth->execute($fieldtag, $subfieldtag);
+ my @results = $sth->fetchrow_array();
+ push @marcfieldsheaders, $results[0];
+ } else {
+ my $query = "SELECT liblibrarian FROM marc_tag_structure WHERE tagfield=?";
+ my $sth = $dbh->prepare($query);
+ $sth->execute($field);
+ my @results = $sth->fetchrow_array();
+ push @marcfieldsheaders, $results[0];
+ }
+ }
+ }
+ $csv->combine(@marcfieldsheaders);
+ $output = $csv->string() . "\n";
+ }
+
+ # For each marcfield to export
+ my @fieldstab;
+ foreach (@marcfields) {
+ my $marcfield = $_->{field};
+ # If it is a subfield
+ if (index($marcfield, '$') > 0) {
+ my ($fieldtag, $subfieldtag) = split('\$', $marcfield);
+ my @fields = $record->field($fieldtag);
+ my @tmpfields;
+
+ # For each field
+ foreach my $field (@fields) {
+
+ # We take every matching subfield
+ my @subfields = $field->subfield($subfieldtag);
+ foreach my $subfield (@subfields) {
+
+ # Getting authorised value
+ my $authvalues = GetKohaAuthorisedValuesFromField($fieldtag, $subfieldtag, $frameworkcode, undef);
+ push @tmpfields, (defined $authvalues->{$subfield}) ? $authvalues->{$subfield} : $subfield;
+ }
+ }
+ push (@fieldstab, join($subfieldseparator, @tmpfields));
+ # Or a field
+ } else {
+ my @fields = ($record->field($marcfield));
+ my $authvalues = GetKohaAuthorisedValuesFromField($marcfield, undef, $frameworkcode, undef);
+
+ my @valuesarray;
+ foreach (@fields) {
+ my $value;
+
+ # Getting authorised value
+ $value = defined $authvalues->{$_->as_string} ? $authvalues->{$_->as_string} : $_->as_string;
+
+ # Field processing
+ eval $fieldprocessing if ($fieldprocessing);
+
+ push @valuesarray, $value;
+ }
+ push (@fieldstab, join($fieldseparator, @valuesarray));
+ }
+ };
+
+ $csv->combine(@fieldstab);
+ $output .= $csv->string() . "\n";
+
+ return $output;
+
}
-=head2 changeEncoding - Change the encoding of a record
-=over 4
+=head2 changeEncoding - Change the encoding of a record
-my ($error, $newrecord) = changeEncoding($record,$format,$flavour,$to_encoding,$from_encoding);
+ my ($error, $newrecord) = changeEncoding($record,$format,$flavour,$to_encoding,$from_encoding);
Changes the encoding of a record
-=over 2
-
C<$record> - the record itself can be in ISO-2709, a MARC::Record object, or MARCXML for now (required)
C<$format> - MARC or MARCXML (required)
C<$from_encoding> - the encoding the record is currently in (optional, it will probably be able to tell unless there's a problem with the record)
-=back
-
FIXME: the from_encoding doesn't work yet
FIXME: better handling for UNIMARC, it should allow management of 100 field
FIXME: shouldn't have to convert to and from xml/marc just to change encoding someone needs to re-write MARC::Record's 'encoding' method to actually alter the encoding rather than just changing the leader
-=back
-
=cut
sub changeEncoding {
return ($error,$newrecord);
}
+=head2 marc2bibtex - Convert from MARC21 and UNIMARC to BibTex
+
+ my ($bibtex) = marc2bibtex($record, $id);
+
+Returns a BibTex scalar
+
+C<$record> - a MARC::Record object
+
+C<$id> - an id for the BibTex record (might be the biblionumber)
+
+=cut
+
+
+sub marc2bibtex {
+ my ($record, $id) = @_;
+ my $tex;
+
+ # Authors
+ my $marcauthors = GetMarcAuthors($record,C4::Context->preference("marcflavour"));
+ my $author;
+ for my $authors ( map { map { @$_ } values %$_ } @$marcauthors ) {
+ $author .= " and " if ($author && $$authors{value});
+ $author .= $$authors{value} if ($$authors{value});
+ }
+
+ # Defining the conversion hash according to the marcflavour
+ my %bh;
+ if (C4::Context->preference("marcflavour") eq "UNIMARC") {
+
+ # FIXME, TODO : handle repeatable fields
+ # TODO : handle more types of documents
+
+ # Unimarc to bibtex hash
+ %bh = (
+
+ # Mandatory
+ author => $author,
+ title => $record->subfield("200", "a") || "",
+ editor => $record->subfield("210", "g") || "",
+ publisher => $record->subfield("210", "c") || "",
+ year => $record->subfield("210", "d") || $record->subfield("210", "h") || "",
+
+ # Optional
+ volume => $record->subfield("200", "v") || "",
+ series => $record->subfield("225", "a") || "",
+ address => $record->subfield("210", "a") || "",
+ edition => $record->subfield("205", "a") || "",
+ note => $record->subfield("300", "a") || "",
+ url => $record->subfield("856", "u") || ""
+ );
+ } else {
+
+ # Marc21 to bibtex hash
+ %bh = (
+
+ # Mandatory
+ author => $author,
+ title => $record->subfield("245", "a") || "",
+ editor => $record->subfield("260", "f") || "",
+ publisher => $record->subfield("260", "b") || "",
+ year => $record->subfield("260", "c") || $record->subfield("260", "g") || "",
+
+ # Optional
+ # unimarc to marc21 specification says not to convert 200$v to marc21
+ series => $record->subfield("490", "a") || "",
+ address => $record->subfield("260", "a") || "",
+ edition => $record->subfield("250", "a") || "",
+ note => $record->subfield("500", "a") || "",
+ url => $record->subfield("856", "u") || ""
+ );
+ }
+
+ $tex .= "\@book{";
+ $tex .= join(",\n", $id, map { $bh{$_} ? qq(\t$_ = "$bh{$_}") : () } keys %bh);
+ $tex .= "\n}\n";
+
+ return $tex;
+}
+
+
=head1 INTERNAL FUNCTIONS
=head2 _entity_encode - Entity-encode an array of strings
-=over 4
-
-my ($entity_encoded_string) = _entity_encode($string);
+ my ($entity_encoded_string) = _entity_encode($string);
or
-my (@entity_encoded_strings) = _entity_encode(@strings);
+ my (@entity_encoded_strings) = _entity_encode(@strings);
Entity-encode an array of strings
-=back
-
=cut
sub _entity_encode {
=head1 MODIFICATIONS
-# $Id$
=cut