# Modified 2008 by BibLibre for Koha
# Modified 2011 by Catalyst
# Modified 2011 by Equinox Software, Inc.
+# Modified 2016 by Universidad de El Salvador
#
# This file is part of Koha.
#
#
#
-#use strict;
-#use warnings;
+use Modern::Perl;
-use List::MoreUtils qw/uniq/;
-use vars qw($VERSION @ISA @EXPORT);
+use List::MoreUtils qw( uniq );
+use YAML::XS;
+use Encode;
+use vars qw(@ISA @EXPORT);
+
+use Koha::SimpleMARC qw( read_field );
-# set the version for version checking
-$VERSION = 3.07.00.049;
@ISA = qw(Exporter);
# only export API methods
@EXPORT = qw(
- &marc2ris
+ marc2ris
);
+our $marcprint = 0; # Debug flag;
=head1 marc2bibtex - Convert from UNIMARC to RIS
sub marc2ris {
my ($record) = @_;
- my $output;
my $marcflavour = C4::Context->preference("marcflavour");
my $intype = lc($marcflavour);
- my $marcprint = 0; # Debug flag;
# Let's redirect stdout
open my $oldout, ">&STDOUT";
my $outvar;
close STDOUT;
- open STDOUT,'>', \$outvar;
-
-
- ## First we should check the character encoding. This may be
- ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
- ## by 'a' at position 09 (zero-based) of the leader
- my $leader = $record->leader();
- if ($intype eq "marc21") {
- if ($leader =~ /^.{9}a/) {
- print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
- $utf = 1;
- }
- else {
- print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
- }
- }
- ## else: other MARC formats do not specify the character encoding
- ## we assume it's *not* UTF-8
+ open STDOUT,'>:encoding(utf8)', \$outvar;
+
+ ## First we should check the character encoding. This may be
+ ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
+ ## by 'a' at position 09 (zero-based) of the leader
+ my $leader = $record->leader();
+ if ( $intype eq "marc21" ) {
+ if ( $leader =~ /^.{9}a/ ) {
+ print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
+ }
+ else {
+ print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
+ }
+ }
+ ## else: other MARC formats do not specify the character encoding
+ ## we assume it's *not* UTF-8
+
+ my $RisExportAdditionalFields = C4::Context->preference('RisExportAdditionalFields');
+ my $ris_additional_fields;
+ if ($RisExportAdditionalFields) {
+ $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
+ $ris_additional_fields = eval { YAML::XS::Load(Encode::encode_utf8($RisExportAdditionalFields)); };
+ if ($@) {
+ warn "Unable to parse RisExportAdditionalFields : $@";
+ $ris_additional_fields = undef;
+ }
+ }
- ## start RIS dataset
- &print_typetag($leader);
+ ## start RIS dataset
+ if ( $ris_additional_fields && $ris_additional_fields->{TY} ) {
+ my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY} );
+ my ( $type ) = read_field( { record => $record, field => $f, subfield => $sf, field_numbers => [1] } );
+ if ($type) {
+ print "TY - $type\r\n";
+ }
+ else {
+ &print_typetag($leader);
+ }
+ }
+ else {
+ &print_typetag($leader);
+ }
## retrieve all author fields and collect them in a list
my @author_fields;
foreach my $field (@author_fields) {
if (length($field)) {
my $author = &get_author($field);
- print "AU - ",&charconv($author),"\r\n";
+ print "AU - ",$author,"\r\n";
}
}
foreach my $field (@editor_fields) {
if (length($field)) {
my $editor = &get_editor($field);
- print "ED - ",&charconv($editor),"\r\n";
+ print "ED - ",$editor,"\r\n";
}
}
@kwpool = uniq @kwpool;
for my $kw ( @kwpool ) {
- print "KW - ", &charconv($kw), "\r\n";
+ print "KW - ", $kw, "\r\n";
}
## 5XX have various candidates for notes and abstracts. We pool
my $allnotes = join "; ", @notepool;
if (length($allnotes) > 0) {
- print "N1 - ", &charconv($allnotes), "\r\n";
+ print "N1 - ", $allnotes, "\r\n";
}
## 320/520 have the abstract
print_uri($record->field('856'));
}
+ if ($ris_additional_fields) {
+ foreach my $ris_tag ( keys %$ris_additional_fields ) {
+ next if $ris_tag eq 'TY';
+
+ my @fields =
+ ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
+ ? @{ $ris_additional_fields->{$ris_tag} }
+ : $ris_additional_fields->{$ris_tag};
+
+ for my $tag (@fields) {
+ my ( $f, $sf ) = split( /\$/, $tag );
+ my @values = read_field( { record => $record, field => $f, subfield => $sf } );
+ foreach my $v (@values) {
+ print "$ris_tag - $v\r\n";
+ }
+ }
+ }
+ }
+
## end RIS dataset
print "ER - \r\n";
## of the leader of a MARC record, the values are the RIS types
## that might appropriately represent these types.
my %ustypehash = (
- "a" => "BOOK",
- "c" => "MUSIC",
- "d" => "MUSIC",
- "e" => "MAP",
- "f" => "MAP",
- "g" => "ADVS",
- "i" => "SOUND",
- "j" => "SOUND",
- "k" => "ART",
- "m" => "DATA",
- "o" => "GEN",
- "p" => "GEN",
- "r" => "ART",
- "t" => "GEN",
- );
-
+ "a" => "BOOK",
+ "c" => "MUSIC",
+ "d" => "MUSIC",
+ "e" => "MAP",
+ "f" => "MAP",
+ "g" => "ADVS",
+ "i" => "SOUND",
+ "j" => "SOUND",
+ "k" => "ART",
+ "m" => "DATA",
+ "o" => "GEN",
+ "p" => "GEN",
+ "r" => "ART",
+ "t" => "MANSCPT",
+ );
+
my %unitypehash = (
- "a" => "BOOK",
- "b" => "BOOK",
- "c" => "MUSIC",
- "d" => "MUSIC",
- "e" => "MAP",
- "f" => "MAP",
- "g" => "ADVS",
- "i" => "SOUND",
- "j" => "SOUND",
- "k" => "ART",
- "l" => "ELEC",
- "m" => "ADVS",
- "r" => "ART",
- );
-
+ "a" => "BOOK",
+ "b" => "MANSCPT",
+ "c" => "MUSIC",
+ "d" => "MUSIC",
+ "e" => "MAP",
+ "f" => "MAP",
+ "g" => "ADVS",
+ "i" => "SOUND",
+ "j" => "SOUND",
+ "k" => "ART",
+ "l" => "ELEC",
+ "m" => "GEN",
+ "r" => "ART",
+ );
+
## The type of a MARC record is found at position 06 of the leader
- my $typeofrecord = substr($leader, 6, 1);
+ my $typeofrecord = defined($leader) && length $leader >=6 ?
+ substr($leader, 6, 1): undef;
+ ## Pos 07 == Bibliographic level
+ my $biblevel = defined($leader) && length $leader >=7 ?
+ substr($leader, 7, 1): '';
- ## ToDo: for books, field 008 positions 24-27 might have a few more
+ ## TODO: for books, field 008 positions 24-27 might have a few more
## hints
my %typehash;
-
- ## the ukmarc here is just a guess
- if ($intype eq "marc21" || $intype eq "ukmarc") {
- %typehash = %ustypehash;
- }
- elsif ($intype eq "unimarc") {
- %typehash = %unitypehash;
- }
- else {
- ## assume MARC21 as default
- %typehash = %ustypehash;
- }
-
- if (!exists $typehash{$typeofrecord}) {
- print "TY - BOOK\r\n"; ## most reasonable default
- warn ("no type found - assume BOOK") if $marcprint;
+ my $marcflavour = C4::Context->preference("marcflavour");
+ my $intype = lc($marcflavour);
+ if ($intype eq "unimarc") {
+ %typehash = %unitypehash;
}
else {
- print "TY - $typehash{$typeofrecord}\r\n";
+ %typehash = %ustypehash;
+ }
+
+ if (!defined $typeofrecord || !exists $typehash{$typeofrecord}) {
+ print "TY - GEN\r\n"; ## most reasonable default
+ warn ("no type found - assume GEN") if $marcprint;
+ } elsif ( $typeofrecord =~ "a" ) {
+ if ( $biblevel eq 'a' ) {
+ print "TY - GEN\r\n"; ## monographic component part
+ } elsif ( $biblevel eq 'b' || $biblevel eq 's' ) {
+ print "TY - SER\r\n"; ## serial or serial component part
+ } elsif ( $biblevel eq 'm' ) {
+ print "TY - $typehash{$typeofrecord}\r\n"; ## book
+ } elsif ( $biblevel eq 'c' || $biblevel eq 'd' ) {
+ print "TY - GEN\r\n"; ## collections, part of collections or made-up collections
+ } elsif ( $biblevel eq 'i' ) {
+ print "TY - DATA\r\n"; ## updating loose-leafe as Dataset
+ }
+ } else {
+ print "TY - $typehash{$typeofrecord}\r\n";
}
## use $typeofrecord as the return value, just in case
## we currently ignore subfield c until someone complains
if (length($rawauthorb) > 0) {
- return join ",", ($rawauthora, $rawauthorb);
+ return join ", ", ($rawauthora, $rawauthorb);
}
else {
return $rawauthora;
## the sequence of the name parts is encoded either in indicator
## 1 (marc21) or 2 (unimarc)
+ my $marcflavour = C4::Context->preference("marcflavour");
+ my $intype = lc($marcflavour);
if ($intype eq "unimarc") {
$indicator = 2;
}
normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
}
else {
- normalize_author($authorfield->subfield('a'), $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
+ normalize_author($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
}
}
my $clean_title = $titlefield->subfield('a');
my $clean_subtitle = $titlefield->subfield('b');
+$clean_subtitle ||= q{};
$clean_title =~ s% *[/:;.]$%%;
$clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
+ my $marcflavour = C4::Context->preference("marcflavour");
+ my $intype = lc($marcflavour);
if (length($clean_title) > 0
|| (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
- print "TI - ", &charconv($clean_title);
+ print "TI - ", $clean_title;
## subfield $b is relevant only for marc21/ukmarc
if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
- print ": ",&charconv($clean_subtitle);
+ print ": ",$clean_subtitle;
}
print "\r\n";
}
## be written and designated. The field is free-form and resistant
## to all parsing efforts, so this information is lost on me
}
+ return;
}
##********************************************************************
$clean_title =~ s% *[/:;.]$%%;
if (length($clean_title) > 0) {
- print "T2 - ", &charconv($clean_title),"\r\n";
+ print "T2 - ", $clean_title,"\r\n";
}
+ my $marcflavour = C4::Context->preference("marcflavour");
+ my $intype = lc($marcflavour);
if ($intype eq "unimarc") {
print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
if (length($titlefield->subfield('v')) > 0) {
- print "VL - ", &charconv($titlefield->subfield('v')),"\r\n";
+ print "VL - ", $titlefield->subfield('v'),"\r\n";
}
}
}
+ return;
}
##********************************************************************
warn("truncated isbn") if $marcprint;
}
- my $isbn = substr($isbnfield->subfield('a'), 0, 10);
- print "SN - ", &charconv($isbn), "\r\n";
+ my $isbn = $isbnfield->subfield('a');
+ print "SN - ", $isbn, "\r\n";
}
}
}
my $issn = substr($issnfield->subfield('a'), 0, 9);
- print "SN - ", &charconv($issn), "\r\n";
+ print "SN - ", $issn, "\r\n";
}
}
foreach my $f856 (@f856s) {
if (my $uri = $f856->subfield('u')) {
- print "UR - ", charconv($uri), "\r\n";
+ print "UR - ", $uri, "\r\n";
}
}
}
warn("no LOC call number found") if $marcprint;
}
else {
- print "AV - ", &charconv($callnofield->subfield('a')), " ", &charconv($callnofield->subfield('b')), "\r\n";
+ print "AV - ", $callnofield->subfield('a'), " ", $callnofield->subfield('b'), "\r\n";
}
}
warn("no Dewey number found") if $marcprint;
}
else {
- print "U1 - ", &charconv($deweyfield->subfield('a')), " ", &charconv($deweyfield->subfield('2')), "\r\n";
+ print "U1 - ", $deweyfield->subfield('a'), " ", $deweyfield->subfield('2'), "\r\n";
}
}
my $pubsub_publisher;
my $pubsub_date;
+ my $marcflavour = C4::Context->preference("marcflavour");
+ my $intype = lc($marcflavour);
if ($intype eq "unimarc") {
$pubsub_place = "a";
$pubsub_publisher = "c";
## the dates are free-form, so we want to extract
## a four-digit year and leave the rest as
## "other info"
- $protoyear = @$tuple[1];
+ my $protoyear = @$tuple[1];
print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
## strip any separator chars at the end
## now dump the collected CY and PB lists
if (@cities > 0) {
- print "CY - ", &charconv(join(", ", @cities)), "\r\n";
+ print "CY - ", join(", ", @cities), "\r\n";
}
if (@publishers > 0) {
- print "PB - ", &charconv(join(", ", @publishers)), "\r\n";
+ print "PB - ", join(", ", @publishers), "\r\n";
}
}
}
## loop over all 6XX fields
foreach my $kwfield (@keywords) {
- if ($kwfield != undef) {
- ## authornames get special treatment
- if ($fieldname eq "600") {
- my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
- push @kw, $val;
- print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
- }
- else {
- ## retrieve all available subfields
- @kwsubfields = $kwfield->subfields();
-
- ## loop over all available subfield tuples
- foreach my $kwtuple (@kwsubfields) {
- ## loop over all subfields to check
- foreach my $subfield (@subfields) {
- ## [0] contains subfield code
- if (@$kwtuple[0] eq $subfield) {
- ## [1] contains value, remove trailing separators
- @$kwtuple[1] =~ s% *[,;.:/]*$%%;
- if (length(@$kwtuple[1]) > 0) {
- push @kw, @$kwtuple[1];
- print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
- }
- ## we can leave the subfields loop here
- last;
- }
- }
- }
- }
- }
+ if ($kwfield != undef) {
+ ## authornames get special treatment
+ if ($fieldname eq "600") {
+ my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
+ push @kw, $val;
+ print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
+ }
+ else {
+ ## retrieve all available subfields
+ my @kwsubfields = $kwfield->subfields();
+
+ ## loop over all available subfield tuples
+ foreach my $kwtuple (@kwsubfields) {
+ ## loop over all subfields to check
+ foreach my $subfield (@subfields) {
+ ## [0] contains subfield code
+ if (@$kwtuple[0] eq $subfield) {
+ ## [1] contains value, remove trailing separators
+ @$kwtuple[1] =~ s% *[,;.:/]*$%%;
+ if (length(@$kwtuple[1]) > 0) {
+ push @kw, @$kwtuple[1];
+ print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
+ }
+ ## we can leave the subfields loop here
+ last;
+ }
+ }
+ }
+ }
+ }
}
return @kw;
}
## loop over all notefields
foreach my $notefield (@notefields) {
- if ($notefield != undef) {
- ## retrieve all available subfield tuples
- @notesubfields = $notefield->subfields();
-
- ## loop over all subfield tuples
- foreach my $notetuple (@notesubfields) {
- ## loop over all subfields to check
- foreach my $subfield (@subfields) {
- ## [0] contains subfield code
- if (@$notetuple[0] eq $subfield) {
- ## [1] contains value, remove trailing separators
- print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
- @$notetuple[1] =~ s% *[,;.:/]*$%%;
- if (length(@$notetuple[1]) > 0) {
- ## add to list
- push @{$aref}, @$notetuple[1];
- }
- last;
- }
- }
- }
- }
+ if (defined $notefield) {
+ ## retrieve all available subfield tuples
+ my @notesubfields = $notefield->subfields();
+
+ ## loop over all subfield tuples
+ foreach my $notetuple (@notesubfields) {
+ ## loop over all subfields to check
+ foreach my $subfield (@subfields) {
+ ## [0] contains subfield code
+ if (@$notetuple[0] eq $subfield) {
+ ## [1] contains value, remove trailing separators
+ print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
+ @$notetuple[1] =~ s% *[,;.:/]*$%%;
+ if (length(@$notetuple[1]) > 0) {
+ ## add to list
+ push @{$aref}, @$notetuple[1];
+ }
+ last;
+ }
+ }
+ }
+ }
}
}
my $allabs = join "; ", @abstrings;
if (length($allabs) > 0) {
- print "N2 - ", &charconv($allabs), "\r\n";
+ print "N2 - ", $allabs, "\r\n";
}
}
-
-
-##********************************************************************
-## charconv(): converts to a different charset based on a global var
-## Arguments: string
-## Returns: string
-##********************************************************************
-sub charconv {
- if ($utf) {
- ## return unaltered if already utf-8
- return @_;
- }
- elsif ($uniout eq "t") {
- ## convert to utf-8
- return marc8_to_utf8("@_");
- }
- else {
- ## return unaltered if no utf-8 requested
- return @_;
- }
-}
1;