Bug 17600: Standardize our EXPORT_OK

[srvgit] / C4 / Ris.pm
diff --git a/C4/Ris.pm b/C4/Ris.pm

index 0a0f181..9ab50d9 100644 (file)
--- a/C4/Ris.pm
+++ b/C4/Ris.pm
@@ -41,6 +41,7 @@ package C4::Ris;
  # Modified 2008 by BibLibre for Koha
  # Modified 2011 by Catalyst
  # Modified 2011 by Equinox Software, Inc.
+# Modified 2016 by Universidad de El Salvador
  #
  # This file is part of Koha.
  #
@@ -59,23 +60,25 @@ package C4::Ris;
  #
  #
  
-#use strict;
-#use warnings;
+use Modern::Perl;
  
-use List::MoreUtils qw/uniq/;
-use vars qw($VERSION @ISA @EXPORT);
+use List::MoreUtils qw( uniq );
+use YAML::XS;
+use Encode;
+use vars qw(@ISA @EXPORT);
+
+use Koha::SimpleMARC qw( read_field );
  
-# set the version for version checking
-$VERSION = 3.07.00.049;
  
  @ISA = qw(Exporter);
  
  # only export API methods
  
  @EXPORT = qw(
-  &marc2ris
+  marc2ris
  );
  
+our $marcprint = 0; # Debug flag;
  
  =head1 marc2bibtex - Convert from UNIMARC to RIS
  
@@ -89,37 +92,56 @@ C<$record> - a MARC::Record object
  
  sub marc2ris {
      my ($record) = @_;
-    my $output;
  
      my $marcflavour = C4::Context->preference("marcflavour");
      my $intype = lc($marcflavour);
-    my $marcprint = 0; # Debug flag;
  
      # Let's redirect stdout
      open my $oldout, ">&STDOUT";
      my $outvar;
      close STDOUT;
-    open STDOUT,'>', \$outvar;
-
-
-       ## First we should check the character encoding. This may be
-       ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
-       ## by 'a' at position 09 (zero-based) of the leader
-       my $leader = $record->leader();
-       if ($intype eq "marc21") {
-           if ($leader =~ /^.{9}a/) {
-               print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
-               $utf = 1;
-           }
-           else {
-               print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
-           }
-       }
-       ## else: other MARC formats do not specify the character encoding
-       ## we assume it's *not* UTF-8
+    open STDOUT,'>:encoding(utf8)', \$outvar;
+
+    ## First we should check the character encoding. This may be
+    ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
+    ## by 'a' at position 09 (zero-based) of the leader
+    my $leader = $record->leader();
+    if ( $intype eq "marc21" ) {
+        if ( $leader =~ /^.{9}a/ ) {
+            print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
+        }
+        else {
+            print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
+        }
+    }
+    ## else: other MARC formats do not specify the character encoding
+    ## we assume it's *not* UTF-8
+
+    my $RisExportAdditionalFields = C4::Context->preference('RisExportAdditionalFields');
+    my $ris_additional_fields;
+    if ($RisExportAdditionalFields) {
+        $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
+        $ris_additional_fields = eval { YAML::XS::Load(Encode::encode_utf8($RisExportAdditionalFields)); };
+        if ($@) {
+            warn "Unable to parse RisExportAdditionalFields : $@";
+            $ris_additional_fields = undef;
+        }
+    }
  
-       ## start RIS dataset
-       &print_typetag($leader);
+    ## start RIS dataset
+    if ( $ris_additional_fields && $ris_additional_fields->{TY} ) {
+        my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY} );
+        my ( $type ) = read_field( { record => $record, field => $f, subfield => $sf, field_numbers => [1] } );
+        if ($type) {
+            print "TY  - $type\r\n";
+        }
+        else {
+            &print_typetag($leader);
+        }
+    }
+    else {
+        &print_typetag($leader);
+    }
  
         ## retrieve all author fields and collect them in a list
         my @author_fields;
@@ -138,7 +160,7 @@ sub marc2ris {
         foreach my $field (@author_fields) {
             if (length($field)) {
                 my $author = &get_author($field);
-               print "AU  - ",&charconv($author),"\r\n";
+               print "AU  - ",$author,"\r\n";
             }
         }
  
@@ -164,7 +186,7 @@ sub marc2ris {
         foreach my $field (@editor_fields) {
             if (length($field)) {
                 my $editor = &get_editor($field);
-               print "ED  - ",&charconv($editor),"\r\n";
+               print "ED  - ",$editor,"\r\n";
             }
         }
  
@@ -239,7 +261,7 @@ sub marc2ris {
      @kwpool = uniq @kwpool;
  
      for my $kw ( @kwpool ) {
-        print "KW  - ", &charconv($kw), "\r\n";
+        print "KW  - ", $kw, "\r\n";
      }
  
         ## 5XX have various candidates for notes and abstracts. We pool
@@ -266,7 +288,7 @@ sub marc2ris {
         my $allnotes = join "; ", @notepool;
  
         if (length($allnotes) > 0) {
-           print "N1  - ", &charconv($allnotes), "\r\n";
+           print "N1  - ", $allnotes, "\r\n";
         }
  
         ## 320/520 have the abstract
@@ -285,6 +307,25 @@ sub marc2ris {
          print_uri($record->field('856'));
      }
  
+    if ($ris_additional_fields) {
+        foreach my $ris_tag ( keys %$ris_additional_fields ) {
+            next if $ris_tag eq 'TY';
+
+            my @fields =
+              ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
+              ? @{ $ris_additional_fields->{$ris_tag} }
+              : $ris_additional_fields->{$ris_tag};
+
+            for my $tag (@fields) {
+                my ( $f, $sf ) = split( /\$/, $tag );
+                my @values = read_field( { record => $record, field => $f, subfield => $sf } );
+                foreach my $v (@values) {
+                    print "$ris_tag  - $v\r\n";
+                }
+            }
+        }
+    }
+
         ## end RIS dataset
         print "ER  - \r\n";
  
@@ -309,64 +350,75 @@ sub print_typetag {
      ## of the leader of a MARC record, the values are the RIS types
      ## that might appropriately represent these types.
      my %ustypehash = (
-                   "a" => "BOOK",
-                   "c" => "MUSIC",
-                   "d" => "MUSIC",
-                   "e" => "MAP",
-                   "f" => "MAP",
-                   "g" => "ADVS",
-                   "i" => "SOUND",
-                   "j" => "SOUND",
-                   "k" => "ART",
-                   "m" => "DATA",
-                   "o" => "GEN",
-                   "p" => "GEN",
-                   "r" => "ART",
-                   "t" => "GEN",
-               );
-    
+            "a" => "BOOK",
+            "c" => "MUSIC",
+            "d" => "MUSIC",
+            "e" => "MAP",
+            "f" => "MAP",
+            "g" => "ADVS",
+            "i" => "SOUND",
+            "j" => "SOUND",
+            "k" => "ART",
+            "m" => "DATA",
+            "o" => "GEN",
+            "p" => "GEN",
+            "r" => "ART",
+            "t" => "MANSCPT",
+            );
+
      my %unitypehash = (
-                   "a" => "BOOK",
-                   "b" => "BOOK",
-                   "c" => "MUSIC",
-                   "d" => "MUSIC",
-                   "e" => "MAP",
-                   "f" => "MAP",
-                   "g" => "ADVS",
-                   "i" => "SOUND",
-                   "j" => "SOUND",
-                   "k" => "ART",
-                   "l" => "ELEC",
-                   "m" => "ADVS",
-                   "r" => "ART",
-               );
-    
+            "a" => "BOOK",
+            "b" => "MANSCPT",
+            "c" => "MUSIC",
+            "d" => "MUSIC",
+            "e" => "MAP",
+            "f" => "MAP",
+            "g" => "ADVS",
+            "i" => "SOUND",
+            "j" => "SOUND",
+            "k" => "ART",
+            "l" => "ELEC",
+            "m" => "GEN",
+            "r" => "ART",
+            );
+
      ## The type of a MARC record is found at position 06 of the leader
-    my $typeofrecord = substr($leader, 6, 1);
+    my $typeofrecord = defined($leader) && length $leader >=6 ?
+                       substr($leader, 6, 1): undef;
+    ## Pos 07 == Bibliographic level
+    my $biblevel = defined($leader) && length $leader >=7 ?
+                       substr($leader, 7, 1): '';
  
-    ## ToDo: for books, field 008 positions 24-27 might have a few more
+    ## TODO: for books, field 008 positions 24-27 might have a few more
      ## hints
  
      my %typehash;
-    
-    ## the ukmarc here is just a guess
-    if ($intype eq "marc21" || $intype eq "ukmarc") {
-       %typehash = %ustypehash;
-    }
-    elsif ($intype eq "unimarc") {
-       %typehash = %unitypehash;
-    }
-    else {
-       ## assume MARC21 as default
-       %typehash = %ustypehash;
-    }
-
-    if (!exists $typehash{$typeofrecord}) {
-       print "TY  - BOOK\r\n"; ## most reasonable default
-       warn ("no type found - assume BOOK") if $marcprint;
+    my $marcflavour = C4::Context->preference("marcflavour");
+    my $intype = lc($marcflavour);
+    if ($intype eq "unimarc") {
+        %typehash = %unitypehash;
      }
      else {
-       print "TY  - $typehash{$typeofrecord}\r\n";
+        %typehash = %ustypehash;
+    }
+
+    if (!defined $typeofrecord || !exists $typehash{$typeofrecord}) {
+        print "TY  - GEN\r\n"; ## most reasonable default
+        warn ("no type found - assume GEN") if $marcprint;
+    } elsif ( $typeofrecord =~ "a" ) {
+        if ( $biblevel eq 'a' ) {
+            print "TY  - GEN\r\n"; ## monographic component part
+        } elsif ( $biblevel eq 'b' || $biblevel eq 's' ) {
+            print "TY  - SER\r\n"; ## serial or serial component part
+        } elsif ( $biblevel eq 'm' ) {
+            print "TY  - $typehash{$typeofrecord}\r\n"; ## book
+        } elsif ( $biblevel eq 'c' || $biblevel eq 'd' ) {
+            print "TY  - GEN\r\n"; ## collections, part of collections or made-up collections
+        } elsif ( $biblevel eq 'i' ) {
+            print "TY  - DATA\r\n"; ## updating loose-leafe as Dataset
+        }
+    } else {
+        print "TY  - $typehash{$typeofrecord}\r\n";
      }
  
      ## use $typeofrecord as the return value, just in case
@@ -409,7 +461,7 @@ sub normalize_author {
  
         ## we currently ignore subfield c until someone complains
         if (length($rawauthorb) > 0) {
-           return join ",", ($rawauthora, $rawauthorb);
+        return join ", ", ($rawauthora, $rawauthorb);
         }
         else {
             return $rawauthora;
@@ -431,6 +483,8 @@ sub get_author {
  
      ## the sequence of the name parts is encoded either in indicator
      ## 1 (marc21) or 2 (unimarc)
+    my $marcflavour = C4::Context->preference("marcflavour");
+    my $intype = lc($marcflavour);
      if ($intype eq "unimarc") {
         $indicator = 2;
      }
@@ -448,7 +502,7 @@ sub get_author {
         normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
      }
      else {
-       normalize_author($authorfield->subfield('a'), $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
+        normalize_author($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
      }
  }
  
@@ -497,16 +551,19 @@ sub print_title {
         my $clean_title = $titlefield->subfield('a');
  
         my $clean_subtitle = $titlefield->subfield('b');
+$clean_subtitle ||= q{};
         $clean_title =~ s% *[/:;.]$%%;
         $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
  
+    my $marcflavour = C4::Context->preference("marcflavour");
+    my $intype = lc($marcflavour);
         if (length($clean_title) > 0
             || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
-           print "TI  - ", &charconv($clean_title);
+           print "TI  - ", $clean_title;
  
             ## subfield $b is relevant only for marc21/ukmarc
             if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
-               print ": ",&charconv($clean_subtitle);
+               print ": ",$clean_subtitle;
             }
             print "\r\n";
         }
@@ -516,6 +573,7 @@ sub print_title {
         ## be written and designated. The field is free-form and resistant
         ## to all parsing efforts, so this information is lost on me
      }
+    return;
  }
  
  ##********************************************************************
@@ -536,16 +594,19 @@ sub print_stitle {
         $clean_title =~ s% *[/:;.]$%%;
  
         if (length($clean_title) > 0) {
-           print "T2  - ", &charconv($clean_title),"\r\n";
+           print "T2  - ", $clean_title,"\r\n";
         }
  
+    my $marcflavour = C4::Context->preference("marcflavour");
+    my $intype = lc($marcflavour);
         if ($intype eq "unimarc") {
             print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
             if (length($titlefield->subfield('v')) > 0) {
-               print "VL  - ", &charconv($titlefield->subfield('v')),"\r\n";
+               print "VL  - ", $titlefield->subfield('v'),"\r\n";
             }
         }
      }
+    return;
  }
  
  ##********************************************************************
@@ -565,8 +626,8 @@ sub print_isbn {
             warn("truncated isbn") if $marcprint;
         }
  
-       my $isbn = substr($isbnfield->subfield('a'), 0, 10);
-       print "SN  - ", &charconv($isbn), "\r\n";
+    my $isbn = $isbnfield->subfield('a');
+       print "SN  - ", $isbn, "\r\n";
      }
  }
  
@@ -588,7 +649,7 @@ sub print_issn {
         }
  
         my $issn = substr($issnfield->subfield('a'), 0, 9);
-       print "SN  - ", &charconv($issn), "\r\n";
+       print "SN  - ", $issn, "\r\n";
      }
  }
  
@@ -600,7 +661,7 @@ sub print_uri {
  
      foreach my $f856 (@f856s) {
          if (my $uri = $f856->subfield('u')) {
-               print "UR  - ", charconv($uri), "\r\n";
+               print "UR  - ", $uri, "\r\n";
          }
      }
  }
@@ -617,7 +678,7 @@ sub print_loc_callno {
         warn("no LOC call number found") if $marcprint;
      }
      else {
-       print "AV  - ", &charconv($callnofield->subfield('a')), " ", &charconv($callnofield->subfield('b')), "\r\n";
+       print "AV  - ", $callnofield->subfield('a'), " ", $callnofield->subfield('b'), "\r\n";
      }
  }
  
@@ -633,7 +694,7 @@ sub print_dewey {
         warn("no Dewey number found") if $marcprint;
      }
      else {
-       print "U1  - ", &charconv($deweyfield->subfield('a')), " ", &charconv($deweyfield->subfield('2')), "\r\n";
+       print "U1  - ", $deweyfield->subfield('a'), " ", $deweyfield->subfield('2'), "\r\n";
      }
  }
  
@@ -674,6 +735,8 @@ sub print_pubinfo {
         my $pubsub_publisher;
         my $pubsub_date;
  
+    my $marcflavour = C4::Context->preference("marcflavour");
+    my $intype = lc($marcflavour);
         if ($intype eq "unimarc") {
             $pubsub_place = "a";
             $pubsub_publisher = "c";
@@ -706,7 +769,7 @@ sub print_pubinfo {
                 ## the dates are free-form, so we want to extract
                 ## a four-digit year and leave the rest as
                 ## "other info"
-               $protoyear = @$tuple[1];
+        my $protoyear = @$tuple[1];
                 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
  
                 ## strip any separator chars at the end
@@ -747,10 +810,10 @@ sub print_pubinfo {
  
         ## now dump the collected CY and PB lists
         if (@cities > 0) {
-           print "CY  - ", &charconv(join(", ", @cities)), "\r\n";
+           print "CY  - ", join(", ", @cities), "\r\n";
         }
         if (@publishers > 0) {
-           print "PB  - ", &charconv(join(", ", @publishers)), "\r\n";
+           print "PB  - ", join(", ", @publishers), "\r\n";
         }
      }
  }
@@ -768,36 +831,36 @@ sub get_keywords {
  
      ## loop over all 6XX fields
      foreach my $kwfield (@keywords) {
-       if ($kwfield != undef) {
-           ## authornames get special treatment
-           if ($fieldname eq "600") {
-               my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
-        push @kw, $val;
-               print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
-           }
-           else {
-               ## retrieve all available subfields
-               @kwsubfields = $kwfield->subfields();
-               
-               ## loop over all available subfield tuples
-        foreach my $kwtuple (@kwsubfields) {
-                   ## loop over all subfields to check
-            foreach my $subfield (@subfields) {
-                       ## [0] contains subfield code
-                       if (@$kwtuple[0] eq $subfield) {
-                           ## [1] contains value, remove trailing separators
-                           @$kwtuple[1] =~ s% *[,;.:/]*$%%;
-                           if (length(@$kwtuple[1]) > 0) {
-                push @kw, @$kwtuple[1];
-                               print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
-                           }
-                           ## we can leave the subfields loop here
-                           last;
-                       }
-                   }
-               }
-           }
-       }
+        if ($kwfield != undef) {
+            ## authornames get special treatment
+            if ($fieldname eq "600") {
+                my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
+                push @kw, $val;
+                print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
+            }
+            else {
+                ## retrieve all available subfields
+                my @kwsubfields = $kwfield->subfields();
+
+                ## loop over all available subfield tuples
+                foreach my $kwtuple (@kwsubfields) {
+                    ## loop over all subfields to check
+                    foreach my $subfield (@subfields) {
+                        ## [0] contains subfield code
+                        if (@$kwtuple[0] eq $subfield) {
+                            ## [1] contains value, remove trailing separators
+                            @$kwtuple[1] =~ s% *[,;.:/]*$%%;
+                            if (length(@$kwtuple[1]) > 0) {
+                                push @kw, @$kwtuple[1];
+                                print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
+                            }
+                            ## we can leave the subfields loop here
+                            last;
+                        }
+                    }
+                }
+            }
+        }
      }
      return @kw;
  }
@@ -891,28 +954,28 @@ sub pool_subx {
  
      ## loop over all notefields
      foreach my $notefield (@notefields) {
-       if ($notefield != undef) {
-           ## retrieve all available subfield tuples
-           @notesubfields = $notefield->subfields();
-
-           ## loop over all subfield tuples
-        foreach my $notetuple (@notesubfields) {
-               ## loop over all subfields to check
-        foreach my $subfield (@subfields) {
-                   ## [0] contains subfield code
-                   if (@$notetuple[0] eq $subfield) {
-                       ## [1] contains value, remove trailing separators
-                       print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
-                       @$notetuple[1] =~ s% *[,;.:/]*$%%;
-                       if (length(@$notetuple[1]) > 0) {
-                           ## add to list
-                           push @{$aref}, @$notetuple[1];
-                       }
-                       last;
-                   }
-               }
-           }
-       }
+        if (defined $notefield) {
+            ## retrieve all available subfield tuples
+            my @notesubfields = $notefield->subfields();
+
+            ## loop over all subfield tuples
+            foreach my $notetuple (@notesubfields) {
+                ## loop over all subfields to check
+                foreach my $subfield (@subfields) {
+                    ## [0] contains subfield code
+                    if (@$notetuple[0] eq $subfield) {
+                        ## [1] contains value, remove trailing separators
+                        print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
+                        @$notetuple[1] =~ s% *[,;.:/]*$%%;
+                        if (length(@$notetuple[1]) > 0) {
+                            ## add to list
+                            push @{$aref}, @$notetuple[1];
+                        }
+                        last;
+                    }
+                }
+            }
+        }
      }
  }
  
@@ -950,30 +1013,9 @@ sub print_abstract {
      my $allabs = join "; ", @abstrings;
  
      if (length($allabs) > 0) {
-       print "N2  - ", &charconv($allabs), "\r\n";
+       print "N2  - ", $allabs, "\r\n";
      }
  
  }
  
-    
-    
-##********************************************************************
-## charconv(): converts to a different charset based on a global var
-## Arguments: string
-## Returns: string
-##********************************************************************
-sub charconv {
-    if ($utf) {
-       ## return unaltered if already utf-8
-       return @_;
-    }
-    elsif ($uniout eq "t") {
-       ## convert to utf-8
-       return marc8_to_utf8("@_");
-    }
-    else {
-       ## return unaltered if no utf-8 requested
-       return @_;
-    }
-}
  1;