Bug 17600: Standardize our EXPORT_OK

[srvgit] / C4 / Charset.pm
diff --git a/C4/Charset.pm b/C4/Charset.pm

index 4521262..e780cef 100644 (file)
--- a/C4/Charset.pm
+++ b/C4/Charset.pm
@@ -4,36 +4,33 @@ package C4::Charset;
  #
  # This file is part of Koha.
  #
-# Koha is free software; you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any later
-# version.
+# Koha is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
  #
-# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
-# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+# Koha is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
  #
-# You should have received a copy of the GNU General Public License along
-# with Koha; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+# You should have received a copy of the GNU General Public License
+# along with Koha; if not, see <http://www.gnu.org/licenses>.
  
-use strict;
-use warnings;
+use Modern::Perl;
  
-use MARC::Charset qw/marc8_to_utf8/;
+use MARC::Charset;
  use Text::Iconv;
-use C4::Context;
-use C4::Debug;
-use Unicode::Normalize;
+use Unicode::Normalize qw( NFC NFD );
+use Encode;
  
-use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+use Koha::Logger;
  
+our (@ISA, @EXPORT_OK);
  BEGIN {
-    # set the version for version checking
-    $VERSION = 3.07.00.049;
      require Exporter;
      @ISA    = qw(Exporter);
-    @EXPORT = qw(
+    @EXPORT_OK = qw(
          NormalizeString
          IsStringUTF8ish
          MarcToUTF8Record
@@ -111,8 +108,8 @@ will assume that this situation occur does not very often.
  sub IsStringUTF8ish {
      my $str = shift;
  
-    return 1 if utf8::is_utf8($str);
-    return utf8::decode($str);
+    return 1 if Encode::is_utf8($str);
+    return utf8::decode( $str );
  }
  
  =head2 SetUTF8Flag
@@ -180,7 +177,7 @@ Sample code :
  sub NormalizeString{
         my ($string,$nfd,$transform)=@_;
      return $string unless defined($string); # force scalar context return.
-       utf8::decode($string) unless (utf8::is_utf8($string));
+    $string = Encode::decode('UTF-8', $string) unless (Encode::is_utf8($string));
         if ($nfd){
                 $string= NFD($string);
         }
@@ -332,6 +329,7 @@ sub SetMarcUnicodeFlag {
          substr($leader, 9, 1) = 'a';
          $marc_record->leader($leader); 
      } elsif ($marc_flavour =~/UNIMARC/) {
+        require C4::Context;
         my $defaultlanguage = C4::Context->preference("UNIMARCField100Language");
          $defaultlanguage = "fre" if (!$defaultlanguage || length($defaultlanguage) != 3);
          my $string; 
@@ -354,7 +352,7 @@ sub SetMarcUnicodeFlag {
              $marc_record->insert_grouped_field( 
                  MARC::Field->new( 100, '', '', "a" => $string ) ); 
          }
-               $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 3 );
+        Koha::Logger->get->debug("encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 3 ));
      } else {
          warn "Unrecognized marcflavour: $marc_flavour";
      }
@@ -441,7 +439,7 @@ sub SanitizeRecord {
      my $record_modified = 0;
      my $frameworkcode   = C4::Biblio::GetFrameworkCode($biblionumber);
      my ( $url_field, $url_subfield ) =
-      C4::Biblio::GetMarcFromKohaField( 'biblioitems.url', $frameworkcode );
+      C4::Biblio::GetMarcFromKohaField( 'biblioitems.url' );
      foreach my $field ( $record->fields() ) {
          if ( $field->is_control_field() ) {
              my $value           = $field->data();
@@ -624,7 +622,7 @@ sub _marc_marc8_to_utf8 {
                      # occurs, upgrade the string in place.  Moral of the story seems to be
                      # that pack("U", ...) is better than chr(...) if you need to guarantee
                      # that the resulting string is UTF-8.
-                    utf8::upgrade($utf8sf);
+                    $utf8sf = Encode::encode('UTF-8', $utf8sf);
                  }
                  push @converted_subfields, $subfield->[0], $utf8sf;
              }
@@ -764,7 +762,7 @@ where the eight bit is set) octet with the Unicode
  replacement character.  This is meant as a last-ditch
  method, and would be best used as part of a UI that
  lets a cataloguer pick various character conversions
-until he or she finds the right one.
+until they find the right one.
  
  =cut
  
@@ -803,6 +801,9 @@ Converts a string from ISO-5426 to UTF-8.
  
  
  my %chars;
+
+####
+## 0xb
  $chars{0xb0}=0x0101;#3/0ayn[ain]
  $chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
  #$chars{0xb2}=0x00e0;#'à';
@@ -811,15 +812,47 @@ $chars{0xb2}=0x00e0;#3/2leftlowsinglequotationmark
  $chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark
  # $chars{0xb4}='è';
  $chars{0xb4}=0x00e8;
-$chars{0xbd}=0x02b9;
-$chars{0xbe}=0x02ba;
  # $chars{0xb5}='é';
  $chars{0xb5}=0x00e9;
+$chars{0xb6}=0x2021; # double dagger
+$chars{0xb7}=0x00b7; # middle dot
+$chars{0xb8}=0x2033; # double prime
+$chars{0xb9}=0x2019; # right single quotation mark
+$chars{0xba}=0x201d; # right double quotation mark
+$chars{0xbb}=0x00bb; # right-pointing double angle quotation mark
+$chars{0xbc}=0x266f; # music sharp sign
+$chars{0xbd}=0x02b9; # modifier letter prime
+$chars{0xbe}=0x02ba; # modifier letter double prime
+$chars{0xbf}=0x00bf; # inverted question mark
+
+####
+## 0xe
+$chars{0xe1}=0x00c6; # latin capital letter ae
+$chars{0xe2}=0x0110; # latin capital letter d with stroke
+$chars{0xe6}=0x0132; # latin capital ligature ij
+$chars{0xe8}=0x0141; # latin capital letter l with stroke
+$chars{0xe9}=0x00d8; # latin capital letter o with stroke
+$chars{0xea}=0x0152; # latin capital ligature oe
+$chars{0xec}=0x00de; # latin capital letter thorn
+
+####
+## 0xf
+$chars{0xf1}=0x00e6; # latin small letter ae
+$chars{0xf2}=0x0111; # latin small letter d with stroke
+$chars{0xf3}=0x00f0; # latin small letter eth
+$chars{0xf5}=0x0131; # latin small letter dotless i
+$chars{0xf6}=0x0133; # latin small ligature ij
+$chars{0xf8}=0x0142; # latin small letter l with stroke
+$chars{0xf9}=0x00f8; # latin small letter o with stroke
+$chars{0xfa}=0x0153; # latin small ligature oe
+$chars{0xfb}=0x00df; # latin small letter sharp s
+$chars{0xfc}=0x00fe; # latin small letter thorn
+
+####
+## Others
  $chars{0x97}=0x003c;#3/2leftlowsinglequotationmark
  $chars{0x98}=0x003e;#3/2leftlowsinglequotationmark
-$chars{0xfa}=0x0153; #oe
-$chars{0xea}=0x0152; #oe
-$chars{0x81d1}=0x00b0;
+#$chars{0x81d1}=0x00b0; # FIXME useless
  
  ####
  ## combined characters iso5426
@@ -1121,8 +1154,8 @@ $chars{0xd375}=0x0173; # small u with ogonek
  $chars{0xd441}=0x1e00; # capital a with ring below
  $chars{0xd461}=0x1e01; # small a with ring below
          # 5/5 half circle below
-$chars{0xf948}=0x1e2a; # capital h with breve below
-$chars{0xf968}=0x1e2b; # small h with breve below
+$chars{0xd548}=0x1e2a; # capital h with breve below
+$chars{0xd568}=0x1e2b; # small h with breve below
          # 5/6 dot below
  $chars{0xd641}=0x1ea0; # capital a with dot below
  $chars{0xd642}=0x1e04; # capital b with dot below