#
# This file is part of Koha.
#
-# Koha is free software; you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any later
-# version.
+# Koha is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
#
-# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
-# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+# Koha is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
#
-# You should have received a copy of the GNU General Public License along
-# with Koha; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+# You should have received a copy of the GNU General Public License
+# along with Koha; if not, see <http://www.gnu.org/licenses>.
-use strict;
-use warnings;
+use Modern::Perl;
-use MARC::Charset qw/marc8_to_utf8/;
+use MARC::Charset;
use Text::Iconv;
-use C4::Context;
-use C4::Debug;
-use Unicode::Normalize;
+use Unicode::Normalize qw( NFC NFD );
+use Encode;
-use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+use Koha::Logger;
+our (@ISA, @EXPORT_OK);
BEGIN {
- # set the version for version checking
- $VERSION = 3.07.00.049;
require Exporter;
@ISA = qw(Exporter);
- @EXPORT = qw(
+ @EXPORT_OK = qw(
NormalizeString
IsStringUTF8ish
MarcToUTF8Record
sub IsStringUTF8ish {
my $str = shift;
- return 1 if utf8::is_utf8($str);
- return utf8::decode($str);
+ return 1 if Encode::is_utf8($str);
+ return utf8::decode( $str );
}
=head2 SetUTF8Flag
sub NormalizeString{
my ($string,$nfd,$transform)=@_;
return $string unless defined($string); # force scalar context return.
- utf8::decode($string) unless (utf8::is_utf8($string));
+ $string = Encode::decode('UTF-8', $string) unless (Encode::is_utf8($string));
if ($nfd){
$string= NFD($string);
}
substr($leader, 9, 1) = 'a';
$marc_record->leader($leader);
} elsif ($marc_flavour =~/UNIMARC/) {
+ require C4::Context;
my $defaultlanguage = C4::Context->preference("UNIMARCField100Language");
$defaultlanguage = "fre" if (!$defaultlanguage || length($defaultlanguage) != 3);
my $string;
$marc_record->insert_grouped_field(
MARC::Field->new( 100, '', '', "a" => $string ) );
}
- $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 3 );
+ Koha::Logger->get->debug("encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 3 ));
} else {
warn "Unrecognized marcflavour: $marc_flavour";
}
my $record_modified = 0;
my $frameworkcode = C4::Biblio::GetFrameworkCode($biblionumber);
my ( $url_field, $url_subfield ) =
- C4::Biblio::GetMarcFromKohaField( 'biblioitems.url', $frameworkcode );
+ C4::Biblio::GetMarcFromKohaField( 'biblioitems.url' );
foreach my $field ( $record->fields() ) {
if ( $field->is_control_field() ) {
my $value = $field->data();
# occurs, upgrade the string in place. Moral of the story seems to be
# that pack("U", ...) is better than chr(...) if you need to guarantee
# that the resulting string is UTF-8.
- utf8::upgrade($utf8sf);
+ $utf8sf = Encode::encode('UTF-8', $utf8sf);
}
push @converted_subfields, $subfield->[0], $utf8sf;
}
replacement character. This is meant as a last-ditch
method, and would be best used as part of a UI that
lets a cataloguer pick various character conversions
-until he or she finds the right one.
+until they find the right one.
=cut
my %chars;
+
+####
+## 0xb
$chars{0xb0}=0x0101;#3/0ayn[ain]
$chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
#$chars{0xb2}=0x00e0;#'à';
$chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark
# $chars{0xb4}='è';
$chars{0xb4}=0x00e8;
-$chars{0xbd}=0x02b9;
-$chars{0xbe}=0x02ba;
# $chars{0xb5}='é';
$chars{0xb5}=0x00e9;
+$chars{0xb6}=0x2021; # double dagger
+$chars{0xb7}=0x00b7; # middle dot
+$chars{0xb8}=0x2033; # double prime
+$chars{0xb9}=0x2019; # right single quotation mark
+$chars{0xba}=0x201d; # right double quotation mark
+$chars{0xbb}=0x00bb; # right-pointing double angle quotation mark
+$chars{0xbc}=0x266f; # music sharp sign
+$chars{0xbd}=0x02b9; # modifier letter prime
+$chars{0xbe}=0x02ba; # modifier letter double prime
+$chars{0xbf}=0x00bf; # inverted question mark
+
+####
+## 0xe
+$chars{0xe1}=0x00c6; # latin capital letter ae
+$chars{0xe2}=0x0110; # latin capital letter d with stroke
+$chars{0xe6}=0x0132; # latin capital ligature ij
+$chars{0xe8}=0x0141; # latin capital letter l with stroke
+$chars{0xe9}=0x00d8; # latin capital letter o with stroke
+$chars{0xea}=0x0152; # latin capital ligature oe
+$chars{0xec}=0x00de; # latin capital letter thorn
+
+####
+## 0xf
+$chars{0xf1}=0x00e6; # latin small letter ae
+$chars{0xf2}=0x0111; # latin small letter d with stroke
+$chars{0xf3}=0x00f0; # latin small letter eth
+$chars{0xf5}=0x0131; # latin small letter dotless i
+$chars{0xf6}=0x0133; # latin small ligature ij
+$chars{0xf8}=0x0142; # latin small letter l with stroke
+$chars{0xf9}=0x00f8; # latin small letter o with stroke
+$chars{0xfa}=0x0153; # latin small ligature oe
+$chars{0xfb}=0x00df; # latin small letter sharp s
+$chars{0xfc}=0x00fe; # latin small letter thorn
+
+####
+## Others
$chars{0x97}=0x003c;#3/2leftlowsinglequotationmark
$chars{0x98}=0x003e;#3/2leftlowsinglequotationmark
-$chars{0xfa}=0x0153; #oe
-$chars{0xea}=0x0152; #oe
-$chars{0x81d1}=0x00b0;
+#$chars{0x81d1}=0x00b0; # FIXME useless
####
## combined characters iso5426
$chars{0xd441}=0x1e00; # capital a with ring below
$chars{0xd461}=0x1e01; # small a with ring below
# 5/5 half circle below
-$chars{0xf948}=0x1e2a; # capital h with breve below
-$chars{0xf968}=0x1e2b; # small h with breve below
+$chars{0xd548}=0x1e2a; # capital h with breve below
+$chars{0xd568}=0x1e2b; # small h with breve below
# 5/6 dot below
$chars{0xd641}=0x1ea0; # capital a with dot below
$chars{0xd642}=0x1e04; # capital b with dot below