#
# This file is part of Koha.
#
-# Koha is free software; you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any later
-# version.
+# Koha is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
#
-# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
-# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+# Koha is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
#
-# You should have received a copy of the GNU General Public License along
-# with Koha; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+# You should have received a copy of the GNU General Public License
+# along with Koha; if not, see <http://www.gnu.org/licenses>.
use strict;
use warnings;
use Text::Iconv;
use C4::Debug;
use Unicode::Normalize;
+use Encode qw( decode encode is_utf8 );
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
BEGIN {
# set the version for version checking
- $VERSION = 3.01;
+ $VERSION = 3.07.00.049;
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(
SetMarcUnicodeFlag
StripNonXmlChars
nsb_clean
+ SanitizeRecord
);
}
+=encoding UTF-8
+
=head1 NAME
C4::Charset - utilities for handling character set conversions.
sub IsStringUTF8ish {
my $str = shift;
- return 1 if utf8::is_utf8($str);
- return utf8::decode($str);
+ return 1 if Encode::is_utf8($str);
+ return utf8::decode( $str );
}
=head2 SetUTF8Flag
=cut
sub SetUTF8Flag{
- my ($record, $nfd)=@_;
- return unless ($record && $record->fields());
- foreach my $field ($record->fields()){
- if ($field->tag()>=10){
- my @subfields;
- foreach my $subfield ($field->subfields()){
- push @subfields,($$subfield[0],NormalizeString($$subfield[1],$nfd));
- }
- my $newfield=MARC::Field->new(
- $field->tag(),
- $field->indicator(1),
- $field->indicator(2),
- @subfields
- );
- $field->replace_with($newfield);
- }
- }
+ my ($record, $nfd)=@_;
+ return unless ($record && $record->fields());
+ foreach my $field ($record->fields()){
+ if ($field->tag()>=10){
+ my @subfields;
+ foreach my $subfield ($field->subfields()){
+ push @subfields,($$subfield[0],NormalizeString($$subfield[1],$nfd));
+ }
+ eval {
+ my $newfield=MARC::Field->new(
+ $field->tag(),
+ $field->indicator(1),
+ $field->indicator(2),
+ @subfields
+ );
+ $field->replace_with($newfield);
+ };
+ warn "ERROR occurred in SetUTF8Flag $@" if $@;
+ }
+ }
}
=head2 NormalizeString
sub NormalizeString{
my ($string,$nfd,$transform)=@_;
- utf8::decode($string) unless (utf8::is_utf8($string));
+ return $string unless defined($string); # force scalar context return.
+ $string = Encode::decode('UTF-8', $string) unless (Encode::is_utf8($string));
if ($nfd){
$string= NFD($string);
}
substr($leader, 9, 1) = 'a';
$marc_record->leader($leader);
} elsif ($marc_flavour =~/UNIMARC/) {
+ require C4::Context;
+ my $defaultlanguage = C4::Context->preference("UNIMARCField100Language");
+ $defaultlanguage = "fre" if (!$defaultlanguage || length($defaultlanguage) != 3);
my $string;
- my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,9):(36,22));
+ my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,12):(36,25));
$string=$marc_record->subfield( 100, "a" );
if (defined $string && length($string)==$subflength) {
$string = substr $string, 0,$subflength if (length($string)>$subflength);
else {
$string = POSIX::strftime( "%Y%m%d", localtime );
$string =~ s/\-//g;
- $string = sprintf( "%-*s", $subflength, $string );
+ $string = sprintf( "%-*s", $subflength, $string );
+ substr ( $string, ($encodingposition - 3), 3, $defaultlanguage);
}
- substr( $string, $encodingposition, 8, "frey50 " );
+ substr( $string, $encodingposition, 3, "y50" );
if ( $marc_record->subfield( 100, "a" ) ) {
$marc_record->field('100')->update(a=>$string);
}
$marc_record->insert_grouped_field(
MARC::Field->new( 100, '', '', "a" => $string ) );
}
- $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 8 );
+ $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 3 );
} else {
warn "Unrecognized marcflavour: $marc_flavour";
}
# handles non sorting blocks
my ($string) = @_ ;
$_ = $string ;
- s/$NSB//g ;
- s/$NSE//g ;
- s/$NSB2//g ;
- s/$NSE2//g ;
- s/$C2//g ;
+ s/($C2){0,1}($NSB|$NSB2)//g ;
+ s/($C2){0,1}($NSE|$NSE2)//g ;
$string = $_ ;
return($string) ;
}
+=head2 SanitizeRecord
+
+SanitizeRecord($marcrecord);
+
+Sanitize a record
+This routine is called in the maintenance script misc/maintenance/sanitize_records.pl.
+It cleans any string with '&amp;...', replacing it by '&'
+
+=cut
+
+sub SanitizeRecord {
+ my ( $record, $biblionumber ) = @_;
+ my $string;
+ my $record_modified = 0;
+ my $frameworkcode = C4::Biblio::GetFrameworkCode($biblionumber);
+ my ( $url_field, $url_subfield ) =
+ C4::Biblio::GetMarcFromKohaField( 'biblioitems.url', $frameworkcode );
+ foreach my $field ( $record->fields() ) {
+ if ( $field->is_control_field() ) {
+ my $value = $field->data();
+ my $sanitized_value = _clean_ampersand($value);
+ $record_modified = 1 if $sanitized_value ne $value;
+ $field->update($sanitized_value);
+ }
+ else {
+ my @subfields = $field->subfields();
+ my @new_subfields;
+ foreach my $subfield (@subfields) {
+ next
+ if $url_field eq $field->tag()
+ and $url_subfield eq $subfield->[0];
+ my $value = $subfield->[1];
+ my $sanitized_value = _clean_ampersand($value);
+ push @new_subfields, $subfield->[0] => $sanitized_value;
+ $record_modified = 1 if $sanitized_value ne $value;
+ }
+ if ( scalar(@new_subfields) > 0 ) {
+ my $new_field = eval {
+ MARC::Field->new(
+ $field->tag(), $field->indicator(1),
+ $field->indicator(2), @new_subfields
+ );
+ };
+ if ($@) {
+ warn "error : $@";
+ }
+ else {
+ $field->replace_with($new_field);
+ }
+
+ }
+ }
+ }
+
+ return $record, $record_modified;
+}
+
+sub _clean_ampersand {
+ my ($string) = @_;
+ $string =~ s/(&)(amp;)+/$1/g;
+ return $string;
+}
+
=head1 INTERNAL FUNCTIONS
=head2 _default_marc21_charconv_to_utf8
# occurs, upgrade the string in place. Moral of the story seems to be
# that pack("U", ...) is better than chr(...) if you need to guarantee
# that the resulting string is UTF-8.
- utf8::upgrade($utf8sf);
+ $utf8sf = Encode::encode('UTF-8', $utf8sf);
}
push @converted_subfields, $subfield->[0], $utf8sf;
}