BEGIN {
# set the version for version checking
- $VERSION = 3.01;
+ $VERSION = 3.07.00.049;
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(
SetUTF8Flag
SetMarcUnicodeFlag
StripNonXmlChars
+ nsb_clean
);
}
=head2 SetUTF8Flag
- my $marc_record = SetUTF8Flag($marc_record);
+ my $marc_record = SetUTF8Flag($marc_record, $nfd);
This function sets the PERL UTF8 flag for data.
It is required when using new_from_usmarc
When editing unicode marc records fields and subfields, you
would end up in double encoding without using this function.
+If $nfd is set, string normalization will use NFD instead of NFC
+
FIXME
In my opinion, this function belongs to MARC::Record and not
to this package.
=cut
sub SetUTF8Flag{
- my ($record)=@_;
- return unless ($record && $record->fields());
- foreach my $field ($record->fields()){
- if ($field->tag()>=10){
- my @subfields;
- foreach my $subfield ($field->subfields()){
- push @subfields,($$subfield[0],NormalizeString($$subfield[1]));
- }
- my $newfield=MARC::Field->new(
- $field->tag(),
- $field->indicator(1),
- $field->indicator(2),
- @subfields
- );
- $field->replace_with($newfield);
- }
- }
+ my ($record, $nfd)=@_;
+ return unless ($record && $record->fields());
+ foreach my $field ($record->fields()){
+ if ($field->tag()>=10){
+ my @subfields;
+ foreach my $subfield ($field->subfields()){
+ push @subfields,($$subfield[0],NormalizeString($$subfield[1],$nfd));
+ }
+ eval {
+ my $newfield=MARC::Field->new(
+ $field->tag(),
+ $field->indicator(1),
+ $field->indicator(2),
+ @subfields
+ );
+ $field->replace_with($newfield);
+ };
+ warn "ERROR occurred in SetUTF8Flag $@" if $@;
+ }
+ }
}
=head2 NormalizeString
substr($leader, 9, 1) = 'a';
$marc_record->leader($leader);
} elsif ($marc_flavour =~/UNIMARC/) {
+ my $defaultlanguage = C4::Context->preference("UNIMARCField100Language");
+ $defaultlanguage = "fre" if (!$defaultlanguage || length($defaultlanguage) != 3);
my $string;
- my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,9):(36,22));
+ my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,12):(36,25));
$string=$marc_record->subfield( 100, "a" );
if (defined $string && length($string)==$subflength) {
$string = substr $string, 0,$subflength if (length($string)>$subflength);
else {
$string = POSIX::strftime( "%Y%m%d", localtime );
$string =~ s/\-//g;
- $string = sprintf( "%-*s", $subflength, $string );
+ $string = sprintf( "%-*s", $subflength, $string );
+ substr ( $string, ($encodingposition - 3), 3, $defaultlanguage);
}
- substr( $string, $encodingposition, 8, "frey50 " );
+ substr( $string, $encodingposition, 3, "y50" );
if ( $marc_record->subfield( 100, "a" ) ) {
$marc_record->field('100')->update(a=>$string);
}
$marc_record->insert_grouped_field(
MARC::Field->new( 100, '', '', "a" => $string ) );
}
- $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 8 );
+ $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 3 );
} else {
warn "Unrecognized marcflavour: $marc_flavour";
}
return $str;
}
+
+
+=head2 nsb_clean
+
+=over 4
+
+nsb_clean($string);
+
+=back
+
+Removes Non Sorting Block characters
+
+=cut
+sub nsb_clean {
+ my $NSB = '\x88' ; # NSB : begin Non Sorting Block
+ my $NSE = '\x89' ; # NSE : Non Sorting Block end
+ my $NSB2 = '\x98' ; # NSB : begin Non Sorting Block
+ my $NSE2 = '\x9C' ; # NSE : Non Sorting Block end
+ my $C2 = '\xC2' ; # What is this char ? It is sometimes left by the regexp after removing NSB / NSE
+
+ # handles non sorting blocks
+ my ($string) = @_ ;
+ $_ = $string ;
+ s/$NSB//g ;
+ s/$NSE//g ;
+ s/$NSB2//g ;
+ s/$NSE2//g ;
+ s/$C2//g ;
+ $string = $_ ;
+
+ return($string) ;
+}
+
+
=head1 INTERNAL FUNCTIONS
=head2 _default_marc21_charconv_to_utf8