SetUTF8Flag
SetMarcUnicodeFlag
StripNonXmlChars
+ nsb_clean
);
}
=head2 SetUTF8Flag
- my $marc_record = SetUTF8Flag($marc_record);
+ my $marc_record = SetUTF8Flag($marc_record, $nfd);
This function sets the PERL UTF8 flag for data.
It is required when using new_from_usmarc
When editing unicode marc records fields and subfields, you
would end up in double encoding without using this function.
+If $nfd is set, string normalization will use NFD instead of NFC
+
FIXME
In my opinion, this function belongs to MARC::Record and not
to this package.
=cut
sub SetUTF8Flag{
- my ($record)=@_;
+ my ($record, $nfd)=@_;
return unless ($record && $record->fields());
foreach my $field ($record->fields()){
if ($field->tag()>=10){
my @subfields;
foreach my $subfield ($field->subfields()){
- push @subfields,($$subfield[0],NormalizeString($$subfield[1]));
+ push @subfields,($$subfield[0],NormalizeString($$subfield[1],$nfd));
}
my $newfield=MARC::Field->new(
$field->tag(),
return $str;
}
+
+
+=head2 nsb_clean
+
+=over 4
+
+nsb_clean($string);
+
+=back
+
+Removes Non Sorting Block characters
+
+=cut
+sub nsb_clean {
+ my $NSB = '\x88' ; # NSB : begin Non Sorting Block
+ my $NSE = '\x89' ; # NSE : Non Sorting Block end
+ my $NSB2 = '\x98' ; # NSB : begin Non Sorting Block
+ my $NSE2 = '\x9C' ; # NSE : Non Sorting Block end
+ my $C2 = '\xC2' ; # What is this char ? It is sometimes left by the regexp after removing NSB / NSE
+
+ # handles non sorting blocks
+ my ($string) = @_ ;
+ $_ = $string ;
+ s/$NSB//g ;
+ s/$NSE//g ;
+ s/$NSB2//g ;
+ s/$NSE2//g ;
+ s/$C2//g ;
+ $string = $_ ;
+
+ return($string) ;
+}
+
+
=head1 INTERNAL FUNCTIONS
=head2 _default_marc21_charconv_to_utf8