X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=blobdiff_plain;f=C4%2FCharset.pm;h=8b69848d5dd5f8bdd6f25aad5c7b969821ac9ad0;hb=1e7437bbae653840136fd1c2faed80a83aa08d2b;hp=a4e6b716f8e5b44d251c0cebb1475e827ec9ac88;hpb=c548761bc190d9b6a5f2989c0ab01b54f4d61879;p=srvgit diff --git a/C4/Charset.pm b/C4/Charset.pm index a4e6b716f8..8b69848d5d 100644 --- a/C4/Charset.pm +++ b/C4/Charset.pm @@ -39,6 +39,7 @@ BEGIN { SetUTF8Flag SetMarcUnicodeFlag StripNonXmlChars + nsb_clean ); } @@ -382,6 +383,40 @@ sub StripNonXmlChars { return $str; } + + +=head2 nsb_clean + +=over 4 + +nsb_clean($string); + +=back + +Removes Non Sorting Block characters + +=cut +sub nsb_clean { + my $NSB = '\x88' ; # NSB : begin Non Sorting Block + my $NSE = '\x89' ; # NSE : Non Sorting Block end + my $NSB2 = '\x98' ; # NSB : begin Non Sorting Block + my $NSE2 = '\x9C' ; # NSE : Non Sorting Block end + my $C2 = '\xC2' ; # What is this char ? It is sometimes left by the regexp after removing NSB / NSE + + # handles non sorting blocks + my ($string) = @_ ; + $_ = $string ; + s/$NSB//g ; + s/$NSE//g ; + s/$NSB2//g ; + s/$NSE2//g ; + s/$C2//g ; + $string = $_ ; + + return($string) ; +} + + =head1 INTERNAL FUNCTIONS =head2 _default_marc21_charconv_to_utf8