X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=blobdiff_plain;f=C4%2FCharset.pm;h=8b69848d5dd5f8bdd6f25aad5c7b969821ac9ad0;hb=df0a6a71d7301e4e2e346bfe50e00a8e135da33e;hp=a4e6b716f8e5b44d251c0cebb1475e827ec9ac88;hpb=c2043933f7f9b30bf87a4c5656c50fd10584ef4f;p=koha_gimpoz diff --git a/C4/Charset.pm b/C4/Charset.pm index a4e6b716f8..8b69848d5d 100644 --- a/C4/Charset.pm +++ b/C4/Charset.pm @@ -39,6 +39,7 @@ BEGIN { SetUTF8Flag SetMarcUnicodeFlag StripNonXmlChars + nsb_clean ); } @@ -382,6 +383,40 @@ sub StripNonXmlChars { return $str; } + + +=head2 nsb_clean + +=over 4 + +nsb_clean($string); + +=back + +Removes Non Sorting Block characters + +=cut +sub nsb_clean { + my $NSB = '\x88' ; # NSB : begin Non Sorting Block + my $NSE = '\x89' ; # NSE : Non Sorting Block end + my $NSB2 = '\x98' ; # NSB : begin Non Sorting Block + my $NSE2 = '\x9C' ; # NSE : Non Sorting Block end + my $C2 = '\xC2' ; # What is this char ? It is sometimes left by the regexp after removing NSB / NSE + + # handles non sorting blocks + my ($string) = @_ ; + $_ = $string ; + s/$NSB//g ; + s/$NSE//g ; + s/$NSB2//g ; + s/$NSE2//g ; + s/$C2//g ; + $string = $_ ; + + return($string) ; +} + + =head1 INTERNAL FUNCTIONS =head2 _default_marc21_charconv_to_utf8