7310 Indentation followup replacing leading tabs with spaces

[koha_gimpoz] / C4 / Charset.pm
diff --git a/C4/Charset.pm b/C4/Charset.pm

index a4a06e7..8b69848 100644 (file)
--- a/C4/Charset.pm
+++ b/C4/Charset.pm
@@ -39,6 +39,7 @@ BEGIN {
          SetUTF8Flag
          SetMarcUnicodeFlag
          StripNonXmlChars
          SetUTF8Flag
          SetMarcUnicodeFlag
          StripNonXmlChars
+        nsb_clean
      );
  }
  
      );
  }
  
@@ -112,7 +113,7 @@ sub IsStringUTF8ish {
  
  =head2 SetUTF8Flag
  
  
  =head2 SetUTF8Flag
  
-  my $marc_record = SetUTF8Flag($marc_record);
+  my $marc_record = SetUTF8Flag($marc_record, $nfd);
  
  This function sets the PERL UTF8 flag for data.
  It is required when using new_from_usmarc 
  
  This function sets the PERL UTF8 flag for data.
  It is required when using new_from_usmarc 
@@ -120,6 +121,8 @@ since MARC::File::USMARC does not handle PERL UTF8 setting.
  When editing unicode marc records fields and subfields, you
  would end up in double encoding without using this function. 
  
  When editing unicode marc records fields and subfields, you
  would end up in double encoding without using this function. 
  
+If $nfd is set, string normalization will use NFD instead of NFC
+
  FIXME
  In my opinion, this function belongs to MARC::Record and not
  to this package.
  FIXME
  In my opinion, this function belongs to MARC::Record and not
  to this package.
@@ -128,13 +131,13 @@ But since it handles charset, and MARC::Record, it finds its way in that package
  =cut
  
  sub SetUTF8Flag{
  =cut
  
  sub SetUTF8Flag{
-       my ($record)=@_;
+       my ($record, $nfd)=@_;
         return unless ($record && $record->fields());
         foreach my $field ($record->fields()){
                 if ($field->tag()>=10){
                         my @subfields;
                         foreach my $subfield ($field->subfields()){
         return unless ($record && $record->fields());
         foreach my $field ($record->fields()){
                 if ($field->tag()>=10){
                         my @subfields;
                         foreach my $subfield ($field->subfields()){
-                               push @subfields,($$subfield[0],NormalizeString($$subfield[1]));
+                               push @subfields,($$subfield[0],NormalizeString($$subfield[1],$nfd));
                         }
                         my $newfield=MARC::Field->new(
                                                         $field->tag(),
                         }
                         my $newfield=MARC::Field->new(
                                                         $field->tag(),
@@ -380,6 +383,40 @@ sub StripNonXmlChars {
      return $str;
  }
  
      return $str;
  }
  
+
+
+=head2 nsb_clean
+
+=over 4
+
+nsb_clean($string);
+
+=back
+
+Removes Non Sorting Block characters
+
+=cut
+sub nsb_clean {
+    my $NSB  = '\x88' ;        # NSB : begin Non Sorting Block
+    my $NSE  = '\x89' ;        # NSE : Non Sorting Block end
+    my $NSB2 = '\x98' ;        # NSB : begin Non Sorting Block
+    my $NSE2 = '\x9C' ;        # NSE : Non Sorting Block end
+    my $C2   = '\xC2' ;        # What is this char ? It is sometimes left by the regexp after removing NSB / NSE
+
+    # handles non sorting blocks
+    my ($string) = @_ ;
+    $_ = $string ;
+    s/$NSB//g ;
+    s/$NSE//g ;
+    s/$NSB2//g ;
+    s/$NSE2//g ;
+    s/$C2//g ;
+    $string = $_ ;
+
+    return($string) ;
+}
+
+
  =head1 INTERNAL FUNCTIONS
  
  =head2 _default_marc21_charconv_to_utf8
  =head1 INTERNAL FUNCTIONS
  
  =head2 _default_marc21_charconv_to_utf8