Followup bfffa5 TransformHTMLToXML bug fix

[srvgit] / C4 / Search.pm
diff --git a/C4/Search.pm b/C4/Search.pm

index 39de4df..010f59f 100644 (file)
--- a/C4/Search.pm
+++ b/C4/Search.pm
@@ -27,6 +27,8 @@ use XML::Simple;
  use C4::Dates qw(format_date);
  use C4::XSLT;
  use C4::Branch;
+use C4::Debug;
+use YAML;
  use URI::Escape;
  
  use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
@@ -645,10 +647,12 @@ sub _remove_stopwords {
  #       we use IsAlpha unicode definition, to deal correctly with diacritics.
  #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
  #       is a stopword, we'd get "çon" and wouldn't find anything...
+#       
                 foreach ( keys %{ C4::Context->stopwords } ) {
                         next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
+                       $debug && warn "$_ Dump($operand)";
                         if ( my ($matched) = ($operand =~
-                               /(\P{IsAlnum}\Q$_\E\P{IsAlnum}|^\Q$_\E\P{IsAlnum}|\P{IsAlnum}\Q$_\E$|^\Q$_\E$)/gi) )
+                               /([^\X\p{isAlnum}]\Q$_\E[^\X\p{isAlnum}]|[^\X\p{isAlnum}]\Q$_\E$|^\Q$_\E[^\X\p{isAlnum}])/gi))
                         {
                                 $operand =~ s/\Q$matched\E/ /gi;
                                 push @stopwords_removed, $_;
@@ -691,7 +695,7 @@ sub _detect_truncation {
  # STEMMING
  sub _build_stemmed_operand {
      my ($operand,$lang) = @_;
-    require Lingua::Stem::Snowball;
+    require Lingua::Stem::Snowball ;
      my $stemmed_operand;
  
      # If operand contains a digit, it is almost certainly an identifier, and should
@@ -702,17 +706,10 @@ sub _build_stemmed_operand {
      return $operand if $operand =~ /\d/;
  
  # FIXME: the locale should be set based on the user's language and/or search choice
+    warn "$lang";
      my $stemmer = Lingua::Stem::Snowball->new( lang => $lang,
                                                 encoding => "UTF-8" );
  
-# FIXME: these should be stored in the db so the librarian can modify the behavior
-    $stemmer->add_exceptions(
-        {
-            'and' => 'and',
-            'or'  => 'or',
-            'not' => 'not',
-        }
-    );
      my @words = split( / /, $operand );
      my @stems = $stemmer->stem(\@words);
      for my $stem (@stems) {
@@ -1233,26 +1230,49 @@ sub searchResults {
          if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
              my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
              my @fields  = $marcrecord->fields();
-            foreach my $field (@fields) {
-                my $tag      = $field->tag();
-                my $tagvalue = $field->as_string();
-                $summary =~
-                  s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
-                unless ( $tag < 10 ) {
-                    my @subf = $field->subfields;
-                    for my $i ( 0 .. $#subf ) {
-                        my $subfieldcode  = $subf[$i][0];
-                        my $subfieldvalue = $subf[$i][1];
-                        my $tagsubf       = $tag . $subfieldcode;
-                        $summary =~
-s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
+            
+            my $newsummary;
+            foreach my $line ( "$summary\n" =~ /(.*)\n/g ){
+                my $tags = {};
+                foreach my $tag ( $line =~ /\[(\d{3}[\w|\d])\]/ ) {
+                    $tag =~ /(.{3})(.)/;
+                    if($marcrecord->field($1)){
+                        my @abc = $marcrecord->field($1)->subfield($2);
+                        $tags->{$tag} = $#abc + 1 ;
+                    }
+                }
+                
+                # We catch how many times to repeat this line
+                my $max = 0;
+                foreach my $tag (keys(%$tags)){
+                    $max = $tags->{$tag} if($tags->{$tag} > $max);
+                 }
+                
+                # we replace, and repeat each line
+                for (my $i = 0 ; $i < $max ; $i++){
+                    my $newline = $line;
+
+                    foreach my $tag ( $newline =~ /\[(\d{3}[\w|\d])\]/g ) {
+                        $tag =~ /(.{3})(.)/;
+                        
+                        if($marcrecord->field($1)){
+                            my @repl = $marcrecord->field($1)->subfield($2);
+                            my $subfieldvalue = $repl[$i];
+                            
+                            if (! utf8::is_utf8($subfieldvalue)) {
+                                utf8::decode($subfieldvalue);
+                            }
+ 
+                             $newline =~ s/\[$tag\]/$subfieldvalue/g;
+                        }
                      }
+                    $newsummary .= "$newline\n";
                  }
              }
-            # FIXME: yuk
-            $summary =~ s/\[(.*?)]//g;
-            $summary =~ s/\n/<br\/>/g;
-            $oldbiblio->{summary} = $summary;
+
+            $newsummary =~ s/\[(.*?)]//g;
+            $newsummary =~ s/\n/<br\/>/g;
+            $oldbiblio->{summary} = $newsummary;
          }
  
          # Pull out the items fields