Followup bfffa5 TransformHTMLToXML bug fix
[srvgit] / C4 / Search.pm
index 39de4df..010f59f 100644 (file)
@@ -27,6 +27,8 @@ use XML::Simple;
 use C4::Dates qw(format_date);
 use C4::XSLT;
 use C4::Branch;
+use C4::Debug;
+use YAML;
 use URI::Escape;
 
 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
@@ -645,10 +647,12 @@ sub _remove_stopwords {
 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 #       is a stopword, we'd get "çon" and wouldn't find anything...
+#       
                foreach ( keys %{ C4::Context->stopwords } ) {
                        next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
+                       $debug && warn "$_ Dump($operand)";
                        if ( my ($matched) = ($operand =~
-                               /(\P{IsAlnum}\Q$_\E\P{IsAlnum}|^\Q$_\E\P{IsAlnum}|\P{IsAlnum}\Q$_\E$|^\Q$_\E$)/gi) )
+                               /([^\X\p{isAlnum}]\Q$_\E[^\X\p{isAlnum}]|[^\X\p{isAlnum}]\Q$_\E$|^\Q$_\E[^\X\p{isAlnum}])/gi))
                        {
                                $operand =~ s/\Q$matched\E/ /gi;
                                push @stopwords_removed, $_;
@@ -691,7 +695,7 @@ sub _detect_truncation {
 # STEMMING
 sub _build_stemmed_operand {
     my ($operand,$lang) = @_;
-    require Lingua::Stem::Snowball;
+    require Lingua::Stem::Snowball ;
     my $stemmed_operand;
 
     # If operand contains a digit, it is almost certainly an identifier, and should
@@ -702,17 +706,10 @@ sub _build_stemmed_operand {
     return $operand if $operand =~ /\d/;
 
 # FIXME: the locale should be set based on the user's language and/or search choice
+    warn "$lang";
     my $stemmer = Lingua::Stem::Snowball->new( lang => $lang,
                                                encoding => "UTF-8" );
 
-# FIXME: these should be stored in the db so the librarian can modify the behavior
-    $stemmer->add_exceptions(
-        {
-            'and' => 'and',
-            'or'  => 'or',
-            'not' => 'not',
-        }
-    );
     my @words = split( / /, $operand );
     my @stems = $stemmer->stem(\@words);
     for my $stem (@stems) {
@@ -1233,26 +1230,49 @@ sub searchResults {
         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
             my @fields  = $marcrecord->fields();
-            foreach my $field (@fields) {
-                my $tag      = $field->tag();
-                my $tagvalue = $field->as_string();
-                $summary =~
-                  s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
-                unless ( $tag < 10 ) {
-                    my @subf = $field->subfields;
-                    for my $i ( 0 .. $#subf ) {
-                        my $subfieldcode  = $subf[$i][0];
-                        my $subfieldvalue = $subf[$i][1];
-                        my $tagsubf       = $tag . $subfieldcode;
-                        $summary =~
-s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
+            
+            my $newsummary;
+            foreach my $line ( "$summary\n" =~ /(.*)\n/g ){
+                my $tags = {};
+                foreach my $tag ( $line =~ /\[(\d{3}[\w|\d])\]/ ) {
+                    $tag =~ /(.{3})(.)/;
+                    if($marcrecord->field($1)){
+                        my @abc = $marcrecord->field($1)->subfield($2);
+                        $tags->{$tag} = $#abc + 1 ;
+                    }
+                }
+                
+                # We catch how many times to repeat this line
+                my $max = 0;
+                foreach my $tag (keys(%$tags)){
+                    $max = $tags->{$tag} if($tags->{$tag} > $max);
+                 }
+                
+                # we replace, and repeat each line
+                for (my $i = 0 ; $i < $max ; $i++){
+                    my $newline = $line;
+
+                    foreach my $tag ( $newline =~ /\[(\d{3}[\w|\d])\]/g ) {
+                        $tag =~ /(.{3})(.)/;
+                        
+                        if($marcrecord->field($1)){
+                            my @repl = $marcrecord->field($1)->subfield($2);
+                            my $subfieldvalue = $repl[$i];
+                            
+                            if (! utf8::is_utf8($subfieldvalue)) {
+                                utf8::decode($subfieldvalue);
+                            }
+                             $newline =~ s/\[$tag\]/$subfieldvalue/g;
+                        }
                     }
+                    $newsummary .= "$newline\n";
                 }
             }
-            # FIXME: yuk
-            $summary =~ s/\[(.*?)]//g;
-            $summary =~ s/\n/<br\/>/g;
-            $oldbiblio->{summary} = $summary;
+
+            $newsummary =~ s/\[(.*?)]//g;
+            $newsummary =~ s/\n/<br\/>/g;
+            $oldbiblio->{summary} = $newsummary;
         }
 
         # Pull out the items fields