use C4::Dates qw(format_date);
use C4::XSLT;
use C4::Branch;
+use C4::Debug;
+use YAML;
use URI::Escape;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
# we use IsAlpha unicode definition, to deal correctly with diacritics.
# otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
# is a stopword, we'd get "çon" and wouldn't find anything...
+#
foreach ( keys %{ C4::Context->stopwords } ) {
next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
+ $debug && warn "$_ Dump($operand)";
if ( my ($matched) = ($operand =~
- /(\P{IsAlnum}\Q$_\E\P{IsAlnum}|^\Q$_\E\P{IsAlnum}|\P{IsAlnum}\Q$_\E$|^\Q$_\E$)/gi) )
+ /([^\X\p{isAlnum}]\Q$_\E[^\X\p{isAlnum}]|[^\X\p{isAlnum}]\Q$_\E$|^\Q$_\E[^\X\p{isAlnum}])/gi))
{
$operand =~ s/\Q$matched\E/ /gi;
push @stopwords_removed, $_;
# STEMMING
sub _build_stemmed_operand {
my ($operand,$lang) = @_;
- require Lingua::Stem::Snowball;
+ require Lingua::Stem::Snowball ;
my $stemmed_operand;
# If operand contains a digit, it is almost certainly an identifier, and should
return $operand if $operand =~ /\d/;
# FIXME: the locale should be set based on the user's language and/or search choice
+ warn "$lang";
my $stemmer = Lingua::Stem::Snowball->new( lang => $lang,
encoding => "UTF-8" );
-# FIXME: these should be stored in the db so the librarian can modify the behavior
- $stemmer->add_exceptions(
- {
- 'and' => 'and',
- 'or' => 'or',
- 'not' => 'not',
- }
- );
my @words = split( / /, $operand );
my @stems = $stemmer->stem(\@words);
for my $stem (@stems) {
if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
my @fields = $marcrecord->fields();
- foreach my $field (@fields) {
- my $tag = $field->tag();
- my $tagvalue = $field->as_string();
- $summary =~
- s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
- unless ( $tag < 10 ) {
- my @subf = $field->subfields;
- for my $i ( 0 .. $#subf ) {
- my $subfieldcode = $subf[$i][0];
- my $subfieldvalue = $subf[$i][1];
- my $tagsubf = $tag . $subfieldcode;
- $summary =~
-s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
+
+ my $newsummary;
+ foreach my $line ( "$summary\n" =~ /(.*)\n/g ){
+ my $tags = {};
+ foreach my $tag ( $line =~ /\[(\d{3}[\w|\d])\]/ ) {
+ $tag =~ /(.{3})(.)/;
+ if($marcrecord->field($1)){
+ my @abc = $marcrecord->field($1)->subfield($2);
+ $tags->{$tag} = $#abc + 1 ;
+ }
+ }
+
+ # We catch how many times to repeat this line
+ my $max = 0;
+ foreach my $tag (keys(%$tags)){
+ $max = $tags->{$tag} if($tags->{$tag} > $max);
+ }
+
+ # we replace, and repeat each line
+ for (my $i = 0 ; $i < $max ; $i++){
+ my $newline = $line;
+
+ foreach my $tag ( $newline =~ /\[(\d{3}[\w|\d])\]/g ) {
+ $tag =~ /(.{3})(.)/;
+
+ if($marcrecord->field($1)){
+ my @repl = $marcrecord->field($1)->subfield($2);
+ my $subfieldvalue = $repl[$i];
+
+ if (! utf8::is_utf8($subfieldvalue)) {
+ utf8::decode($subfieldvalue);
+ }
+
+ $newline =~ s/\[$tag\]/$subfieldvalue/g;
+ }
}
+ $newsummary .= "$newline\n";
}
}
- # FIXME: yuk
- $summary =~ s/\[(.*?)]//g;
- $summary =~ s/\n/<br\/>/g;
- $oldbiblio->{summary} = $summary;
+
+ $newsummary =~ s/\[(.*?)]//g;
+ $newsummary =~ s/\n/<br\/>/g;
+ $oldbiblio->{summary} = $newsummary;
}
# Pull out the items fields