use C4::Dates qw(format_date);
use C4::XSLT;
use C4::Branch;
+use C4::Debug;
+use YAML;
use URI::Escape;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
&getRecords
&buildQuery
&NZgetRecords
+ &AddSearchHistory
+ &GetDistinctValues
+ &BiblioAddAuthorities
);
+#FIXME: i had to add BiblioAddAuthorities here because in Biblios.pm it caused circular dependencies (C4::Search uses C4::Biblio, and BiblioAddAuthorities uses SimpleSearch from C4::Search)
# make all your functions, whether exported or not;
# if it's a branch, label by the name, not the code,
if ( $link_value =~ /branch/ ) {
- $facet_label_value =
- $branches->{$one_facet}->{'branchname'};
+ if (defined $branches
+ && ref($branches) eq "HASH"
+ && defined $branches->{$one_facet}
+ && ref ($branches->{$one_facet}) eq "HASH")
+ {
+ $facet_label_value =
+ $branches->{$one_facet}->{'branchname'};
+ }
+ else {
+ $facet_label_value = "*";
+ }
}
# but we're down with the whole label being in the link's title.
# we use IsAlpha unicode definition, to deal correctly with diacritics.
# otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
# is a stopword, we'd get "çon" and wouldn't find anything...
- foreach ( keys %{ C4::Context->stopwords } ) {
- next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
- if ( $operand =~
- /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
- {
- $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
- $operand =~ s/^$_\P{IsAlpha}/ /gi;
- $operand =~ s/\P{IsAlpha}$_$/ /gi;
- $operand =~ s/$1//gi;
- push @stopwords_removed, $_;
- }
- }
- }
+#
+ foreach ( keys %{ C4::Context->stopwords } ) {
+ next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
+ $debug && warn "$_ Dump($operand)";
+ if ( my ($matched) = ($operand =~
+ /([^\X\p{isAlnum}]\Q$_\E[^\X\p{isAlnum}]|[^\X\p{isAlnum}]\Q$_\E$|^\Q$_\E[^\X\p{isAlnum}])/gi))
+ {
+ $operand =~ s/\Q$matched\E/ /gi;
+ push @stopwords_removed, $_;
+ }
+ }
+ }
return ( $operand, \@stopwords_removed );
}
# STEMMING
sub _build_stemmed_operand {
- my ($operand) = @_;
+ my ($operand,$lang) = @_;
+ require Lingua::Stem::Snowball ;
my $stemmed_operand;
# If operand contains a digit, it is almost certainly an identifier, and should
return $operand if $operand =~ /\d/;
# FIXME: the locale should be set based on the user's language and/or search choice
- my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
+ warn "$lang";
+ my $stemmer = Lingua::Stem::Snowball->new( lang => $lang,
+ encoding => "UTF-8" );
-# FIXME: these should be stored in the db so the librarian can modify the behavior
- $stemmer->add_exceptions(
- {
- 'and' => 'and',
- 'or' => 'or',
- 'not' => 'not',
- }
- );
my @words = split( / /, $operand );
- my $stems = $stemmer->stem(@words);
- for my $stem (@$stems) {
+ my @stems = $stemmer->stem(\@words);
+ for my $stem (@stems) {
$stemmed_operand .= "$stem";
$stemmed_operand .= "?"
unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
$simple_query, $query_cgi,
$query_desc, $limit,
$limit_cgi, $limit_desc,
-$stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
+$stopwords_removed, $query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
Build queries and limits in CCL, CGI, Human,
handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
=cut
sub buildQuery {
- my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
+ my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = @_;
warn "---------\nEnter buildQuery\n---------" if $DEBUG;
# Some helpful index variants
my $index_plus = $index . $struct_attr . ":" if $index;
my $index_plus_comma = $index . $struct_attr . "," if $index;
- if ($auto_truncation){
-# FIXME Auto Truncation is only valid for LTR languages
-# use C4::Output;
-# use C4::Languages qw(regex_lang_subtags get_bidi);
-# $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
-# my $current_lang = regex_lang_subtags($lang);
-# my $bidi;
-# $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
- $index_plus_comma .= "rtrn:";
- }
# Remove Stopwords
if ($remove_stopwords) {
if ( $stopwords_removed && $DEBUG );
}
+ if ($auto_truncation){
+ $operand=~join(" ",map{ "$_*" }split (/\s+/,$operand));
+ }
+
# Detect Truncation
my $truncated_operand;
my( $nontruncated, $righttruncated, $lefttruncated,
# Handle Stemming
my $stemmed_operand;
- $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
+ $stemmed_operand = _build_stemmed_operand($operand, $lang)
+ if $stemming;
warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
}
my $marcflavour = C4::Context->preference("marcflavour");
+ # We get the biblionumber position in MARC
+ my ($bibliotag,$bibliosubf)=GetMarcFromKohaField('biblio.biblionumber','');
+ my $fw;
+
# loop through all of the records we've retrieved
for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
- my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
- $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
+
+ if ($bibliotag<10){
+ $fw = GetFrameworkCode($marcrecord->field($bibliotag)->data);
+ }else{
+ $fw = GetFrameworkCode($marcrecord->subfield($bibliotag,$bibliosubf));
+ }
+
+ my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, $fw );
+ $oldbiblio->{subtitle} = GetRecordValue('subtitle', $marcrecord, $fw);
$oldbiblio->{result_number} = $i + 1;
# add imageurl to itemtype if there is one
$oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
$oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
- $oldbiblio->{normalized_upc} = GetNormalizedUPC($marcrecord,$marcflavour);
- $oldbiblio->{normalized_ean} = GetNormalizedEAN($marcrecord,$marcflavour);
+ $oldbiblio->{normalized_upc} = GetNormalizedUPC( $marcrecord,$marcflavour);
+ $oldbiblio->{normalized_ean} = GetNormalizedEAN( $marcrecord,$marcflavour);
$oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
$oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
$oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
my @fields = $marcrecord->fields();
- foreach my $field (@fields) {
- my $tag = $field->tag();
- my $tagvalue = $field->as_string();
- $summary =~
- s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
- unless ( $tag < 10 ) {
- my @subf = $field->subfields;
- for my $i ( 0 .. $#subf ) {
- my $subfieldcode = $subf[$i][0];
- my $subfieldvalue = $subf[$i][1];
- my $tagsubf = $tag . $subfieldcode;
- $summary =~
-s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
+
+ my $newsummary;
+ foreach my $line ( "$summary\n" =~ /(.*)\n/g ){
+ my $tags = {};
+ foreach my $tag ( $line =~ /\[(\d{3}[\w|\d])\]/ ) {
+ $tag =~ /(.{3})(.)/;
+ if($marcrecord->field($1)){
+ my @abc = $marcrecord->field($1)->subfield($2);
+ $tags->{$tag} = $#abc + 1 ;
}
}
+
+ # We catch how many times to repeat this line
+ my $max = 0;
+ foreach my $tag (keys(%$tags)){
+ $max = $tags->{$tag} if($tags->{$tag} > $max);
+ }
+
+ # we replace, and repeat each line
+ for (my $i = 0 ; $i < $max ; $i++){
+ my $newline = $line;
+
+ foreach my $tag ( $newline =~ /\[(\d{3}[\w|\d])\]/g ) {
+ $tag =~ /(.{3})(.)/;
+
+ if($marcrecord->field($1)){
+ my @repl = $marcrecord->field($1)->subfield($2);
+ my $subfieldvalue = $repl[$i];
+
+ if (! utf8::is_utf8($subfieldvalue)) {
+ utf8::decode($subfieldvalue);
+ }
+
+ $newline =~ s/\[$tag\]/$subfieldvalue/g;
+ }
+ }
+ $newsummary .= "$newline\n";
+ }
}
- # FIXME: yuk
- $summary =~ s/\[(.*?)]//g;
- $summary =~ s/\n/<br\/>/g;
- $oldbiblio->{summary} = $summary;
+
+ $newsummary =~ s/\[(.*?)]//g;
+ $newsummary =~ s/\n/<br\/>/g;
+ $oldbiblio->{summary} = $newsummary;
}
# Pull out the items fields
);
}
+sub AddSearchHistory{
+ my ($borrowernumber,$session,$query_desc,$query_cgi, $total)=@_;
+ my $dbh = C4::Context->dbh;
+
+ # Add the request the user just made
+ my $sql = "INSERT INTO search_history(userid, sessionid, query_desc, query_cgi, total, time) VALUES(?, ?, ?, ?, ?, NOW())";
+ my $sth = $dbh->prepare($sql);
+ $sth->execute($borrowernumber, $session, $query_desc, $query_cgi, $total);
+ return $dbh->last_insert_id(undef, 'search_history', undef,undef,undef);
+}
+
+sub GetSearchHistory{
+ my ($borrowernumber,$session)=@_;
+ my $dbh = C4::Context->dbh;
+
+ # Add the request the user just made
+ my $query = "SELECT FROM search_history WHERE (userid=? OR sessionid=?)";
+ my $sth = $dbh->prepare($query);
+ $sth->execute($borrowernumber, $session);
+ return $sth->fetchall_hashref({});
+}
=head2 z3950_search_args
return $array;
}
+=head2 BiblioAddAuthorities
+
+( $countlinked, $countcreated ) = BiblioAddAuthorities($record, $frameworkcode);
+
+this function finds the authorities linked to the biblio
+ * search in the authority DB for the same authid (in $9 of the biblio)
+ * search in the authority DB for the same 001 (in $3 of the biblio in UNIMARC)
+ * search in the authority DB for the same values (exactly) (in all subfields of the biblio)
+OR adds a new authority record
+
+=over 2
+
+=item C<input arg:>
+
+ * $record is the MARC record in question (marc blob)
+ * $frameworkcode is the bibliographic framework to use (if it is "" it uses the default framework)
+
+=item C<Output arg:>
+
+ * $countlinked is the number of authorities records that are linked to this authority
+ * $countcreated
+
+=item C<BUGS>
+ * I had to add this to Search.pm (instead of the logical Biblio.pm) because of a circular dependency (this sub uses SimpleSearch, and Search.pm uses Biblio.pm)
+=back
+
+=cut
+
+
+sub BiblioAddAuthorities{
+ my ( $record, $frameworkcode ) = @_;
+ my $dbh=C4::Context->dbh;
+ my $query=$dbh->prepare(qq|
+SELECT authtypecode,tagfield
+FROM marc_subfield_structure
+WHERE frameworkcode=?
+AND (authtypecode IS NOT NULL AND authtypecode<>\"\")|);
+# SELECT authtypecode,tagfield
+# FROM marc_subfield_structure
+# WHERE frameworkcode=?
+# AND (authtypecode IS NOT NULL OR authtypecode<>\"\")|);
+ $query->execute($frameworkcode);
+ my ($countcreated,$countlinked);
+ while (my $data=$query->fetchrow_hashref){
+ foreach my $field ($record->field($data->{tagfield})){
+ next if ($field->subfield('3')||$field->subfield('9'));
+ # No authorities id in the tag.
+ # Search if there is any authorities to link to.
+ my $query='at='.$data->{authtypecode}.' ';
+ map {$query.= ' and he,ext="'.$_->[1].'"' if ($_->[0]=~/[A-z]/)} $field->subfields();
+ my ($error, $results, $total_hits)=SimpleSearch( $query, undef, undef, [ "authorityserver" ] );
+ # there is only 1 result
+ if ( $error ) {
+ warn "BIBLIOADDSAUTHORITIES: $error";
+ return (0,0) ;
+ }
+ if ($results && scalar(@$results)==1) {
+ my $marcrecord = MARC::File::USMARC::decode($results->[0]);
+ $field->add_subfields('9'=>$marcrecord->field('001')->data);
+ $countlinked++;
+ } elsif (scalar(@$results)>1) {
+ #More than One result
+ #This can comes out of a lack of a subfield.
+# my $marcrecord = MARC::File::USMARC::decode($results->[0]);
+# $record->field($data->{tagfield})->add_subfields('9'=>$marcrecord->field('001')->data);
+ $countlinked++;
+ } else {
+ #There are no results, build authority record, add it to Authorities, get authid and add it to 9
+ ###NOTICE : This is only valid if a subfield is linked to one and only one authtypecode
+ ###NOTICE : This can be a problem. We should also look into other types and rejected forms.
+ my $authtypedata=C4::AuthoritiesMarc->GetAuthType($data->{authtypecode});
+ next unless $authtypedata;
+ my $marcrecordauth=MARC::Record->new();
+ my $authfield=MARC::Field->new($authtypedata->{auth_tag_to_report},'','',"a"=>"".$field->subfield('a'));
+ map { $authfield->add_subfields($_->[0]=>$_->[1]) if ($_->[0]=~/[A-z]/ && $_->[0] ne "a" )} $field->subfields();
+ $marcrecordauth->insert_fields_ordered($authfield);
+
+ # bug 2317: ensure new authority knows it's using UTF-8; currently
+ # only need to do this for MARC21, as MARC::Record->as_xml_record() handles
+ # automatically for UNIMARC (by not transcoding)
+ # FIXME: AddAuthority() instead should simply explicitly require that the MARC::Record
+ # use UTF-8, but as of 2008-08-05, did not want to introduce that kind
+ # of change to a core API just before the 3.0 release.
+ if (C4::Context->preference('marcflavour') eq 'MARC21') {
+ SetMarcUnicodeFlag($marcrecordauth, 'MARC21');
+ }
+
+# warn "AUTH RECORD ADDED : ".$marcrecordauth->as_formatted;
+
+ my $authid=AddAuthority($marcrecordauth,'',$data->{authtypecode});
+ $countcreated++;
+ $field->add_subfields('9'=>$authid);
+ }
+ }
+ }
+ return ($countlinked,$countcreated);
+}
+
+=head2 GetDistinctValues($field);
+
+C<$field> is a reference to the fields array
+
+=cut
+
+sub GetDistinctValues {
+ my ($fieldname,$string)=@_;
+ # returns a reference to a hash of references to branches...
+ if ($fieldname=~/\./){
+ my ($table,$column)=split /\./, $fieldname;
+ my $dbh = C4::Context->dbh;
+ warn "select DISTINCT($column) as value, count(*) as cnt from $table group by lib order by $column ";
+ my $sth = $dbh->prepare("select DISTINCT($column) as value, count(*) as cnt from $table ".($string?" where $column like \"$string%\"":"")."group by value order by $column ");
+ $sth->execute;
+ my $elements=$sth->fetchall_arrayref({});
+ return $elements;
+ }
+ else {
+ $string||= qq("");
+ my @servers=qw<biblioserver authorityserver>;
+ my (@zconns,@results);
+ for ( my $i = 0 ; $i < @servers ; $i++ ) {
+ $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
+ $results[$i] =
+ $zconns[$i]->scan(
+ ZOOM::Query::CCL2RPN->new( qq"$fieldname $string", $zconns[$i])
+ );
+ }
+ # The big moment: asynchronously retrieve results from all servers
+ my @elements;
+ while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
+ my $ev = $zconns[ $i - 1 ]->last_event();
+ if ( $ev == ZOOM::Event::ZEND ) {
+ next unless $results[ $i - 1 ];
+ my $size = $results[ $i - 1 ]->size();
+ if ( $size > 0 ) {
+ for (my $j=0;$j<$size;$j++){
+ my %hashscan;
+ @hashscan{qw(value cnt)}=$results[ $i - 1 ]->display_term($j);
+ push @elements, \%hashscan;
+ }
+ }
+ }
+ }
+ return \@elements;
+ }
+}
END { } # module clean-up code here (global destructor)