koha-dev.rot13.org Git - koha_gimpoz/blob - C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 require Exporter;
  20 use C4::Context;
  21 use C4::Biblio;    # GetMarcFromKohaField
  22 use C4::Koha;      # getFacets
  23 use Lingua::Stem;
  24 use C4::Date;
  25
  26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  27
  28 # set the version for version checking
  29 $VERSION = 3.00;
  30
  31 =head1 NAME
  32
  33 C4::Search - Functions for searching the Koha catalog.
  34
  35 =head1 SYNOPSIS
  36
  37 see opac/opac-search.pl or catalogue/search.pl for example of usage
  38
  39 =head1 DESCRIPTION
  40
  41 This module provides the searching facilities for the Koha into a zebra catalog.
  42
  43 =head1 FUNCTIONS
  44
  45 =cut
  46
  47 @ISA    = qw(Exporter);
  48 @EXPORT = qw(
  49   &SimpleSearch
  50   &findseealso
  51   &FindDuplicate
  52   &searchResults
  53   &getRecords
  54   &buildQuery
  55   &NZgetRecords
  56   &ModBiblios
  57 );
  58
  59 # make all your functions, whether exported or not;
  60
  61 =head2 findseealso($dbh,$fields);
  62
  63 C<$dbh> is a link to the DB handler.
  64
  65 use C4::Context;
  66 my $dbh =C4::Context->dbh;
  67
  68 C<$fields> is a reference to the fields array
  69
  70 This function modify the @$fields array and add related fields to search on.
  71
  72 =cut
  73
  74 sub findseealso {
  75     my ( $dbh, $fields ) = @_;
  76     my $tagslib = GetMarcStructure( 1 );
  77     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  78         my ($tag)      = substr( @$fields[$i], 1, 3 );
  79         my ($subfield) = substr( @$fields[$i], 4, 1 );
  80         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  81           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  82     }
  83 }
  84
  85 =head2 FindDuplicate
  86
  87 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  88
  89 =cut
  90
  91 sub FindDuplicate {
  92     my ($record) = @_;
  93     my $dbh = C4::Context->dbh;
  94     my $result = TransformMarcToKoha( $dbh, $record, '' );
  95     my $sth;
  96     my $query;
  97     my $search;
  98     my $type;
  99     my ( $biblionumber, $title );
 100
 101     # search duplicate on ISBN, easy and fast..
 102     # ... normalize first
 103     if ( $result->{isbn} ) {
 104         $result->{isbn} =~ s/\(.*$//;
 105         $result->{isbn} =~ s/\s+$//;
 106     }
 107     #$search->{'avoidquerylog'}=1;
 108     if ( $result->{isbn} ) {
 109         $query = "isbn=$result->{isbn}";
 110     }
 111     else {
 112         $result->{title} =~ s /\\//g;
 113         $result->{title} =~ s /\"//g;
 114         $result->{title} =~ s /\(//g;
 115         $result->{title} =~ s /\)//g;
 116         # remove valid operators
 117         $result->{title} =~ s/(and|or|not)//g;
 118         $query = "ti,ext=$result->{title}";
 119         $query .= " and mt=$result->{itemtype}" if ($result->{itemtype});
 120         if ($result->{author}){
 121           $result->{author} =~ s /\\//g;
 122           $result->{author} =~ s /\"//g;
 123           $result->{author} =~ s /\(//g;
 124           $result->{author} =~ s /\)//g;
 125           # remove valid operators
 126           $result->{author} =~ s/(and|or|not)//g;
 127           $query .= " and au,ext=$result->{author}";
 128         }
 129     }
 130     my ($error,$searchresults) =
 131       SimpleSearch($query); # FIXME :: hardcoded !
 132     my @results;
 133     foreach my $possible_duplicate_record (@$searchresults) {
 134         my $marcrecord =
 135           MARC::Record->new_from_usmarc($possible_duplicate_record);
 136         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 137
 138         # FIXME :: why 2 $biblionumber ?
 139         if ($result){
 140           push @results, $result->{'biblionumber'};
 141           push @results, $result->{'title'};
 142         }
 143     }
 144     return @results;
 145 }
 146
 147 =head2 SimpleSearch
 148
 149 ($error,$results) = SimpleSearch($query,@servers);
 150
 151 this function performs a simple search on the catalog using zoom.
 152
 153 =over 2
 154
 155 =item C<input arg:>
 156
 157     * $query could be a simple keyword or a complete CCL query wich is depending on your ccl file.
 158     * @servers is optionnal. default one is read on koha.xml
 159
 160 =item C<Output arg:>
 161     * $error is a string which containt the description error if there is one. Else it's empty.
 162     * \@results is an array of marc record.
 163
 164 =item C<usage in the script:>
 165
 166 =back
 167
 168 my ($error, $marcresults) = SimpleSearch($query);
 169
 170 if (defined $error) {
 171     $template->param(query_error => $error);
 172     warn "error: ".$error;
 173     output_html_with_http_headers $input, $cookie, $template->output;
 174     exit;
 175 }
 176
 177 my $hits = scalar @$marcresults;
 178 my @results;
 179
 180 for(my $i=0;$i<$hits;$i++) {
 181     my %resultsloop;
 182     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 183     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 184
 185     #build the hash for the template.
 186     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 187     $resultsloop{title}           = $biblio->{'title'};
 188     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 189     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 190     $resultsloop{author}          = $biblio->{'author'};
 191     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 192     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 193
 194     push @results, \%resultsloop;
 195 }
 196 $template->param(result=>\@results);
 197
 198 =cut
 199
 200 sub SimpleSearch {
 201     my $query   = shift;
 202     if (C4::Context->preference('NoZebra')) {
 203         my $result = NZorder(NZanalyse($query))->{'biblioserver'}->{'RECORDS'};
 204         return (undef,$result);
 205     } else {
 206         my @servers = @_;
 207         my @results;
 208         my @tmpresults;
 209         my @zconns;
 210         return ( "No query entered", undef ) unless $query;
 211
 212         #@servers = (C4::Context->config("biblioserver")) unless @servers;
 213         @servers =
 214         ("biblioserver") unless @servers
 215         ;    # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 216
 217         # Connect & Search
 218         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 219             $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 220             $tmpresults[$i] =
 221             $zconns[$i]
 222             ->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) );
 223
 224             # getting error message if one occured.
 225             my $error =
 226                 $zconns[$i]->errmsg() . " ("
 227             . $zconns[$i]->errcode() . ") "
 228             . $zconns[$i]->addinfo() . " "
 229             . $zconns[$i]->diagset();
 230
 231             return ( $error, undef ) if $zconns[$i]->errcode();
 232         }
 233         my $hits;
 234         my $ev;
 235         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 236             $ev = $zconns[ $i - 1 ]->last_event();
 237             if ( $ev == ZOOM::Event::ZEND ) {
 238                 $hits = $tmpresults[ $i - 1 ]->size();
 239             }
 240             if ( $hits > 0 ) {
 241                 for ( my $j = 0 ; $j < $hits ; $j++ ) {
 242                     my $record = $tmpresults[ $i - 1 ]->record($j)->raw();
 243                     push @results, $record;
 244                 }
 245             }
 246         }
 247         return ( undef, \@results );
 248     }
 249 }
 250
 251 # performs the search
 252 sub getRecords {
 253     my (
 254         $koha_query,     $federated_query,  $sort_by_ref,
 255         $servers_ref,    $results_per_page, $offset,
 256         $expanded_facet, $branches,         $query_type,
 257         $scan
 258     ) = @_;
 259 #     warn "Query : $koha_query";
 260     my @servers = @$servers_ref;
 261     my @sort_by = @$sort_by_ref;
 262
 263     # create the zoom connection and query object
 264     my $zconn;
 265     my @zconns;
 266     my @results;
 267     my $results_hashref = ();
 268
 269     ### FACETED RESULTS
 270     my $facets_counter = ();
 271     my $facets_info    = ();
 272     my $facets         = getFacets();
 273
 274     #### INITIALIZE SOME VARS USED CREATE THE FACETED RESULTS
 275     my @facets_loop;    # stores the ref to array of hashes for template
 276     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 277         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 278
 279 # perform the search, create the results objects
 280 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 281         my $query_to_use;
 282         if ( $servers[$i] =~ /biblioserver/ ) {
 283             $query_to_use = $koha_query;
 284         }
 285         else {
 286             $query_to_use = $federated_query;
 287         }
 288
 289         # check if we've got a query_type defined
 290         eval {
 291             if ($query_type)
 292             {
 293                 if ( $query_type =~ /^ccl/ ) {
 294                     $query_to_use =~
 295                       s/\:/\=/g;    # change : to = last minute (FIXME)
 296
 297                     #                 warn "CCL : $query_to_use";
 298                     $results[$i] =
 299                       $zconns[$i]->search(
 300                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 301                       );
 302                 }
 303                 elsif ( $query_type =~ /^cql/ ) {
 304
 305                     #                 warn "CQL : $query_to_use";
 306                     $results[$i] =
 307                       $zconns[$i]->search(
 308                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 309                 }
 310                 elsif ( $query_type =~ /^pqf/ ) {
 311
 312                     #                 warn "PQF : $query_to_use";
 313                     $results[$i] =
 314                       $zconns[$i]->search(
 315                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 316                 }
 317             }
 318             else {
 319                 if ($scan) {
 320
 321                     #                 warn "preparing to scan";
 322                     $results[$i] =
 323                       $zconns[$i]->scan(
 324                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 325                       );
 326                 }
 327                 else {
 328
 329                     #             warn "LAST : $query_to_use";
 330                     $results[$i] =
 331                       $zconns[$i]->search(
 332                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 333                       );
 334                 }
 335             }
 336         };
 337         if ($@) {
 338             warn "WARNING: query problem with $query_to_use " . $@;
 339         }
 340
 341         # concatenate the sort_by limits and pass them to the results object
 342         my $sort_by;
 343         foreach my $sort (@sort_by) {
 344             if ($sort eq "author_az") {
 345                 $sort_by.="1=1003 <i ";
 346             }
 347             elsif ($sort eq "author_za") {
 348                 $sort_by.="1=1003 >i ";
 349             }
 350             elsif ($sort eq "popularity_asc") {
 351                 $sort_by.="1=9003 <i ";
 352             }
 353             elsif ($sort eq "popularity_dsc") {
 354                 $sort_by.="1=9003 >i ";
 355             }
 356             elsif ($sort eq "call_number_asc") {
 357                 $sort_by.="1=20  <i ";
 358             }
 359             elsif ($sort eq "call_number_dsc") {
 360                 $sort_by.="1=20 >i ";
 361             }
 362             elsif ($sort eq "pubdate_asc") {
 363                 $sort_by.="1=31 <i ";
 364             }
 365             elsif ($sort eq "pubdate_dsc") {
 366                 $sort_by.="1=31 >i ";
 367             }
 368             elsif ($sort eq "acqdate_asc") {
 369                 $sort_by.="1=32 <i ";
 370             }
 371             elsif ($sort eq "acqdate_dsc") {
 372                 $sort_by.="1=32 >i ";
 373             }
 374             elsif ($sort eq "title_az") {
 375                 $sort_by.="1=4 <i ";
 376             }
 377             elsif ($sort eq "title_za") {
 378                 $sort_by.="1=4 >i ";
 379             }
 380         }
 381         if ($sort_by) {
 382             if ( $results[$i]->sort( "yaz", $sort_by ) < 0) {
 383                 warn "WARNING sort $sort_by failed";
 384             }
 385         }
 386     }
 387     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 388         my $ev = $zconns[ $i - 1 ]->last_event();
 389         if ( $ev == ZOOM::Event::ZEND ) {
 390             my $size = $results[ $i - 1 ]->size();
 391             if ( $size > 0 ) {
 392                 my $results_hash;
 393                 #$results_hash->{'server'} = $servers[$i-1];
 394                 # loop through the results
 395                 $results_hash->{'hits'} = $size;
 396                 my $times;
 397                 if ( $offset + $results_per_page <= $size ) {
 398                     $times = $offset + $results_per_page;
 399                 }
 400                 else {
 401                     $times = $size;
 402                 }
 403                 for ( my $j = $offset ; $j < $times ; $j++ )
 404                 {   #(($offset+$count<=$size) ? ($offset+$count):$size) ; $j++){
 405                     my $records_hash;
 406                     my $record;
 407                     my $facet_record;
 408                     ## This is just an index scan
 409                     if ($scan) {
 410                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 411
 412                  # here we create a minimal MARC record and hand it off to the
 413                  # template just like a normal result ... perhaps not ideal, but
 414                  # it works for now
 415                         my $tmprecord = MARC::Record->new();
 416                         $tmprecord->encoding('UTF-8');
 417                         my $tmptitle;
 418
 419           # srote the minimal record in author/title (depending on MARC flavour)
 420                         if ( C4::Context->preference("marcflavour") eq
 421                             "UNIMARC" )
 422                         {
 423                             $tmptitle = MARC::Field->new(
 424                                 '200', ' ', ' ',
 425                                 a => $term,
 426                                 f => $occ
 427                             );
 428                         }
 429                         else {
 430                             $tmptitle = MARC::Field->new(
 431                                 '245', ' ', ' ',
 432                                 a => $term,
 433                                 b => $occ
 434                             );
 435                         }
 436                         $tmprecord->append_fields($tmptitle);
 437                         $results_hash->{'RECORDS'}[$j] =
 438                           $tmprecord->as_usmarc();
 439                     }
 440                     else {
 441                         $record = $results[ $i - 1 ]->record($j)->raw();
 442
 443                         #warn "RECORD $j:".$record;
 444                         $results_hash->{'RECORDS'}[$j] =
 445                           $record;    # making a reference to a hash
 446                                       # Fill the facets while we're looping
 447                         $facet_record = MARC::Record->new_from_usmarc($record);
 448
 449                         #warn $servers[$i-1].$facet_record->title();
 450                         for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 451                             if ( $facets->[$k] ) {
 452                                 my @fields;
 453                                 for my $tag ( @{ $facets->[$k]->{'tags'} } ) {
 454                                     push @fields, $facet_record->field($tag);
 455                                 }
 456                                 for my $field (@fields) {
 457                                     my @subfields = $field->subfields();
 458                                     for my $subfield (@subfields) {
 459                                         my ( $code, $data ) = @$subfield;
 460                                         if ( $code eq
 461                                             $facets->[$k]->{'subfield'} )
 462                                         {
 463                                             $facets_counter->{ $facets->[$k]
 464                                                   ->{'link_value'} }->{$data}++;
 465                                         }
 466                                     }
 467                                 }
 468                                 $facets_info->{ $facets->[$k]->{'link_value'} }
 469                                   ->{'label_value'} =
 470                                   $facets->[$k]->{'label_value'};
 471                                 $facets_info->{ $facets->[$k]->{'link_value'} }
 472                                   ->{'expanded'} = $facets->[$k]->{'expanded'};
 473                             }
 474                         }
 475                     }
 476                 }
 477                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 478             }
 479
 480             #print "connection ", $i-1, ": $size hits";
 481             #print $results[$i-1]->record(0)->render() if $size > 0;
 482             # BUILD FACETS
 483             for my $link_value (
 484                 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 485                 keys %$facets_counter
 486               )
 487             {
 488                 my $expandable;
 489                 my $number_of_facets;
 490                 my @this_facets_array;
 491                 for my $one_facet (
 492                     sort {
 493                         $facets_counter->{$link_value}
 494                           ->{$b} <=> $facets_counter->{$link_value}->{$a}
 495                     } keys %{ $facets_counter->{$link_value} }
 496                   )
 497                 {
 498                     $number_of_facets++;
 499                     if (   ( $number_of_facets < 6 )
 500                         || ( $expanded_facet eq $link_value )
 501                         || ( $facets_info->{$link_value}->{'expanded'} ) )
 502                     {
 503
 504                        # sanitize the link value ), ( will cause errors with CCL
 505                         my $facet_link_value = $one_facet;
 506                         $facet_link_value =~ s/(\(|\))/ /g;
 507
 508                         # fix the length that will display in the label
 509                         my $facet_label_value = $one_facet;
 510                         $facet_label_value = substr( $one_facet, 0, 20 ) . "..."
 511                           unless length($facet_label_value) <= 20;
 512
 513                        # well, if it's a branch, label by the name, not the code
 514                         if ( $link_value =~ /branch/ ) {
 515                             $facet_label_value =
 516                               $branches->{$one_facet}->{'branchname'};
 517                         }
 518
 519                  # but we're down with the whole label being in the link's title
 520                         my $facet_title_value = $one_facet;
 521
 522                         push @this_facets_array,
 523                           (
 524                             {
 525                                 facet_count =>
 526                                   $facets_counter->{$link_value}->{$one_facet},
 527                                 facet_label_value => $facet_label_value,
 528                                 facet_title_value => $facet_title_value,
 529                                 facet_link_value  => $facet_link_value,
 530                                 type_link_value   => $link_value,
 531                             },
 532                           );
 533                     }
 534                 }
 535                 unless ( $facets_info->{$link_value}->{'expanded'} ) {
 536                     $expandable = 1
 537                       if ( ( $number_of_facets > 6 )
 538                         && ( $expanded_facet ne $link_value ) );
 539                 }
 540                 push @facets_loop,
 541                   (
 542                     {
 543                         type_link_value => $link_value,
 544                         type_id         => $link_value . "_id",
 545                         type_label      =>
 546                           $facets_info->{$link_value}->{'label_value'},
 547                         facets     => \@this_facets_array,
 548                         expandable => $expandable,
 549                         expand     => $link_value,
 550                     }
 551                   );
 552             }
 553         }
 554     }
 555     return ( undef, $results_hashref, \@facets_loop );
 556 }
 557
 558 # STOPWORDS
 559 sub _remove_stopwords {
 560     my ($operand,$index) = @_;
 561     # phrase and exact-qualified indexes shoudln't have stopwords removed
 562     if ($index!~m/phr|ext/){
 563     # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 564     #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 565     #       otherwise, a french word like "leçon" woudl be split into "le" "çon", le
 566     #       is an empty word, we get "çon" and wouldn't find anything...
 567         foreach (keys %{C4::Context->stopwords}) {
 568             next if ($_ =~/(and|or|not)/); # don't remove operators
 569             $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /i;
 570             $operand=~ s/^$_\P{IsAlpha}/ /i;
 571             $operand=~ s/\P{IsAlpha}$_$/ /i;
 572         }
 573     }
 574     return $operand;
 575 }
 576
 577 # TRUNCATION
 578 sub _detect_truncation {
 579     my ($operand,$index) = @_;
 580     my (@nontruncated,@righttruncated,@lefttruncated,@rightlefttruncated,@regexpr);
 581     $operand =~s/^ //g;
 582     my @wordlist= split (/\s/,$operand);
 583     foreach my $word (@wordlist){
 584         if ($word=~s/^\*([^\*]+)\*$/$1/){
 585             push @rightlefttruncated,$word;
 586         }
 587         elsif($word=~s/^\*([^\*]+)$/$1/){
 588             push @lefttruncated,$word;
 589         }
 590         elsif ($word=~s/^([^\*]+)\*$/$1/){
 591             push @righttruncated,$word;
 592         }
 593         elsif (index($word,"*")<0){
 594             push @nontruncated,$word;
 595         }
 596         else {
 597             push @regexpr,$word;
 598         }
 599     }
 600     return (\@nontruncated,\@righttruncated,\@lefttruncated,\@rightlefttruncated,\@regexpr);
 601 }
 602
 603 sub _build_stemmed_operand {
 604     my ($operand) = @_;
 605     my $stemmed_operand;
 606     # FIXME: the locale should be set based on the user's language and/or search choice
 607     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 608     # FIXME: these should be stored in the db so the librarian can modify the behavior
 609     $stemmer->add_exceptions(
 610             {
 611                 'and' => 'and',
 612                 'or'  => 'or',
 613                 'not' => 'not',
 614             }
 615
 616         );
 617     my @words = split( / /, $operand );
 618     my $stems = $stemmer->stem(@words);
 619     for my $stem (@$stems) {
 620             $stemmed_operand .= "$stem";
 621             $stemmed_operand .= "?" unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 622             $stemmed_operand .= " ";
 623     }
 624     #warn "STEMMED OPERAND: $stemmed_operand";
 625     return $stemmed_operand;
 626 }
 627
 628 sub _build_weighted_query {
 629     # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 630     # pretty well but will work much better when we have an actual query parser
 631     my ($operand,$stemmed_operand,$index) = @_;
 632     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 633     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 634     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
 635
 636     my $weighted_query .= "(rk=(";     # Specifies that we're applying rank
 637
 638     # Keyword, or, no index specified
 639     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 640         $weighted_query .= "Title-cover,ext,r1=\"$operand\"";       # exact title-cover
 641         $weighted_query .= " or ti,ext,r2=\"$operand\"";            # exact title
 642         $weighted_query .= " or ti,phr,r3=\"$operand\"";            # phrase title
 643        #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 644        #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 645         $weighted_query .= " or wrd,fuzzy,r8=\"$operand\"" if $fuzzy_enabled; # add fuzzy, word list
 646         $weighted_query .= " or wrd,right-Truncation,r9=\"$stemmed_operand\"" if ($stemming and $stemmed_operand); # add stemming, right truncation
 647        # embedded sorting: 0 a-z; 1 z-a
 648        # $weighted_query .= ") or (sort1,aut=1";
 649     }
 650     # if the index already has more than one qualifier, just wrap the operand
 651     # in quotes and pass it back
 652     elsif ($index =~ ',') {
 653         $weighted_query .=" $index=\"$operand\"";
 654     }
 655     #TODO: build better cases based on specific search indexes
 656     else {
 657        $weighted_query .= " $index,ext,r1=\"$operand\"";            # exact index
 658        #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 659        $weighted_query .= " or $index,phr,r3=\"$operand\"";         # phrase index
 660        $weighted_query .= " or $index,rt,wrd,r3=\"$operand\"";      # word list index
 661     }
 662     $weighted_query .= "))";    # close rank specification
 663     return $weighted_query;
 664 }
 665
 666 # build the query itself
 667 sub buildQuery {
 668     my ( $operators, $operands, $indexes, $limits, $sort_by ) = @_;
 669
 670     my @operators = @$operators if $operators;
 671     my @indexes   = @$indexes   if $indexes;
 672     my @operands  = @$operands  if $operands;
 673     my @limits    = @$limits    if $limits;
 674     my @sort_by   = @$sort_by   if $sort_by;
 675
 676     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 677
 678     # only turn on field weighting in simple searches
 679     my $weight_fields;
 680    # if (@operands==1) {
 681         $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 682     #}
 683     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
 684
 685     my $human_search_desc;      # a human-readable query
 686     my $machine_search_desc;    #a machine-readable query
 687
 688     my $query = $operands[0];
 689 # STEP I: determine if this is a form-based / simple query or if it's complex (if complex,
 690 # pass it off to zebra directly)
 691
 692 # check if this is a known query language query, if it is, return immediately,
 693 # the user is responsible for constructing valid syntax:
 694     if ( $query =~ /^ccl=/ ) {
 695         return ( undef, $', $', $', 'ccl' );
 696     }
 697     if ( $query =~ /^cql=/ ) {
 698         return ( undef, $', $', $', 'cql' );
 699     }
 700     if ( $query =~ /^pqf=/ ) {
 701         return ( undef, $', $', $', 'pqf' );
 702     }
 703     if ( $query =~ /(\(|\)|:|=)/ ) {    # sorry, too complex, assume CCL
 704         return ( undef, $query, $query, $query, 'ccl' );
 705     }
 706
 707 # form-based queries are limited to non-nested at a specific depth, so we can easily
 708 # modify the incoming query operands and indexes to do stemming and field weighting
 709 # Once we do so, we'll end up with a value in $query, just like if we had an
 710 # incoming $query from the user
 711     else {
 712         $query = ""; # clear it out so we can populate properly with field-weighted stemmed query
 713         my $previous_operand;    # a flag used to keep track if there was a previous query
 714                                 # if there was, we can apply the current operator
 715         # for every operand
 716         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 717
 718             # COMBINE OPERANDS, INDEXES AND OPERATORS
 719             if ( $operands[$i] ) {
 720                 my $operand = $operands[$i];
 721                 my $index   = $indexes[$i];
 722
 723                 # if there's no index, don't use one, it will throw a CCL error
 724                 my $index_plus = "$index:" if $index;
 725                 my $index_plus_comma="$index," if $index;
 726
 727                 # Remove Stopwords
 728                 $operand = _remove_stopwords($operand,$index);
 729                 warn "OPERAND w/out STOPWORDS: >$operand<";
 730
 731                 my $indexes_set;
 732
 733                 # Detect Truncation
 734                 my ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr);
 735                 my $truncated_operand;
 736                 ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr) = _detect_truncation($operand,$index);
 737                 warn "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<";
 738                 # Apply Truncation
 739                 # Problem is when build_weights gets ahold if this is wraps in quotes which breaks the truncation :/
 740                 if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){
 741                     $indexes_set = 1;
 742                     undef $weight_fields;
 743                     my $previous_truncation_operand;
 744                     if (scalar(@$nontruncated)>0) {
 745                         $truncated_operand.= "$index_plus @$nontruncated ";
 746                         $previous_truncation_operand = 1;
 747                     }
 748                     if (scalar(@$righttruncated)>0){
 749                         $truncated_operand .= "and " if $previous_truncation_operand;
 750                         $truncated_operand .= "$index_plus_comma"."rtrn:@$righttruncated ";
 751                         $previous_truncation_operand = 1;
 752                     }
 753                     if (scalar(@$lefttruncated)>0){
 754                         $truncated_operand .= "and " if $previous_truncation_operand;
 755                         $truncated_operand .= "$index_plus_comma"."ltrn:@$lefttruncated ";
 756                         $previous_truncation_operand = 1;
 757                     }
 758                     if (scalar(@$rightlefttruncated)>0){
 759                         $truncated_operand .= "and " if $previous_truncation_operand;
 760                         $truncated_operand .= "$index_plus_comma"."rltrn:@$rightlefttruncated ";
 761                         $previous_truncation_operand = 1;
 762                     }
 763                 }
 764                 $operand = $truncated_operand if $truncated_operand;
 765                 warn "TRUNCATED OPERAND: >$truncated_operand<";
 766
 767                 # Handle Stemming
 768                 my $stemmed_operand;
 769                 $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
 770                 warn "STEMMED OPERAND: >$stemmed_operand<";
 771
 772                 # Handle Field Weighting
 773                 my $weighted_operand;
 774                 $weighted_operand = _build_weighted_query($operand,$stemmed_operand,$index) if $weight_fields;
 775                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<";
 776                 $operand = $weighted_operand if $weight_fields;
 777                 $indexes_set = 1 if $weight_fields;
 778
 779                 # If there's a previous operand, we need to add an operator
 780                 if ($previous_operand) {
 781
 782                     # user-specified operator
 783                     if ( $operators[$i-1] ) {
 784                         $human_search_desc .=" $operators[$i-1] $index_plus $operands[$i]";
 785                         $query .= " $operators[$i-1] ";
 786                         $query .= " $index_plus " unless $indexes_set;
 787                         $query .= " $operand";
 788                     }
 789
 790                     # the default operator is and
 791                     else {
 792                         $query .= " and ";
 793                         $query .= "$index_plus " unless $indexes_set;
 794                         $query .= "$operand";
 795                         $human_search_desc .= " and $index_plus $operands[$i]";
 796                     }
 797                 }
 798
 799                 # There's no previous operand - FIXME: completely ignoring our $query, no field weighting, no stemming
 800                 # FIXME: also, doesn't preserve original order
 801                 else {
 802                     # if there are terms to fit with truncation
 803 #                    if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){
 804  #                       # add the non-truncated ones first
 805   #                      $query.= "$index_plus @$nontruncated " if (scalar(@$nontruncated)>0);
 806    #                     if (scalar(@$righttruncated)>0){
 807     #                        $query .= "and $index_plus_comma"."rtrn:@$righttruncated ";
 808      #                   }
 809       #                  if (scalar(@$lefttruncated)>0){
 810        #                     $query .= "and $index_plus_comma"."ltrn:@$lefttruncated ";
 811         #                }
 812          #               if (scalar(@$rightlefttruncated)>0){
 813           #                  $query .= "and $index_plus_comma"."rltrn:@$rightlefttruncated ";
 814            #             }
 815             #            $human_search_desc .= $query;
 816              #       } else {
 817                         # field-weighted queries already have indexes set
 818                         $query.=" $index_plus " unless $indexes_set;
 819                         $query             .= $operand;
 820                         $human_search_desc .= " $index_plus $operands[$i]";
 821               #      }
 822                     $previous_operand = 1;
 823                 }
 824             }    #/if $operands
 825         }    # /for
 826     }
 827     warn "QUERY BEFORE LIMITS: >$query<";
 828     # add limits
 829     my $limit_query;
 830     my $limit_search_desc;
 831     foreach my $limit (@limits) {
 832
 833         # FIXME: not quite right yet ... will work on this soon -- JF
 834         my $type = $1 if $limit =~ m/([^:]+):([^:]*)/;
 835         if ( $limit =~ /available/ ) {
 836             $limit_query .= " (($query and datedue=0000-00-00) or ($query and datedue=0000-00-00 not lost=1) or ($query and datedue=0000-00-00 not lost=2))";
 837             #$limit_search_desc.=" and available";
 838         }
 839         elsif ( ($limit_query) && ( index( $limit_query, $type, 0 ) > 0 ) ) {
 840             if ( $limit_query !~ /\(/ ) {
 841                 $limit_query =
 842                     substr( $limit_query, 0, index( $limit_query, $type, 0 ) )
 843                   . "("
 844                   . substr( $limit_query, index( $limit_query, $type, 0 ) )
 845                   . " or $limit )"
 846                   if $limit;
 847                 $limit_search_desc =
 848                   substr( $limit_search_desc, 0,
 849                     index( $limit_search_desc, $type, 0 ) )
 850                   . "("
 851                   . substr( $limit_search_desc,
 852                     index( $limit_search_desc, $type, 0 ) )
 853                   . " or $limit )"
 854                   if $limit;
 855             }
 856             else {
 857                 chop $limit_query;
 858                 chop $limit_search_desc;
 859                 $limit_query       .= " or $limit )" if $limit;
 860                 $limit_search_desc .= " or $limit )" if $limit;
 861             }
 862         }
 863         elsif ( ($limit_query) && ( $limit =~ /mc/ ) ) {
 864             $limit_query       .= " or $limit" if $limit;
 865             $limit_search_desc .= " or $limit" if $limit;
 866         }
 867
 868         # these are treated as AND
 869         elsif ($limit_query) {
 870            if ($limit =~ /branch/){
 871                 $limit_query       .= " ) and ( $limit" if $limit;
 872             $limit_search_desc .= " ) and ( $limit" if $limit;
 873         }else{
 874             $limit_query       .= " or $limit" if $limit;
 875                     $limit_search_desc .= " or $limit" if $limit;
 876         }
 877         }
 878
 879         # otherwise, there is nothing but the limit
 880         else {
 881             $limit_query       .= "$limit" if $limit;
 882             $limit_search_desc .= "$limit" if $limit;
 883         }
 884     }
 885
 886     # if there's also a query, we need to AND the limits to it
 887     if ( ($limit_query) && ($query) ) {
 888         $limit_query       = " and (" . $limit_query . ")";
 889         $limit_search_desc = " and ($limit_search_desc)" if $limit_search_desc;
 890
 891     }
 892     #warn "LIMIT: $limit_query";
 893     $query             .= $limit_query;
 894     $human_search_desc .= $limit_search_desc;
 895
 896     # now normalize the strings
 897     $query =~ s/  / /g;    # remove extra spaces
 898     $query =~ s/^ //g;     # remove any beginning spaces
 899     $query =~ s/:/=/g;     # causes probs for server
 900     $query =~ s/==/=/g;    # remove double == from query
 901
 902     my $federated_query = $human_search_desc;
 903     $federated_query =~ s/  / /g;
 904     $federated_query =~ s/^ //g;
 905     $federated_query =~ s/:/=/g;
 906     my $federated_query_opensearch = $federated_query;
 907
 908 #     my $federated_query_RPN = new ZOOM::Query::CCL2RPN( $query , C4::Context->ZConn('biblioserver'));
 909
 910     $human_search_desc =~ s/  / /g;
 911     $human_search_desc =~ s/^ //g;
 912     my $koha_query = $query;
 913
 914     #warn "QUERY:".$koha_query;
 915     #warn "SEARCHDESC:".$human_search_desc;
 916     #warn "FEDERATED QUERY:".$federated_query;
 917     return ( undef, $human_search_desc, $koha_query, $federated_query );
 918 }
 919
 920 # IMO this subroutine is pretty messy still -- it's responsible for
 921 # building the HTML output for the template
 922 sub searchResults {
 923     my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_;
 924
 925     my $dbh = C4::Context->dbh;
 926     my $toggle;
 927     my $even = 1;
 928     my @newresults;
 929     my $span_terms_hashref;
 930     for my $span_term ( split( / /, $searchdesc ) ) {
 931         $span_term =~ s/(.*=|\)|\(|\+|\.)//g;
 932         $span_terms_hashref->{$span_term}++;
 933     }
 934
 935     #Build brancnames hash
 936     #find branchname
 937     #get branch information.....
 938     my %branches;
 939     my $bsth =
 940       $dbh->prepare("SELECT branchcode,branchname FROM branches")
 941       ;    # FIXME : use C4::Koha::GetBranches
 942     $bsth->execute();
 943     while ( my $bdata = $bsth->fetchrow_hashref ) {
 944         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
 945     }
 946
 947     #Build itemtype hash
 948     #find itemtype & itemtype image
 949     my %itemtypes;
 950     $bsth =
 951       $dbh->prepare("SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes");
 952     $bsth->execute();
 953     while ( my $bdata = $bsth->fetchrow_hashref ) {
 954         $itemtypes{ $bdata->{'itemtype'} }->{description} =
 955           $bdata->{'description'};
 956         $itemtypes{ $bdata->{'itemtype'} }->{imageurl} = $bdata->{'imageurl'};
 957         $itemtypes{ $bdata->{'itemtype'} }->{summary} = $bdata->{'summary'};
 958         $itemtypes{ $bdata->{'itemtype'} }->{notforloan} = $bdata->{'notforloan'};
 959     }
 960
 961     #search item field code
 962     my $sth =
 963       $dbh->prepare(
 964 "select tagfield from marc_subfield_structure where kohafield like 'items.itemnumber'"
 965       );
 966     $sth->execute;
 967     my ($itemtag) = $sth->fetchrow;
 968
 969     ## find column names of items related to MARC
 970     my $sth2 = $dbh->prepare("SHOW COLUMNS from items");
 971     $sth2->execute;
 972     my %subfieldstosearch;
 973     while ( ( my $column ) = $sth2->fetchrow ) {
 974         my ( $tagfield, $tagsubfield ) =
 975           &GetMarcFromKohaField( "items." . $column, "" );
 976         $subfieldstosearch{$column} = $tagsubfield;
 977     }
 978     my $times;
 979
 980     if ( $hits && $offset + $results_per_page <= $hits ) {
 981         $times = $offset + $results_per_page;
 982     }
 983     else {
 984         $times = $hits;
 985     }
 986
 987     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
 988         my $marcrecord;
 989         $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
 990         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
 991         # add image url if there is one
 992         if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
 993             $oldbiblio->{imageurl} =
 994               $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
 995             $oldbiblio->{description} =
 996               $itemtypes{ $oldbiblio->{itemtype} }->{description};
 997         }
 998         else {
 999             $oldbiblio->{imageurl} =
1000               getitemtypeimagesrc() . "/"
1001               . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1002               if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1003             $oldbiblio->{description} =
1004               $itemtypes{ $oldbiblio->{itemtype} }->{description};
1005         }
1006         #
1007         # build summary if there is one (the summary is defined in itemtypes table
1008         #
1009         if ($itemtypes{ $oldbiblio->{itemtype} }->{summary}) {
1010             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1011             my @fields = $marcrecord->fields();
1012             foreach my $field (@fields) {
1013                 my $tag = $field->tag();
1014                 my $tagvalue = $field->as_string();
1015                 $summary =~ s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1016                 unless ($tag<10) {
1017                     my @subf = $field->subfields;
1018                     for my $i (0..$#subf) {
1019                         my $subfieldcode = $subf[$i][0];
1020                         my $subfieldvalue = $subf[$i][1];
1021                         my $tagsubf = $tag.$subfieldcode;
1022                         $summary =~ s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1023                     }
1024                 }
1025             }
1026             $summary =~ s/\[(.*?)]//g;
1027             $summary =~ s/\n/<br>/g;
1028             $oldbiblio->{summary} = $summary;
1029         }
1030         # add spans to search term in results for search term highlighting
1031         foreach my $term ( keys %$span_terms_hashref ) {
1032             my $old_term = $term;
1033             if ( length($term) > 3 ) {
1034                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\])//g;
1035                 $term =~ s/\\//g;
1036                 $term =~ s/\*//g;
1037
1038                 #FIXME: is there a better way to do this?
1039                 $oldbiblio->{'title'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1040                 $oldbiblio->{'subtitle'} =~
1041                   s/$term/<span class=\"term\">$&<\/span>/gi;
1042
1043                 $oldbiblio->{'author'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1044                 $oldbiblio->{'publishercode'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1045                 $oldbiblio->{'place'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1046                 $oldbiblio->{'pages'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1047                 $oldbiblio->{'notes'} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1048                 $oldbiblio->{'size'}  =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1049             }
1050         }
1051
1052         if ( $i % 2 ) {
1053             $toggle = "#ffffcc";
1054         }
1055         else {
1056             $toggle = "white";
1057         }
1058         $oldbiblio->{'toggle'} = $toggle;
1059         my @fields = $marcrecord->field($itemtag);
1060         my @items_loop;
1061         my $items;
1062         my $ordered_count     = 0;
1063         my $onloan_count      = 0;
1064         my $wthdrawn_count    = 0;
1065         my $itemlost_count    = 0;
1066         my $norequests        = 1;
1067
1068         #
1069         # check the loan status of the item :
1070         # it is not stored in the MARC record, for pref (zebra reindexing)
1071         # reason. Thus, we have to get the status from a specific SQL query
1072         #
1073         my $sth_issue = $dbh->prepare("
1074             SELECT date_due,returndate
1075             FROM issues
1076             WHERE itemnumber=? AND returndate IS NULL");
1077         my $items_count=scalar(@fields);
1078         foreach my $field (@fields) {
1079             my $item;
1080             foreach my $code ( keys %subfieldstosearch ) {
1081                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1082             }
1083             $sth_issue->execute($item->{itemnumber});
1084             $item->{due_date} = format_date($sth_issue->fetchrow);
1085             $item->{onloan} = 1 if $item->{due_date};
1086             # at least one item can be reserved : suppose no
1087             $norequests = 1;
1088             if ( $item->{wthdrawn} ) {
1089                 $wthdrawn_count++;
1090                 $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{unavailable}=1;
1091                 $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{wthdrawn}=1;
1092             }
1093             elsif ( $item->{itemlost} ) {
1094                 $itemlost_count++;
1095                 $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{unavailable}=1;
1096                 $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{itemlost}=1;
1097             }
1098             unless ( $item->{notforloan}) {
1099                 # OK, this one can be issued, so at least one can be reserved
1100                 $norequests = 0;
1101             }
1102             if ( ( $item->{onloan} ) && ( $item->{onloan} != '0000-00-00' ) )
1103             {
1104                 $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{unavailable}=1;
1105                 $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{onloancount} = 1;
1106                 $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{due_date} = $item->{due_date};
1107                 $onloan_count++;
1108             }
1109             if ( $item->{'homebranch'} ) {
1110                 $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{count}++;
1111             }
1112
1113             # Last resort
1114             elsif ( $item->{'holdingbranch'} ) {
1115                 $items->{ $item->{'holdingbranch'} }->{count}++;
1116             }
1117             $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{itemcallnumber} =                $item->{itemcallnumber};
1118             $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{location} =                $item->{location};
1119             $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{branchcode} =               $item->{homebranch};
1120         }    # notforloan, item level and biblioitem level
1121
1122         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1123         $norequests = 1 if $itemtypes{$oldbiblio->{itemtype}}->{notforloan};
1124
1125         for my $key ( sort keys %$items ) {
1126             my $this_item = {
1127                 branchname     => $branches{$items->{$key}->{branchcode}},
1128                 branchcode     => $items->{$key}->{branchcode},
1129                 count          => $items->{$key}->{count},
1130                 itemcallnumber => $items->{$key}->{itemcallnumber},
1131                 location => $items->{$key}->{location},
1132                 onloancount      => $items->{$key}->{onloancount},
1133                 due_date         => $items->{$key}->{due_date},
1134                 wthdrawn      => $items->{$key}->{wthdrawn},
1135                 lost         => $items->{$key}->{itemlost},
1136             };
1137             push @items_loop, $this_item;
1138         }
1139         $oldbiblio->{norequests}    = $norequests;
1140         $oldbiblio->{items_count}    = $items_count;
1141         $oldbiblio->{items_loop}    = \@items_loop;
1142         $oldbiblio->{onloancount}   = $onloan_count;
1143         $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1144         $oldbiblio->{itemlostcount} = $itemlost_count;
1145         $oldbiblio->{orderedcount}  = $ordered_count;
1146         $oldbiblio->{isbn}          =~ s/-//g; # deleting - in isbn to enable amazon content
1147         push( @newresults, $oldbiblio );
1148     }
1149     return @newresults;
1150 }
1151
1152
1153
1154 #----------------------------------------------------------------------
1155 #
1156 # Non-Zebra GetRecords#
1157 #----------------------------------------------------------------------
1158
1159 =head2 NZgetRecords
1160
1161   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1162
1163 =cut
1164
1165 sub NZgetRecords {
1166     my (
1167         $koha_query,     $federated_query,  $sort_by_ref,
1168         $servers_ref,    $results_per_page, $offset,
1169         $expanded_facet, $branches,         $query_type,
1170         $scan
1171     ) = @_;
1172     my $result = NZanalyse($koha_query);
1173     return (undef,NZorder($result,@$sort_by_ref[0],$results_per_page,$offset),undef);
1174 }
1175
1176 =head2 NZanalyse
1177
1178   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1179   the list is builded from inverted index in nozebra SQL table
1180   note that title is here only for convenience : the sorting will be very fast when requested on title
1181   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1182
1183 =cut
1184
1185 sub NZanalyse {
1186     my ($string,$server) = @_;
1187     # $server contains biblioserver or authorities, depending on what we search on.
1188     #warn "querying : $string on $server";
1189     $server='biblioserver' unless $server;
1190     # if we have a ", replace the content to discard temporarily any and/or/not inside
1191     my $commacontent;
1192     if ($string =~/"/) {
1193         $string =~ s/"(.*?)"/__X__/;
1194         $commacontent = $1;
1195 #         print "commacontent : $commacontent\n";
1196     }
1197     # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1198     # then, call again NZanalyse with $left and $right
1199     # (recursive until we find a leaf (=> something without and/or/not)
1200     $string =~ /(.*)( and | or | not | AND | OR | NOT )(.*)/;
1201     my $left = $1;
1202     my $right = $3;
1203     my $operand = lc($2);
1204     # it's not a leaf, we have a and/or/not
1205     if ($operand) {
1206         # reintroduce comma content if needed
1207         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1208         $left =~ s/__X__/"$commacontent"/ if $commacontent;
1209 #         warn "node : $left / $operand / $right\n";
1210         my $leftresult = NZanalyse($left,$server);
1211         my $rightresult = NZanalyse($right,$server);
1212         # OK, we have the results for right and left part of the query
1213         # depending of operand, intersect, union or exclude both lists
1214         # to get a result list
1215         if ($operand eq ' and ') {
1216             my @leftresult = split /;/, $leftresult;
1217 #             my @rightresult = split /;/,$leftresult;
1218             my $finalresult;
1219             # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1220             # the result is stored twice, to have the same weight for AND than OR.
1221             # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1222             # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1223             foreach (@leftresult) {
1224                 if ($rightresult =~ "$_;") {
1225                     $finalresult .= "$_;$_;";
1226                 }
1227             }
1228             return $finalresult;
1229         } elsif ($operand eq ' or ') {
1230             # just merge the 2 strings
1231             return $leftresult.$rightresult;
1232         } elsif ($operand eq ' not ') {
1233             my @leftresult = split /;/, $leftresult;
1234 #             my @rightresult = split /;/,$leftresult;
1235             my $finalresult;
1236             foreach (@leftresult) {
1237                 unless ($rightresult =~ "$_;") {
1238                     $finalresult .= "$_;";
1239                 }
1240             }
1241             return $finalresult;
1242         } else {
1243             # this error is impossible, because of the regexp that isolate the operand, but just in case...
1244             die "error : operand unknown : $operand for $string";
1245         }
1246     # it's a leaf, do the real SQL query and return the result
1247     } else {
1248         $string =~  s/__X__/"$commacontent"/ if $commacontent;
1249         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1250 #         warn "leaf : $string\n";
1251         # parse the string in in operator/operand/value again
1252         $string =~ /(.*)(>=|<=)(.*)/;
1253         my $left = $1;
1254         my $operator = $2;
1255         my $right = $3;
1256         unless ($operator) {
1257             $string =~ /(.*)(>|<|=)(.*)/;
1258             $left = $1;
1259             $operator = $2;
1260             $right = $3;
1261         }
1262         my $results;
1263         # automatic replace for short operators
1264         $left='title' if $left =~ '^ti';
1265         $left='author' if $left =~ '^au';
1266         $left='publisher' if $left =~ '^pb';
1267         $left='subject' if $left =~ '^su';
1268         $left='koha-Auth-Number' if $left =~ '^an';
1269         $left='keyword' if $left =~ '^kw';
1270         if ($operator) {
1271             #do a specific search
1272             my $dbh = C4::Context->dbh;
1273             $operator='LIKE' if $operator eq '=' and $right=~ /%/;
1274             my $sth = $dbh->prepare("SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?");
1275             warn "$left / $operator / $right\n";
1276             # split each word, query the DB and build the biblionumbers result
1277             foreach (split / /,$right) {
1278                 my ($biblionumbers,$value);
1279                 next unless $_;
1280                 warn "EXECUTE : $server, $left, $_";
1281                 $sth->execute($server, $left, $_);
1282                 while (my ($line,$value) = $sth->fetchrow) {
1283                     # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1284                     # otherwise, fill the result
1285                     $biblionumbers .= $line unless ($right =~ /\d/ && $value =~ /\D/);
1286                     warn "result : $value ". ($right =~ /\d/) . "==".(!$value =~ /\d/) ;#= $line";
1287                 }
1288                 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1289                 if ($results) {
1290                     my @leftresult = split /;/, $biblionumbers;
1291                     my $temp;
1292                     foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight
1293                         # remove weight at the end
1294                         my $cleaned = $entry;
1295                         $cleaned =~ s/-\d*$//;
1296                         # if the entry already in the hash, take it & increase weight
1297 #                         warn "===== $cleaned =====";
1298                         if ($results =~ "$cleaned") {
1299                             $temp .= "$entry;$entry;";
1300 #                             warn "INCLUDING $entry";
1301                         }
1302                     }
1303                     $results = $temp;
1304                 } else {
1305                     $results = $biblionumbers;
1306                 }
1307             }
1308         } else {
1309             #do a complete search (all indexes)
1310             my $dbh = C4::Context->dbh;
1311             my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?");
1312             # split each word, query the DB and build the biblionumbers result
1313             foreach (split / /,$string) {
1314                 next if C4::Context->stopwords->{uc($_)}; # skip if stopword
1315                 #warn "search on all indexes on $_";
1316                 my $biblionumbers;
1317                 next unless $_;
1318                 $sth->execute($server, $_);
1319                 while (my $line = $sth->fetchrow) {
1320                     $biblionumbers .= $line;
1321                 }
1322                 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1323                 if ($results) {
1324 #                 warn "RES for $_ = $biblionumbers";
1325                     my @leftresult = split /;/, $biblionumbers;
1326                     my $temp;
1327                     foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight
1328                         # remove weight at the end
1329                         my $cleaned = $entry;
1330                         $cleaned =~ s/-\d*$//;
1331                         # if the entry already in the hash, take it & increase weight
1332 #                         warn "===== $cleaned =====";
1333                         if ($results =~ "$cleaned") {
1334                             $temp .= "$entry;$entry;";
1335 #                             warn "INCLUDING $entry";
1336                         }
1337                     }
1338                     $results = $temp;
1339                 } else {
1340 #                 warn "NEW RES for $_ = $biblionumbers";
1341                     $results = $biblionumbers;
1342                 }
1343             }
1344         }
1345 #         warn "return : $results for LEAF : $string";
1346         return $results;
1347     }
1348 }
1349
1350 =head2 NZorder
1351
1352   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1353
1354   TODO :: Description
1355
1356 =cut
1357
1358
1359 sub NZorder {
1360     my ($biblionumbers, $ordering,$results_per_page,$offset) = @_;
1361     # order title asc by default
1362 #     $ordering = '1=36 <i' unless $ordering;
1363     $results_per_page=20 unless $results_per_page;
1364     $offset = 0 unless $offset;
1365     my $dbh = C4::Context->dbh;
1366     #
1367     # order by POPULARITY
1368     #
1369     if ($ordering =~ /popularity/) {
1370         my %result;
1371         my %popularity;
1372         # popularity is not in MARC record, it's builded from a specific query
1373         my $sth = $dbh->prepare("select sum(issues) from items where biblionumber=?");
1374         foreach (split /;/,$biblionumbers) {
1375             my ($biblionumber,$title) = split /,/,$_;
1376             $result{$biblionumber}=GetMarcBiblio($biblionumber);
1377             $sth->execute($biblionumber);
1378             my $popularity= $sth->fetchrow ||0;
1379             # hint : the key is popularity.title because we can have
1380             # many results with the same popularity. In this cas, sub-ordering is done by title
1381             # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1382             # (un-frequent, I agree, but we won't forget anything that way ;-)
1383             $popularity{sprintf("%10d",$popularity).$title.$biblionumber} = $biblionumber;
1384         }
1385         # sort the hash and return the same structure as GetRecords (Zebra querying)
1386         my $result_hash;
1387         my $numbers=0;
1388         if ($ordering eq 'popularity_dsc') { # sort popularity DESC
1389             foreach my $key (sort {$b cmp $a} (keys %popularity)) {
1390                 $result_hash->{'RECORDS'}[$numbers++] = $result{$popularity{$key}}->as_usmarc();
1391             }
1392         } else { # sort popularity ASC
1393             foreach my $key (sort (keys %popularity)) {
1394                 $result_hash->{'RECORDS'}[$numbers++] = $result{$popularity{$key}}->as_usmarc();
1395             }
1396         }
1397         my $finalresult=();
1398         $result_hash->{'hits'} = $numbers;
1399         $finalresult->{'biblioserver'} = $result_hash;
1400         return $finalresult;
1401     #
1402     # ORDER BY author
1403     #
1404     } elsif ($ordering =~/author/){
1405         my %result;
1406         foreach (split /;/,$biblionumbers) {
1407             my ($biblionumber,$title) = split /,/,$_;
1408             my $record=GetMarcBiblio($biblionumber);
1409             my $author;
1410             if (C4::Context->preference('marcflavour') eq 'UNIMARC') {
1411                 $author=$record->subfield('200','f');
1412                 $author=$record->subfield('700','a') unless $author;
1413             } else {
1414                 $author=$record->subfield('100','a');
1415             }
1416             # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1417             # and we don't want to get only 1 result for each of them !!!
1418             $result{$author.$biblionumber}=$record;
1419         }
1420         # sort the hash and return the same structure as GetRecords (Zebra querying)
1421         my $result_hash;
1422         my $numbers=0;
1423         if ($ordering eq 'author_za') { # sort by author desc
1424             foreach my $key (sort { $b cmp $a } (keys %result)) {
1425                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1426             }
1427         } else { # sort by author ASC
1428             foreach my $key (sort (keys %result)) {
1429                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1430             }
1431         }
1432         my $finalresult=();
1433         $result_hash->{'hits'} = $numbers;
1434         $finalresult->{'biblioserver'} = $result_hash;
1435         return $finalresult;
1436     #
1437     # ORDER BY callnumber
1438     #
1439     } elsif ($ordering =~/callnumber/){
1440         my %result;
1441         foreach (split /;/,$biblionumbers) {
1442             my ($biblionumber,$title) = split /,/,$_;
1443             my $record=GetMarcBiblio($biblionumber);
1444             my $callnumber;
1445             my ($callnumber_tag,$callnumber_subfield)=GetMarcFromKohaField($dbh,'items.itemcallnumber');
1446             ($callnumber_tag,$callnumber_subfield)= GetMarcFromKohaField('biblioitems.callnumber') unless $callnumber_tag;
1447             if (C4::Context->preference('marcflavour') eq 'UNIMARC') {
1448                 $callnumber=$record->subfield('200','f');
1449             } else {
1450                 $callnumber=$record->subfield('100','a');
1451             }
1452             # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1453             # and we don't want to get only 1 result for each of them !!!
1454             $result{$callnumber.$biblionumber}=$record;
1455         }
1456         # sort the hash and return the same structure as GetRecords (Zebra querying)
1457         my $result_hash;
1458         my $numbers=0;
1459         if ($ordering eq 'call_number_dsc') { # sort by title desc
1460             foreach my $key (sort { $b cmp $a } (keys %result)) {
1461                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1462             }
1463         } else { # sort by title ASC
1464             foreach my $key (sort { $a cmp $b } (keys %result)) {
1465                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1466             }
1467         }
1468         my $finalresult=();
1469         $result_hash->{'hits'} = $numbers;
1470         $finalresult->{'biblioserver'} = $result_hash;
1471         return $finalresult;
1472     } elsif ($ordering =~ /pubdate/){ #pub year
1473         my %result;
1474         foreach (split /;/,$biblionumbers) {
1475             my ($biblionumber,$title) = split /,/,$_;
1476             my $record=GetMarcBiblio($biblionumber);
1477             my ($publicationyear_tag,$publicationyear_subfield)=GetMarcFromKohaField('biblioitems.publicationyear','');
1478             my $publicationyear=$record->subfield($publicationyear_tag,$publicationyear_subfield);
1479             # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1480             # and we don't want to get only 1 result for each of them !!!
1481             $result{$publicationyear.$biblionumber}=$record;
1482         }
1483         # sort the hash and return the same structure as GetRecords (Zebra querying)
1484         my $result_hash;
1485         my $numbers=0;
1486         if ($ordering eq 'pubdate_dsc') { # sort by pubyear desc
1487             foreach my $key (sort { $b cmp $a } (keys %result)) {
1488                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1489             }
1490         } else { # sort by pub year ASC
1491             foreach my $key (sort (keys %result)) {
1492                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc();
1493             }
1494         }
1495         my $finalresult=();
1496         $result_hash->{'hits'} = $numbers;
1497         $finalresult->{'biblioserver'} = $result_hash;
1498         return $finalresult;
1499     #
1500     # ORDER BY title
1501     #
1502     } elsif ($ordering =~ /title/) {
1503         # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
1504         my %result;
1505         foreach (split /;/,$biblionumbers) {
1506             my ($biblionumber,$title) = split /,/,$_;
1507             # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1508             # and we don't want to get only 1 result for each of them !!!
1509             # hint & speed improvement : we can order without reading the record
1510             # so order, and read records only for the requested page !
1511             $result{$title.$biblionumber}=$biblionumber;
1512         }
1513         # sort the hash and return the same structure as GetRecords (Zebra querying)
1514         my $result_hash;
1515         my $numbers=0;
1516         if ($ordering eq 'title_az') { # sort by title desc
1517             foreach my $key (sort (keys %result)) {
1518                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key};
1519             }
1520         } else { # sort by title ASC
1521             foreach my $key (sort { $b cmp $a } (keys %result)) {
1522                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key};
1523             }
1524         }
1525         # limit the $results_per_page to result size if it's more
1526         $results_per_page = $numbers-1 if $numbers < $results_per_page;
1527         # for the requested page, replace biblionumber by the complete record
1528         # speed improvement : avoid reading too much things
1529         for (my $counter=$offset;$counter<=$offset+$results_per_page;$counter++) {
1530             $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio($result_hash->{'RECORDS'}[$counter])->as_usmarc;
1531         }
1532         my $finalresult=();
1533         $result_hash->{'hits'} = $numbers;
1534         $finalresult->{'biblioserver'} = $result_hash;
1535         return $finalresult;
1536     } else {
1537     #
1538     # order by ranking
1539     #
1540         # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
1541         my %result;
1542         my %count_ranking;
1543         foreach (split /;/,$biblionumbers) {
1544             my ($biblionumber,$title) = split /,/,$_;
1545             $title =~ /(.*)-(\d)/;
1546             # get weight
1547             my $ranking =$2;
1548             # note that we + the ranking because ranking is calculated on weight of EACH term requested.
1549             # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
1550             # biblio N has ranking = 6
1551             $count_ranking{$biblionumber} += $ranking;
1552         }
1553         # build the result by "inverting" the count_ranking hash
1554         # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
1555 #         warn "counting";
1556         foreach (keys %count_ranking) {
1557             $result{sprintf("%10d",$count_ranking{$_}).'-'.$_} = $_;
1558         }
1559         # sort the hash and return the same structure as GetRecords (Zebra querying)
1560         my $result_hash;
1561         my $numbers=0;
1562             foreach my $key (sort {$b cmp $a} (keys %result)) {
1563                 $result_hash->{'RECORDS'}[$numbers++] = $result{$key};
1564             }
1565         # limit the $results_per_page to result size if it's more
1566         $results_per_page = $numbers-1 if $numbers < $results_per_page;
1567         # for the requested page, replace biblionumber by the complete record
1568         # speed improvement : avoid reading too much things
1569         for (my $counter=$offset;$counter<=$offset+$results_per_page;$counter++) {
1570             $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio($result_hash->{'RECORDS'}[$counter])->as_usmarc if $result_hash->{'RECORDS'}[$counter];
1571         }
1572         my $finalresult=();
1573         $result_hash->{'hits'} = $numbers;
1574         $finalresult->{'biblioserver'} = $result_hash;
1575         return $finalresult;
1576     }
1577 }
1578 =head2 ModBiblios
1579
1580 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
1581
1582 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
1583 test parameter if set donot perform change to records in database.
1584
1585 =over 2
1586
1587 =item C<input arg:>
1588
1589     * $listbiblios is an array ref to marcrecords to be changed
1590     * $tagsubfield is the reference of the subfield to change.
1591     * $initvalue is the value to search the record for
1592     * $targetvalue is the value to set the subfield to
1593     * $test is to be set only not to perform changes in database.
1594
1595 =item C<Output arg:>
1596     * $countchanged counts all the changes performed.
1597     * $listunchanged contains the list of all the biblionumbers of records unchanged.
1598
1599 =item C<usage in the script:>
1600
1601 =back
1602
1603 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
1604 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
1605 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
1606
1607 =cut
1608
1609 sub ModBiblios{
1610   my ($listbiblios,$tagsubfield,$initvalue,$targetvalue,$test)=@_;
1611   my $countmatched;
1612   my @unmatched;
1613   my ($tag,$subfield)=($1,$2) if ($tagsubfield=~/^(\d{1,3})([a-z0-9A-Z@])?$/);
1614   if ((length($tag)<3)&& $subfield=~/0-9/){
1615     $tag=$tag.$subfield;
1616     undef $subfield;
1617   }
1618   my ($bntag,$bnsubf) = GetMarcFromKohaField('biblio.biblionumber');
1619   my ($itemtag,$itemsubf) = GetMarcFromKohaField('items.itemnumber');
1620   foreach my $usmarc (@$listbiblios){
1621     my $record;
1622     $record=eval{MARC::Record->new_from_usmarc($usmarc)};
1623     my $biblionumber;
1624     if ($@){
1625       # usmarc is not a valid usmarc May be a biblionumber
1626       if ($tag eq $itemtag){
1627         my $bib=GetBiblioFromItemNumber($usmarc);
1628         $record=GetMarcItem($bib->{'biblionumber'},$usmarc) ;
1629         $biblionumber=$bib->{'biblionumber'};
1630       } else {
1631         $record=GetMarcBiblio($usmarc);
1632         $biblionumber=$usmarc;
1633       }
1634     }  else {
1635       if ($bntag >= 010){
1636         $biblionumber = $record->subfield($bntag,$bnsubf);
1637       }else {
1638         $biblionumber=$record->field($bntag)->data;
1639       }
1640     }
1641     #GetBiblionumber is to be written.
1642     #Could be replaced by TransformMarcToKoha (But Would be longer)
1643     if ($record->field($tag)){
1644       my $modify=0;
1645       foreach my $field ($record->field($tag)){
1646         if ($subfield){
1647           if ($field->delete_subfield('code' =>$subfield,'match'=>qr($initvalue))){
1648             $countmatched++;
1649             $modify=1;
1650             $field->update($subfield,$targetvalue) if ($targetvalue);
1651           }
1652         } else {
1653           if ($tag >= 010){
1654             if ($field->delete_field($field)){
1655               $countmatched++;
1656               $modify=1;
1657             }
1658           } else {
1659             $field->data=$targetvalue if ($field->data=~qr($initvalue));
1660           }
1661         }
1662       }
1663 #       warn $record->as_formatted;
1664       if ($modify){
1665         ModBiblio($record,$biblionumber,GetFrameworkCode($biblionumber)) unless ($test);
1666       } else {
1667         push @unmatched, $biblionumber;
1668       }
1669     } else {
1670       push @unmatched, $biblionumber;
1671     }
1672   }
1673   return ($countmatched,\@unmatched);
1674 }
1675
1676 END { }    # module clean-up code here (global destructor)
1677
1678 1;
1679 __END__
1680
1681 =head1 AUTHOR
1682
1683 Koha Developement team <info@koha.org>
1684
1685 =cut