Bug 33159: Simplify ES handling and fix zebra handling
authorNick Clemens <nick@bywatersolutions.com>
Wed, 8 Mar 2023 18:03:05 +0000 (18:03 +0000)
committerTomas Cohen Arazi <tomascohen@theke.io>
Fri, 31 Mar 2023 09:56:53 +0000 (11:56 +0200)
Before this patch we used two indexes for the thesaurus values, we can
simply index both needed fields into a single index and just form the
search correctly.

This patch also ensures we pass the 'thesaurus' vlaue for the heading
directly to the query builder - for zebra it goes through, and for ES
we convert it to the expected code.

This patch also moves the necessary mappings out of the user definable
mappings and hardcodes them. There is precedent for this with
'match-heading', it ensures matching works as expected

To test:
1 - Follow previous test plan in Zebra and ES

Signed-off-by: Phil Ringnalda <phil@chetcolibrary.org>
Signed-off-by: Frank Hansen <frank.hansen@ub.lu.se>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>
C4/Heading.pm
Koha/SearchEngine/Elasticsearch.pm
Koha/SearchEngine/Elasticsearch/QueryBuilder.pm
admin/searchengine/elasticsearch/mappings.yaml
t/db_dependent/Heading.t

index cce3abf..8b8ece8 100644 (file)
@@ -210,34 +210,11 @@ sub _search {
     }
 
     if ( $thesaurus ) {
-    # This is calculated in C4/Heading/MARC21.pm - not used for UNIMARC
-        if ($thesaurus eq 'lcsh') {
-            $subject_heading_thesaurus = 'a';
-        } elsif ($thesaurus eq 'lcac') {
-            $subject_heading_thesaurus = 'b';
-        } elsif ($thesaurus eq 'mesh') {
-            $subject_heading_thesaurus = 'c';
-        } elsif ($thesaurus eq 'nal') {
-            $subject_heading_thesaurus = 'd';
-        } elsif ($thesaurus eq 'notspecified') {
-            $subject_heading_thesaurus = 'n';
-        } elsif ($thesaurus eq 'cash') {
-            $subject_heading_thesaurus = 'k';
-        } elsif ($thesaurus eq 'rvm') {
-            $subject_heading_thesaurus = 'v';
-        } else { # We stored the value from $7 as the thesaurus if there was one
-            $subject_heading_thesaurus = 'z';
-            push @marclist, 'thesaurus-conventions';
-            push @and_or, 'and';
-            push @excluding, '';
-            push @operator, 'is';
-            push @value, $self->{'thesaurus'};
-        }
         push @marclist, 'thesaurus';
         push @and_or, 'and';
         push @excluding, '';
         push @operator, 'is';
-        push @value, $subject_heading_thesaurus;
+        push @value, $thesaurus;
     }
 
     require Koha::SearchEngine::QueryBuilder;
index 1fe6fb1..9463aed 100644 (file)
@@ -233,7 +233,10 @@ sub get_elasticsearch_mappings {
                 }
             }
         );
-        $mappings->{properties}{ 'match-heading' } = _get_elasticsearch_field_config('search', 'text') if $self->index eq 'authorities';
+        if( $self->index eq 'authorities' ){
+            $mappings->{properties}{ 'match-heading' } = _get_elasticsearch_field_config('search', 'text');
+            $mappings->{properties}{ 'subject-heading-thesaurus' } = _get_elasticsearch_field_config('search', 'text');
+        }
         $all_mappings{$self->index} = $mappings;
     }
     $self->sort_fields(\%{$sort_fields{$self->index}});
@@ -1166,6 +1169,11 @@ sub _get_marc_mapping_rules {
         }
     }
 
+    if( $self->index eq 'authorities' ){
+        push @{$rules->{control_fields}->{'008'}}, ['subject-heading-thesaurus', { 'substr' => [ 11, 1 ] } ];
+        push @{$rules->{data_fields}->{'040'}->{subfields}->{f}}, ['subject-heading-thesaurus', { } ];
+    }
+
     return $rules;
 }
 
index 53042b4..d5dbdee 100644 (file)
@@ -564,6 +564,16 @@ our $koha_to_index_name = {
     all             => ''
 };
 
+our $thesaurus_to_value = {
+   lcsh => 'a',
+   lcac => 'b',
+   mesh => 'c',
+   nal  => 'd',
+   notspecified => 'n',
+   cash => 'k',
+   rvm => 'v',
+};
+
 sub build_authorities_query_compat {
     my ( $self, $marclist, $and_or, $excluding, $operator, $value,
         $authtypecode, $orderby )
@@ -588,6 +598,8 @@ sub build_authorities_query_compat {
     }
     for ( my $i = 0 ; $i < @$value ; $i++ ) {
         next unless $value->[$i]; #clean empty form values, ES doesn't like undefined searches
+        $value->[$i] = $thesaurus_to_value->{ $value->[$i] }
+            if( defined $thesaurus_to_value->{ $value->[$i] } && $indexes[$i] eq 'subject-heading-thesaurus' );
         push @searches,
           {
             where    => $indexes[$i],
index e892f91..2644bea 100644 (file)
@@ -805,24 +805,6 @@ authorities:
         sort: ~
         suggestible: ''
     type: ''
-  Subject-heading-thesaurus:
-    label: Subject-heading-thesaurus
-    mappings:
-      - facet: ''
-        marc_field: 008_/11
-        marc_type: marc21
-        sort: ~
-        suggestible: ''
-    type: ''
-  Subject-heading-thesaurus-conventions:
-    label: Subject-heading-thesaurus-conventions
-    mappings:
-      - facet: ''
-        marc_field: 040f
-        marc_type: marc21
-        sort: ~
-        suggestible: ''
-    type: ''
   authtype:
     label: authtype
     mappings:
index 6cbb2ac..7aa14c9 100755 (executable)
@@ -104,8 +104,7 @@ subtest "_search tests" => sub {
     $terms = $search_query->{query}->{bool}->{must};
     $expected_terms = [
         { term => { 'match-heading.ci_raw' => 'Uncles generalsubdiv Fiction' } },
-        { term => { 'subject-heading-thesaurus-conventions.ci_raw' => 'special_sauce' } },
-        { term => { 'subject-heading-thesaurus.ci_raw' => 'z' } },
+        { term => { 'subject-heading-thesaurus.ci_raw' => 'special_sauce' } },
     ];
     is_deeply( $terms, $expected_terms, "Search formed as expected with second indicator 7 and subfield 2");