X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=blobdiff_plain;f=C4%2FMatcher.pm;h=9d1df67e585063ece52156298821d8039664f6d5;hb=3bb33fd666d4d0eea26114b373874189edc82459;hp=505ff27f3688a22d7892f662fa511c0a7dcb86be;hpb=b69facedc4f0c1c2194244df9d97460851b273a5;p=koha_gimpoz diff --git a/C4/Matcher.pm b/C4/Matcher.pm index 505ff27f36..9d1df67e58 100644 --- a/C4/Matcher.pm +++ b/C4/Matcher.pm @@ -13,11 +13,13 @@ package C4::Matcher; # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # -# You should have received a copy of the GNU General Public License along with -# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, -# Suite 330, Boston, MA 02111-1307 USA +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. use strict; +use warnings; + use C4::Context; use MARC::Record; use C4::Search; @@ -25,8 +27,10 @@ use C4::Biblio; use vars qw($VERSION); -# set the version for version checking -$VERSION = 3.00; +BEGIN { + # set the version for version checking + $VERSION = 3.01; +} =head1 NAME @@ -34,35 +38,33 @@ C4::Matcher - find MARC records matching another one =head1 SYNOPSIS -=over 4 + my @matchers = C4::Matcher::GetMatcherList(); -my @matchers = C4::Matcher::GetMatcherList(); + my $matcher = C4::Matcher->new($record_type); + $matcher->threshold($threshold); + $matcher->code($code); + $matcher->description($description); -my $matcher = C4::Matcher->new($record_type); -$matcher->threshold($threshold); -$matcher->code($code); -$matcher->description($description); + $matcher->add_simple_matchpoint('isbn', 1000, '020', 'a', -1, 0, ''); + $matcher->add_simple_matchpoint('Date', 1000, '008', '', 7, 4, ''); + $matcher->add_matchpoint('isbn', 1000, [ { tag => '020', subfields => 'a', norms => [] } ]); -$matcher->add_simple_matchpoint('isbn', 1000, '020', 'a', -1, 0, ''); -$matcher->add_simple_matchpoint('Date', 1000, '008', '', 7, 4, ''); -$matcher->add_matchpoint('isbn', 1000, [ { tag => '020', subfields => 'a', norms => [] } ]); + $matcher->add_simple_required_check('245', 'a', -1, 0, '', '245', 'a', -1, 0, ''); + $matcher->add_required_check([ { tag => '245', subfields => 'a', norms => [] } ], + [ { tag => '245', subfields => 'a', norms => [] } ]); -$matcher->add_simple_required_check('245', 'a', -1, 0, '', '245', 'a', -1, 0, ''); -$matcher->add_required_check([ { tag => '245', subfields => 'a', norms => [] } ], - [ { tag => '245', subfields => 'a', norms => [] } ]); + my @matches = $matcher->get_matches($marc_record, $max_matches); -my @matches = $matcher->get_matches($marc_record, $max_matches); + foreach $match (@matches) { -foreach $match (@matches) { + # matches already sorted in order of + # decreasing score + print "record ID: $match->{'record_id'}; + print "score: $match->{'score'}; - # matches already sorted in order of - # decreasing score - print "record ID: $match->{'record_id'}; - print "score: $match->{'score'}; + } -} - -=back + my $matcher_description = $matcher->dump(); =head1 FUNCTIONS @@ -70,18 +72,14 @@ foreach $match (@matches) { =head2 GetMatcherList -=over 4 - -my @matchers = C4::Matcher::GetMatcherList(); - -=back + my @matchers = C4::Matcher::GetMatcherList(); Returns an array of hashrefs list all matchers present in the database. Each hashref includes: -matcher_id -code -description + * matcher_id + * code + * description =cut @@ -99,15 +97,9 @@ sub GetMatcherList { =head1 METHODS -=cut - =head2 new -=over 4 - -my $matcher = C4::Matcher->new($record_type, $threshold); - -=back + my $matcher = C4::Matcher->new($record_type, $threshold); Creates a new Matcher. C<$record_type> indicates which search database to use, e.g., 'biblio' or 'authority' and defaults to @@ -146,11 +138,7 @@ sub new { =head2 fetch -=over 4 - -my $matcher = C4::Matcher->fetch($id); - -=back + my $matcher = C4::Matcher->fetch($id); Creates a matcher object from the version stored in the database. If a matcher with the given @@ -239,11 +227,7 @@ sub _fetch_matchpoint { =head2 store -=over 4 - -my $id = $matcher->store(); - -=back + my $id = $matcher->store(); Stores matcher in database. The return value is the ID of the marc_matchers row. If the matcher was @@ -365,14 +349,29 @@ sub _store_matchpoint { return $matchpoint_id; } -=head2 threshold -=over 4 +=head2 delete -$matcher->threshold(1000); -my $threshold = $matcher->threshold(); + C4::Matcher->delete($id); -=back +Deletes the matcher of the specified ID +from the database. + +=cut + +sub delete { + my $class = shift; + my $matcher_id = shift; + + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("DELETE FROM marc_matchers WHERE matcher_id = ?"); + $sth->execute($matcher_id); # relying on cascading deletes to clean up everything +} + +=head2 threshold + + $matcher->threshold(1000); + my $threshold = $matcher->threshold(); Accessor method. @@ -383,14 +382,26 @@ sub threshold { @_ ? $self->{'threshold'} = shift : $self->{'threshold'}; } -=head2 code +=head2 _id -=over 4 + $matcher->_id(123); + my $id = $matcher->_id(); -$matcher->code('ISBN'); -my $code = $matcher->code(); +Accessor method. Note that using this method +to set the DB ID of the matcher should not be +done outside of the editing CGI. -=back +=cut + +sub _id { + my $self = shift; + @_ ? $self->{'id'} = shift : $self->{'id'}; +} + +=head2 code + + $matcher->code('ISBN'); + my $code = $matcher->code(); Accessor method. @@ -403,12 +414,8 @@ sub code { =head2 description -=over 4 - -$matcher->description('match on ISBN'); -my $description = $matcher->description(); - -=back + $matcher->description('match on ISBN'); + my $description = $matcher->description(); Accessor method. @@ -421,11 +428,7 @@ sub description { =head2 add_matchpoint -=over 4 - -$matcher->add_matchpoint($index, $score, $matchcomponents); - -=back + $matcher->add_matchpoint($index, $score, $matchcomponents); Adds a matchpoint that may include multiple components. The $index parameter identifies the index that will be searched, while $score @@ -463,13 +466,10 @@ sub add_matchpoint { =head2 add_simple_matchpoint -=over 4 + $matcher->add_simple_matchpoint($index, $score, $source_tag, + $source_subfields, $source_offset, + $source_length, $source_normalizer); -$matcher->add_simple_matchpoint($index, $score, $source_tag, $source_subfields, - $source_offset, $source_length, - $source_normalizer); - -=back Adds a simple matchpoint rule -- after composing a key based on the source tag and subfields, normalized per the normalization fuction, search the index. All records retrieved @@ -483,7 +483,7 @@ sub add_simple_matchpoint { $self->add_matchpoint($index, $score, [ { tag => $source_tag, subfields => $source_subfields, - offset => $source_offset, length => $source_length, + offset => $source_offset, 'length' => $source_length, norms => [ $source_normalizer ] } ]); @@ -491,11 +491,7 @@ sub add_simple_matchpoint { =head2 add_required_check -=over 4 - -$match->add_required_check($source_matchpoint, $target_matchpoint); - -=back + $match->add_required_check($source_matchpoint, $target_matchpoint); Adds a required check definition. A required check means that in order for a match to be considered valid, the key derived from the @@ -547,16 +543,14 @@ sub add_required_check { =head2 add_simple_required_check -$matcher->add_simple_required_check($source_tag, $source_subfields, $source_offset, $source_length, $source_normalizer, - $target_tag, $target_subfields, $target_offset, $target_length, $target_normalizer); - -=over 4 + $matcher->add_simple_required_check($source_tag, $source_subfields, + $source_offset, $source_length, $source_normalizer, + $target_tag, $target_subfields, $target_offset, + $target_length, $target_normalizer); Adds a required check, which requires that the normalized keys made from the source and targets must match for a match to be considered valid. -=back - =cut sub add_simple_required_check { @@ -565,26 +559,22 @@ sub add_simple_required_check { $target_tag, $target_subfields, $target_offset, $target_length, $target_normalizer) = @_; $self->add_required_check( - [ { tag => $source_tag, subfields => $source_subfields, offset => $source_offset, length => $source_length, + [ { tag => $source_tag, subfields => $source_subfields, offset => $source_offset, 'length' => $source_length, norms => [ $source_normalizer ] } ], - [ { tag => $target_tag, subfields => $target_subfields, offset => $target_offset, length => $target_length, + [ { tag => $target_tag, subfields => $target_subfields, offset => $target_offset, 'length' => $target_length, norms => [ $target_normalizer ] } ] ); } =head2 find_matches -=over 4 - -my @matches = $matcher->get_matches($marc_record, $max_matches); -foreach $match (@matches) { - # matches already sorted in order of - # decreasing score - print "record ID: $match->{'record_id'}; - print "score: $match->{'score'}; -} - -=back + my @matches = $matcher->get_matches($marc_record, $max_matches); + foreach $match (@matches) { + # matches already sorted in order of + # decreasing score + print "record ID: $match->{'record_id'}; + print "score: $match->{'score'}; + } Identifies all of the records matching the given MARC record. For a record already in the database to be considered a match, it must meet the following criteria: @@ -614,11 +604,14 @@ sub get_matches { # build query my $query = join(" or ", map { "$matchpoint->{'index'}=$_" } @source_keys); # FIXME only searching biblio index at the moment - my ($error, $searchresults) = SimpleSearch($query); + my ($error, $searchresults, $total_hits) = SimpleSearch($query, 0, $max_matches); - warn "search failed ($query) $error" if $error; - foreach my $matched (@$searchresults) { - $matches{$matched} += $matchpoint->{'score'}; + if (defined $error ) { + warn "search failed ($query) $error"; + } else { + foreach my $matched (@{$searchresults}) { + $matches{$matched} += $matchpoint->{'score'}; + } } } @@ -646,6 +639,37 @@ sub get_matches { } +=head2 dump + + $description = $matcher->dump(); + +Returns a reference to a structure containing all of the information +in the matcher object. This is mainly a convenience method to +aid setting up a HTML editing form. + +=cut + +sub dump { + my $self = shift; + + my $result = {}; + + $result->{'matcher_id'} = $self->{'id'}; + $result->{'code'} = $self->{'code'}; + $result->{'description'} = $self->{'description'}; + + $result->{'matchpoints'} = []; + foreach my $matchpoint (@{ $self->{'matchpoints'} }) { + push @{ $result->{'matchpoints'} }, $matchpoint; + } + $result->{'matchchecks'} = []; + foreach my $matchcheck (@{ $self->{'required_checks'} }) { + push @{ $result->{'matchchecks'} }, $matchcheck; + } + + return $result; +} + sub _passes_required_checks { my ($source_record, $target_blob, $matchchecks) = @_; my $target_record = MARC::Record->new_from_usmarc($target_blob); # FIXME -- need to avoid parsing record twice @@ -690,21 +714,23 @@ sub _get_match_keys { last FIELD if $j > 0 and $check_only_first_repeat; last FIELD if $i > 0 and $j > $#keys; my $key = ""; + my $string; if ($field->is_control_field()) { - if ($component->{'length'}) { - $key = _normalize(substr($field->data(), $component->{'offset'}, $component->{'length'})) - # FIXME normalize, substr - } else { - $key = _normalize($field->data()); - } + $string=$field->data(); } else { foreach my $subfield ($field->subfields()) { if (exists $component->{'subfields'}->{$subfield->[0]}) { - $key .= " " . $subfield->[1]; + $string .= " " . $subfield->[1]; } } - $key = _normalize($key); + } + if ($component->{'length'}>0) { + $string= substr($string, $component->{'offset'}, $component->{'length'}); + # FIXME normalize, substr + } elsif ($component->{'offset'}) { + $string= substr($string, $component->{'offset'}); } + $key = _normalize($string); if ($i == 0) { push @keys, $key if $key; } else { @@ -713,7 +739,6 @@ sub _get_match_keys { } } return @keys; - } @@ -733,18 +758,21 @@ sub _parse_match_component { # FIXME - default normalizer sub _normalize { my $value = uc shift; + $value =~ s/[.;:,\]\[\)\(\/'"]//g; $value =~ s/^\s+//; - $value =~ s/^\s+$//; + #$value =~ s/^\s+$//; + $value =~ s/\s+$//; $value =~ s/\s+/ /g; - $value =~ s/[.;,\]\[\)\(\/"']//g; + #$value =~ s/[.;,\]\[\)\(\/"']//g; return $value; } 1; +__END__ =head1 AUTHOR -Koha Development Team +Koha Development Team Galen Charlton