# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
-# You should have received a copy of the GNU General Public License along with
-# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
-# Suite 330, Boston, MA 02111-1307 USA
+# You should have received a copy of the GNU General Public License along
+# with Koha; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
use strict;
+use warnings;
+
use C4::Context;
use MARC::Record;
use C4::Search;
use vars qw($VERSION);
-# set the version for version checking
-$VERSION = 3.00;
+BEGIN {
+ # set the version for version checking
+ $VERSION = 3.01;
+}
=head1 NAME
=head1 SYNOPSIS
-=over 4
+ my @matchers = C4::Matcher::GetMatcherList();
-my @matchers = C4::Matcher::GetMatcherList();
+ my $matcher = C4::Matcher->new($record_type);
+ $matcher->threshold($threshold);
+ $matcher->code($code);
+ $matcher->description($description);
-my $matcher = C4::Matcher->new($record_type);
-$matcher->threshold($threshold);
-$matcher->code($code);
-$matcher->description($description);
+ $matcher->add_simple_matchpoint('isbn', 1000, '020', 'a', -1, 0, '');
+ $matcher->add_simple_matchpoint('Date', 1000, '008', '', 7, 4, '');
+ $matcher->add_matchpoint('isbn', 1000, [ { tag => '020', subfields => 'a', norms => [] } ]);
-$matcher->add_simple_matchpoint('isbn', 1000, '020', 'a', -1, 0, '');
-$matcher->add_simple_matchpoint('Date', 1000, '008', '', 7, 4, '');
-$matcher->add_matchpoint('isbn', 1000, [ { tag => '020', subfields => 'a', norms => [] } ]);
+ $matcher->add_simple_required_check('245', 'a', -1, 0, '', '245', 'a', -1, 0, '');
+ $matcher->add_required_check([ { tag => '245', subfields => 'a', norms => [] } ],
+ [ { tag => '245', subfields => 'a', norms => [] } ]);
-$matcher->add_simple_required_check('245', 'a', -1, 0, '', '245', 'a', -1, 0, '');
-$matcher->add_required_check([ { tag => '245', subfields => 'a', norms => [] } ],
- [ { tag => '245', subfields => 'a', norms => [] } ]);
+ my @matches = $matcher->get_matches($marc_record, $max_matches);
-my @matches = $matcher->get_matches($marc_record, $max_matches);
+ foreach $match (@matches) {
-foreach $match (@matches) {
+ # matches already sorted in order of
+ # decreasing score
+ print "record ID: $match->{'record_id'};
+ print "score: $match->{'score'};
- # matches already sorted in order of
- # decreasing score
- print "record ID: $match->{'record_id'};
- print "score: $match->{'score'};
+ }
-}
-
-=back
+ my $matcher_description = $matcher->dump();
=head1 FUNCTIONS
=head2 GetMatcherList
-=over 4
-
-my @matchers = C4::Matcher::GetMatcherList();
-
-=back
+ my @matchers = C4::Matcher::GetMatcherList();
Returns an array of hashrefs list all matchers
present in the database. Each hashref includes:
-matcher_id
-code
-description
+ * matcher_id
+ * code
+ * description
=cut
=head1 METHODS
-=cut
-
=head2 new
-=over 4
-
-my $matcher = C4::Matcher->new($record_type, $threshold);
-
-=back
+ my $matcher = C4::Matcher->new($record_type, $threshold);
Creates a new Matcher. C<$record_type> indicates which search
database to use, e.g., 'biblio' or 'authority' and defaults to
=head2 fetch
-=over 4
-
-my $matcher = C4::Matcher->fetch($id);
-
-=back
+ my $matcher = C4::Matcher->fetch($id);
Creates a matcher object from the version stored
in the database. If a matcher with the given
=head2 store
-=over 4
-
-my $id = $matcher->store();
-
-=back
+ my $id = $matcher->store();
Stores matcher in database. The return value is the ID
of the marc_matchers row. If the matcher was
return $matchpoint_id;
}
-=head2 threshold
-=over 4
+=head2 delete
-$matcher->threshold(1000);
-my $threshold = $matcher->threshold();
+ C4::Matcher->delete($id);
-=back
+Deletes the matcher of the specified ID
+from the database.
+
+=cut
+
+sub delete {
+ my $class = shift;
+ my $matcher_id = shift;
+
+ my $dbh = C4::Context->dbh;
+ my $sth = $dbh->prepare("DELETE FROM marc_matchers WHERE matcher_id = ?");
+ $sth->execute($matcher_id); # relying on cascading deletes to clean up everything
+}
+
+=head2 threshold
+
+ $matcher->threshold(1000);
+ my $threshold = $matcher->threshold();
Accessor method.
@_ ? $self->{'threshold'} = shift : $self->{'threshold'};
}
-=head2 code
+=head2 _id
-=over 4
+ $matcher->_id(123);
+ my $id = $matcher->_id();
-$matcher->code('ISBN');
-my $code = $matcher->code();
+Accessor method. Note that using this method
+to set the DB ID of the matcher should not be
+done outside of the editing CGI.
-=back
+=cut
+
+sub _id {
+ my $self = shift;
+ @_ ? $self->{'id'} = shift : $self->{'id'};
+}
+
+=head2 code
+
+ $matcher->code('ISBN');
+ my $code = $matcher->code();
Accessor method.
=head2 description
-=over 4
-
-$matcher->description('match on ISBN');
-my $description = $matcher->description();
-
-=back
+ $matcher->description('match on ISBN');
+ my $description = $matcher->description();
Accessor method.
=head2 add_matchpoint
-=over 4
-
-$matcher->add_matchpoint($index, $score, $matchcomponents);
-
-=back
+ $matcher->add_matchpoint($index, $score, $matchcomponents);
Adds a matchpoint that may include multiple components. The $index
parameter identifies the index that will be searched, while $score
=head2 add_simple_matchpoint
-=over 4
+ $matcher->add_simple_matchpoint($index, $score, $source_tag,
+ $source_subfields, $source_offset,
+ $source_length, $source_normalizer);
-$matcher->add_simple_matchpoint($index, $score, $source_tag, $source_subfields,
- $source_offset, $source_length,
- $source_normalizer);
-
-=back
Adds a simple matchpoint rule -- after composing a key based on the source tag and subfields,
normalized per the normalization fuction, search the index. All records retrieved
$self->add_matchpoint($index, $score, [
{ tag => $source_tag, subfields => $source_subfields,
- offset => $source_offset, length => $source_length,
+ offset => $source_offset, 'length' => $source_length,
norms => [ $source_normalizer ]
}
]);
=head2 add_required_check
-=over 4
-
-$match->add_required_check($source_matchpoint, $target_matchpoint);
-
-=back
+ $match->add_required_check($source_matchpoint, $target_matchpoint);
Adds a required check definition. A required check means that in
order for a match to be considered valid, the key derived from the
=head2 add_simple_required_check
-$matcher->add_simple_required_check($source_tag, $source_subfields, $source_offset, $source_length, $source_normalizer,
- $target_tag, $target_subfields, $target_offset, $target_length, $target_normalizer);
-
-=over 4
+ $matcher->add_simple_required_check($source_tag, $source_subfields,
+ $source_offset, $source_length, $source_normalizer,
+ $target_tag, $target_subfields, $target_offset,
+ $target_length, $target_normalizer);
Adds a required check, which requires that the normalized keys made from the source and targets
must match for a match to be considered valid.
-=back
-
=cut
sub add_simple_required_check {
$target_tag, $target_subfields, $target_offset, $target_length, $target_normalizer) = @_;
$self->add_required_check(
- [ { tag => $source_tag, subfields => $source_subfields, offset => $source_offset, length => $source_length,
+ [ { tag => $source_tag, subfields => $source_subfields, offset => $source_offset, 'length' => $source_length,
norms => [ $source_normalizer ] } ],
- [ { tag => $target_tag, subfields => $target_subfields, offset => $target_offset, length => $target_length,
+ [ { tag => $target_tag, subfields => $target_subfields, offset => $target_offset, 'length' => $target_length,
norms => [ $target_normalizer ] } ]
);
}
=head2 find_matches
-=over 4
-
-my @matches = $matcher->get_matches($marc_record, $max_matches);
-foreach $match (@matches) {
- # matches already sorted in order of
- # decreasing score
- print "record ID: $match->{'record_id'};
- print "score: $match->{'score'};
-}
-
-=back
+ my @matches = $matcher->get_matches($marc_record, $max_matches);
+ foreach $match (@matches) {
+ # matches already sorted in order of
+ # decreasing score
+ print "record ID: $match->{'record_id'};
+ print "score: $match->{'score'};
+ }
Identifies all of the records matching the given MARC record. For a record already
in the database to be considered a match, it must meet the following criteria:
# build query
my $query = join(" or ", map { "$matchpoint->{'index'}=$_" } @source_keys);
# FIXME only searching biblio index at the moment
- my ($error, $searchresults) = SimpleSearch($query);
+ my ($error, $searchresults, $total_hits) = SimpleSearch($query, 0, $max_matches);
- warn "search failed ($query) $error" if $error;
- foreach my $matched (@$searchresults) {
- $matches{$matched} += $matchpoint->{'score'};
+ if (defined $error ) {
+ warn "search failed ($query) $error";
+ } else {
+ foreach my $matched (@{$searchresults}) {
+ $matches{$matched} += $matchpoint->{'score'};
+ }
}
}
}
+=head2 dump
+
+ $description = $matcher->dump();
+
+Returns a reference to a structure containing all of the information
+in the matcher object. This is mainly a convenience method to
+aid setting up a HTML editing form.
+
+=cut
+
+sub dump {
+ my $self = shift;
+
+ my $result = {};
+
+ $result->{'matcher_id'} = $self->{'id'};
+ $result->{'code'} = $self->{'code'};
+ $result->{'description'} = $self->{'description'};
+
+ $result->{'matchpoints'} = [];
+ foreach my $matchpoint (@{ $self->{'matchpoints'} }) {
+ push @{ $result->{'matchpoints'} }, $matchpoint;
+ }
+ $result->{'matchchecks'} = [];
+ foreach my $matchcheck (@{ $self->{'required_checks'} }) {
+ push @{ $result->{'matchchecks'} }, $matchcheck;
+ }
+
+ return $result;
+}
+
sub _passes_required_checks {
my ($source_record, $target_blob, $matchchecks) = @_;
my $target_record = MARC::Record->new_from_usmarc($target_blob); # FIXME -- need to avoid parsing record twice
last FIELD if $j > 0 and $check_only_first_repeat;
last FIELD if $i > 0 and $j > $#keys;
my $key = "";
+ my $string;
if ($field->is_control_field()) {
- if ($component->{'length'}) {
- $key = _normalize(substr($field->data(), $component->{'offset'}, $component->{'length'}))
- # FIXME normalize, substr
- } else {
- $key = _normalize($field->data());
- }
+ $string=$field->data();
} else {
foreach my $subfield ($field->subfields()) {
if (exists $component->{'subfields'}->{$subfield->[0]}) {
- $key .= " " . $subfield->[1];
+ $string .= " " . $subfield->[1];
}
}
- $key = _normalize($key);
+ }
+ if ($component->{'length'}>0) {
+ $string= substr($string, $component->{'offset'}, $component->{'length'});
+ # FIXME normalize, substr
+ } elsif ($component->{'offset'}) {
+ $string= substr($string, $component->{'offset'});
}
+ $key = _normalize($string);
if ($i == 0) {
push @keys, $key if $key;
} else {
}
}
return @keys;
-
}
# FIXME - default normalizer
sub _normalize {
my $value = uc shift;
+ $value =~ s/[.;:,\]\[\)\(\/'"]//g;
$value =~ s/^\s+//;
- $value =~ s/^\s+$//;
+ #$value =~ s/^\s+$//;
+ $value =~ s/\s+$//;
$value =~ s/\s+/ /g;
- $value =~ s/[.;,\]\[\)\(\/"']//g;
+ #$value =~ s/[.;,\]\[\)\(\/"']//g;
return $value;
}
1;
+__END__
=head1 AUTHOR
-Koha Development Team <info@koha.org>
+Koha Development Team <http://koha-community.org/>
Galen Charlton <galen.charlton@liblime.com>