use vars qw($ext_dict $select_all @fields);
BEGIN {
- $VERSION = 0.01;
+ $VERSION = 0.03;
@ISA = qw(Exporter);
@EXPORT_OK = qw(
&get_tag &get_tags &get_tag_rows
&remove_tag
&delete_tag_rows_by_ids
&rectify_weights
+ &get_approval_rows
+ &blacklist
+ &whitelist
+ &is_approved
+ &approval_counts
+ &get_filters
);
# %EXPORT_TAGS = ();
$ext_dict = C4::Context->preference('TagsExternalDictionary');
}
if ($ext_dict) {
require Lingua::Ispell;
- import Lingua::Ispell qw(spellcheck);
+ import Lingua::Ispell qw(spellcheck add_word_lc save_dictionary);
}
}
$select_all = "SELECT " . join(',',@fields) . "\n FROM tags_all\n";
}
-sub remove_tag ($) {
- my $tag_id = shift;
- my $rows = get_tag_rows({tag_id=>$tag_id}) or return 0;
- (scalar(@$rows) == 1) or return undef;
+sub get_filters (;$) {
+ my $query = "SELECT * FROM tags_filters ";
+ my ($sth);
+ if (@_) {
+ $sth = C4::Context->dbh->prepare($query . " WHERE filter_id = ? ");
+ $sth->execute(shift);
+ } else {
+ $sth = C4::Context->dbh->prepare($query);
+ $sth->execute;
+ }
+ return $sth->fetchall_arrayref({});
+}
+
+# (SELECT count(*) FROM tags_all ) as tags_all,
+# (SELECT count(*) FROM tags_index ) as tags_index,
+
+sub approval_counts () {
+ my $query = "SELECT
+ (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
+ (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
+ (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
+ ";
+ my $sth = C4::Context->dbh->prepare($query);
+ $sth->execute;
+ my $result = $sth->fetchrow_hashref();
+ $result->{approved_total} = $result->{approved_count} + $result->{rejected_count} + $result->{unapproved_count};
+ $debug and warn "counts returned: " . Dumper $result;
+ return $result;
+}
+
+sub remove_tag ($;$) {
+ my $tag_id = shift or return undef;
+ my $user_id = (@_) ? shift : undef;
+ my $rows = (defined $user_id) ?
+ get_tag_rows({tag_id=>$tag_id, borrowernumber=>$user_id}) :
+ get_tag_rows({tag_id=>$tag_id}) ;
+ $rows or return 0;
+ (scalar(@$rows) == 1) or return undef; # should never happen (duplicate ids)
my $row = shift(@$rows);
($tag_id == $row->{tag_id}) or return 0;
my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
carp "Empty argument key to get_tag_rows: ignoring!";
next;
}
- unless (1 == scalar grep {/^ $key $/xi} @ok_fields) {
+ unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
carp "get_tag_rows received unreconized argument key '$key'.";
next;
}
- if ($key =~ /^limit$/i) {
+ if ($key eq 'limit') {
my $val = $hash->{$key};
- unless ($val =~ /^\d+$/) {
+ unless ($val =~ /^(\d+,)?\d+$/) {
carp "Non-nuerical limit value '$val' ignored!";
next;
}
}
sub get_tags (;$) { # i.e., from tags_index
- # my $self = shift;
my $hash = shift || {};
- my @ok_fields = qw(term biblionumber weight limit sort);
+ my @ok_fields = qw(term biblionumber weight limit sort approved);
my $wheres;
my $limit = "";
my $order = "";
carp "Empty argument key to get_tags: ignoring!";
next;
}
- unless (1 == scalar grep {/^ $key $/xi} @ok_fields) {
+ unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
carp "get_tags received unreconized argument key '$key'.";
next;
}
- if ($key =~ /^limit$/i) {
+ if ($key eq 'limit') {
my $val = $hash->{$key};
- unless ($val =~ /^\d+$/) {
+ unless ($val =~ /^(\d+,)?\d+$/) {
carp "Non-nuerical limit value '$val' ignored!";
next;
}
$limit = " LIMIT $val\n";
- } elsif ($key =~ /^sort$/i) {
+ } elsif ($key eq 'sort') {
foreach my $by (split /\,/, $hash->{$key}) {
unless (
$by =~ /^([-+])?(term)/ or
carp "get_tags received illegal sort order '$by'";
next;
}
- $order .= " ORDER BY $2 " . ($1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
+ if ($order) {
+ $order .= ", ";
+ } else {
+ $order = " ORDER BY ";
+ }
+ $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
}
} else {
- my $whereval = $key;
- ($key =~ /^term$/i) and $whereval = 'tags_index.term';
- $wheres .= ($wheres) ? " AND $whereval = ?\n" : " WHERE $whereval = ?\n";
- push @exe_args, $hash->{$key};
+ my $whereval = $hash->{$key};
+ my $longkey = ($key eq 'term' ) ? 'tags_index.term' :
+ ($key eq 'approved') ? 'tags_approval.approved' : $key;
+ my $op = ($whereval =~ s/^(>=|<=)// or
+ $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
+ $wheres .= ($wheres) ? " AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
+ push @exe_args, $whereval;
}
}
my $query = "
return $sth->fetchall_arrayref({});
}
+sub get_approval_rows (;$) { # i.e., from tags_approval
+ my $hash = shift || {};
+ my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort borrowernumber);
+ my $wheres;
+ my $limit = "";
+ my $order = "";
+ my @exe_args = ();
+ foreach my $key (keys %$hash) {
+ $debug and print STDERR "get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
+ unless (length $key) {
+ carp "Empty argument key to get_approval_rows: ignoring!";
+ next;
+ }
+ unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
+ carp "get_approval_rows received unreconized argument key '$key'.";
+ next;
+ }
+ if ($key eq 'limit') {
+ my $val = $hash->{$key};
+ unless ($val =~ /^(\d+,)?\d+$/) {
+ carp "Non-numerical limit value '$val' ignored!";
+ next;
+ }
+ $limit = " LIMIT $val\n";
+ } elsif ($key eq 'sort') {
+ foreach my $by (split /\,/, $hash->{$key}) {
+ unless (
+ $by =~ /^([-+])?(term)/ or
+ $by =~ /^([-+])?(biblionumber)/ or
+ $by =~ /^([-+])?(borrowernumber)/ or
+ $by =~ /^([-+])?(weight_total)/ or
+ $by =~ /^([-+])?(approved(_by)?)/ or
+ $by =~ /^([-+])?(date_approved)/
+ ) {
+ carp "get_approval_rows received illegal sort order '$by'";
+ next;
+ }
+ if ($order) {
+ $order .= ", ";
+ } else {
+ $order = " ORDER BY " unless $order;
+ }
+ $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
+ }
+
+ } else {
+ my $whereval = $hash->{$key};
+ my $op = ($whereval =~ s/^(>=|<=)// or
+ $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
+ $wheres .= ($wheres) ? " AND $key $op ?\n" : " WHERE $key $op ?\n";
+ push @exe_args, $whereval;
+ }
+ }
+ my $query = "
+ SELECT tags_approval.term AS term,
+ tags_approval.approved AS approved,
+ tags_approval.date_approved AS date_approved,
+ tags_approval.approved_by AS approved_by,
+ tags_approval.weight_total AS weight_total,
+ CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
+ FROM tags_approval
+ LEFT JOIN borrowers
+ ON tags_approval.approved_by = borrowers.borrowernumber ";
+ $query .= ($wheres||'') . $order . $limit;
+ $debug and print STDERR "get_approval_rows query:\n $query\n",
+ "get_approval_rows query args: ", join(',', @exe_args), "\n";
+ my $sth = C4::Context->dbh->prepare($query);
+ if (@exe_args) {
+ $sth->execute(@exe_args);
+ } else {
+ $sth->execute;
+ }
+ return $sth->fetchall_arrayref({});
+}
+
sub is_approved ($) {
my $term = shift or return undef;
- if ($ext_dict) {
- return (spellcheck($term) ? 0 : 1);
- }
my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
$sth->execute($term);
- $sth->rows or return undef;
- return $sth->fetch;
+ unless ($sth->rows) {
+ $ext_dict and return (spellcheck($term) ? 0 : 1); # spellcheck returns empty on OK word
+ return 0;
+ }
+ return $sth->fetchrow;
}
sub get_tag_index ($;$) {
return $sth->fetchrow_hashref;
}
-sub add_tag_approval ($;$) {
+sub whitelist {
+ my $operator = shift;
+ defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
+ if ($ext_dict) {
+ foreach (@_) {
+ spellcheck($_) or next;
+ add_word_lc($_);
+ }
+ }
+ foreach (@_) {
+ my $aref = get_approval_rows({term=>$_});
+ if ($aref and scalar @$aref) {
+ mod_tag_approval($operator,$_,1);
+ } else {
+ add_tag_approval($_,$operator);
+ }
+ }
+ return scalar @_;
+}
+# note: there is no "unwhitelist" operation because there is no remove for Ispell.
+# The blacklist regexps should operate "in front of" the whitelist, so if you approve
+# a term mistakenly, you can still reverse it. But there is no going back to "neutral".
+sub blacklist {
+ my $operator = shift;
+ defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
+ foreach (@_) {
+ my $aref = get_approval_rows({term=>$_});
+ if ($aref and scalar @$aref) {
+ mod_tag_approval($operator,$_,-1);
+ } else {
+ add_tag_approval($_,$operator,-1);
+ }
+ }
+ return scalar @_;
+}
+sub add_filter {
+ my $operator = shift;
+ defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
+ my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
+ # my $sth = C4::Context->dbh->prepare($query);
+ return scalar @_;
+}
+sub remove_filter {
+ my $operator = shift;
+ defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
+ my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
+ # my $sth = C4::Context->dbh->prepare($query);
+ # $sth->execute($term);
+ return scalar @_;
+}
+
+sub add_tag_approval ($;$$) { # or disapproval
+ $debug and warn "add_tag_approval(" . join(", ",map {defined($_) ? $_ : 'UNDEF'} @_) . ")";
my $term = shift or return undef;
my $query = "SELECT * FROM tags_approval WHERE term = ?";
my $sth = C4::Context->dbh->prepare($query);
$sth->execute($term);
($sth->rows) and return increment_weight_total($term);
- my $ok = (@_ ? shift : 0);
- if ($ok) {
- $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,1,NOW())";
- $debug and print STDERR "add_tag_approval query:\n$query\nadd_tag_approval args: ($term,$ok)\n";
- $sth = C4::Context->dbh->prepare($query);
- $sth->execute($term,$ok);
+ my $operator = shift || 0;
+ my $approval = (@_ ? shift : 0); # default is unapproved
+ my @exe_args = ($term); # all 3 queries will use this argument
+ if ($operator) {
+ $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
+ push @exe_args, $operator, $approval;
+ } elsif ($approval) {
+ $query = "INSERT INTO tags_approval (term,approved,date_approved) VALUES (?,?,NOW())";
+ push @exe_args, $approval;
} else {
$query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
- $debug and print STDERR "add_tag_approval query:\n$query\nadd_tag_approval args: ($term)\n";
- $sth = C4::Context->dbh->prepare($query);
- $sth->execute($term);
}
+ $debug and print STDERR "add_tag_approval query: $query\nadd_tag_approval args: (" . join(", ", @exe_args) . ")\n";
+ $sth = C4::Context->dbh->prepare($query);
+ $sth->execute(@exe_args);
return $sth->rows;
}
+sub mod_tag_approval ($$$) {
+ my $operator = shift;
+ defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
+ my $term = shift or return undef;
+ my $approval = (scalar @_ ? shift : 1); # default is to approve
+ my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
+ $debug and print STDERR "mod_tag_approval query: $query\nmod_tag_approval args: ($operator,$approval,$term)\n";
+ my $sth = C4::Context->dbh->prepare($query);
+ $sth->execute($operator,$approval,$term);
+}
+
sub add_tag_index ($$;$) {
my $term = shift or return undef;
my $biblionumber = shift or return undef;
$sth->execute($term,$biblionumber);
($sth->rows) and return increment_weight($term,$biblionumber);
$query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
- $debug and print "add_tag_index query:\n$query\nadd_tag_index args: ($term,$biblionumber)\n";
+ $debug and print STDERR "add_tag_index query: $query\nadd_tag_index args: ($term,$biblionumber)\n";
$sth = C4::Context->dbh->prepare($query);
$sth->execute($term,$biblionumber);
return $sth->rows;
}
sub decrement_weights ($$) {
decrement_weight(@_);
- derement_weight_total(shift);
+ decrement_weight_total(shift);
}
sub increment_weight_total ($) {
_set_weight_total('weight_total+1',shift);
UPDATE tags_approval
SET weight_total=" . (shift) . "
WHERE term=?
- ");
+ "); # note: CANNOT use "?" for weight_total (see the args above).
$sth->execute(shift); # just the term
}
sub _set_weight ($$$) {
my $biblionumber = shift or return undef;
my $term = shift or return undef;
my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
-
- # first, add to tags regardless of approaval
+ $term =~ s/^\s+//;
+ $term =~ s/\s+$//;
+ ($term) or return undef; # must be more than whitespace
+ my $rows = get_tag_rows({biblionumber=>$biblionumber, borrowernumber=>$borrowernumber, term=>$term, limit=>1});
my $query = "INSERT INTO tags_all
(borrowernumber,biblionumber,term,date_created)
VALUES (?,?,?,NOW())";
- $debug and print STDERR "add_tag query:\n $query\n",
+ $debug and print STDERR "add_tag query: $query\n",
"add_tag query args: ($borrowernumber,$biblionumber,$term)\n";
+ if (scalar @$rows) {
+ $debug and carp "Duplicate tag detected. Tag not added.";
+ return undef;
+ }
+ # add to tags_all regardless of approaval
my $sth = C4::Context->dbh->prepare($query);
$sth->execute($borrowernumber,$biblionumber,$term);
# then
- if (@_) { # if an arg remains, it is the borrowernumber of the approver: tag is pre-approved.
+ if (scalar @_) { # if arg remains, it is the borrowernumber of the approver: tag is pre-approved.
my $approver = shift;
- add_tag_approval($term,$approver);
+ $debug and print STDERR "term '$term' pre-approved by borrower #$approver\n";
+ add_tag_approval($term,$approver,1);
add_tag_index($term,$biblionumber,$approver);
- } elsif (is_approved($term)) {
- add_tag_approval($term,1);
+ } elsif (is_approved($term) >= 1) {
+ $debug and print STDERR "term '$term' approved by whitelist\n";
+ add_tag_approval($term,0,1);
add_tag_index($term,$biblionumber,1);
} else {
+ $debug and print STDERR "term '$term' NOT approved (yet)\n";
add_tag_approval($term);
add_tag_index($term,$biblionumber);
}
=head3 TO DO: Add real perldoc
+=cut
+
+=head2 External Dictionary (Ispell) [Recommended]
+
+An external dictionary can be used as a means of "pre-populating" and tracking
+allowed terms based on the widely available Ispell dictionary. This can be the system
+dictionary or a personal version, but in order to support whitelisting, it must be
+editable to the process running Koha.
+
+To enable, enter the absolute path to the ispell dictionary in the system
+preference "TagsExternalDictionary".
+
+Using external Ispell is recommended for both ease of use and performance. Note that any
+language version of Ispell can be installed. It is also possible to modify the dictionary
+at the command line to affect the desired content.
+
+WARNING: The default Ispell dictionary includes (properly spelled) obscenities! Users
+should build their own wordlist and recompile Ispell based on it. See man ispell for
+instructions.
+
+=head2 Table Structure
+
+The tables used by tags are:
+ tags_all
+ tags_index
+ tags_approval
+ tags_blacklist
+
+Your first thought may be that this looks a little complicated. It is, but only because
+it has to be. I'll try to explain.
+
+tags_all - This table would be all we really need if we didn't care about moderation or
+performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
+though, it contains all the relevant info about a given tag:
+ tag_id - unique id number for it
+ borrowernumber - user that entered it
+ biblionumber - book record it is attached to
+ term - tag "term" itself
+ language - perhaps used later to influence weighting
+ date_created - date and time it was created
+
+tags_approval - Since we need to provide moderation, this table is used to track it. If no
+external dictionary is used, this table is the sole reference for approval and rejection.
+With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
+This could be called an "approved terms" table. See above regarding the External Dictionary.
+ term - tag "term" itself
+ approved - Negative, 0 or positive if tag is rejected, pending or approved.
+ date_approved - date of last action
+ approved_by - staffer performing the last action
+ weight_total - total occurance of term in any biblio by any users
+
+tags_index - This table is for performance, because by far the most common operation will
+be fetching tags for a list of search results. We will have a set of biblios, and we will
+want ONLY their approved tags and overall weighting. While we could implement a query that
+would traverse tags_all filtered against tags_approval, the performance implications of
+trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
+ term - approved term as it appears in tags_approval
+ biblionumber - book record it is attached to
+ weight - number of times tag applied by any user
+
+tags_blacklist - A set of regular expression filters. Unsurprisingly, these should be perl-
+compatible (PCRE) for your version of perl. Since this is a blacklist, a term will be
+blocked if it matches any of the given patterns. WARNING: do not add blacklist regexps
+if you do not understand their operation and interaction. It is quite easy to define too
+simple or too complex a regexp and effectively block all terms. The blacklist operation is
+fairly resource intensive, since every line of tags_blacklist will need to be read and compared.
+It is recommended that tags_blacklist be used minimally, and only by an administrator with an
+understanding of regular expression syntax and performance.
+
+So the best way to think about the different tables is that they are each tailored to a certain
+use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
+the tag population can continue to grow even if a user (along with their corresponding
+rows in tags_all) is removed.
+
=head2 Tricks
If you want to auto-populate some tags for debugging, do something like this:
+--------------+
26 rows in set (0.00 sec)
-Then, take those numbers and type them into this perl command line:
+Then, take those numbers and type/pipe them into this perl command line:
perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'
+Note, the borrowernumber in this example is 51. Use your own or any arbitrary valid borrowernumber.
+
=cut