# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
-# You should have received a copy of the GNU General Public License along with
-# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
-# Suite 330, Boston, MA 02111-1307 USA
+# You should have received a copy of the GNU General Public License along
+# with Koha; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
use XML::Simple;
+use XML::LibXML;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request::Common;
BEGIN {
require Exporter;
- $VERSION = 0.03;
+ $VERSION = 3.07.00.049;
@ISA = qw(Exporter);
@EXPORT = qw(
&get_syndetics_index
);
}
+# package-level variable
+my $parser = XML::LibXML->new();
+
=head1 NAME
C4::External::Syndetics - Functions for retrieving Syndetics content in Koha
=head2 get_syndetics_summary
-=over 4
-
-my $syndetics_summary= &get_syndetics_summary( $isbn );
-
-=back
+ my $syndetics_summary= &get_syndetics_summary( $isbn );
Get Summary data from Syndetics
sub get_syndetics_index {
my ( $isbn,$upc,$oclc ) = @_;
- # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
- my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
-
- my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/INDEX.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
-
- my $ua = LWP::UserAgent->new;
- $ua->timeout(10);
- $ua->env_proxy;
- my $response = $ua->get($url);
- unless ($response->content_type =~ /xml/) {
- return;
- }
+ my $response = _fetch_syndetics_content('INDEX.XML', $isbn, $upc, $oclc);
my $content = $response->content;
- warn "could not retrieve $url" unless $content;
my $xmlsimple = XML::Simple->new();
$response = $xmlsimple->XMLin(
$content,
) unless !$content;
my $syndetics_elements;
- for my $available_type ('SUMMARY','TOC','FICTION','AWARDS1','SERIES1','SPSUMMARY','SPREVIEW','AVSUMMARY','DBCHAPTER','LJREVIEW','PWREVIEW','SLJREVIEW','CHREVIEW','BLREVIEW','HBREVIEW','KIREVIEW','CRITICASREVIEW','ANOTES') {
+ for my $available_type ('SUMMARY','TOC','FICTION','AWARDS1','SERIES1','SPSUMMARY','SPREVIEW', 'AVPROFILE', 'AVSUMMARY','DBCHAPTER','LJREVIEW','PWREVIEW','SLJREVIEW','CHREVIEW','BLREVIEW','HBREVIEW','KIREVIEW','CRITICASREVIEW','ANOTES') {
if (exists $response->{$available_type} && $response->{$available_type} =~ /$available_type/) {
$syndetics_elements->{$available_type} = $available_type;
#warn "RESPONSE: $available_type : $response->{$available_type}";
}
sub get_syndetics_summary {
- my ( $isbn,$upc,$oclc ) = @_;
+ my ( $isbn, $upc, $oclc, $syndetics_elements ) = @_;
- # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
- my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
-
- my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/SUMMARY.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
- my $ua = LWP::UserAgent->new;
- $ua->timeout(10);
- $ua->env_proxy;
- my $response = $ua->get($url);
+ my $summary_type = exists($syndetics_elements->{'AVSUMMARY'}) ? 'AVSUMMARY.XML' : 'SUMMARY.XML';
+ my $response = _fetch_syndetics_content($summary_type, $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
return;
}
my $content = $response->content;
- warn "could not retrieve $url" unless $content;
- my $xmlsimple = XML::Simple->new();
- $response = $xmlsimple->XMLin(
- $content,
- forcearray => [ qw(Fld520) ],
- ) unless !$content;
- # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
my $summary;
- $summary = \@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}} if $response;
+ eval {
+ my $doc = $parser->parse_string($content);
+ $summary = $doc->findvalue('//Fld520');
+ };
+ if ($@) {
+ warn "Error parsing Syndetics $summary_type";
+ }
return $summary if $summary;
}
sub get_syndetics_toc {
my ( $isbn,$upc,$oclc ) = @_;
- # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
- my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
-
- my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/TOC.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
- my $ua = LWP::UserAgent->new;
- $ua->timeout(10);
- $ua->env_proxy;
-
- my $response = $ua->get($url);
+ my $response = _fetch_syndetics_content('TOC.XML', $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
return;
}
my $content = $response->content;
- warn "could not retrieve $url" unless $content;
my $xmlsimple = XML::Simple->new();
$response = $xmlsimple->XMLin(
$content,
sub get_syndetics_excerpt {
my ( $isbn,$upc,$oclc ) = @_;
- # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
- my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
-
- my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/DBCHAPTER.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
- my $ua = LWP::UserAgent->new;
- $ua->timeout(10);
- $ua->env_proxy;
- my $response = $ua->get($url);
+ my $response = _fetch_syndetics_content('DBCHAPTER.XML', $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
return;
}
my $content = $response->content;
- warn "could not retrieve $url" unless $content;
my $xmlsimple = XML::Simple->new();
$response = $xmlsimple->XMLin(
$content,
sub get_syndetics_reviews {
my ( $isbn,$upc,$oclc,$syndetics_elements ) = @_;
- # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
- my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
my @reviews;
my $review_sources = [
{title => 'Library Journal Review', file => 'LJREVIEW.XML', element => 'LJREVIEW'},
#warn "Skipping $source->{element} doesn't match $syndetics_elements->{$source->{element}} \n";
next;
}
- my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/$source->{file}&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
-
- my $ua = LWP::UserAgent->new;
- $ua->timeout(10);
- $ua->env_proxy;
-
- my $response = $ua->get($url);
+ my $response = _fetch_syndetics_content($source->{file}, $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
next;
}
my $content = $response->content;
- warn "could not retrieve $url" unless $content;
- my $xmlsimple = XML::Simple->new();
- eval {
- $response = $xmlsimple->XMLin(
- $content,
- ForceContent => 1,
- forcearray => [ qw(Fld520) ]
- ) unless !$content;
+
+ eval {
+ my $doc = $parser->parse_string($content);
+
+ # note that using findvalue strips any HTML elements embedded
+ # in that review. That helps us handle slight differences
+ # in the output provided by Syndetics 'old' and 'new' versions
+ # of their service and cleans any questionable HTML that
+ # may be present in the reviews, but does mean that any
+ # <B> and <I> tags used to format the review are also gone.
+ my $result = $doc->findvalue('//Fld520');
+ push @reviews, {title => $source->{title}, reviews => [ { content => $result } ]} if $result;
};
-
- for my $subfield_a (@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}}) {
- my @content;
- # this is absurd, but sometimes this data serializes differently
- if(ref($subfield_a->{a}->{content}) eq 'ARRAY') {
- for my $content (@{$subfield_a->{a}->{content}}) {
- push @content, {content => $content};
- }
- }
- else {
- push @content, {content => $subfield_a->{a}->{content}};
- }
- push @reviews, {title => $source->{title}, reviews => \@content};
+ if ($@) {
+ warn "Error parsing Syndetics $source->{title} review";
}
}
return \@reviews;
sub get_syndetics_editions {
my ( $isbn,$upc,$oclc ) = @_;
- # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
- my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
-
- my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/FICTION.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
- my $ua = LWP::UserAgent->new;
- $ua->timeout(10);
- $ua->env_proxy;
-
- my $response = $ua->get($url);
+ my $response = _fetch_syndetics_content('FICTION.XML', $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
return;
}
my $content = $response->content;
- warn "could not retrieve $url" unless $content;
my $xmlsimple = XML::Simple->new();
$response = $xmlsimple->XMLin(
$content,
sub get_syndetics_anotes {
my ( $isbn,$upc,$oclc) = @_;
- # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
- my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
-
- my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/ANOTES.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
- my $ua = LWP::UserAgent->new;
- $ua->timeout(10);
- $ua->env_proxy;
-
- my $response = $ua->get($url);
+ my $response = _fetch_syndetics_content('ANOTES.XML', $isbn, $upc, $oclc);
unless ($response->content_type =~ /xml/) {
return;
}
my $content = $response->content;
- warn "could not retrieve $url" unless $content;
my $xmlsimple = XML::Simple->new();
$response = $xmlsimple->XMLin(
$content,
return \@anotes;
}
+sub _fetch_syndetics_content {
+ my ( $element, $isbn, $upc, $oclc ) = @_;
+
+ $isbn = '' unless defined $isbn;
+ $upc = '' unless defined $upc;
+ $oclc = '' unless defined $oclc;
+
+ my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
+
+ my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/$element&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
+ my $ua = LWP::UserAgent->new;
+ $ua->timeout(10);
+ $ua->env_proxy;
+ my $response = $ua->get($url);
+
+ warn "could not retrieve $url" unless $response->content;
+ return $response;
+
+}
1;
__END__
=head1 NOTES
+=cut
+
=head1 AUTHOR
Joshua Ferraro <jmf@liblime.com>