# Suite 330, Boston, MA 02111-1307 USA
use XML::Simple;
+use XML::LibXML;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request::Common;
);
}
+# package-level variable
+my $parser = XML::LibXML->new();
+
=head1 NAME
C4::External::Syndetics - Functions for retrieving Syndetics content in Koha
# grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
- my $url = "http://syndetics.com/index.aspx?isbn=$isbn/INDEX.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
+ my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/INDEX.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
# grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
- my $url = "http://syndetics.com/index.aspx?isbn=$isbn/SUMMARY.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
+ my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/SUMMARY.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
# grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
- my $url = "http://syndetics.com/index.aspx?isbn=$isbn/TOC.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
+ my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/TOC.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
# grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
- my $url = "http://syndetics.com/index.aspx?isbn=$isbn/DBCHAPTER.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
+ my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/DBCHAPTER.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
#warn "Skipping $source->{element} doesn't match $syndetics_elements->{$source->{element}} \n";
next;
}
- my $url = "http://syndetics.com/index.aspx?isbn=$isbn/$source->{file}&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
+ my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/$source->{file}&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
my $content = $response->content;
warn "could not retrieve $url" unless $content;
- my $xmlsimple = XML::Simple->new();
- eval {
- $response = $xmlsimple->XMLin(
- $content,
- ForceContent => 1,
- forcearray => [ qw(Fld520) ]
- ) unless !$content;
+
+ eval {
+ my $doc = $parser->parse_string($content);
+
+ # note that using findvalue strips any HTML elements embedded
+ # in that review. That helps us handle slight differences
+ # in the output provided by Syndetics 'old' and 'new' versions
+ # of their service and cleans any questionable HTML that
+ # may be present in the reviews, but does mean that any
+ # <B> and <I> tags used to format the review are also gone.
+ my $result = $doc->findvalue('//Fld520');
+ push @reviews, {title => $source->{title}, reviews => [ { content => $result } ]} if $result;
};
-
- for my $subfield_a (@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}}) {
- my @content;
- # this is absurd, but sometimes this data serializes differently
- if(ref($subfield_a->{a}->{content}) eq 'ARRAY') {
- for my $content (@{$subfield_a->{a}->{content}}) {
- push @content, {content => $content};
- }
- }
- else {
- push @content, {content => $subfield_a->{a}->{content}};
- }
- push @reviews, {title => $source->{title}, reviews => \@content};
+ if ($@) {
+ warn "Error parsing response from $url";
}
}
return \@reviews;
# grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
- my $url = "http://syndetics.com/index.aspx?isbn=$isbn/FICTION.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
+ my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/FICTION.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
# grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
- my $url = "http://syndetics.com/index.aspx?isbn=$isbn/ANOTES.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
+ my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/ANOTES.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;