1 package C4::External::Syndetics;
2 # Copyright (C) 2006 LibLime
3 # <jmf at liblime dot com>
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along with
17 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
18 # Suite 330, Boston, MA 02111-1307 USA
24 use HTTP::Request::Common;
29 use vars qw($VERSION @ISA @EXPORT);
37 &get_syndetics_summary
39 &get_syndetics_editions
40 &get_syndetics_excerpt
41 &get_syndetics_reviews
46 # package-level variable
47 my $parser = XML::LibXML->new();
51 C4::External::Syndetics - Functions for retrieving Syndetics content in Koha
55 This module provides facilities for retrieving Syndetics.com content in Koha
57 =head2 get_syndetics_summary
61 my $syndetics_summary= &get_syndetics_summary( $isbn );
65 Get Summary data from Syndetics
69 sub get_syndetics_index {
70 my ( $isbn,$upc,$oclc ) = @_;
72 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
73 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
75 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/INDEX.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
77 my $ua = LWP::UserAgent->new;
80 my $response = $ua->get($url);
81 unless ($response->content_type =~ /xml/) {
85 my $content = $response->content;
86 warn "could not retrieve $url" unless $content;
87 my $xmlsimple = XML::Simple->new();
88 $response = $xmlsimple->XMLin(
92 my $syndetics_elements;
93 for my $available_type ('SUMMARY','TOC','FICTION','AWARDS1','SERIES1','SPSUMMARY','SPREVIEW', 'AVPROFILE', 'AVSUMMARY','DBCHAPTER','LJREVIEW','PWREVIEW','SLJREVIEW','CHREVIEW','BLREVIEW','HBREVIEW','KIREVIEW','CRITICASREVIEW','ANOTES') {
94 if (exists $response->{$available_type} && $response->{$available_type} =~ /$available_type/) {
95 $syndetics_elements->{$available_type} = $available_type;
96 #warn "RESPONSE: $available_type : $response->{$available_type}";
99 return $syndetics_elements if $syndetics_elements;
102 sub get_syndetics_summary {
103 my ( $isbn, $upc, $oclc, $syndetics_elements ) = @_;
105 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
106 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
108 my $summary_type = exists($syndetics_elements->{'AVSUMMARY'}) ? 'AVSUMMARY' : 'SUMMARY';
109 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/$summary_type.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
110 my $ua = LWP::UserAgent->new;
113 my $response = $ua->get($url);
114 unless ($response->content_type =~ /xml/) {
118 my $content = $response->content;
120 warn "could not retrieve $url" unless $content;
123 my $doc = $parser->parse_string($content);
124 $summary = $doc->findvalue('//Fld520');
127 warn "Error parsing response from $url";
129 return $summary if $summary;
132 sub get_syndetics_toc {
133 my ( $isbn,$upc,$oclc ) = @_;
135 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
136 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
138 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/TOC.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
139 my $ua = LWP::UserAgent->new;
143 my $response = $ua->get($url);
144 unless ($response->content_type =~ /xml/) {
148 my $content = $response->content;
149 warn "could not retrieve $url" unless $content;
150 my $xmlsimple = XML::Simple->new();
151 $response = $xmlsimple->XMLin(
153 forcearray => [ qw(Fld970) ],
155 # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
157 $toc = \@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld970}} if $response;
161 sub get_syndetics_excerpt {
162 my ( $isbn,$upc,$oclc ) = @_;
164 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
165 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
167 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/DBCHAPTER.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
168 my $ua = LWP::UserAgent->new;
171 my $response = $ua->get($url);
172 unless ($response->content_type =~ /xml/) {
176 my $content = $response->content;
177 warn "could not retrieve $url" unless $content;
178 my $xmlsimple = XML::Simple->new();
179 $response = $xmlsimple->XMLin(
181 forcearray => [ qw(Fld520) ],
183 # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
185 $excerpt = \@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}} if $response;
186 return XMLout($excerpt, NoEscape => 1) if $excerpt;
189 sub get_syndetics_reviews {
190 my ( $isbn,$upc,$oclc,$syndetics_elements ) = @_;
192 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
193 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
195 my $review_sources = [
196 {title => 'Library Journal Review', file => 'LJREVIEW.XML', element => 'LJREVIEW'},
197 {title => 'Publishers Weekly Review', file => 'PWREVIEW.XML', element => 'PWREVIEW'},
198 {title => 'School Library Journal Review', file => 'SLJREVIEW.XML', element => 'SLJREVIEW'},
199 {title => 'CHOICE Review', file => 'CHREVIEW.XML', element => 'CHREVIEW'},
200 {title => 'Booklist Review', file => 'BLREVIEW.XML', element => 'BLREVIEW'},
201 {title => 'Horn Book Review', file => 'HBREVIEW.XML', element => 'HBREVIEW'},
202 {title => 'Kirkus Book Review', file => 'KIREVIEW.XML', element => 'KIREVIEW'},
203 {title => 'Criticas Review', file => 'CRITICASREVIEW.XML', element => 'CRITICASREVIEW'},
204 {title => 'Spanish Review', file => 'SPREVIEW.XML', element => 'SPREVIEW'},
207 for my $source (@$review_sources) {
208 if ($syndetics_elements->{$source->{element}} and $source->{element} =~ $syndetics_elements->{$source->{element}}) {
211 #warn "Skipping $source->{element} doesn't match $syndetics_elements->{$source->{element}} \n";
214 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/$source->{file}&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
216 my $ua = LWP::UserAgent->new;
220 my $response = $ua->get($url);
221 unless ($response->content_type =~ /xml/) {
225 my $content = $response->content;
226 warn "could not retrieve $url" unless $content;
229 my $doc = $parser->parse_string($content);
231 # note that using findvalue strips any HTML elements embedded
232 # in that review. That helps us handle slight differences
233 # in the output provided by Syndetics 'old' and 'new' versions
234 # of their service and cleans any questionable HTML that
235 # may be present in the reviews, but does mean that any
236 # <B> and <I> tags used to format the review are also gone.
237 my $result = $doc->findvalue('//Fld520');
238 push @reviews, {title => $source->{title}, reviews => [ { content => $result } ]} if $result;
241 warn "Error parsing response from $url";
247 sub get_syndetics_editions {
248 my ( $isbn,$upc,$oclc ) = @_;
250 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
251 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
253 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/FICTION.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
254 my $ua = LWP::UserAgent->new;
258 my $response = $ua->get($url);
259 unless ($response->content_type =~ /xml/) {
263 my $content = $response->content;
265 warn "could not retrieve $url" unless $content;
266 my $xmlsimple = XML::Simple->new();
267 $response = $xmlsimple->XMLin(
269 forcearray => [ qw(Fld020) ],
271 # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
273 $similar_items = \@{$response->{VarFlds}->{VarDFlds}->{NumbCode}->{Fld020}} if $response;
274 return $similar_items if $similar_items;
277 sub get_syndetics_anotes {
278 my ( $isbn,$upc,$oclc) = @_;
280 # grab the AWSAccessKeyId: mine is '0V5RRRRJZ3HR2RQFNHR2'
281 my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
283 my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/ANOTES.XML&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
284 my $ua = LWP::UserAgent->new;
288 my $response = $ua->get($url);
289 unless ($response->content_type =~ /xml/) {
293 my $content = $response->content;
295 warn "could not retrieve $url" unless $content;
296 my $xmlsimple = XML::Simple->new();
297 $response = $xmlsimple->XMLin(
299 forcearray => [ qw(Fld980) ],
303 for my $fld980 (@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld980}}) {
304 # this is absurd, but sometimes this data serializes differently
305 if(ref($fld980->{a}->{content}) eq 'ARRAY') {
306 for my $content (@{$fld980->{a}->{content}}) {
307 push @anotes, {content => $content};
312 push @anotes, {content => $fld980->{a}->{content}};
325 Joshua Ferraro <jmf@liblime.com>