From: Galen Charlton Date: Thu, 30 Apr 2009 19:10:34 +0000 (-0500) Subject: Syndetics: start switch to XML::LibXML to parse results X-Git-Tag: new_acq_a_porter~166 X-Git-Url: http://koha-dev.rot13.org:8081/gitweb/?a=commitdiff_plain;h=050f9115de3ede3ced8c6132a5cec5264d0acb3b;p=koha_gimpoz Syndetics: start switch to XML::LibXML to parse results XML::Simple doesn't handle mixed content, which is a problem because reviews provided by Syndetics sometimes contain HTML tags. Furthermore, it's often easier to write XPath to extract bits of an XML document than trying to play with XML::Simple's options. Signed-off-by: Galen Charlton --- diff --git a/C4/External/Syndetics.pm b/C4/External/Syndetics.pm index 5275cc90c0..f61a31c7aa 100644 --- a/C4/External/Syndetics.pm +++ b/C4/External/Syndetics.pm @@ -18,6 +18,7 @@ package C4::External::Syndetics; # Suite 330, Boston, MA 02111-1307 USA use XML::Simple; +use XML::LibXML; use LWP::Simple; use LWP::UserAgent; use HTTP::Request::Common; @@ -42,6 +43,9 @@ BEGIN { ); } +# package-level variable +my $parser = XML::LibXML->new(); + =head1 NAME C4::External::Syndetics - Functions for retrieving Syndetics content in Koha @@ -219,36 +223,21 @@ sub get_syndetics_reviews { my $content = $response->content; warn "could not retrieve $url" unless $content; - my $xmlsimple = XML::Simple->new(); - eval { - $response = $xmlsimple->XMLin( - $content, - ForceContent => 1, - forcearray => [ qw(Fld520) ] - ) unless !$content; + + eval { + my $doc = $parser->parse_string($content); + + # note that using findvalue strips any HTML elements embedded + # in that review. That helps us handle slight differences + # in the output provided by Syndetics 'old' and 'new' versions + # of their service and cleans any questionable HTML that + # may be present in the reviews, but does mean that any + # and tags used to format the review are also gone. + my $result = $doc->findvalue('//Fld520'); + push @reviews, {title => $source->{title}, reviews => [ { content => $result } ]} if $result; }; - - for my $subfield_a (@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}}) { - my @content; - # this is absurd, but sometimes this data serializes differently - if (exists $subfield_a->{content}) { - if (ref($subfield_a->{content} eq 'ARRAY')) { - for my $content (@{$subfield_a->{content}}) { - push @content, {content => $content}; - } - } else { - push @content, {content => $subfield_a->{content}}; - } - } - elsif(ref($subfield_a->{a}->{content}) eq 'ARRAY') { - for my $content (@{$subfield_a->{a}->{content}}) { - push @content, {content => $content}; - } - } - else { - push @content, {content => $subfield_a->{a}->{content}}; - } - push @reviews, {title => $source->{title}, reviews => \@content}; + if ($@) { + warn "Error parsing response from $url"; } } return \@reviews;