#
# Copyright 2009 Tamil s.a.r.l.
#
-# This software is placed under the gnu General Public License, v2
-# (http://www.gnu.org/licenses/gpl.html)
+# This file is part of Koha.
#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with Koha; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"is_success: ", $url->{ is_success }, "\n",
"status: ", $url->{ status }, "\n";
}
-
+
=head1 FUNCTIONS
=head2 new
=cut
+use strict;
+use warnings;
use LWP::UserAgent;
use HTTP::Request;
use C4::Biblio;
sub new {
my $self = {};
- my $class = shift;
+ my ($class, $timeout, $agent) = @_;
- $self->{ user_agent } = new LWP::UserAgent;
+ my $uagent = new LWP::UserAgent;
+ $uagent->agent( $agent ) if $agent;
+ $uagent->timeout( $timeout) if $timeout;
+ $self->{ user_agent } = $uagent;
+ $self->{ bad_url } = { };
bless $self, $class;
return $self;
my $biblionumber = shift;
my $uagent = $self->{ user_agent };
my $host = $self->{ host_default };
+ my $bad_url = $self->{ bad_url };
my $record = GetMarcBiblio( $biblionumber );
- return undef unless $record->field('856');
+ return unless $record->field('856');
my @urls = ();
foreach my $field ( $record->field('856') ) {
next unless $url;
$url = "$host/$url" unless $url =~ /^http/;
my $check = { url => $url };
- my $req = HTTP::Request->new( GET => $url );
- my $res = $uagent->request( $req, sub { die }, 1 );
- if ( $res->is_success ) {
+ if ( $bad_url->{ $url } ) {
$check->{ is_success } = 1;
- $check->{ status } = 'ok';
+ $check->{ status } = '500 Site already checked';
}
else {
- $check->{ is_success } = 0;
- $check->{ status } = $res->status_line;
+ my $req = HTTP::Request->new( GET => $url );
+ my $res = $uagent->request( $req, sub { die }, 1 );
+ if ( $res->is_success ) {
+ $check->{ is_success } = 1;
+ $check->{ status } = 'ok';
+ }
+ else {
+ $check->{ is_success } = 0;
+ $check->{ status } = $res->status_line;
+ $bad_url->{ $url } = 1;
+ }
}
- push( @urls, $check );
+ push @urls, $check;
}
return \@urls;
}
use diagnostics;
use Carp;
-use YAML::XS;
-
use Pod::Usage;
use Getopt::Long;
use C4::Context;
my $host_pro = '';
my $html = 0;
my $uriedit = "/cgi-bin/koha/cataloguing/addbiblio.pl?biblionumber=";
+my $agent = '';
+my $timeout = 15;
GetOptions(
'verbose' => \$verbose,
'html' => \$html,
'help' => \$help,
'host=s' => \$host,
'host-pro=s' => \$host_pro,
+ 'agent=s' => \$agent;
+ 'timeout=i', => \$timeout,
);
# Check all URLs from all current Koha biblio records
#
sub check_all_url {
- my $checker = C4::URL::Checker->new();
+ my $checker = C4::URL::Checker->new($timeout,$agent);
$checker->{ host_default } = $host;
my $context = new C4::Context( );
my $sth = $dbh->prepare(
"SELECT biblionumber FROM biblioitems WHERE url <> ''" );
$sth->execute;
- print "<html>\n<body>\n<table>\n" if $html;
+ if ( $html ) {
+ print <<EOS;
+<html>
+<body>
+<table>
+EOS
+ }
while ( my ($biblionumber) = $sth->fetchrow ) {
my $result = $checker->check_biblio( $biblionumber );
next unless $result; # No URL
$host_pro = $host;
}
else {
- print "Error: host_pro parameter or host must be provided in html mode\n";
+ print "Error: host-pro parameter or host must be provided in html mode\n";
exit;
}
}
=over
-=item check-url.pl [--verbose|--help] [--host=http://default.tld]
+=item check-url.pl [--verbose|--help] [--agent=agent-string] [--host=http://default.tld]
-Scan all URL found in 856$u and display if ressources are available or not.
+Scan all URLs found in 856$u of bib records
+and display if resources are available or not.
=back
=item B<--verbose|-v>
-Outputs succeed URL checks with failed ones.
+Outputs both successful and failed URLs.
=item B<--html>
Server host used to link to biblio record editing page.
+=item B<--agent=agent-string>
+
+Change default libwww user-agent string to custom. Some sites do
+not like libwww user-agent and return false 40x failure codes,
+so this allows Koha to report itself as Koha, or a browser.
+
+=item B<--timeout=15>
+
+Timeout for fetching URLs. By default 15 seconds.
+
=item B<--help|-h>
Print this help page.