3 # Copyright Biblibre 2008
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 use CGI qw( :standard -oldstyle_urls -utf8 );
30 eval { require PerlIO::gzip };
34 unless ( C4::Context->preference('OAI-PMH') ) {
37 -type => 'text/plain; charset=utf-8',
39 -status => '404 OAI-PMH service is disabled',
41 "OAI-PMH service is disabled";
45 my @encodings = http('HTTP_ACCEPT_ENCODING');
46 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
48 -type => 'text/xml; charset=utf-8',
50 -Content-Encoding => 'gzip',
52 binmode( STDOUT, ":gzip" );
56 -type => 'text/xml; charset=utf-8',
61 binmode STDOUT, ':encoding(UTF-8)';
62 my $repository = C4::OAI::Repository->new();
68 # Extends HTTP::OAI::ResumptionToken
69 # A token is identified by:
75 package C4::OAI::ResumptionToken;
81 use base ("HTTP::OAI::ResumptionToken");
85 my ($class, %args) = @_;
87 my $self = $class->SUPER::new(%args);
89 my ($metadata_prefix, $offset, $from, $until, $set);
90 if ( $args{ resumptionToken } ) {
91 ($metadata_prefix, $offset, $from, $until, $set)
92 = split( '/', $args{resumptionToken} );
95 $metadata_prefix = $args{ metadataPrefix };
96 $from = $args{ from } || '1970-01-01';
97 $until = $args{ until };
99 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
100 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
102 #Add times to the arguments, when necessary, so they correctly match against the DB timestamps
103 $from .= 'T00:00:00Z' if length($from) == 10;
104 $until .= 'T23:59:59Z' if length($until) == 10;
105 $offset = $args{ offset } || 0;
109 $self->{ metadata_prefix } = $metadata_prefix;
110 $self->{ offset } = $offset;
111 $self->{ from } = $from;
112 $self->{ until } = $until;
113 $self->{ set } = $set;
114 $self->{ from_arg } = _strip_UTC_designators($from);
115 $self->{ until_arg } = _strip_UTC_designators($until);
117 $self->resumptionToken(
118 join( '/', $metadata_prefix, $offset, $from, $until, $set ) );
119 $self->cursor( $offset );
124 sub _strip_UTC_designators {
125 my ( $timestamp ) = @_;
126 $timestamp =~ s/T/ /g;
127 $timestamp =~ s/Z//g;
131 # __END__ C4::OAI::ResumptionToken
135 package C4::OAI::Identify;
142 use base ("HTTP::OAI::Identify");
145 my ($class, $repository) = @_;
147 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
148 my $self = $class->SUPER::new(
150 repositoryName => C4::Context->preference("LibraryName"),
151 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
152 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
153 granularity => 'YYYY-MM-DD',
154 earliestDatestamp => '0001-01-01',
155 deletedRecord => C4::Context->preference("OAI-PMH:DeletedRecord") || 'no',
158 # FIXME - alas, the description element is not so simple; to validate
159 # against the OAI-PMH schema, it cannot contain just a string,
160 # but one or more elements that validate against another XML schema.
161 # For now, simply omitting it.
162 # $self->description( "Koha OAI Repository" );
164 $self->compression( 'gzip' );
169 # __END__ C4::OAI::Identify
173 package C4::OAI::ListMetadataFormats;
179 use base ("HTTP::OAI::ListMetadataFormats");
182 my ($class, $repository) = @_;
184 my $self = $class->SUPER::new();
186 if ( $repository->{ conf } ) {
187 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
188 my $format = $repository->{ conf }->{ format }->{ $name };
189 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
190 metadataPrefix => $format->{metadataPrefix},
191 schema => $format->{schema},
192 metadataNamespace => $format->{metadataNamespace}, ) );
196 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
197 metadataPrefix => 'oai_dc',
198 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
199 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
201 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
202 metadataPrefix => 'marcxml',
203 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
204 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
211 # __END__ C4::OAI::ListMetadataFormats
215 package C4::OAI::Record;
220 use HTTP::OAI::Metadata::OAI_DC;
222 use base ("HTTP::OAI::Record");
225 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
227 my $self = $class->SUPER::new(%args);
229 $timestamp =~ s/ /T/, $timestamp .= 'Z';
230 $self->header( new HTTP::OAI::Header(
231 identifier => $args{identifier},
232 datestamp => $timestamp,
235 foreach my $setSpec (@$setSpecs) {
236 $self->header->setSpec($setSpec);
239 my $parser = XML::LibXML->new();
240 my $record_dom = $parser->parse_string( $marcxml );
241 my $format = $args{metadataPrefix};
242 if ( $format ne 'marcxml' ) {
244 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
246 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
248 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
253 # __END__ C4::OAI::Record
255 package C4::OAI::DeletedRecord;
260 use HTTP::OAI::Metadata::OAI_DC;
262 use base ("HTTP::OAI::Record");
265 my ($class, $timestamp, $setSpecs, %args) = @_;
267 my $self = $class->SUPER::new(%args);
269 $timestamp =~ s/ /T/, $timestamp .= 'Z';
270 $self->header( new HTTP::OAI::Header(
272 identifier => $args{identifier},
273 datestamp => $timestamp,
276 foreach my $setSpec (@$setSpecs) {
277 $self->header->setSpec($setSpec);
283 # __END__ C4::OAI::DeletedRecord
287 package C4::OAI::GetRecord;
294 use base ("HTTP::OAI::GetRecord");
298 my ($class, $repository, %args) = @_;
300 my $self = HTTP::OAI::GetRecord->new(%args);
302 my $dbh = C4::Context->dbh;
303 my $sth = $dbh->prepare("
304 SELECT marcxml, timestamp
306 WHERE biblionumber=? " );
307 my $prefix = $repository->{koha_identifier} . ':';
308 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
309 $sth->execute( $biblionumber );
310 my ($marcxml, $timestamp);
312 unless ( ($marcxml, $timestamp) = $sth->fetchrow ) {
313 $sth = $dbh->prepare("
314 SELECT biblionumber, timestamp
316 WHERE biblionumber=? " );
317 $sth->execute( $biblionumber );
319 unless ( ($marcxml, $timestamp) = $sth->fetchrow ) {
321 return HTTP::OAI::Response->new(
322 requestURL => $repository->self_url(),
323 errors => [ new HTTP::OAI::Error(
324 code => 'idDoesNotExist',
325 message => "There is no biblio record with this identifier",
332 my $oai_sets = GetOAISetsBiblio($biblionumber);
334 foreach (@$oai_sets) {
335 push @setSpecs, $_->{spec};
338 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
339 ($deleted == 1) ? $self->record( C4::OAI::DeletedRecord->new(
340 $timestamp, \@setSpecs, %args ) )
341 : $self->record( C4::OAI::Record->new(
342 $repository, $marcxml, $timestamp, \@setSpecs, %args ) );
346 # __END__ C4::OAI::GetRecord
350 package C4::OAI::ListIdentifiers;
357 use base ("HTTP::OAI::ListIdentifiers");
361 my ($class, $repository, %args) = @_;
363 my $self = HTTP::OAI::ListIdentifiers->new(%args);
365 my $token = new C4::OAI::ResumptionToken( %args );
366 my $dbh = C4::Context->dbh;
368 if(defined $token->{'set'}) {
369 $set = GetOAISetBySpec($token->{'set'});
371 my $max = $repository->{koha_max_count};
373 (SELECT biblioitems.biblionumber, timestamp
376 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
377 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
378 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
380 (SELECT deletedbiblio.biblionumber, timestamp FROM deletedbiblio";
381 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
382 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
383 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
385 $sql .= ") ORDER BY biblionumber
386 LIMIT " . ($max+1) . "
387 OFFSET $token->{offset}
389 my $sth = $dbh->prepare( $sql );
390 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
391 push @bind_params, $set->{'id'} if defined $set;
392 push @bind_params, ($token->{'from'}, $token->{'until'});
393 push @bind_params, $set->{'id'} if defined $set;
394 $sth->execute( @bind_params );
397 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
399 if ( $count > $max ) {
400 $self->resumptionToken(
401 new C4::OAI::ResumptionToken(
402 metadataPrefix => $token->{metadata_prefix},
403 from => $token->{from},
404 until => $token->{until},
405 offset => $token->{offset} + $max,
411 $timestamp =~ s/ /T/, $timestamp .= 'Z';
412 $self->identifier( new HTTP::OAI::Header(
413 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
414 datestamp => $timestamp,
421 # __END__ C4::OAI::ListIdentifiers
423 package C4::OAI::Description;
428 use HTTP::OAI::SAXHandler qw/ :SAX /;
431 my ( $class, %args ) = @_;
435 if(my $setDescription = $args{setDescription}) {
436 $self->{setDescription} = $setDescription;
438 if(my $handler = $args{handler}) {
439 $self->{handler} = $handler;
447 my ( $self, $handler ) = @_;
449 $self->{handler} = $handler if $handler;
457 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
462 # __END__ C4::OAI::Description
464 package C4::OAI::ListSets;
471 use base ("HTTP::OAI::ListSets");
474 my ( $class, $repository, %args ) = @_;
476 my $self = HTTP::OAI::ListSets->new(%args);
478 my $token = C4::OAI::ResumptionToken->new(%args);
479 my $sets = GetOAISets;
481 foreach my $set (@$sets) {
482 if ($pos < $token->{offset}) {
487 foreach my $desc (@{$set->{'descriptions'}}) {
488 push @descriptions, C4::OAI::Description->new(
489 setDescription => $desc,
494 setSpec => $set->{'spec'},
495 setName => $set->{'name'},
496 setDescription => \@descriptions,
500 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
503 $self->resumptionToken(
504 new C4::OAI::ResumptionToken(
505 metadataPrefix => $token->{metadata_prefix},
508 ) if ( $pos > $token->{offset} );
513 # __END__ C4::OAI::ListSets;
515 package C4::OAI::ListRecords;
522 use base ("HTTP::OAI::ListRecords");
526 my ($class, $repository, %args) = @_;
528 my $self = HTTP::OAI::ListRecords->new(%args);
530 my $token = new C4::OAI::ResumptionToken( %args );
531 my $dbh = C4::Context->dbh;
533 if(defined $token->{'set'}) {
534 $set = GetOAISetBySpec($token->{'set'});
536 my $max = $repository->{koha_max_count};
538 (SELECT biblioitems.biblionumber, marcxml, timestamp
541 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
542 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
543 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
545 (SELECT deletedbiblio.biblionumber, null as marcxml, timestamp FROM deletedbiblio";
546 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
547 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
548 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
550 $sql .= ") ORDER BY biblionumber
551 LIMIT " . ($max + 1) . "
552 OFFSET $token->{offset}
554 my $sth = $dbh->prepare( $sql );
555 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
556 push @bind_params, $set->{'id'} if defined $set;
557 push @bind_params, ($token->{'from'}, $token->{'until'});
558 push @bind_params, $set->{'id'} if defined $set;
559 $sth->execute( @bind_params );
562 while ( my ($biblionumber, $marcxml, $timestamp) = $sth->fetchrow ) {
564 if ( $count > $max ) {
565 $self->resumptionToken(
566 new C4::OAI::ResumptionToken(
567 metadataPrefix => $token->{metadata_prefix},
568 from => $token->{from},
569 until => $token->{until},
570 offset => $token->{offset} + $max,
576 my $oai_sets = GetOAISetsBiblio($biblionumber);
578 foreach (@$oai_sets) {
579 push @setSpecs, $_->{spec};
582 $self->record( C4::OAI::Record->new(
583 $repository, $marcxml, $timestamp, \@setSpecs,
584 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
585 metadataPrefix => $token->{metadata_prefix}
588 $self->record( C4::OAI::DeletedRecord->new(
589 $timestamp, \@setSpecs, identifier => $repository->{ koha_identifier } . ':' . $biblionumber ) );
596 # __END__ C4::OAI::ListRecords
600 package C4::OAI::Repository;
602 use base ("HTTP::OAI::Repository");
608 use HTTP::OAI::Repository qw/:validate/;
610 use XML::SAX::Writer;
613 use YAML::Syck qw( LoadFile );
614 use CGI qw/:standard -oldstyle_urls/;
621 my ($class, %args) = @_;
622 my $self = $class->SUPER::new(%args);
624 $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID");
625 $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount");
626 $self->{ koha_metadata_format } = ['oai_dc', 'marcxml'];
627 $self->{ koha_stylesheet } = { }; # Build when needed
629 # Load configuration file if defined in OAI-PMH:ConfFile syspref
630 if ( my $file = C4::Context->preference("OAI-PMH:ConfFile") ) {
631 $self->{ conf } = LoadFile( $file );
632 my @formats = keys %{ $self->{conf}->{format} };
633 $self->{ koha_metadata_format } = \@formats;
636 # Check for grammatical errors in the request
637 my @errs = validate_request( CGI::Vars() );
639 # Is metadataPrefix supported by the respository?
640 my $mdp = param('metadataPrefix') || '';
641 if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) {
642 push @errs, new HTTP::OAI::Error(
643 code => 'cannotDisseminateFormat',
644 message => "Dissemination as '$mdp' is not supported",
650 $response = HTTP::OAI::Response->new(
651 requestURL => self_url(),
656 my %attr = CGI::Vars();
657 my $verb = delete( $attr{verb} );
658 if ( $verb eq 'ListSets' ) {
659 $response = C4::OAI::ListSets->new($self, %attr);
661 elsif ( $verb eq 'Identify' ) {
662 $response = C4::OAI::Identify->new( $self );
664 elsif ( $verb eq 'ListMetadataFormats' ) {
665 $response = C4::OAI::ListMetadataFormats->new( $self );
667 elsif ( $verb eq 'GetRecord' ) {
668 $response = C4::OAI::GetRecord->new( $self, %attr );
670 elsif ( $verb eq 'ListRecords' ) {
671 $response = C4::OAI::ListRecords->new( $self, %attr );
673 elsif ( $verb eq 'ListIdentifiers' ) {
674 $response = C4::OAI::ListIdentifiers->new( $self, %attr );
678 $response->set_handler( XML::SAX::Writer->new( Output => *STDOUT ) );
687 my ( $self, $format ) = @_;
689 my $stylesheet = $self->{ koha_stylesheet }->{ $format };
690 unless ( $stylesheet ) {
691 my $xsl_file = $self->{ conf }
692 ? $self->{ conf }->{ format }->{ $format }->{ xsl_file }
693 : ( C4::Context->config('intrahtdocs') .
695 C4::Context->preference('marcflavour') .
697 my $parser = XML::LibXML->new();
698 my $xslt = XML::LibXSLT->new();
699 my $style_doc = $parser->parse_file( $xsl_file );
700 $stylesheet = $xslt->parse_stylesheet( $style_doc );
701 $self->{ koha_stylesheet }->{ $format } = $stylesheet;
711 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
715 use C4::OAI::Repository;
717 my $repository = C4::OAI::Repository->new();
721 This object extend HTTP::OAI::Repository object.
722 It accepts OAI-PMH HTTP requests and returns result.
724 This OAI-PMH server can operate in a simple mode and extended one.
726 In simple mode, repository configuration comes entirely from Koha system
727 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
728 records in marcxml or dublin core format. Dublin core records are created from
729 koha marcxml records tranformed with XSLT. Used XSL file is located in
730 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
731 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
734 In extende mode, it's possible to parameter other format than marcxml or Dublin
735 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
736 list available metadata formats and XSL file used to create them from marcxml
737 records. If this syspref isn't set, Koha OAI server works in simple mode. A
738 configuration file koha-oai.conf can look like that:
744 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
745 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
746 xsl_file: /usr/local/koha/xslt/vs.xsl
748 metadataPrefix: marxml
749 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
750 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
752 metadataPrefix: oai_dc
753 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
754 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
755 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl