3 # Copyright Biblibre 2008
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 use CGI qw( :standard -oldstyle_urls -utf8 );
30 eval { require PerlIO::gzip };
34 unless ( C4::Context->preference('OAI-PMH') ) {
37 -type => 'text/plain; charset=utf-8',
39 -status => '404 OAI-PMH service is disabled',
41 "OAI-PMH service is disabled";
45 my @encodings = http('HTTP_ACCEPT_ENCODING');
46 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
48 -type => 'text/xml; charset=utf-8',
50 -Content-Encoding => 'gzip',
52 binmode( STDOUT, ":gzip" );
56 -type => 'text/xml; charset=utf-8',
61 binmode STDOUT, ':encoding(UTF-8)';
62 my $repository = C4::OAI::Repository->new();
68 # Extends HTTP::OAI::ResumptionToken
69 # A token is identified by:
75 package C4::OAI::ResumptionToken;
81 use base ("HTTP::OAI::ResumptionToken");
85 my ($class, %args) = @_;
87 my $self = $class->SUPER::new(%args);
89 my ($metadata_prefix, $offset, $from, $until, $set);
90 if ( $args{ resumptionToken } ) {
91 ($metadata_prefix, $offset, $from, $until, $set)
92 = split( '/', $args{resumptionToken} );
95 $metadata_prefix = $args{ metadataPrefix };
96 $from = $args{ from } || '1970-01-01';
97 $until = $args{ until };
99 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
100 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
102 #Add times to the arguments, when necessary, so they correctly match against the DB timestamps
103 $from .= 'T00:00:00Z' if length($from) == 10;
104 $until .= 'T23:59:59Z' if length($until) == 10;
105 $offset = $args{ offset } || 0;
109 $self->{ metadata_prefix } = $metadata_prefix;
110 $self->{ offset } = $offset;
111 $self->{ from } = $from;
112 $self->{ until } = $until;
113 $self->{ set } = $set;
114 $self->{ from_arg } = _strip_UTC_designators($from);
115 $self->{ until_arg } = _strip_UTC_designators($until);
117 $self->resumptionToken(
118 join( '/', $metadata_prefix, $offset, $from, $until, $set ) );
119 $self->cursor( $offset );
124 sub _strip_UTC_designators {
125 my ( $timestamp ) = @_;
126 $timestamp =~ s/T/ /g;
127 $timestamp =~ s/Z//g;
131 # __END__ C4::OAI::ResumptionToken
135 package C4::OAI::Identify;
142 use base ("HTTP::OAI::Identify");
145 my ($class, $repository) = @_;
147 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
148 my $self = $class->SUPER::new(
150 repositoryName => C4::Context->preference("LibraryName"),
151 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
152 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
153 granularity => 'YYYY-MM-DD',
154 earliestDatestamp => '0001-01-01',
155 deletedRecord => C4::Context->preference("OAI-PMH:DeletedRecord") || 'no',
158 # FIXME - alas, the description element is not so simple; to validate
159 # against the OAI-PMH schema, it cannot contain just a string,
160 # but one or more elements that validate against another XML schema.
161 # For now, simply omitting it.
162 # $self->description( "Koha OAI Repository" );
164 $self->compression( 'gzip' );
169 # __END__ C4::OAI::Identify
173 package C4::OAI::ListMetadataFormats;
179 use base ("HTTP::OAI::ListMetadataFormats");
182 my ($class, $repository) = @_;
184 my $self = $class->SUPER::new();
186 if ( $repository->{ conf } ) {
187 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
188 my $format = $repository->{ conf }->{ format }->{ $name };
189 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
190 metadataPrefix => $format->{metadataPrefix},
191 schema => $format->{schema},
192 metadataNamespace => $format->{metadataNamespace}, ) );
196 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
197 metadataPrefix => 'oai_dc',
198 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
199 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
201 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
202 metadataPrefix => 'marcxml',
203 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
204 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
211 # __END__ C4::OAI::ListMetadataFormats
215 package C4::OAI::Record;
220 use HTTP::OAI::Metadata::OAI_DC;
222 use base ("HTTP::OAI::Record");
225 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
227 my $self = $class->SUPER::new(%args);
229 $timestamp =~ s/ /T/, $timestamp .= 'Z';
230 $self->header( new HTTP::OAI::Header(
231 identifier => $args{identifier},
232 datestamp => $timestamp,
235 foreach my $setSpec (@$setSpecs) {
236 $self->header->setSpec($setSpec);
239 my $parser = XML::LibXML->new();
240 my $record_dom = $parser->parse_string( $marcxml );
241 my $format = $args{metadataPrefix};
242 if ( $format ne 'marcxml' ) {
244 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
246 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
248 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
253 # __END__ C4::OAI::Record
255 package C4::OAI::DeletedRecord;
259 use HTTP::OAI::Metadata::OAI_DC;
261 use base ("HTTP::OAI::Record");
264 my ($class, $timestamp, $setSpecs, %args) = @_;
266 my $self = $class->SUPER::new(%args);
268 $timestamp =~ s/ /T/, $timestamp .= 'Z';
269 $self->header( new HTTP::OAI::Header(
271 identifier => $args{identifier},
272 datestamp => $timestamp,
275 foreach my $setSpec (@$setSpecs) {
276 $self->header->setSpec($setSpec);
282 # __END__ C4::OAI::DeletedRecord
286 package C4::OAI::GetRecord;
293 use base ("HTTP::OAI::GetRecord");
297 my ($class, $repository, %args) = @_;
299 my $self = HTTP::OAI::GetRecord->new(%args);
301 my $dbh = C4::Context->dbh;
302 my $prefix = $repository->{koha_identifier} . ':';
303 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
304 my ($marcxml, $timestamp);
306 unless ( ($marcxml, $timestamp) = $dbh->selectrow_array(q/
307 SELECT marcxml, timestamp
309 WHERE biblionumber=? /, undef, $biblionumber)) {
311 unless ( ($marcxml, $timestamp) = $dbh->selectrow_array(q/
312 SELECT biblionumber, timestamp
314 WHERE biblionumber=? /, undef, $biblionumber )) {
317 return HTTP::OAI::Response->new(
318 requestURL => $repository->self_url(),
319 errors => [ new HTTP::OAI::Error(
320 code => 'idDoesNotExist',
321 message => "There is no biblio record with this identifier",
328 my $oai_sets = GetOAISetsBiblio($biblionumber);
330 foreach (@$oai_sets) {
331 push @setSpecs, $_->{spec};
334 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
335 ($deleted == 1) ? $self->record( C4::OAI::DeletedRecord->new(
336 $timestamp, \@setSpecs, %args ) )
337 : $self->record( C4::OAI::Record->new(
338 $repository, $marcxml, $timestamp, \@setSpecs, %args ) );
342 # __END__ C4::OAI::GetRecord
346 package C4::OAI::ListIdentifiers;
353 use base ("HTTP::OAI::ListIdentifiers");
357 my ($class, $repository, %args) = @_;
359 my $self = HTTP::OAI::ListIdentifiers->new(%args);
361 my $token = new C4::OAI::ResumptionToken( %args );
362 my $dbh = C4::Context->dbh;
364 if(defined $token->{'set'}) {
365 $set = GetOAISetBySpec($token->{'set'});
367 my $max = $repository->{koha_max_count};
369 (SELECT biblioitems.biblionumber, biblioitems.timestamp
372 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
373 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
374 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
376 (SELECT deletedbiblio.biblionumber, timestamp FROM deletedbiblio";
377 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
378 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
379 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
381 $sql .= ") ORDER BY biblionumber
382 LIMIT " . ($max+1) . "
383 OFFSET $token->{offset}
385 my $sth = $dbh->prepare( $sql );
386 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
387 push @bind_params, $set->{'id'} if defined $set;
388 push @bind_params, ($token->{'from'}, $token->{'until'});
389 push @bind_params, $set->{'id'} if defined $set;
390 $sth->execute( @bind_params );
393 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
395 if ( $count > $max ) {
396 $self->resumptionToken(
397 new C4::OAI::ResumptionToken(
398 metadataPrefix => $token->{metadata_prefix},
399 from => $token->{from},
400 until => $token->{until},
401 offset => $token->{offset} + $max,
407 $timestamp =~ s/ /T/, $timestamp .= 'Z';
408 $self->identifier( new HTTP::OAI::Header(
409 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
410 datestamp => $timestamp,
414 # Return error if no results
416 return HTTP::OAI::Response->new(
417 requestURL => $repository->self_url(),
418 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
425 # __END__ C4::OAI::ListIdentifiers
427 package C4::OAI::Description;
432 use HTTP::OAI::SAXHandler qw/ :SAX /;
435 my ( $class, %args ) = @_;
439 if(my $setDescription = $args{setDescription}) {
440 $self->{setDescription} = $setDescription;
442 if(my $handler = $args{handler}) {
443 $self->{handler} = $handler;
451 my ( $self, $handler ) = @_;
453 $self->{handler} = $handler if $handler;
461 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
466 # __END__ C4::OAI::Description
468 package C4::OAI::ListSets;
475 use base ("HTTP::OAI::ListSets");
478 my ( $class, $repository, %args ) = @_;
480 my $self = HTTP::OAI::ListSets->new(%args);
482 my $token = C4::OAI::ResumptionToken->new(%args);
483 my $sets = GetOAISets;
485 foreach my $set (@$sets) {
486 if ($pos < $token->{offset}) {
491 foreach my $desc (@{$set->{'descriptions'}}) {
492 push @descriptions, C4::OAI::Description->new(
493 setDescription => $desc,
498 setSpec => $set->{'spec'},
499 setName => $set->{'name'},
500 setDescription => \@descriptions,
504 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
507 $self->resumptionToken(
508 new C4::OAI::ResumptionToken(
509 metadataPrefix => $token->{metadata_prefix},
512 ) if ( $pos > $token->{offset} );
517 # __END__ C4::OAI::ListSets;
519 package C4::OAI::ListRecords;
526 use base ("HTTP::OAI::ListRecords");
530 my ($class, $repository, %args) = @_;
532 my $self = HTTP::OAI::ListRecords->new(%args);
534 my $token = new C4::OAI::ResumptionToken( %args );
535 my $dbh = C4::Context->dbh;
537 if(defined $token->{'set'}) {
538 $set = GetOAISetBySpec($token->{'set'});
540 my $max = $repository->{koha_max_count};
542 (SELECT biblioitems.biblionumber, biblioitems.marcxml, biblioitems.timestamp
545 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
546 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
547 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
549 (SELECT deletedbiblio.biblionumber, null as marcxml, timestamp FROM deletedbiblio";
550 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
551 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
552 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
554 $sql .= ") ORDER BY biblionumber
555 LIMIT " . ($max + 1) . "
556 OFFSET $token->{offset}
558 my $sth = $dbh->prepare( $sql );
559 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
560 push @bind_params, $set->{'id'} if defined $set;
561 push @bind_params, ($token->{'from'}, $token->{'until'});
562 push @bind_params, $set->{'id'} if defined $set;
563 $sth->execute( @bind_params );
566 while ( my ($biblionumber, $marcxml, $timestamp) = $sth->fetchrow ) {
568 if ( $count > $max ) {
569 $self->resumptionToken(
570 new C4::OAI::ResumptionToken(
571 metadataPrefix => $token->{metadata_prefix},
572 from => $token->{from},
573 until => $token->{until},
574 offset => $token->{offset} + $max,
580 my $oai_sets = GetOAISetsBiblio($biblionumber);
582 foreach (@$oai_sets) {
583 push @setSpecs, $_->{spec};
586 $self->record( C4::OAI::Record->new(
587 $repository, $marcxml, $timestamp, \@setSpecs,
588 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
589 metadataPrefix => $token->{metadata_prefix}
592 $self->record( C4::OAI::DeletedRecord->new(
593 $timestamp, \@setSpecs, identifier => $repository->{ koha_identifier } . ':' . $biblionumber ) );
597 # Return error if no results
599 return HTTP::OAI::Response->new(
600 requestURL => $repository->self_url(),
601 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
608 # __END__ C4::OAI::ListRecords
612 package C4::OAI::Repository;
614 use base ("HTTP::OAI::Repository");
620 use HTTP::OAI::Repository qw/:validate/;
622 use XML::SAX::Writer;
625 use YAML::Syck qw( LoadFile );
626 use CGI qw/:standard -oldstyle_urls/;
633 my ($class, %args) = @_;
634 my $self = $class->SUPER::new(%args);
636 $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID");
637 $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount");
638 $self->{ koha_metadata_format } = ['oai_dc', 'marcxml'];
639 $self->{ koha_stylesheet } = { }; # Build when needed
641 # Load configuration file if defined in OAI-PMH:ConfFile syspref
642 if ( my $file = C4::Context->preference("OAI-PMH:ConfFile") ) {
643 $self->{ conf } = LoadFile( $file );
644 my @formats = keys %{ $self->{conf}->{format} };
645 $self->{ koha_metadata_format } = \@formats;
648 # Check for grammatical errors in the request
649 my @errs = validate_request( CGI::Vars() );
651 # Is metadataPrefix supported by the respository?
652 my $mdp = param('metadataPrefix') || '';
653 if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) {
654 push @errs, new HTTP::OAI::Error(
655 code => 'cannotDisseminateFormat',
656 message => "Dissemination as '$mdp' is not supported",
662 $response = HTTP::OAI::Response->new(
663 requestURL => self_url(),
668 my %attr = CGI::Vars();
669 my $verb = delete( $attr{verb} );
670 if ( $verb eq 'ListSets' ) {
671 $response = C4::OAI::ListSets->new($self, %attr);
673 elsif ( $verb eq 'Identify' ) {
674 $response = C4::OAI::Identify->new( $self );
676 elsif ( $verb eq 'ListMetadataFormats' ) {
677 $response = C4::OAI::ListMetadataFormats->new( $self );
679 elsif ( $verb eq 'GetRecord' ) {
680 $response = C4::OAI::GetRecord->new( $self, %attr );
682 elsif ( $verb eq 'ListRecords' ) {
683 $response = C4::OAI::ListRecords->new( $self, %attr );
685 elsif ( $verb eq 'ListIdentifiers' ) {
686 $response = C4::OAI::ListIdentifiers->new( $self, %attr );
690 $response->set_handler( XML::SAX::Writer->new( Output => *STDOUT ) );
699 my ( $self, $format ) = @_;
701 my $stylesheet = $self->{ koha_stylesheet }->{ $format };
702 unless ( $stylesheet ) {
703 my $xsl_file = $self->{ conf }
704 ? $self->{ conf }->{ format }->{ $format }->{ xsl_file }
705 : ( C4::Context->config('intrahtdocs') .
707 C4::Context->preference('marcflavour') .
709 my $parser = XML::LibXML->new();
710 my $xslt = XML::LibXSLT->new();
711 my $style_doc = $parser->parse_file( $xsl_file );
712 $stylesheet = $xslt->parse_stylesheet( $style_doc );
713 $self->{ koha_stylesheet }->{ $format } = $stylesheet;
723 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
727 use C4::OAI::Repository;
729 my $repository = C4::OAI::Repository->new();
733 This object extend HTTP::OAI::Repository object.
734 It accepts OAI-PMH HTTP requests and returns result.
736 This OAI-PMH server can operate in a simple mode and extended one.
738 In simple mode, repository configuration comes entirely from Koha system
739 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
740 records in marcxml or dublin core format. Dublin core records are created from
741 koha marcxml records tranformed with XSLT. Used XSL file is located in
742 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
743 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
746 In extende mode, it's possible to parameter other format than marcxml or Dublin
747 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
748 list available metadata formats and XSL file used to create them from marcxml
749 records. If this syspref isn't set, Koha OAI server works in simple mode. A
750 configuration file koha-oai.conf can look like that:
756 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
757 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
758 xsl_file: /usr/local/koha/xslt/vs.xsl
760 metadataPrefix: marxml
761 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
762 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
764 metadataPrefix: oai_dc
765 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
766 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
767 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl