experiment: use PazPar2 to group related works
authorGalen Charlton <galen.charlton@liblime.com>
Thu, 7 Feb 2008 06:11:47 +0000 (00:11 -0600)
committerJoshua Ferraro <jmf@liblime.com>
Fri, 8 Feb 2008 12:01:39 +0000 (06:01 -0600)
The approach is to use PazPar2 to search just one
target, the biblio Zebra database.  The results
of each set are merged by PazPar2 to generate a
hitlist that combines related bibs together; as an
example, if a library has the first Harry Potter
book in three languages and an audiobook format,
the hitlist should ideally return one result
for the work that includes links to the individual
bibs.

The new module C4::Search::PazPar2 implements a
simple client for PazPar2's XML-over-HTTP API.  It is
designed to be generic, and thus may end up getting
moved out of Koha to become a stand-alone CPAN module.

Signed-off-by: Chris Cormack <crc@liblime.com>
Signed-off-by: Joshua Ferraro <jmf@liblime.com>
C4/Search.pm
C4/Search/PazPar2.pm [new file with mode: 0644]
etc/pazpar2/koha-biblios.xml [new file with mode: 0644]
etc/pazpar2/marc21.xsl [new file with mode: 0644]
etc/pazpar2/pazpar2.xml [new file with mode: 0644]
etc/pazpar2/pz2-ourl-base.xsl [new file with mode: 0644]
etc/pazpar2/pz2-ourl-marc21.xsl [new file with mode: 0644]

index 43b9d46..4027011 100644 (file)
@@ -611,6 +611,70 @@ sub getRecords {
     return ( undef, $results_hashref, \@facets_loop );
 }
 
+use C4::Search::PazPar2;
+use XML::Simple;
+use Data::Dumper;
+sub pazGetRecords {
+    my (
+        $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
+        $results_per_page, $offset,       $expanded_facet, $branches,
+        $query_type,       $scan
+    ) = @_;
+
+    my $paz = C4::Search::PazPar2->new('http://localhost:10006/search.pz2');
+    $paz->init();
+    $paz->search($simple_query);
+    sleep 1;
+
+    # do results
+    my $results_hashref = {};
+    my $stats = XMLin($paz->stat);
+    $results_hashref->{'biblioserver'}->{'hits'} = $stats->{'hits'};
+    my $results = XMLin($paz->show($offset, $results_per_page), forcearray => 1);
+    #die Dumper($results);
+    HIT: foreach my $hit (@{ $results->{'hit'} }) {
+        warn "hit";
+        my $recid = $hit->{recid}->[0];
+        #if ($recid =~ /[\200-\377]/) {
+        if ($recid =~ /sodot/) {
+            #die "bad $recid\n";
+            #probably do not want non-ASCII in record ID
+            last HIT;
+        }
+        my $count = 1;
+        if (exists $hit->{count}) {
+            $count = $hit->{count}->[0];
+        }
+        #die $count;
+        for (my $i = 0; $i < $count; $i++) {
+            warn "look for $recid offset = $i";
+            my $rec = $paz->record($recid, $i);
+            warn "got record $i";
+            push @{ $results_hashref->{'biblioserver'}->{'RECORDS'} }, $paz->record($recid, $i);
+        }
+    }
+    warn "past hits";
+    
+    # pass through facets
+    my $termlist_xml = $paz->termlist('author,subject');
+    my $terms = XMLin($termlist_xml, forcearray => 1);
+    my @facets_loop = ();
+    foreach my $list (sort keys %{ $terms->{'list'} }) {
+        my @facets = ();
+        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
+            push @facets, {
+                facet_label_value => $facet->{'name'}->[0],
+            };
+        }
+        push @facets_loop, ( {
+            type_label => $list,
+            facets => \@facets,
+        } );
+    }
+
+    return ( undef, $results_hashref, \@facets_loop );
+}
+
 # STOPWORDS
 sub _remove_stopwords {
     my ( $operand, $index ) = @_;
diff --git a/C4/Search/PazPar2.pm b/C4/Search/PazPar2.pm
new file mode 100644 (file)
index 0000000..f543c96
--- /dev/null
@@ -0,0 +1,168 @@
+package C4::Search::PazPar2;
+
+# Copyright (C) 2007 LibLime
+#
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA  02111-1307 USA
+
+use strict;
+
+use LWP::UserAgent;
+use URI;
+use URI::QueryParam;
+use XML::Simple;
+
+=head1 NAME
+
+C4::Search::PazPar2 - implement client for PazPar2
+
+[Note: may rename to Net::PazPar2 or somesuch if decide to put on CPAN separate
+ from Koha]
+
+=head1 SYNOPSIS
+
+=head1 DESCRIPTION
+
+=cut
+
+sub new {
+    my $class = shift;
+    my $endpoint = shift;
+
+    my $self = {};
+    $self->{'endpoint'} = $endpoint;
+    $self->{'session'} = '';
+    $self->{'ua'} = LWP::UserAgent->new;
+    bless $self, $class;
+
+    return $self;
+}
+
+sub init {
+    my $self = shift;
+
+    my $uri = URI->new($self->{'endpoint'});
+    $uri->query_param(command => 'init');
+    my $response = $self->{'ua'}->get($uri);
+    if ($response->is_success) {
+        my $message = XMLin($response->content);
+        if ($message->{'status'} eq 'OK') {
+            $self->{'session'} = $message->{'session'};
+        }
+    } else {
+        warn $response->status_line;
+    }
+}
+
+sub search {
+    my $self = shift;
+    my $query = shift;
+
+    my $uri = URI->new($self->{'endpoint'});
+    $uri->query_param(command => 'search');
+    $uri->query_param(session => $self->{'session'});
+    $uri->query_param(query => $query);
+    my $response = $self->{'ua'}->get($uri);
+    if ($response->is_success) {
+        #print $response->content, "\n";
+    } else {
+        warn $response->status_line;
+    }
+
+}
+
+sub stat {
+    my $self = shift;
+
+    my $uri = URI->new($self->{'endpoint'});
+    $uri->query_param(command => 'stat');
+    $uri->query_param(session => $self->{'session'});
+    my $response = $self->{'ua'}->get($uri);
+    if ($response->is_success) {
+        return $response->content;
+    } else {
+        warn $response->status_line;
+        return;
+    }
+}
+
+sub show {
+    my $self = shift;
+    my $start = shift;
+    my $count = shift;
+
+    my $uri = URI->new($self->{'endpoint'});
+    $uri->query_param(command => 'show');
+    $uri->query_param(start => $start);
+    $uri->query_param(num => $count);
+    $uri->query_param(block => 1);
+    $uri->query_param(session => $self->{'session'});
+    my $response = $self->{'ua'}->get($uri);
+    if ($response->is_success) {
+        return $response->content;
+    } else {
+        warn $response->status_line;
+        return;
+    }
+    
+}
+
+sub record {
+    my $self = shift;
+    my $id = shift;
+    my $offset = shift;
+
+    my $uri = URI->new($self->{'endpoint'});
+    $uri->query_param(command => 'record');
+    $uri->query_param(id => $id);
+    $uri->query_param(offset => $offset);
+    $uri->query_param(binary => 1);
+    $uri->query_param(session => $self->{'session'});
+    my $response = $self->{'ua'}->get($uri);
+    if ($response->is_success) {
+        return $response->content;
+    } else {
+        warn $response->status_line;
+        return;
+    }
+}
+
+sub termlist {
+    my $self = shift;
+    my $name = shift;
+
+    my $uri = URI->new($self->{'endpoint'});
+    $uri->query_param(command => 'termlist');
+    $uri->query_param(name => $name);
+    $uri->query_param(session => $self->{'session'});
+    my $response = $self->{'ua'}->get($uri);
+    if ($response->is_success) {
+        return $response->content;
+    } else {
+        warn $response->status_line;
+        return;
+    }
+
+}
+
+1;
+
+=head1 AUTHOR
+
+Koha Development Team <info@koha.org>
+
+Galen Charlton <galen.charlton@liblime.com>
+
+=cut
diff --git a/etc/pazpar2/koha-biblios.xml b/etc/pazpar2/koha-biblios.xml
new file mode 100644 (file)
index 0000000..2872b2a
--- /dev/null
@@ -0,0 +1,30 @@
+<settings target="localhost:10005/biblios">
+
+  <set name="pz:name" value="Koha bib database"/>
+
+  <!-- This file introduces default settings for pazpar2 -->
+  <!-- $Id: loc.xml,v 1.2 2007-07-10 13:43:07 adam Exp $ -->
+
+  <!-- mapping for unqualified search -->
+  <set name="pz:cclmap:term" value="u=1016 t=l,r s=al"/>
+
+  <!-- field-specific mappings -->
+  
+  <set name="pz:cclmap:au" value="u=1004 s=al"/>
+  <set name="pz:cclmap:ti" value="u=4 s=al"/>
+  <set name="pz:cclmap:su" value="u=21 s=al"/>
+  <set name="pz:cclmap:isbn" value="u=7"/>
+  <set name="pz:cclmap:issn" value="u=8"/>
+  <set name="pz:cclmap:date" value="u=30 r=r"/>
+
+  <!-- Retrieval settings -->
+
+  <set name="pz:requestsyntax" value="marc21"/>
+  <set name="pz:elements" value="F"/>
+
+  <!-- Result normalization settings -->
+
+  <set name="pz:nativesyntax" value="iso2709"/>
+  <set name="pz:xslt" value="marc21.xsl"/>
+
+</settings>
diff --git a/etc/pazpar2/marc21.xsl b/etc/pazpar2/marc21.xsl
new file mode 100644 (file)
index 0000000..f47642c
--- /dev/null
@@ -0,0 +1,288 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- $Id: marc21.xsl,v 1.22 2007-10-04 12:01:15 adam Exp $ -->
+<xsl:stylesheet
+    version="1.0"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns:pz="http://www.indexdata.com/pazpar2/1.0"
+    xmlns:marc="http://www.loc.gov/MARC21/slim">
+
+  
+  <xsl:output indent="yes" method="xml" version="1.0" encoding="UTF-8"/>
+
+<!-- Extract metadata from MARC21/USMARC 
+      http://www.loc.gov/marc/bibliographic/ecbdhome.html
+-->  
+  <xsl:include href="pz2-ourl-marc21.xsl" />
+  
+  <xsl:template match="/marc:record">
+    <xsl:variable name="title_medium" select="marc:datafield[@tag='245']/marc:subfield[@code='h']"/>
+    <xsl:variable name="journal_title" select="marc:datafield[@tag='773']/marc:subfield[@code='t']"/>
+    <xsl:variable name="electronic_location_url" select="marc:datafield[@tag='856']/marc:subfield[@code='u']"/>
+    <xsl:variable name="fulltext_a" select="marc:datafield[@tag='900']/marc:subfield[@code='a']"/>
+    <xsl:variable name="fulltext_b" select="marc:datafield[@tag='900']/marc:subfield[@code='b']"/>
+    <xsl:variable name="medium">
+      <xsl:choose>
+       <xsl:when test="$title_medium">
+         <xsl:value-of select="substring-after(substring-before($title_medium,']'),'[')"/>
+       </xsl:when>
+       <xsl:when test="$fulltext_a">
+         <xsl:text>electronic resource</xsl:text>
+       </xsl:when>
+       <xsl:when test="$fulltext_b">
+         <xsl:text>electronic resource</xsl:text>
+       </xsl:when>
+       <xsl:when test="$electronic_location_url">
+         <xsl:text>electronic resource</xsl:text>
+       </xsl:when>
+       <xsl:when test="$journal_title">
+         <xsl:text>article</xsl:text>
+       </xsl:when>
+       <xsl:otherwise>
+         <xsl:text>book</xsl:text>
+       </xsl:otherwise>
+      </xsl:choose>
+    </xsl:variable>
+
+    <xsl:variable name="mergekey">
+        <xsl:text>title </xsl:text>
+        <xsl:choose>
+          <xsl:when test="marc:datafield[@tag='240']">
+             <xsl:value-of select="marc:datafield[@tag='240']/marc:subfield[@code='a']"/>
+          </xsl:when>
+          <xsl:otherwise>
+             <xsl:value-of select="marc:datafield[@tag='245']/marc:subfield[@code='a']"/>
+          </xsl:otherwise>
+        </xsl:choose>
+        <xsl:text> author </xsl:text>
+        <xsl:value-of select="marc:datafield[@tag='100']/marc:subfield[@code='a']"/>
+<!--
+        <xsl:text> medium </xsl:text>
+        <xsl:value-of select="$medium"/>
+-->
+    </xsl:variable>
+
+    <pz:record>
+      <xsl:attribute name="mergekey">
+        <xsl:value-of select="$mergekey"/>
+      </xsl:attribute>
+
+      
+      <xsl:for-each select="marc:controlfield[@tag='001']">
+        <pz:metadata type="id">
+          <xsl:value-of select="."/>
+        </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='010']">
+        <pz:metadata type="lccn">
+         <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='999']">
+        <pz:metadata type="kohaid">
+         <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+
+      <xsl:for-each select="marc:datafield[@tag='020']">
+        <pz:metadata type="isbn">
+         <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='022']">
+        <pz:metadata type="issn">
+         <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='027']">
+        <pz:metadata type="tech-rep-nr">
+         <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='100']">
+       <pz:metadata type="author">
+         <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+       <pz:metadata type="author-title">
+         <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+       <pz:metadata type="author-date">
+         <xsl:value-of select="marc:subfield[@code='d']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='110']">
+       <pz:metadata type="corporate-name">
+           <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+       <pz:metadata type="corporate-location">
+           <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+       <pz:metadata type="corporate-date">
+           <xsl:value-of select="marc:subfield[@code='d']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='111']">
+       <pz:metadata type="meeting-name">
+           <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+       <pz:metadata type="meeting-location">
+           <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+       <pz:metadata type="meeting-date">
+           <xsl:value-of select="marc:subfield[@code='d']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='260']">
+       <pz:metadata type="date">
+           <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='245']">
+        <pz:metadata type="title">
+          <xsl:value-of select="marc:subfield[@code='a']"/>
+        </pz:metadata>
+        <pz:metadata type="title-remainder">
+          <xsl:value-of select="marc:subfield[@code='b']"/>
+        </pz:metadata>
+        <pz:metadata type="title-responsibility">
+          <xsl:value-of select="marc:subfield[@code='c']"/>
+        </pz:metadata>
+        <pz:metadata type="title-dates">
+          <xsl:value-of select="marc:subfield[@code='f']"/>
+        </pz:metadata>
+        <pz:metadata type="title-medium">
+          <xsl:value-of select="marc:subfield[@code='h']"/>
+        </pz:metadata>
+        <pz:metadata type="title-number-section">
+          <xsl:value-of select="marc:subfield[@code='n']"/>
+        </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='250']">
+       <pz:metadata type="edition">
+           <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='260']">
+        <pz:metadata type="publication-place">
+         <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+        <pz:metadata type="publication-name">
+         <xsl:value-of select="marc:subfield[@code='b']"/>
+       </pz:metadata>
+        <pz:metadata type="publication-date">
+         <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='300']">
+       <pz:metadata type="physical-extent">
+         <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+       <pz:metadata type="physical-format">
+         <xsl:value-of select="marc:subfield[@code='b']"/>
+       </pz:metadata>
+       <pz:metadata type="physical-dimensions">
+         <xsl:value-of select="marc:subfield[@code='c']"/>
+       </pz:metadata>
+       <pz:metadata type="physical-accomp">
+         <xsl:value-of select="marc:subfield[@code='e']"/>
+       </pz:metadata>
+       <pz:metadata type="physical-unittype">
+         <xsl:value-of select="marc:subfield[@code='f']"/>
+       </pz:metadata>
+       <pz:metadata type="physical-unitsize">
+         <xsl:value-of select="marc:subfield[@code='g']"/>
+       </pz:metadata>
+       <pz:metadata type="physical-specified">
+         <xsl:value-of select="marc:subfield[@code='3']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='440']">
+       <pz:metadata type="series-title">
+         <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag &gt;= 500 and @tag &lt;= 599]
+                           [@tag != '506' and @tag != '530' and
+                           @tag != '540' and @tag != '546'
+                            and @tag != '522']">
+       <pz:metadata type="description">
+            <xsl:value-of select="*/text()"/>
+        </pz:metadata>
+      </xsl:for-each>
+      
+      <xsl:for-each select="marc:datafield[@tag='650' or @tag='653']">
+        <pz:metadata type="subject">
+         <xsl:value-of select="marc:subfield[@code='a']"/>
+       </pz:metadata>
+       <pz:metadata type="subject-long">
+         <xsl:for-each select="marc:subfield">
+           <xsl:if test="position() > 1">
+             <xsl:text>, </xsl:text>
+           </xsl:if>
+           <xsl:value-of select="."/>
+         </xsl:for-each>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='856']">
+       <pz:metadata type="electronic-url">
+         <xsl:value-of select="marc:subfield[@code='u']"/>
+       </pz:metadata>
+       <pz:metadata type="electronic-text">
+         <xsl:value-of select="marc:subfield[@code='y']"/>
+       </pz:metadata>
+       <pz:metadata type="electronic-note">
+         <xsl:value-of select="marc:subfield[@code='z']"/>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <xsl:for-each select="marc:datafield[@tag='773']">
+       <pz:metadata type="citation">
+         <xsl:for-each select="*">
+           <xsl:value-of select="normalize-space(.)"/>
+           <xsl:text> </xsl:text>
+         </xsl:for-each>
+       </pz:metadata>
+      </xsl:for-each>
+
+      <pz:metadata type="medium">
+       <xsl:value-of select="$medium"/>
+      </pz:metadata>
+      
+      <xsl:if test="$fulltext_a">
+       <pz:metadata type="fulltext">
+         <xsl:value-of select="$fulltext_a"/>
+       </pz:metadata>
+      </xsl:if>
+
+      <xsl:if test="$fulltext_b">
+       <pz:metadata type="fulltext">
+         <xsl:value-of select="$fulltext_b"/>
+       </pz:metadata>
+      </xsl:if>
+
+      <xsl:if test="$open_url_resolver">
+        <pz:metadata type="open-url">
+            <xsl:call-template name="insert-md-openurl" />
+        </pz:metadata>
+      </xsl:if>
+
+    </pz:record>
+
+  </xsl:template>
+
+</xsl:stylesheet>
diff --git a/etc/pazpar2/pazpar2.xml b/etc/pazpar2/pazpar2.xml
new file mode 100644 (file)
index 0000000..81a887b
--- /dev/null
@@ -0,0 +1,98 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- $Id: pazpar2.cfg.dist,v 1.21 2007-10-04 12:01:15 adam Exp $ -->
+<pazpar2 xmlns="http://www.indexdata.com/pazpar2/1.0">
+  
+  <server>
+    <listen port="10006"/>
+    <settings src="/home/gmc/koha/dev/etc/pazpar2/koha-biblios.xml"/>
+
+    <relevance>
+      <icu_chain id="relevance" locale="el">
+       <normalize rule="[:Control:] Any-Remove"/>
+       <tokenize rule="l"/>
+       <normalize rule="[[:WhiteSpace:][:Punctuation:]] Remove"/>
+       <casemap rule="l"/>
+       <index/>
+      </icu_chain>
+    </relevance>
+
+    <sort>
+      <icu_chain id="sort" locale="el">
+       <normalize rule="[[:Control:][:WhiteSpace:][:Punctuation:]] Remove"/>
+       <casemap rule="l"/>
+       <sortkey/>
+      </icu_chain>
+    </sort>
+    
+    <mergekey>
+      <icu_chain id="mergekey" locale="el">
+       <tokenize rule="l"/>
+       <normalize rule="[[:Control:][:WhiteSpace:][:Punctuation:]] Remove"/>
+       <casemap rule="l"/>
+       <index/>
+      </icu_chain>
+    </mergekey>
+    
+    <service>
+      <!-- we try to keep same order as in marc21.xsl -->
+      <metadata name="id"/>
+      <metadata name="lccn" merge="unique"/>
+      <metadata name="kohaid" merge="unique" brief="yes" />
+      <metadata name="isbn"/>
+      <metadata name="issn"/>
+      <metadata name="tech-rep-nr"/>
+      <metadata name="author" brief="yes" termlist="yes" merge="longest" rank="2"/>
+      <metadata name="author-title"/>
+      <metadata name="author-date"/>
+      <metadata name="corporate-name"/>
+      <metadata name="corporate-location"/>
+      <metadata name="corporate-date"/>
+      <metadata name="meeting-name"/>
+      <metadata name="meeting-location"/>
+      <metadata name="meeting-date"/>
+      <metadata name="date" brief="yes" sortkey="numeric" type="year"
+               merge="range" termlist="yes"/>
+      <metadata name="title" brief="yes" sortkey="skiparticle"
+               merge="longest" rank="6"/>
+      <metadata name="title-remainder" brief="yes" merge="longest" rank="5"/>
+      <metadata name="title-responsibility" brief="yes" />
+      <metadata name="title-dates" brief="yes" merge="longest"/>
+      <metadata name="title-medium" brief="yes" merge="longest"/>
+      <metadata name="title-number-section" brief="yes" merge="longest"/>
+      <metadata name="medium" brief="yes" merge="longest"/>
+      <metadata name="edition"/>
+      <metadata name="publication-place"/>
+      <metadata name="publication-name"/>
+      <metadata name="publication-date"/>
+      <metadata name="physical-extent"/>       
+      <metadata name="physical-format"/>       
+      <metadata name="physical-dimensions"/>   
+      <metadata name="physical-accomp"/>       
+      <metadata name="physical-unittype"/>     
+      <metadata name="physical-unitsize"/>     
+      <metadata name="physical-specified"/>    
+
+      <metadata name="series-title"/>  
+
+      <metadata name="description" merge="longest" rank="3"/>
+      <metadata name="subject-long" rank="3"/>
+      <metadata name="subject" termlist="yes" rank="0"/>
+      <metadata name="electronic-url"/>
+      <metadata name="electronic-text"/>
+      <metadata name="electronic-note"/>
+      <metadata name="citation"/>
+      <metadata name="fulltext"/>
+
+      <metadata name="url_recipe" setting="postproc"/>
+      <metadata name="open_url_resolver" setting="parameter"/>
+      <metadata name="open-url" merge="longest"/>
+    </service>
+  </server>
+  
+</pazpar2>
+<!-- Keep this comment at the end of the file
+     Local variables:
+     mode: nxml
+     End:
+-->
+
diff --git a/etc/pazpar2/pz2-ourl-base.xsl b/etc/pazpar2/pz2-ourl-base.xsl
new file mode 100644 (file)
index 0000000..260dc9f
--- /dev/null
@@ -0,0 +1,169 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet
+    version="1.0"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns:pz="http://www.indexdata.com/pazpar2/1.0"
+    xmlns:marc="http://www.loc.gov/MARC21/slim"
+    xmlns:str="http://exslt.org/strings"
+    extension-element-prefixes="str">
+
+  <xsl:param name="open_url_resolver"/>
+  <!--<xsl:variable name="resolver">http://zeus.lib.uoc.gr:3210/sfxtst3</xsl:variable>-->
+  <xsl:template name="insert-md-openurl">
+  
+    <xsl:value-of select="$open_url_resolver" /><xsl:text>?generatedby=pz2</xsl:text>
+    <xsl:call-template name="ou-parse-author" />
+    <xsl:call-template name="ou-parse-date" />
+    <xsl:call-template name="ou-parse-volume" />
+    <xsl:call-template name="ou-parse-any">
+      <xsl:with-param name="field_name" select="string('isbn')" />
+    </xsl:call-template>
+    <xsl:call-template name="ou-parse-any">
+      <xsl:with-param name="field_name" select="string('issn')" />
+    </xsl:call-template>
+    <xsl:call-template name="ou-parse-any">
+      <xsl:with-param name="field_name" select="string('title')" />
+    </xsl:call-template>
+    <xsl:call-template name="ou-parse-any">
+      <xsl:with-param name="field_name" select="string('atitle')" />
+    </xsl:call-template>
+
+  </xsl:template>
+  <!-- parsing raw string data -->
+  
+  <xsl:template name="ou-parse-author" >
+    <xsl:variable name="author">
+      <xsl:call-template name="ou-author" />
+    </xsl:variable>
+    
+    <xsl:variable name="aulast" select="normalize-space(substring-before($author, ','))"/>
+
+    <xsl:variable name="aufirst" 
+      select="substring-before( normalize-space(substring-after($author, ',')), ' ')"/>
+
+    <xsl:if test="$aulast != ''">
+      <xsl:text>&amp;aulast=</xsl:text>
+      <xsl:value-of select="$aulast" />
+    </xsl:if>
+
+    <xsl:if test="string-length( translate($aufirst, '.', '') ) &gt; 1" >
+      <xsl:text>&amp;aufirst=</xsl:text>
+      <xsl:value-of select="$aufirst" />
+    </xsl:if>
+
+  </xsl:template>
+
+  <xsl:template name="ou-parse-volume">
+    <xsl:variable name="volume">
+      <xsl:call-template name="ou-volume" />
+    </xsl:variable>
+
+    <xsl:variable name="vol" select="substring-after($volume, 'Vol')"/>
+    <xsl:variable name="issue" select="false()" />
+    <xsl:variable name="spage" select="false()" />
+
+    <xsl:if test="$vol">
+      <xsl:text>&amp;volume=</xsl:text>
+      <xsl:value-of select="$vol" />
+    </xsl:if>
+
+    <xsl:if test="$issue">
+      <xsl:text>&amp;issue=</xsl:text>
+      <xsl:value-of select="$issue" />
+    </xsl:if>
+    
+    <xsl:if test="$spage">
+      <xsl:text>&amp;spage=</xsl:text>
+      <xsl:value-of select="$vol" />
+    </xsl:if>
+
+  </xsl:template>
+
+
+  <xsl:template name="ou-parse-date">
+    <xsl:variable name="date">
+      <xsl:call-template name="ou-date" />
+    </xsl:variable>
+
+    <xsl:variable name="parsed_date" select="translate($date, '.[]c;', '')"/>
+
+    <xsl:if test="$parsed_date">
+      <xsl:text>&amp;date=</xsl:text>
+      <xsl:value-of select="$parsed_date" />
+    </xsl:if>
+
+  </xsl:template>
+
+  
+  <xsl:template name="ou-parse-any">
+    <xsl:param name="field_name" />
+
+    <xsl:variable name="field_value">
+      <xsl:choose>
+
+      <xsl:when test="$field_name = 'isbn'">
+        <xsl:call-template name="ou-isbn"/>
+      </xsl:when>
+
+      <xsl:when test="$field_name = 'issn'">
+        <xsl:call-template name="ou-issn"/>
+      </xsl:when>
+      
+      <xsl:when test="$field_name = 'atitle'">
+        <xsl:call-template name="ou-atitle"/>
+      </xsl:when>
+     
+      <xsl:when test="$field_name = 'title'">
+        <xsl:call-template name="ou-title"/>
+      </xsl:when>
+
+      </xsl:choose>
+    </xsl:variable>
+
+    <xsl:variable name="digits" select="1234567890"/>
+
+    <xsl:variable name="parsed_value">
+      <xsl:choose>
+
+      <xsl:when test="$field_name = 'isbn'">
+        <xsl:value-of select="translate($field_value, translate($field_value, concat($digits, 'X'), ''), '')"/>
+      </xsl:when>
+
+      <xsl:when test="$field_name = 'issn'">
+        <xsl:value-of select="translate($field_value, translate($field_value, concat($digits, '-', 'X'), ''), '')"/>
+      </xsl:when>
+      
+      <xsl:when test="$field_name = 'atitle'">
+        <xsl:value-of select="translate(normalize-space($field_value), '.', '')"/>
+      </xsl:when>
+     
+      <xsl:when test="$field_name = 'title'">
+        <xsl:value-of select="translate(normalize-space($field_value), '.', '')"/>
+      </xsl:when>
+
+      </xsl:choose>
+    </xsl:variable>
+
+
+    <xsl:if test="$parsed_value != ''">
+      <xsl:text>&amp;</xsl:text>
+      <xsl:value-of select="$field_name" />
+      <xsl:text>=</xsl:text>
+      <xsl:value-of select="$parsed_value" />
+    </xsl:if>
+
+  </xsl:template>
+
+
+</xsl:stylesheet>
+<!--
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=2 tabstop=4 expandtab
+ */
+-->
diff --git a/etc/pazpar2/pz2-ourl-marc21.xsl b/etc/pazpar2/pz2-ourl-marc21.xsl
new file mode 100644 (file)
index 0000000..1c8efde
--- /dev/null
@@ -0,0 +1,97 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet
+    version="1.0"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns:pz="http://www.indexdata.com/pazpar2/1.0"
+    xmlns:marc="http://www.loc.gov/MARC21/slim"
+    xmlns:str="http://exslt.org/strings"
+    extension-element-prefixes="str">  
+  
+  <xsl:import href="pz2-ourl-base.xsl"/>
+
+  <xsl:template name="ou-author" >
+  <!-- what to do with multiple authors??-->
+    <xsl:for-each select="marc:datafield[@tag='100' or @tag='700']">
+      <xsl:value-of select="marc:subfield[@code='a']"/>
+    </xsl:for-each>
+  </xsl:template>
+
+  <xsl:template name="ou-title" >
+  <!-- if 773 exists its a journal/article -->
+    <xsl:choose>
+    
+      <xsl:when test="marc:datafield[@tag='773']/marc:subfield[@code='t']">
+        <xsl:value-of select="marc:datafield[@tag='773']/marc:subfield[@code='t']"/>
+      </xsl:when>
+
+      <xsl:when test="marc:datafield[@tag='245']/marc:subfield[@code='a']">
+        <xsl:value-of select="marc:datafield[@tag='245']/marc:subfield[@code='a']"/>
+      </xsl:when>
+
+    </xsl:choose>
+  </xsl:template>
+
+  
+  <xsl:template name="ou-atitle" >
+    <!-- return value only if article or journal -->
+    <xsl:if test="marc:datafield[@tag='773']">
+      <xsl:value-of select="marc:datafield[@tag='245']/marc:subfield[@code='a']"/>
+    </xsl:if>
+  </xsl:template>
+
+
+  <xsl:template name="ou-date" >
+    <xsl:for-each select="marc:datafield[@tag='260']">
+      <xsl:value-of select="marc:subfield[@code='c']"/>
+    </xsl:for-each>
+  </xsl:template>
+
+  
+  <xsl:template name="ou-isbn" >
+  <!-- if 773 exists its a journal/article -->
+    <xsl:choose>  
+    
+      <xsl:when test="marc:datafield[@tag='773']/marc:subfield[@code='z']">
+        <xsl:value-of select="marc:datafield[@tag='773']/marc:subfield[@code='z']"/>
+      </xsl:when>
+      
+      <xsl:when test="marc:datafield[@tag='020']/marc:subfield[@code='a']">
+        <xsl:value-of select="marc:datafield[@tag='020']/marc:subfield[@code='a']"/>
+      </xsl:when>
+
+    </xsl:choose>
+  </xsl:template>
+
+  
+  <xsl:template name="ou-issn" >
+  <!-- if 773 exists its a journal/article -->
+    <xsl:choose>
+    
+      <xsl:when test="marc:datafield[@tag='773']/marc:subfield[@code='x']">
+        <xsl:value-of select="marc:datafield[@tag='773']/marc:subfield[@code='x']"/>
+      </xsl:when>
+
+      <xsl:when test="marc:datafield[@tag='022']/marc:subfield[@code='a']">
+        <xsl:value-of select="marc:datafield[@tag='022']/marc:subfield[@code='a']"/>
+      </xsl:when>
+
+      </xsl:choose>
+  </xsl:template>
+
+  
+  <xsl:template name="ou-volume" >
+    <xsl:if test="marc:datafield[@tag='773']">
+         <xsl:value-of select="marc:datafield[@tag='773']/marc:subfield[@code='g']"/>
+    </xsl:if>
+  </xsl:template>
+
+</xsl:stylesheet>
+<!--
+/*
+ * Local variables:
+ * c-basic-offset: 2
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=2 tabstop=4 expandtab
+ */
+-->