This script builds a new marc_word

author joshferraro <joshferraro>

Fri, 11 Jun 2004 15:07:48 +0000 (15:07 +0000)

committer joshferraro <joshferraro>

Fri, 11 Jun 2004 15:07:48 +0000 (15:07 +0000)
author joshferraro <joshferraro>
Fri, 11 Jun 2004 15:07:48 +0000 (15:07 +0000)
committer joshferraro <joshferraro>
Fri, 11 Jun 2004 15:07:48 +0000 (15:07 +0000)
diff --git a/misc/build_marc_word.pl b/misc/build_marc_word.pl

new file mode 100755 (executable)

index 0000000..d17df6f
--- /dev/null
+++ b/misc/build_marc_word.pl
@@ -0,0 +1,114 @@
+#!/usr/bin/perl -w
+#-----------------------------------
+# Script Name: build_marc_word.pl
+# Script Version: 0.1.0
+# Date:  2004/06/05
+# Author:  Joshua Ferraro [jmf at kados dot org]
+# Description: This script builds a new marc_word
+#  table with a reduced number of tags (only those
+#  tags that should be searched) allowing for
+#  faster and more accurate searching when used
+#  with the SearchMarc routines.  Make sure that
+#  the MARCaddword routine in Biblio.pm will index
+#  characters >= 1 char; otherwise, searches like
+#  "O'brian, Patrick" will fail as the search 
+#  routines will seperate that query into "o", 
+#  "brian", and "patrick".  (If "o" is not in the
+#  database the search will fail)
+# Usage: build_marc_word.pl
+# Revision History:
+#    0.1.0  2004/06/11:  first working version.
+#                       Thanks to Chris Cormack
+#                       for helping with the $data object
+#                       and Stephen Hedges for providing
+#                       the list of MARC tags.
+# FixMe:
+#   *Should add a few parameters like 'delete from
+#    marc_word' or make script ask user whether to
+#    perform that task ...
+#   *Add a 'status' report as the data is loaded ... 
+#-----------------------------------
+use lib '/usr/local/koha/intranet/modules/';
+use strict;
+use C4::Context;
+use C4::Biblio;
+my $dbh=C4::Context->dbh;
+
+#Here is where you name the tags that you wish to index.  If you
+# are using MARC21 this set of default tags should be fine but you
+# may need to add holdings tags specific to your library (e.g., holding
+# branch for Nelsonville is 942k but that may not be the case for your
+# library).
+my @tags=(
+
+#Tag documentation from http://lcweb.loc.gov/marc/bibliographic/ecbdhome.html
+
+"020a", # INTERNATIONAL STANDARD BOOK NUMBER
+"022a", # INTERNATIONAL STANDARD SERIAL NUMBER
+"100a",        # MAIN ENTRY--PERSONAL NAME
+"110a",        # MAIN ENTRY--CORPORATE NAME
+"110b",        #   Subordinate unit
+"110c",        #   Location of meeting
+"111a", # MAIN ENTRY--MEETING NAME
+"111c", #   Location of meeting
+"130a", # MAIN ENTRY--UNIFORM TITLE 
+"240a", # UNIFORM TITLE 
+"245a", # TITLE STATEMENT
+"245b", #   Remainder of title
+"245c", #   Statement of responsibility, etc.
+"245p", #   Name of part/section of a work
+"246a", # VARYING FORM OF TITLE
+"246b", #   Remainder of title
+"260b", # PUBLICATION, DISTRIBUTION, ETC. (IMPRINT)
+"440a", # SERIES STATEMENT/ADDED ENTRY--TITLE
+"440p", #   Name of part/section of a work
+"500a", # GENERAL NOTE
+"505t", # FORMATTED CONTENTS NOTE (t is Title)
+"511a", # PARTICIPANT OR PERFORMER NOTE
+"520a", # SUMMARY, ETC.
+"534a", # ORIGINAL VERSION NOTE 
+"534k", #   Key title of original
+"534t", #   Title statement of original
+"586a", # AWARDS NOTE
+"600a", # SUBJECT ADDED ENTRY--PERSONAL NAME 
+"610a", # SUBJECT ADDED ENTRY--CORPORATE NAME
+"611a", # SUBJECT ADDED ENTRY--MEETING NAME
+"630a", # SUBJECT ADDED ENTRY--UNIFORM TITLE
+"650a", # SUBJECT ADDED ENTRY--TOPICAL TERM
+"651a", # SUBJECT ADDED ENTRY--GEOGRAPHIC NAME
+"700a", # ADDED ENTRY--PERSONAL NAME
+"710a", # ADDED ENTRY--CORPORATE NAME
+"711a", # ADDED ENTRY--MEETING NAME
+"720a", # ADDED ENTRY--UNCONTROLLED NAME
+"730a", # ADDED ENTRY--UNIFORM TITLE
+"740a", # ADDED ENTRY--UNCONTROLLED RELATED/ANALYTICAL TITLE
+"752a", # ADDED ENTRY--HIERARCHICAL PLACE NAME
+"800a", # SERIES ADDED ENTRY--PERSONAL NAME
+"810a", # SERIES ADDED ENTRY--CORPORATE NAME
+"811a", # SERIES ADDED ENTRY--MEETING NAME
+"830a", # SERIES ADDED ENTRY--UNIFORM TITLE
+"942k"  # Holdings Branch ?? Unique to NPL??
+);
+
+#note that subfieldcode in marc_subfield_table is subfieldid in marc_word ... even
+#though there is another subfieldid in marc_subfield_table--very confusing naming conventions!
+
+#For each tag we run a search to find the necessary data for building the marc_word table
+foreach my $this_tagid(@tags) {
+       my $query="SELECT bibid,tag,tagorder,subfieldcode,subfieldorder,subfieldvalue FROM marc_subfield_table WHERE tag=? AND subfieldcode=?";
+       my $sth=$dbh->prepare($query);
+
+       my ($tag, $subfieldid);
+
+#split the tag into tag, subfield
+       if ($this_tagid =~ s/(\D+)//) {
+               $subfieldid = $1;
+               $tag = $this_tagid;
+       }
+#Then we pass this information on to MARCaddword in Biblio.pm to actually perform the import into marc_word
+       $sth->execute($tag, $subfieldid);
+       while (my $data=$sth->fetchrow_hashref()){
+               MARCaddword($dbh,$data->{'bibid'},$data->{'tag'},$data->{'tagorder'},$data->{'subfieldcode'},$data->{'subfieldorder'},$data->{'subfieldvalue'});
+       }
+}
+$dbh->disconnect();
author	joshferraro <joshferraro>
	Fri, 11 Jun 2004 15:07:48 +0000 (15:07 +0000)
committer	joshferraro <joshferraro>
	Fri, 11 Jun 2004 15:07:48 +0000 (15:07 +0000)