Continuing work on Z39.50 search tool. Daemon now forks up to 12 processes
authortonnesen <tonnesen>
Tue, 6 Nov 2001 18:13:59 +0000 (18:13 +0000)
committertonnesen <tonnesen>
Tue, 6 Nov 2001 18:13:59 +0000 (18:13 +0000)
to do Z39.50 searches.  Daemon will also wait to see if the user looks
beyond the first couple of pages of results, and will download more results
if necessary.

acqui.simple/marcimport.pl
acqui.simple/processz3950queue

index 7aae1e9..de3fea6 100755 (executable)
@@ -133,8 +133,14 @@ if ($input->param('z3950queue')) {
        }
        chop $serverlist;
        my $q_serverlist=$dbh->quote($serverlist);
-       my $sth=$dbh->prepare("insert into z3950queue (term,type,servers) values ($q_term, '$type', $q_serverlist)");
+       my $rand=$input->param('rand');
+       my $sth=$dbh->prepare("select identifier from z3950queue where
+       identifier=$rand");
        $sth->execute;
+       unless ($sth->rows) {
+           $sth=$dbh->prepare("insert into z3950queue (term,type,servers, identifier) values ($q_term, '$type', $q_serverlist, '$rand')");
+           $sth->execute;
+       }
     }
 }
 
@@ -695,26 +701,56 @@ EOF
            $sth->execute;
            my ($servers) = $sth->fetchrow;
            my $serverstring;
+           my $starttimer=time();
            foreach $serverstring (split(/\s+/, $servers)) {
                my ($name, $server, $database, $auth) = split(/\//, $serverstring, 4);
                if ($name eq 'MAN') {
                    print "$server/$database<br>\n";
-               } elsif ($name eq 'LOC') {
-                   print "Library of Congress<br>\n";
-               } elsif ($name eq 'NLC') {
-                   print "National Library of Canada<br>\n";
                } else {
                    my $sti=$dbh->prepare("select name from
                    z3950servers where id=$name");
                    $sti->execute;
                    my ($longname)=$sti->fetchrow;
-                   print "$longname<br>\n";
+                   print "<a name=SERVER-$name></a>\n";
+                   if ($longname) {
+                       print "$longname \n";
+                   } else {
+                       print "$server/$database \n";
+                   }
                }
-               print "<ul>\n";
                my $q_server=$dbh->quote($serverstring);
+               my $startrecord=$input->param("ST-$name");
+               ($startrecord) || ($startrecord='0');
                my $sti=$dbh->prepare("select numrecords,id,results,startdate,enddate from z3950results where queryid=$id and server=$q_server");
                $sti->execute;
                ($numrecords,$resultsid,$data,$startdate,$enddate) = $sti->fetchrow;
+               my $serverplaceholder='';
+               foreach ($input->param) {
+                   (next) unless (/ST-(.+)/);
+                   my $serverid=$1;
+                   (next) if ($serverid eq $name);
+                   my $place=$input->param("ST-$serverid");
+                   $serverplaceholder.="\&ST-$serverid=$place";
+               }
+               if ($numrecords) {
+                   my $previous='';
+                   my $next='';
+                   if ($startrecord>0) {
+                       $previous="<a href=".$ENV{'SCRIPT_NAME'}."?file=Z-$id&menu=z3950$serverplaceholder\&ST-$name=".($startrecord-10)."#SERVER-$name>Previous</a>";
+                   }
+                   my $highest;
+                   $highest=$startrecord+10;
+                   ($highest>$numrecords) && ($highest=$numrecords);
+                   if ($numrecords>$startrecord+10) {
+                       $next="<a href=".$ENV{'SCRIPT_NAME'}."?file=Z-$id&menu=z3950$serverplaceholder\&ST-$name=$highest#SERVER-$name>Next</a>";
+                   }
+                   print "<font size=-1>[Viewing ".($startrecord+1)." to ".$highest." of $numrecords records]  $previous | $next </font><br>\n";
+               } else {
+                   print "<br>\n";
+               }
+               print "<ul>\n";
+               my $stj=$dbh->prepare("update z3950results set highestseen=".($startrecord+10)." where id=$resultsid");
+               $stj->execute;
                if ($sti->rows == 0) {
                    print "pending...";
                } elsif ($enddate == 0) {
@@ -728,8 +764,17 @@ EOF
                    }
                    print "<font color=red>processing... ($elapsedtime)</font>";
                } elsif ($numrecords) {
-                   my @records=parsemarcdata($data);
+                   my $splitchar=chr(29);
+                   my @records=split(/$splitchar/, $data);
+                   $data='';
+                   for ($i=$startrecord; $i<$startrecord+10; $i++) {
+                       $data.=$records[$i].$splitchar;
+                   }
+                   @records=parsemarcdata($data);
+                   my $counter=0;
                    foreach $record (@records) {
+                       $counter++;
+                       #(next) unless ($counter>=$startrecord && $counter<=$startrecord+10);
                        my ($lccn, $isbn, $issn, $dewey, $author, $title, $place, $publisher, $publicationyear, $volume, $number, @subjects, $note, $controlnumber);
                        foreach $field (@$record) {
                            if ($field->{'tag'} eq '001') {
@@ -800,6 +845,8 @@ EOF
                }
                print "</ul>\n";
            }
+           my $elapsed=time()-$starttimer;
+           print "<hr>It took $elapsed seconds to process this page.\n";
        } else {
            my $sth=$dbh->prepare("select marc,name from uploadedmarc where id=$file");
            $sth->execute;
@@ -907,30 +954,54 @@ sub z3950 {
        $type=uc($type);
        $term=~s/</&lt;/g;
        $term=~s/>/&gt;/g;
-       if ($done == 1) {
-           my $elapsed=$enddate-$startdate;
-           my $elapsedtime='';
-           if ($elapsed>60) {
-               $elapsedtime=sprintf "%d minutes",($elapsed/60);
-           } else {
-               $elapsedtime=sprintf "%d seconds",$elapsed;
-           }
-           if ($numrecords) {
-               print "<li><a href=$ENV{'SCRIPT_NAME'}?file=Z-$id&menu=$menu>$type=$term</a> <font size=-1>Done. $numrecords records found in $elapsedtime.</font><br>\n";
-           } else {
-               print "<li><a href=$ENV{'SCRIPT_NAME'}?file=Z-$id&menu=$menu>$type=$term</a> <font size=-1>Done.  No records found.  Search took $elapsedtime.</font><br>\n";
+       my $sti=$dbh->prepare("select id,server,startdate,enddate,numrecords from z3950results where queryid=$id");
+       $sti->execute;
+       if ($sti->rows) {
+           my $processing=0;
+           my $realenddate=0;
+           my $totalrecords=0;
+           while (my ($r_id,$r_server,$r_startdate,$r_enddate,$r_numrecords) = $sti->fetchrow) {
+               if ($r_enddate==0) {
+                   $processing=1;
+               } else {
+                   if ($r_enddate>$realenddate) {
+                       $realenddate=$r_enddate;
+                   }
+               }
+
+               $totalrecords+=$r_numrecords;
            }
-       } elsif ($done == -1) {
-           my $elapsed=time()-$startdate;
-           my $elapsedtime='';
-           if ($elapsed>60) {
-               $elapsedtime=sprintf "%d minutes",($elapsed/60);
+           if ($processing) {
+               my $elapsed=time()-$startdate;
+               my $elapsedtime='';
+               if ($elapsed>60) {
+                   $elapsedtime=sprintf "%d minutes",($elapsed/60);
+               } else {
+                   $elapsedtime=sprintf "%d seconds",$elapsed;
+               }
+               if ($totalrecords) {
+                   $totalrecords="$totalrecords found.";
+               } else {
+                   $totalrecords='';
+               }
+               print "<li><a href=$ENV{'SCRIPT_NAME'}?file=Z-$id&menu=$menu>$type=$term</a> <font size=-1 color=red>Processing... $totalrecords ($elapsedtime)</font><br>\n";
            } else {
-               $elapsedtime=sprintf "%d seconds",$elapsed;
+               my $elapsed=$realenddate-$startdate;
+               my $elapsedtime='';
+               if ($elapsed>60) {
+                   $elapsedtime=sprintf "%d minutes",($elapsed/60);
+               } else {
+                   $elapsedtime=sprintf "%d seconds",$elapsed;
+               }
+               if ($totalrecords) {
+                   $totalrecords="$totalrecords found.";
+               } else {
+                   $totalrecords='';
+               }
+               print "<li><a href=$ENV{'SCRIPT_NAME'}?file=Z-$id&menu=$menu>$type=$term</a> <font size=-1>Done. $totalrecords ($elapsedtime)</font><br>\n";
            }
-           print "<li><a href=$ENV{'SCRIPT_NAME'}?file=Z-$id&menu=$menu>$type=$term</a> <font color=red size=-1>Processing ($elapsedtime)</font><br>\n";
        } else {
-           print "<li><a href=$ENV{'SCRIPT_NAME'}?file=Z-$id&menu=$menu>$type=$term</a> $done <font size=-1>Pending</font><br>\n";
+           print "<li><a href=$ENV{'SCRIPT_NAME'}?file=Z-$id&menu=$menu>$type=$term</a> <font size=-1>Pending</font><br>\n";
        }
     }
     print "</ul>\n";
@@ -944,15 +1015,17 @@ sub z3950 {
     }
     $serverlist.="<input type=checkbox name=S-MAN> <input name=manualz3950server size=25 value=otherserver:210/DATABASE>\n";
     
+    my $rand=rand(1000000000);
 print << "EOF";
     <form action=$ENV{'SCRIPT_NAME'} method=GET>
     <input type=hidden name=z3950queue value=1>
     <input type=hidden name=menu value=$menu>
     <p>
     <input type=hidden name=test value=testvalue>
+    <input type=hidden name=rand value=$rand>
     <table border=1 bgcolor=#dddddd><tr><th bgcolor=#bbbbbb colspan=2>Search for MARC records</th></tr>
     <tr><td>Query Term</td><td><input name=query></td></tr>
-    <tr><td colspan=2 align=center><input type=radio name=type value=isbn checked> ISBN <input type=radio name=type value=lccn> LCCN <input type=radio name=type value=title> Title</td></tr>
+    <tr><td colspan=2 align=center><input type=radio name=type value=isbn checked>&nbsp;ISBN <input type=radio name=type value=lccn>&nbsp;LCCN<br><input type=radio name=type value=author>&nbsp;Author <input type=radio name=type value=title>&nbsp;Title <input type=radio name=type value=keyword>&nbsp;Keyword</td></tr>
     <tr><td colspan=2>
     $serverlist
     </td></tr>
index 51b3fea..aad5e4b 100755 (executable)
@@ -4,56 +4,206 @@ use DBI;
 #use strict;
 use C4::Acquisitions;
 use C4::Output;
+use Net::Z3950;
 my $dbh=C4Connect;
 
+my $sth=$dbh->prepare("update z3950results set active=0");
+$sth->execute;
+$sth->finish;
+$SIG{CHLD}='reap';
 
-
+my $reapcounter=0;
+my $forkcounter=0;
+my $pid=$$;
+my $lastrun=0;
 while (1) {
-    my $sth=$dbh->prepare("select id,term,type,servers from z3950queue where
-    isnull(done) || done=-1");
-    $sth->execute;
-    while (my ($id, $term, $type, $servers) = $sth->fetchrow) {
-       my $now=time();
-       my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=$now where id=$id");
-       $sti->execute;
-       my $attr='';
-       if ($type eq 'isbn') {
-           $attr='1=7';
-       } elsif ($type eq 'title') {
-           $attr='1=4';
-       } elsif ($type eq 'lccn') {
-           $attr='1=9';
-       }
-       $term='"'.$term.'"';
-       $query="f \@attr $attr $term";
-       my $totalrecords=0;
-       my $serverinfo;
-       foreach $serverinfo (split(/\s+/, $servers)) {
-           my ($name, $server, $database, $auth) = split(/\//, $serverinfo, 4);
-           ($auth eq '/') && ($auth='');
-           print "Processing $type=$term at $name $server $database $auth\n";
-           $now=time();
-           my $q_serverinfo=$dbh->quote($serverinfo);
-           my $sti=$dbh->prepare("insert into z3950results (server, queryid, startdate) values ($q_serverinfo, $id, $now)");
-           $sti->execute;
-           my $resultsid=$dbh->{'mysql_insertid'};
-           getrecord($server, $database, $query, $auth);
-           my $result=`cat yaz.mrc`;
-           unlink ('yaz.mrc');
-           my $splitchar=chr(29);
-           my @records=split(/$splitchar/, $result);
-           my $numrecords=$#records+1;
-           $totalrecords+=$numrecords;
-           my $q_result=$dbh->quote($result);
-           ($q_result) || ($q_result='""');
-           $now=time();
-           $sti=$dbh->prepare("update z3950results set numrecords=$numrecords,results=$q_result,enddate=$now where id=$resultsid");
-           $sti->execute;
+    if ((time-$lastrun)>5) {
+       my $sth=$dbh->prepare("select id,term,type,servers from z3950queue order by id");
+       $sth->execute;
+       while (my ($id, $term, $type, $servers) = $sth->fetchrow) {
+           if ($forkcounter<12) {
+               my $now=time();
+               $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=$id");
+               $stk->execute;
+               my %serverdone;
+               unless ($stk->rows) {
+                   my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=$now where id=$id");
+                   $sti->execute;
+               }
+               while (my ($r_id, $r_server,$r_startdate,$r_enddate,$r_numrecords,$active) = $stk->fetchrow) {
+                   if ($r_enddate >0) {
+                       $serverdone{$r_server}=1;
+                   } elsif ($active) {
+                       $serverdone{$r_server}=1;
+                   } else {
+                       $serverdone{$r_server}=-1;
+                   }
+               }
+
+               $stk->finish;
+               my $attr='';
+               if ($type eq 'isbn') {
+                   $attr='1=7';
+               } elsif ($type eq 'title') {
+                   $attr='1=4';
+               } elsif ($type eq 'author') {
+                   $attr='1=1003';
+               } elsif ($type eq 'lccn') {
+                   $attr='1=9';
+               } elsif ($type eq 'keyword') {
+                   $attr='1=1016';
+               }
+               $term='"'.$term.'"';
+               $query="\@attr $attr $term";
+               my $totalrecords=0;
+               my $serverinfo;
+               my $stillprocessing=0;
+               foreach $serverinfo (split(/\s+/, $servers)) {
+                   (next) if ($serverdone{$serverinfo} == 1);
+                   my $stillprocessing=1;
+                   if (my $pid=fork()) {
+                       $forkcounter++;
+                   } else {
+                       #$sth->finish;
+                       #$sti->finish;
+                       #$dbh->disconnect;
+                       my $dbi=C4Connect;
+                       my ($name, $server, $database, $user, $password) = split(/\//, $serverinfo, 5);
+                       $server=~/(.*)\:(\d+)/;
+                       my $servername=$1;
+                       my $port=$2;
+                       print "Processing $type=$term at $name $server $database (".($forkcounter+1)." forks)\n";
+                       $now=time();
+                       my $q_serverinfo=$dbi->quote($serverinfo);
+                       my $resultsid;
+                       if ($serverdone{$serverinfo}==-1) {
+                           my $stj=$dbi->prepare("select id from z3950results where server=$q_serverinfo and queryid=$id");
+                           $stj->execute;
+                           ($resultsid) = $stj->fetchrow;
+                       } else {
+                           my $stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values ($q_serverinfo, $id, $now)");
+                           $stj->execute;
+                           $resultsid=$dbi->{'mysql_insertid'};
+                       }
+                       my $stj=$dbh->prepare("update z3950results set active=1 where id=$resultsid");
+                       $stj->execute;
+                       my $conn;
+                       my $noconnection=0;
+                       if ($user) {
+                           eval { $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database, user => $user, password => $password); };
+                           if ($@) {
+                               $noconnection=1;
+                           }
+                           pe();
+                       } else {
+                           eval { $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database); };
+                           if ($@) {
+                               $noconnection=1;
+                           }
+                           pe();
+                       }
+                       if ($noconnection) {
+                       } else {
+                           my $rs=$conn->search($query);
+                           pe();
+                           $rs->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::USMARC);
+                           pe();
+                           my $numresults=$rs->size();
+                           pe();
+                           my $i;
+                           my $result='';
+                           my $scantimerstart=time();
+                           for ($i=1; $i<=(($numresults<80) ? ($numresults) : (80)); $i++) {
+                               my $rec=$rs->record($i);
+                               my $marcdata=$rec->rawdata();
+                               $result.=$marcdata;
+                           }
+                           my $scantimerend=time();
+                           my $numrecords;
+                           ($numresults<80) ? ($numrecords=$numresults) : ($numrecords=80);
+                           my $elapsed=$scantimerend-$scantimerstart;
+                           if ($elapsed) {
+                               my $speed=int($numresults/$elapsed*100)/100;
+                               print "  SPEED: $speed  $server done $numrecords\n";
+                           }
+
+                           my $q_result=$dbi->quote($result);
+                           ($q_result) || ($q_result='""');
+                           $now=time();
+                           my $task="update z3950results set numrecords=$numresults,numdownloaded=$numrecords,highestseen=0,results=$q_result,enddate=$now where id=$resultsid";
+                           my $stj=$dbi->prepare($task);
+                           $stj->execute;
+                           my $counter=0;
+                           while ($counter<60 && $numrecords<$numresults) {
+                               $counter++;
+                               my $stj=$dbi->prepare("select highestseen from z3950results where id=$resultsid");
+                               $stj->execute;
+                               my ($highestseen) = $stj->fetchrow;
+                               if ($highestseen>($numrecords-30)) {
+                                   $counter=0;
+                                   print "   $server rescanning\n";
+                                   my $scantimerstart=time();
+                                   for ($i=$numrecords+1; $i<=(($numresults<($numrecords+40)) ? ($numresults) : ($numrecords+40)); $i++) {
+                                       my $rec=$rs->record($i);
+                                       my $marcdata=$rec->rawdata();
+                                       $result.=$marcdata;
+                                   }
+                                   my $scantimerend=time();
+                                   ($numresults<$numrecords+40) ? ($numrecords=$numresults) : ($numrecords=$numrecords+40);
+                                   my $elapsed=$scantimerend-$scantimerstart;
+                                   if ($elapsed) {
+                                       my $speed=int($numresults/$elapsed*100)/100;
+                                       print "  SPEED: $speed  $server done $numrecords\n";
+                                   }
+
+                                   my $q_result=$dbi->quote($result);
+                                   ($q_result) || ($q_result='""');
+                                   $now=time();
+                                   my $task="update z3950results set numdownloaded=$numrecords,results=$q_result where id=$resultsid";
+                                   my $stj=$dbi->prepare($task);
+                                   $stj->execute;
+                               }
+                               sleep 5;
+                           }
+                       }
+                       my $stj=$dbi->prepare("update z3950results set active=0 where id=$resultsid");
+                       $stj->execute;
+                       eval {$stj->finish};
+                       $dbi->disconnect;
+                       print "    $server done.\n";
+                       exit;
+                       sub pe {
+                           (return) unless ($code);
+                           my $code=$conn->errcode();
+                           my $msg=$conn->errmsg();
+                           my $ai=$conn->addinfo();
+                           print << "EOF";
+                       CODE:  $code
+                       MSG:   $msg
+                       ADDTL: $ai
+
+EOF
+                       }
+                   }
+               } unless ($stillprocessing) {
+                   #my $sti=$dbh->prepare("select enddate from z3950queue where id=$id");
+                   #$sti->execute;
+                   #my ($enddate) = $sti->fetchrow;
+                   #unless ($enddate) {
+       #               my $now=time;
+#                      $sti=$dbh->prepare("update z3950queue set done=1,numrecords=$totalrecords,enddate=$now where id=$id");
+#                      $sti->execute;
+#                  }
+               }
+           } else {
+#          my $q_serverinfo=$dbh->quote($serverinfo);
+#          my $stj=$dbh->prepare("insert into z3950results (server, queryid, startdate) values ($q_serverinfo, $id, 0)");
+#          $stj->execute;
+           }
        }
-       $sti=$dbh->prepare("update z3950queue set done=1,numrecords=$totalrecords,enddate=$now where id=$id");
-       $sti->execute;
+       $lastrun=time();
     }
-    sleep 15;
+    sleep 1;
 }
 
 sub getrecord {
@@ -61,30 +211,16 @@ sub getrecord {
     my $base=shift;
     my $query=shift;
     my $auth=shift;
-    open  (M, "|yaz-client -m yaz.mrc >>yaz.out 2>>yaz.err");
+    my $id=shift;
+    open  (M, "|yaz-client -m yaz-$id.mrc >>yaz.out 2>>yaz.err");
     select M;
     $|=1;
     select STDOUT;
     ($auth) && ($auth="authentication $auth\n");
-    print << "EOF";
-$auth\open $server
-base $base
-$query
-s
-s
-s
-s
-s
-s
-s
-s
-s
-s
-quit
-EOF
     print M << "EOF";
 $auth\open $server
 base $base
+setnames
 $query
 s
 s
@@ -100,3 +236,9 @@ quit
 EOF
     close M;
 }
+sub reap {
+    $forkcounter--;
+}
+
+
+