Skip to main content.
home | support | download

Back to List Archive

Perl Scripts

From: <Jerry.Porter(at)>
Date: Wed Aug 04 1999 - 18:45:37 GMT
Hello All,

Thanks to David Norris I have a windows executable thst allows me to index files
with colons.
It seems now I can get my results back with colons imbedded in words.
But, I only get the results back If I execute the search at the command line. If
I execute the search VIA my perl script I get nothing back for the words that
have colons. Searches for words that do not have colons work fine. I do not know
much about perl. Do you have any pointers?


Perl sript is below:

# Change the above line to reflect the location of your installation of PERL
# -------------------------------------------------------------------
# Program:
# Author : Jerry E. Porter
# Purpose: A gateway interface (CGI) to the SWISH Searcher/indexer
# Instructions: 
# 1. Install and configure SWISH -- 
#    Available from Enterprise Integration Technologies at 
# 2. Index your site so that SWISH returns the url for each file
#    ie.  Swish should return
#    See the SWISH documentation about REPLACE_RULES to see how.
# 3. Customize the User-Defined variables below to reflect your site.
# 4. Install this file in your cgi directory.  This may vary 
#    from site to site, but is usually in a directory like cgi-bin
# 5. Create a link from your pages to the cgi
#    ex.
#    Running the cgi as a URL will generate a blank query form on the fly.
# Note: if you don't like the the initial form that comes up, you can modify
#       the print_form subroutine
#       To change the format of the returned results, you may modify the 
#       print_results subroutine

# -------- User defined configuration variables -----------

# Absolute path and command to execute the Swish searcher
   $swish = "D:/swish/swishe.exe";     

# URL of where you put this cgi
   $swishcgi = "";

# Optional parameters to pass to the Swish searcher
   $params = " ";                            

# Absolute path and filename of your created Swish index file 
   $index1 = "P:/htdocs/ARManswers.WOF/ARManswers.index";
   $index2 = "P:/htdocs/Source/Source.index";

# The Full name of your organization -- Printed with Search Results
   $organization = "Targetbase Marketing";

# The full name of your department -- Printed with search Results
   $department = "ARManswers Search Engine";

# ------ End of Configuration Variables ------------

#sub read_form
# Reads in form data if it exists

read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});

# Split the Name value pairs
@pairs = split(/&/, $buffer);
foreach $pair (@pairs)
   ($name, $value) = split(/=/, $pair);
   # Un-Webify plus signs and %-encoding
   $value =~ tr/+/ /;
   $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
   push(@search_tags, $value), next if ($name eq 'search_tags');

   $FORM{$name} = $value

$query = $FORM{'query'};   
$results = $FORM{'results'};

if (@search_tags) 
   $tags = join("",@search_tags);
   $search_tags = "\-t $tags"; 
   $search_tags = "";

if ($query) 

sub print_form
 &html_header("ARManswers Search Engine");
 # To change the form that get's generated on the fly, edit the HTML below.
 print <<EOF;

<FORM ACTION="$swishcgi" method=post>
Enter word(s). You can connect terms with <b>and</b> or <b>or</b><p>
    <b>and</b> will find items that contain both terms<br>
    <b>or</b> will find items that contain either word, but not necessarily 
   Example: NSDictionary <B>and</B> NSString 
   <INPUT NAME="query" SIZE=60>
<B>Maximum # of Items</B>
<SELECT name=results>
   <OPTION value=0> No Limit
   <OPTION value=10> 10
   <OPTION value=20> 20
   <OPTION value=30> 30
   <OPTION value=40> 40
   <OPTION value=50> 50
   <OPTION value=100> 100
<!-- <TR><TH COLSPAN=4>
Search In the following Tags:<BR>
Leave Blank to search everything
   <td><input type=checkbox name=search_tags value="t">Title Tags</td>
   <td><input type=checkbox name=search_tags value="h">Heading Tags</td>
   <td><input type=checkbox name=search_tags value="c">Comment Tags</td>
   <td><input type=checkbox name=search_tags value="e">Emphasized Text</td>
</tr> -->

<INPUT TYPE="submit" VALUE="Start Search">
<INPUT TYPE="reset" VALUE="Clear Form">


sub search_parse
# Run SWISH and parse output
#Initialize counter variable for number of results
$count = 0; 
open(SWISH, "$swish -w $query -m $results $search_tags -f $index1 $index2|");
while (<SWISH>)
  # First, check to see if search produced an error
   if ($_ eq "err: no results") 
   {&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>There were no items that
matched your search request.</B></FONT></CENTER>");}

   if ($_ eq "err: could not open index file.</B></FONT>") 
   {&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>Could not open Index File
   if ($_ eq "err: no search words specified") 
   {&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>Please Enter at least one
Search Word.</B></FONT></CENTER>");}

   if ($_ eq "err: a word is too common") 
 {&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>One of your search terms is
too common, please try again.</B></FONT></CENTER>");}
# Next Line ignores lines that begin with a non-digit
  next if /^\D/;
  push(@results, $_);

   &html_header("ARManswers Search Results");
   print "<FONT SIZE=3><B>The ARManswers search engine found the following
   print " that might be relevant to your search topic.<BR>\n";
   print "A higher relevancy score means the item is more\n";
   print " likely to be what you are looking for.</B></FONT><BR><BR>\n";
   print "<CENTER>\n";
   print "Your Search for <B>$query</B>, returned \n";
   print "<BLINK><FONT SIZE=5 COLOR=BLUE><B>$count</B> </BLINK></FONT>Items.\n";
   print "</CENTER>\n";
   print "<hr>\n";
   print "<A href=\"$swishcgi\"><img alt=\"New Search\" border=0 height=17
src=..\/ARManswers\/Images\/search_button.gif width=73></A>\n";
   print "<TR><TH WIDTH=*>Question</TH><TH WIDTH=1%>Relevancy<BR>Score</TH><TH
WIDTH=1%>Size of Document<BR>in bytes</TH></TR>\n";

   foreach (@results)
     ($stringone, $title, $filesize) = split(/\"/, $_);
     ($rank, $url) = split(/ /, $stringone);
     print "<TR><TD BGCOLOR=\"#CCCCCC\"><B><a
BGCOLOR=\"#CCCCCC\">$filesize Bytes</TD></TR>\n";

   print "</TABLE>\n";


sub search_error
   &html_header("ARManswers Search Results: ERROR");
   $error_message = $_[0];
   print "$error_message\n";
   print "<HR>\n";
   print "</CENTER>\n";
   print "<TR><TD>\n";
   print "<A href=\"$swishcgi\"><img alt=\"New Search\" border=0 height=17
src=..\/ARManswers\/Images\/search_button.gif width=73></A>\n";
   print "</TD></TR>\n";
   print "</TABLE>\n";

sub html_header
# This subroutine takes the document title as a command
# line parameter and adds header information to the top
# of the HTML document to be returned.

   $document_title = $_[0];
   print "Content-type: text/html\n\n";
   print "<HTML>\n";
   print "<HEAD>\n";
   print "<TITLE>$document_title</TITLE>\n";
   print "</HEAD>\n";
   print "<CENTER><H2>$document_title</H2></CENTER>\n";
   print "<HR>\n";

sub html_trailer
# This subroutine prints a suitable HTML trailer
   print "<HR>\n";
   print "$organization<br>\n";
   print "$department<P></body>\n";
   print "</body>\n</html>\n";
Received on Wed Aug 4 11:43:52 1999