Hello All,
Thanks to David Norris I have a windows executable thst allows me to index files
with colons.
It seems now I can get my results back with colons imbedded in words.
But, I only get the results back If I execute the search at the command line. If
I execute the search VIA my perl script I get nothing back for the words that
have colons. Searches for words that do not have colons work fine. I do not know
much about perl. Do you have any pointers?
Jerry
Perl sript is below:
#!D:/perl5/bin/perl
# Change the above line to reflect the location of your installation of PERL
#
# -------------------------------------------------------------------
# Program: ARManswers-cgi.pl
# Author : Jerry E. Porter
#
# Purpose: A gateway interface (CGI) to the SWISH Searcher/indexer
#
# Instructions:
#
# 1. Install and configure SWISH --
# Available from Enterprise Integration Technologies at
# http://www.eit.com/goodies/software/swish/swish.html
#
# 2. Index your site so that SWISH returns the url for each file
# ie. Swish should return http://www.yoursite.edu/file_name.html
# See the SWISH documentation about REPLACE_RULES to see how.
#
# 3. Customize the User-Defined variables below to reflect your site.
#
# 4. Install this file in your cgi directory. This may vary
# from site to site, but is usually in a directory like cgi-bin
#
# 5. Create a link from your pages to the cgi
# ex. http://www.yoursite.edu/cgi-bin/swish-cgi
# Running the cgi as a URL will generate a blank query form on the fly.
#
#
# Note: if you don't like the the initial form that comes up, you can modify
# the print_form subroutine
#
# To change the format of the returned results, you may modify the
# print_results subroutine
# -------- User defined configuration variables -----------
# Absolute path and command to execute the Swish searcher
$swish = "D:/swish/swishe.exe";
# URL of where you put this cgi
$swishcgi = "http://armanswers.marcgroup.com/cgi-bin/ARManswers-cgi.pl";
# Optional parameters to pass to the Swish searcher
$params = " ";
# Absolute path and filename of your created Swish index file
$index1 = "P:/htdocs/ARManswers.WOF/ARManswers.index";
$index2 = "P:/htdocs/Source/Source.index";
# The Full name of your organization -- Printed with Search Results
$organization = "Targetbase Marketing";
# The full name of your department -- Printed with search Results
$department = "ARManswers Search Engine";
# ------ End of Configuration Variables ------------
#sub read_form
# Reads in form data if it exists
read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
# Split the Name value pairs
@pairs = split(/&/, $buffer);
foreach $pair (@pairs)
{
($name, $value) = split(/=/, $pair);
# Un-Webify plus signs and %-encoding
$value =~ tr/+/ /;
$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
push(@search_tags, $value), next if ($name eq 'search_tags');
$FORM{$name} = $value
}
$query = $FORM{'query'};
$results = $FORM{'results'};
if (@search_tags)
{
$tags = join("",@search_tags);
$search_tags = "\-t $tags";
}
else
{
$search_tags = "";
}
if ($query)
{
&search_parse;
}
else
{
&print_form;
}
sub print_form
{
&html_header("ARManswers Search Engine");
# To change the form that get's generated on the fly, edit the HTML below.
print <<EOF;
<CENTER>
<FORM ACTION="$swishcgi" method=post>
<table BORDER=1 CELLPADDING=1 CELLSPACING=2>
<CAPTION>
Enter word(s). You can connect terms with <b>and</b> or <b>or</b><p>
<b>and</b> will find items that contain both terms<br>
<b>or</b> will find items that contain either word, but not necessarily
both<p>
Example: NSDictionary <B>and</B> NSString
</CAPTION>
<TR><TD ALIGN=CENTER COLSPAN=4>
<INPUT NAME="query" SIZE=60>
</TD></TR>
<TR><TD ALIGN=CENTER COLSPAN=4>
<B>Maximum # of Items</B>
<SELECT name=results>
<OPTION value=0> No Limit
<OPTION value=10> 10
<OPTION value=20> 20
<OPTION value=30> 30
<OPTION value=40> 40
<OPTION value=50> 50
<OPTION value=100> 100
</SELECT>
</TD></TR>
<!-- <TR><TH COLSPAN=4>
Search In the following Tags:<BR>
Leave Blank to search everything
</TH></TR>
<tr>
<td><input type=checkbox name=search_tags value="t">Title Tags</td>
<td><input type=checkbox name=search_tags value="h">Heading Tags</td>
<td><input type=checkbox name=search_tags value="c">Comment Tags</td>
<td><input type=checkbox name=search_tags value="e">Emphasized Text</td>
</tr> -->
</table>
<P>
<INPUT TYPE="submit" VALUE="Start Search">
<INPUT TYPE="reset" VALUE="Clear Form">
</FORM>
</CENTER>
</BODY>
</HTML>
EOF
&html_trailer;
}
sub search_parse
# Run SWISH and parse output
{
#Initialize counter variable for number of results
$count = 0;
open(SWISH, "$swish -w $query -m $results $search_tags -f $index1 $index2|");
while (<SWISH>)
{
# First, check to see if search produced an error
chop;
if ($_ eq "err: no results")
{&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>There were no items that
matched your search request.</B></FONT></CENTER>");}
if ($_ eq "err: could not open index file.</B></FONT>")
{&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>Could not open Index File
$index1.</B></FONT></CENTER>");}
if ($_ eq "err: no search words specified")
{&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>Please Enter at least one
Search Word.</B></FONT></CENTER>");}
if ($_ eq "err: a word is too common")
{&search_error("<CENTER><FONT COLOR=RED SIZE=5><B>One of your search terms is
too common, please try again.</B></FONT></CENTER>");}
# Next Line ignores lines that begin with a non-digit
next if /^\D/;
$count++;
push(@results, $_);
}
&html_header("ARManswers Search Results");
print "<FONT SIZE=3><B>The ARManswers search engine found the following
items\n";
print " that might be relevant to your search topic.<BR>\n";
print "A higher relevancy score means the item is more\n";
print " likely to be what you are looking for.</B></FONT><BR><BR>\n";
print "<CENTER>\n";
print "Your Search for <B>$query</B>, returned \n";
print "<BLINK><FONT SIZE=5 COLOR=BLUE><B>$count</B> </BLINK></FONT>Items.\n";
print "</CENTER>\n";
print "<hr>\n";
print "<A href=\"$swishcgi\"><img alt=\"New Search\" border=0 height=17
src=..\/ARManswers\/Images\/search_button.gif width=73></A>\n";
print "<TABLE WIDTH=100% BORDER=1 CELLPADDING=1 CELLSPACING=2>\n";
print "<TR><TH WIDTH=*>Question</TH><TH WIDTH=1%>Relevancy<BR>Score</TH><TH
WIDTH=1%>Size of Document<BR>in bytes</TH></TR>\n";
foreach (@results)
{
select(STDOUT);
($stringone, $title, $filesize) = split(/\"/, $_);
($rank, $url) = split(/ /, $stringone);
print "<TR><TD BGCOLOR=\"#CCCCCC\"><B><a
href=\"$url\">$title</B></a></TD>\n";
print "<TD ALIGN=RIGHT BGCOLOR=\"#CCCCCC\">$rank</TD><TD ALIGN=RIGHT
BGCOLOR=\"#CCCCCC\">$filesize Bytes</TD></TR>\n";
}
print "</TABLE>\n";
&html_trailer;
}
sub search_error
{
&html_header("ARManswers Search Results: ERROR");
$error_message = $_[0];
print "$error_message\n";
print "<HR>\n";
print "</CENTER>\n";
print "<TABLE BORDER=0 CELLPADDING=2 CELLSPACING=2>\n";
print "<TR><TD>\n";
print "<A href=\"$swishcgi\"><img alt=\"New Search\" border=0 height=17
src=..\/ARManswers\/Images\/search_button.gif width=73></A>\n";
print "</TD></TR>\n";
print "</TABLE>\n";
&html_trailer;
}
sub html_header
# This subroutine takes the document title as a command
# line parameter and adds header information to the top
# of the HTML document to be returned.
{
$document_title = $_[0];
print "Content-type: text/html\n\n";
print "<HTML>\n";
print "<HEAD>\n";
print "<TITLE>$document_title</TITLE>\n";
print "</HEAD>\n";
print "<BODY BGCOLOR=#EEEEEE
background=..\/ARManswers\/Images\/background_main.gif>\n";
print "<CENTER><H2>$document_title</H2></CENTER>\n";
print "<HR>\n";
}
sub html_trailer
# This subroutine prints a suitable HTML trailer
{
print "<HR>\n";
print "$organization<br>\n";
print "$department<P></body>\n";
print "</body>\n</html>\n";
exit;
}
Received on Wed Aug 4 11:43:52 1999