Skip to main content.
home | support | download

Back to List Archive

Simple bug fix...

From: Peter Bergner <bergner(at)not-real.lcse.umn.edu>
Date: Wed Nov 05 1997 - 19:37:21 GMT
I've been a fairly long time user of SWISH, but was kinda disappointed
that it wasn't being maintained.  I ended up making quite a few bug
fixes which I see have also been made in SWISH-E.  It's nice to see that
the SWISH-E people have taken it back up!!!

Anyway, back to the point.  There's a small bug in how swish-e
handles IGNORELASTCHAR's.  Specifically, if it removes the last
character because it's a member of IGNORELASTCHAR, it doesn't
check if the new last character is also a member of IGNORELASTCHAR.
The following diff contains the changes to index.c needed to
iteratively remove all IGNORELASTCHAR's.

Example, the word:
    Briggs'.
should be stripped down to:
    Briggs
-- 

Peter




*** src/index.c	Mon Sep 29 16:50:07 1997
--- index.c	Wed Nov  5 13:17:49 1997
***************
*** 16,21 ****
--- 16,25 ----
  **
  ** Changed removestops to support printing of stop words
  ** G. Hill 4/7/97
+ **
+ ** Added stripIgnoreChars and isIgnoreChar routines which iteratively
+ ** remove all ignore characters from the end of each word.
+ ** P. Bergner  10/5/97  bergner@lcse.umn.edu
  */
  
  #include "swish.h"
***************
*** 22,27 ****
--- 26,36 ----
  #include "index.h"
  #include "hash.h"
  
+ #ifdef IGNORELAST
+ void stripIgnoreChars(char *word);
+ int isIgnoreChar(char c);
+ #endif
+ 
   /* Have we already indexed a file or directory?
   ** This function is used to avoid multiple index entries
   ** or endless looping due to symbolic links.
***************
*** 556,573 ****
  					strcpy(word, (char *)
  					convertentities(word));
  
  				/* Get rid of specified last char's */
! 				if (IGNORELAST) {
! 				  for (k=0; word[k] != '\0'; k++)
! 				    ;
! 				  /* Move back one to find the last char */
! 				  k--;
! 				  for (q=0; IGNORELASTCHAR[q] != '\0'; q++)
! 				    if (word[k] == IGNORELASTCHAR[q]){
! 				      word[k] = '\0';
! 				      break;
! 				    }
! 				}
  /* Sorry, have to do isokword() twice to filter out converted strings! */
  
                                  if (hasokchars(word) && isokword(word)) {
--- 565,575 ----
  					strcpy(word, (char *)
  					convertentities(word));
  
+ #ifdef IGNORELAST
  				/* Get rid of specified last char's */
! 				stripIgnoreChars( word );
! #endif
! 
  /* Sorry, have to do isokword() twice to filter out converted strings! */
  
                                  if (hasokchars(word) && isokword(word)) {
***************
*** 681,699 ****
  					strcpy(word, (char *)
  					convertentities(word));
   
!                                 /* Get rid of specified last char's */
!                                 if (IGNORELAST) {
!                                   for (k=0; word[k] != '\0'; k++)
!                                     ;
!                                   /* Move back one to find the last char */
!                                   k--;
!                                   for (q=0; IGNORELASTCHAR[q] != '\0'; q++)
!                                     if (word[k] == IGNORELASTCHAR[q]){
!                                       word[k] = '\0';
!                                       break;
!                                     }
!                                 }
! 
                                  if (hasokchars(word) && isokword(word))
                                          entrylist = (struct entry *)
                                          addentry(entrylist, word,
--- 683,692 ----
  					strcpy(word, (char *)
  					convertentities(word));
   
! #ifdef IGNORELAST
! 				/* Get rid of specified last char's */
! 				stripIgnoreChars( word );
! #endif
                                  if (hasokchars(word) && isokword(word))
                                          entrylist = (struct entry *)
                                          addentry(entrylist, word,
***************
*** 1274,1291 ****
  	    if (isokword(word))
  	      strcpy(word, (char *)
  		     convertentities(word));
  	    /* Get rid of specified last char's */
! 	    if (IGNORELAST) {
! 	      for (k=0; word[k] != '\0'; k++)
! 		;
! 	      /* Move back one to find the last char */
! 	      k--;
! 	      for (q=0; IGNORELASTCHAR[q] != '\0'; q++)
! 		if (word[k] == IGNORELASTCHAR[q]){
! 		  word[k] = '\0';
! 		  break;
! 		}
! 	    }
  	    if (hasokchars(word) && isokword(word))
  	      entrylist = (struct entry *)
  		addentry(entrylist, word,
--- 1267,1276 ----
  	    if (isokword(word))
  	      strcpy(word, (char *)
  		     convertentities(word));
+ #ifdef IGNORELAST
  	    /* Get rid of specified last char's */
! 	    stripIgnoreChars( word );
! #endif
  	    if (hasokchars(word) && isokword(word))
  	      entrylist = (struct entry *)
  		addentry(entrylist, word,
***************
*** 1305,1307 ****
--- 1290,1330 ----
      }
    return wordcount;
  }
+ 
+ #ifdef IGNORELAST
+ 
+ /*  These 2 routines fix the problem when a word ends with mutiple
+ **  IGNORELASTCHAR's (eg, qwerty'. ).  The old code correctly deleted
+ **  the ".", but didn't check if the new last character ("'") is also 
+ **  an ignore character.
+  */
+ 
+ void stripIgnoreChars(char *word)
+ {
+   int i;
+   
+   /* Get rid of specified last char's */
+   for (i=0; word[i] != '\0'; i++)
+     ;
+   /* Iteratively strip off the last character if it's an ignore character */
+   while ( isIgnoreChar(word[--i]) )
+     word[i] = '\0';
+ }
+ 
+ int isIgnoreChar(char c)
+ {
+   int i;
+ 
+   /*  Returns TRUE if the character is a member of IGNORELASTCHAR,
+   **  FALSE otherwise.
+    */
+   for (i=0; IGNORELASTCHAR[i] != '\0'; i++)
+     if (c == IGNORELASTCHAR[i])
+       return TRUE;
+ 
+   return FALSE;
+ }
+ #endif
+ 
+ 
+ 
Received on Wed Nov 5 11:46:55 1997