Skip to main content.
home | support | download

Back to List Archive

Re: FileRules ... not

From: Earl Fogel <fogel(at)not-real.duke.usask.ca>
Date: Mon Jan 11 1999 - 15:25:19 GMT
Hi,

On Sat, 9 Jan 1999, Andres Cvitkovich wrote:

>a few days ago, I asked about FileRules and excluding files NOT containing
>a certain substring...

Yes, this would be useful, and I implemented something similar back in
October, although I don't believe I explained it as well as you did.  Would 
this do what you need?

I submitted my patches to the nice folks who develop swish-e, and I believe
they will be included in a future release.

Here's what I wrote back then:

-------- Included Message --------
Date: Mon, 26 Oct 1998 11:39:01 -0600 (CST)
From: Earl Fogel <fogel@duke.usask.ca>
To: swish-e@sunsite.berkeley.edu
Subject: using regexes with IndexOnly

Hello,

I needed a way to tell swish-e to only index files named index.html,
something that is not possible with the current version.

Since the Swish-E folks added regex support to the FileRules directive,
it seemed that the best way to solve my problem was to add regex support
to the IndexOnly directive as well.  For example:

	IndexOnly ^index\.html$
	IndexOnly \.html$

I'll append the diffs below.  I hope they can be incorporated into a future
release.  This is based on swish-e 1.2.4.

Earl Fogel
Computing Services              phone: (306) 966-4861
University of Saskatchewan      email: earl.fogel@usask.ca
--

*** fs.c.orig	Wed Oct 21 12:12:36 1998
--- fs.c	Mon Oct 26 11:29:09 1998
***************
*** 29,35 ****
  static void indexafile(char *path);
  static void printfiles(struct sortentry *e);
  static void printdirs(struct sortentry *e);
! static int isoksuffix(char *filename, struct swline *rulelist);
  static int ishtml(char *filename);
  static int isoktitle(char *title);
  
--- 29,35 ----
  static void indexafile(char *path);
  static void printfiles(struct sortentry *e);
  static void printdirs(struct sortentry *e);
! static int isokfname(char *filename, struct swline *rulelist);
  static int ishtml(char *filename);
  static int isoktitle(char *title);
  
***************
*** 41,47 ****
  static struct swline *fileconlist = 0;
  static struct swline *titconlist = 0;
  static struct swline *fileislist = 0;
! static struct swline *suffixlist = 0;
  static struct swline *nocontentslist = 0;
  
  
--- 41,47 ----
  static struct swline *fileconlist = 0;
  static struct swline *titconlist = 0;
  static struct swline *fileislist = 0;
! static struct swline *fnamelist = 0;
  static struct swline *nocontentslist = 0;
  
  
***************
*** 210,216 ****
  			if ( already_indexed(s) )
  				continue;
  			
! 			if (!isoksuffix(dp->d_name, suffixlist))
  				continue;
  			
  			if (ishtml(s)) {
--- 210,216 ----
  			if ( already_indexed(s) )
  				continue;
  			
! 			if (!isokfname(dp->d_name, fnamelist))
  				continue;
  			
  			if (ishtml(s)) {
***************
*** 294,300 ****
  	if (badfile)
  		return;
  	
! 	if (!isoksuffix(path, suffixlist))
  		return;
  	
  	if (ishtml(path)) {
--- 294,300 ----
  	if (badfile)
  		return;
  	
! 	if (!isokfname(path, fnamelist))
  		return;
  	
  	if (ishtml(path)) {
***************
*** 336,342 ****
  		}
  		if ((fp = fopen(e->filename, "r" )) != NULL ) {
  			wordcount = countwords(fp, e->filename, e->title,
! 				isoksuffix(e->filename, nocontentslist) && nocontentslist != NULL);
  			fclose(fp);
  		}
  		if (verbose == 3) {
--- 336,342 ----
  		}
  		if ((fp = fopen(e->filename, "r" )) != NULL ) {
  			wordcount = countwords(fp, e->filename, e->title,
! 				isokfname(e->filename, nocontentslist) && nocontentslist != NULL);
  			fclose(fp);
  		}
  		if (verbose == 3) {
***************
*** 418,424 ****
  				break;
  			else {
  				c += skiplen;
! 				suffixlist = (struct swline *)addswline(suffixlist, value);
  			}
  		}
      } else if ((c = (char *) lstrstr(line, "directory contains")) &&
--- 418,424 ----
  				break;
  			else {
  				c += skiplen;
! 				fnamelist = (struct swline *)addswline(fnamelist, value);
  			}
  		}
      } else if ((c = (char *) lstrstr(line, "directory contains")) &&
***************
*** 499,536 ****
  
  
  
! /* Check if a file with a particular suffix should be indexed
  ** according to the settings in the configuration file.
  */
  
! int isoksuffix(filename, rulelist)
  char *filename;
  struct swline *rulelist;
  {
! 	int badfile;
! 	char *c, suffix[MAXSUFFIXLEN], checksuffix[MAXSUFFIXLEN];
! 	struct swline *tmplist;
! 	
! 	tmplist = rulelist;
! 	if (tmplist == NULL)
  		return 1;
! 	if ((c = (char *) strrchr(filename, '.')) == NULL)
! 		return 0;
! 	
! 	badfile = 1;
! 	strcpy(checksuffix, c + 1);
! 	while (tmplist != NULL) {
! 		if ((c = (char *) strrchr(tmplist->line, '.')) == NULL)
! 			strcpy(suffix, tmplist->line);
! 		else
! 			strcpy(suffix, c + 1);
! 		if (lstrstr(suffix, checksuffix) && strlen(suffix) ==
! 			strlen(checksuffix))
! 			badfile = 0;
! 		tmplist = tmplist->next;
! 	}
! 	return !(badfile);
  }
  
  /* This checks is a filename has one of the following suffixes:
  ** "htm", "HTM", "html", "HTML", "shtml", "SHTML".
--- 499,524 ----
  
  
  
! /* Check if a file with a particular name should be indexed
  ** according to the settings in the configuration file.
+ ** Uses regular expressions.
  */
  
! int isokfname(filename, rulelist)
  char *filename;
  struct swline *rulelist;
  {
! 	if (rulelist == NULL)
  		return 1;
! 
!         while (rulelist != NULL) {
!                 if (matchARegex(filename, rulelist->line)) {
!                         return 1;
!                 }
!                 rulelist = rulelist->next;
!         }
  }
+ 
  
  /* This checks is a filename has one of the following suffixes:
  ** "htm", "HTM", "html", "HTML", "shtml", "SHTML".


--
Earl Fogel
Computing Services              phone: (306) 966-4861
University of Saskatchewan      email: earl.fogel@usask.ca
Received on Mon Jan 11 07:25:24 1999