Skip to main content.
home | support | download

Back to List Archive

Re: [swish-e] Parsing plain text emails to use the subject line as the title

From: Troy Wical <troy(at)not-real.wical.com>
Date: Fri Mar 19 2010 - 23:33:17 GMT
On Mar 19, 2010, at 9:11 AM, Peter Karman wrote:

> try the -d and -v options to turn on some debugging.

##########################################################################
##########################################################################
[/home/mail-archive/search]# swish3 -i -d -v 3 /home/mail-archive/ 
test/ -S mail -c test.conf
{
   Debug           => 0,
   Format          => "native",
   Headers         => 1,
   Limit           => [],
   Merge           => undef,
   Source          => "fs",
   Version         => 0,
   Warnings        => 2,
   aggregator      => "mail",
   begin           => 0,
   config          => "test.conf",
   debug           => 1,
   extended_output => undef,
   folder          => "index.swish3",
   help            => 0,
   indexer         => "native",
   input           => 1,
   invindex        => "index.swish3",
   links           => 0,
   max             => undef,
   newer_than      => undef,
   query           => "",
   sort_order      => "",
   test_mode       => 0,
   verbose         => 3,
}
creating indexer: SWISH::Prog::Native::Indexer at /usr/local/lib/perl5/ 
site_perl/5.8.8/SWISH/Prog.pm line 113.
creating aggregator: SWISH::Prog::Aggregator::Mail at /usr/local/lib/ 
perl5/site_perl/5.8.8/SWISH/Prog.pm line 138.
do {
   my $a = bless({
     _start     => 1269041300,
     aggregator => bless({
                     _start           => 1269041300,
                     debug            => 1,
                     doc_class        => "SWISH::Prog::Doc",
                     indexer          => bless({
                                           _start    => 1269041300,
                                           config    => bless({
                                                           
DefaultContents                   => ["TXT*"],
                                                           
"IgnoreTotalWordCountWhenRanking" => [0],
                                                           
IndexDir                          => ["/home/mail-archive/test"],
                                                           
IndexFile                         => ["/home/mail-archive/search/ 
test.index"],
                                                           
IndexReport                       => [1],
                                                           
MetaNameAlias                     => ["swishdefault mail"],
                                                           
MetaNames                         => { url => 1 },
                                                           
PropertyNames                     => { url => 1 },
                                                           
ReplaceRules                      => [
                                                                                                 "replace 
  \"/home/mail-archive/\" \"http://type2.com/mail-archives/\"",
                                                                                               ],
                                                           
StoreDescription                  => ["XML* <body>"],
                                                           
_start                            => 1269041300,
                                                           
debug                             => 0,
                                                           
verbose                           => 0,
                                                        },  
"SWISH::Prog::Config"),
                                           debug     => 1,
                                           exe       => "swish-e",
                                           invindex  => bless({
                                                          _start  =>  
1269041300,
                                                          clobber => 0,
                                                          debug   => 0,
                                                          file    =>  
bless({
                                                                        
dir => bless({ dirs => ["index.swish3"], file_spec_class => undef,  
volume => "" }, "Path::Class::Dir"),
                                                                        
file => "index.swish-e",
                                                                        
file_spec_class => undef,
                                                                     }, "Path 
::Class::File"),
                                                          path    =>  
bless({ dirs => ["index.swish3"], file_spec_class => undef, volume =>  
"" }, "Path::Class::Dir"),
                                                          verbose => 0,
                                                        },  
"SWISH::Prog::Native::InvIndex"),
                                           test_mode => 0,
                                           verbose   => 3,
                                         },  
"SWISH::Prog::Native::Indexer"),
                     progress_size    => 1000,
                     swish_filter_obj => bless({
                                           doc_class    =>  
"SWISH::Filter::Document",
                                           filters      => [
                                                             bless({
                                                                
_mimetypes => bless({}, "SWISH::Filter::MIMETypes"),
                                                               gz =>  
{ perl => 1 },
                                                                
mimetypes => [qr|application/x-gzip|],
                                                               type =>  
1,
                                                             },  
"SWISH::Filters::Decompress"),
                                                           ],
                                           mimetypes    => bless({},  
"SWISH::Filter::MIMETypes"),
                                           skip_filters => {},
                                         }, "SWISH::Filter"),
                     test_mode        => 0,
                     verbose          => 3,
                   }, "SWISH::Prog::Aggregator::Mail"),
     config     => "test.conf",
     debug      => 1,
     indexer    => 'fix',
     invindex   => "index.swish3",
     test_mode  => 0,
     verbose    => 3,
   }, "SWISH::Prog");
   $a->{indexer} = $a->{aggregator}{indexer};
   $a;
} at /usr/local/bin/swish3 line 186
opening: swish-e  -f index.swish3/index.swish-e -v3 -W0 -S prog -i  
stdin -c /tmp/qpXYkqnBB9 at /usr/local/lib/perl5/site_perl/5.8.8/SWISH/ 
Prog.pm line 196
Parsing config file '/tmp/qpXYkqnBB9'
Indexing Data Source: "External-Program"
Indexing "stdin"

Removing very common words...
no words removed.
Writing main index...
err: No unique words indexed!
.
error : can't close indexer ($?: 256):
  at /usr/local/lib/perl5/site_perl/5.8.8/SWISH/Prog.pm line 198
0 documents in 00:00:00
##########################################################################
##########################################################################

>
> what kind of mail is in /home/mail-archive/test? maildir? or mbox?

Format is maildir. You can see an example @ http://type2.com/mail-archives/type2/747/05

Thanks, Troy


_______________________________________________
Users mailing list
Users@lists.swish-e.org
http://lists.swish-e.org/listinfo/users
Received on Fri Mar 19 19:33:28 2010