[interchange-cvs] interchange - racke modified 2 files
interchange-cvs at icdevgroup.org
interchange-cvs at icdevgroup.org
Fri Jun 23 06:26:02 EDT 2006
User: racke
Date: 2006-06-23 10:26:02 GMT
Modified: . MANIFEST
Added: lib/Vend Swish2.pm
Log:
added Vend::Swish2 module provided by Brian Miller <brian at endpoint.com>
this is superiour compared with Vend::Swish, because instead of the awkward
way to interface with Swish-e by running the binary it uses the Perl API
note: this module will replace Vend::Swish after we ensure as much backward
compatibility as possible
Revision Changes Path
2.196 +1 -0 interchange/MANIFEST
rev 2.196, prev_rev 2.195
Index: MANIFEST
===================================================================
RCS file: /var/cvs/interchange/MANIFEST,v
retrieving revision 2.195
retrieving revision 2.196
diff -u -r2.195 -r2.196
--- MANIFEST 11 Apr 2006 14:43:57 -0000 2.195
+++ MANIFEST 23 Jun 2006 10:26:01 -0000 2.196
@@ -1110,6 +1110,7 @@
lib/Vend/SQL_Parser.pm
lib/Vend/Subs.pm
lib/Vend/Swish.pm
+lib/Vend/Swish2.pm
lib/Vend/Table/Common.pm
lib/Vend/Table/DB_File.pm
lib/Vend/Table/DBI.pm
1.1 interchange/lib/Vend/Swish2.pm
rev 1.1, prev_rev 1.0
Index: Swish2.pm
===================================================================
# Vend::Swish2 - Search indexes with Swish-e's new SWISH::API
#
# $Id: Swish2.pm,v 1.1 2006/06/23 10:26:02 racke Exp $
#
# Adapted from Vend::Swish by Brian Miller <brian at endpoint.com>
#
# Copyright (C) 2005-2006 Interchange Development Group
# Copyright (C) 2002 Mike Heins <mikeh at perusion.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free
# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307 USA.
package Vend::Swish2;
require Vend::Search;
@ISA = qw(Vend::Search);
$VERSION = substr(q$Revision: 1.1 $, 10);
use strict;
use lib qw( /usr/local/lib/swish-e/perl );
use SWISH::API;
use SWISH::ParseQuery;
use SWISH::PhraseHighlight;
# singleton to hold initialization object,
# search objects are then retrieved through it
# this should improve performance through caching
my $_swish = {};
my $_swish_highlighters = {};
my %Default = (
matches => 0,
mv_head_skip => 0,
mv_index_delim => "\t",
mv_record_delim => "\n",
mv_matchlimit => 50,
mv_max_matches => 2000,
mv_min_string => 1,
);
my %fmap = ( code => 'swishreccount',
score => 'swishrank',
url => 'swishdocpath',
title => 'swishtitle',
filesize => 'swishdocsize',
mod_date => 'swishlastmodified',
description => 'swishdescription',
);
my %highlight_settings = ( show_words => 8,
occurrences => 5,
max_words => 100,
highlight_on => '<span class="highlight">',
highlight_off => '</span>',
);
sub array {
my ($s, $opt) = @_;
$s->{mv_list_only} = 1; # makes perform_search only return results array
return Vend::Scan::perform_search($opt, undef, $s);
}
sub hash {
my ($s, $opt) = @_;
$s->{mv_return_reference} = 'HASH';
$s->{mv_list_only} = 1; # makes perform_search only return results array
return Vend::Scan::perform_search($opt, undef, $s);
}
sub list {
my ($s, $opt) = @_;
$s->{mv_return_reference} = 'LIST';
$s->{mv_list_only} = 1; # makes perform_search only return results array
return Vend::Scan::perform_search($opt, undef, $s);
}
sub init {
my ($s, $options) = @_;
#::logDebug("initing Swish search, Swish=" . Vend::Util::uneval($Vend::Cfg->{Swish2}));
$Vend::Cfg->{Swish2} ||= {};
@{$s}{keys %Default} = (values %Default);
$s->{mv_base_directory} = undef,
$s->{mv_begin_string} = [];
$s->{mv_all_chars} = [1];
$s->{mv_case} = [];
$s->{mv_column_op} = [];
$s->{mv_negate} = [];
$s->{mv_numeric} = [];
$s->{mv_orsearch} = [];
$s->{mv_searchspec} = [];
$s->{mv_search_group} = [];
$s->{mv_search_field} = [];
$s->{mv_search_file} = [];
push @{$s->{mv_search_file}}, $Vend::Cfg->{Swish2}{index}
if $Vend::Cfg->{Swish2}{index};
$s->{mv_searchspec} = [];
$s->{mv_sort_option} = [];
$s->{mv_substring_match} = [];
$s->{mv_field_names} = [qw/code score url title filesize mod_date description/];
$s->{mv_return_fields} = [qw/code score url title filesize mod_date description/];
for (keys %$options) {
$s->{$_} = $options->{$_};
}
# can create the base Swish object once and run
# multiple queries off of it
my @searchfiles = @{$s->{mv_search_file}};
for (@searchfiles) {
$_ = Vend::Util::catfile($s->{mv_base_directory}, $_)
unless Vend::Util::file_name_is_absolute($_);
}
my $from_index = join ' ', @{ $s->{'mv_search_file'} };
$s->{'swish_index'} = $from_index;
unless ($_swish->{$from_index}) {
$_swish->{$from_index} = new SWISH::API ( $from_index );
if ($_swish->{$from_index}->Error) {
die "Can't create swish engine: " . $_swish->{$from_index}->ErrorString;
}
}
if ($Vend::Cfg->{Swish2}{highlight_context}) {
push @{ $s->{mv_field_names} }, 'context';
push @{ $s->{mv_return_fields} }, 'context';
$fmap{'context'} = 'swishdescription';
foreach my $index (@{ $s->{'mv_search_file'} }) {
my $swish = $_swish->{$from_index};
my %headers = map { lc $_ => ($swish->HeaderValue( $index, $_ ) || '') } $swish->HeaderNames;
$_swish_highlighters->{$index} = new SWISH::PhraseHighlight ( \%highlight_settings, \%headers, { swish => $swish } );
}
}
return;
}
sub new {
my ($class, %options) = @_;
my $s = new Vend::Search;
bless $s, $class;
$s->init(\%options);
return $s;
}
sub search {
my ($s, %options) = @_;
while (my ($key,$val) = each %options) {
$s->{$key} = $val;
}
$s->{mv_return_delim} = $s->{mv_index_delim}
unless defined $s->{mv_return_delim};
my @specs = @{$s->{mv_searchspec}};
my @pats = $s->spec_check(@specs);
$s->save_specs();
my $search_string = join ' ', @pats;
if (length $search_string < $s->{mv_min_string}) {
my $msg = ::errmsg(
"Swish search string less than minimum %s characters: %s",
$s->{mv_min_string},
$search_string,
);
return $s->search_error($msg);
}
my $engine = $_swish->{ $s->{'swish_index'} };
my $results = $engine->Query( $search_string );
if ($engine->Error) {
$s->{matches} = -1;
return $s->search_error("Can't run swish query: " . $engine->ErrorString);
}
# no matches, can return now
unless ($results->Hits) {
$s->{matches} = 0;
return;
}
my @out;
while (my $result = $results->NextResult) {
my $out_ref = [ map { $result->Property( $fmap{$_} ) } @{ $s->{'mv_field_names'} } ];
my $out_ref = [];
foreach my $field (@{ $s->{'mv_field_names'} }) {
if ($field =~ /context/) {
my $text = $result->Property( $fmap{$field} );
if ($Vend::Cfg->{'Swish2'}{'highlight_context'} and defined $text and $text ne '') {
my $index = $result->Property('swishdbfile');
my $parsed_query = parse_query( join ' ', $results->ParsedWords( $index ) );
#::logDebug("parsed query: " . Vend::Util::uneval($parsed_query));
$_swish_highlighters->{$index}->highlight( \$text, $parsed_query->{'swishdefault'}, undef, $result );
}
push @$out_ref, $text;
}
else {
push @$out_ref, $result->Property( $fmap{$field} );
}
}
push @out, $out_ref;
}
{
my $field_names = join "\t", @{$s->{mv_field_names}};
$field_names =~ s/^\s+//;
my @laundry = (qw/mv_search_field mv_range_look mv_return_fields/);
$s->hash_fields(
[ split /\Q$s->{mv_index_delim}/, $field_names ],
@laundry,
);
}
if ($s->{mv_unique}) {
my %seen;
@out = grep ! $seen{$_->[0]}++, @out;
}
if ($s->{mv_sort_field} and @{$s->{mv_sort_field}}) {
$s->hash_fields( $s->{mv_field_names}, qw/mv_sort_field/ );
@out = $s->sort_search_return(\@out);
}
$s->{matches} = @out;
if ($s->{matches} > $s->{mv_matchlimit} and $s->{mv_matchlimit} > 0) {
$s->save_more(\@out)
or ::logError("Error saving matches: $!");
if ($s->{mv_first_match}) {
splice @out, 0, $s->{mv_first_match};
$s->{mv_next_pointer} = $s->{mv_first_match} + $s->{mv_matchlimit};
$s->{mv_next_pointer} = 0
if $s->{mv_next_pointer} > $s->{matches};
}
$#out = $s->{mv_matchlimit} - 1;
}
if (! $s->{mv_return_reference}) {
$s->{mv_results} = \@out;
#::logDebug("returning search: " . Vend::Util::uneval($s));
return $s;
}
elsif ($s->{mv_return_reference} eq 'LIST') {
my $col = @{ $s->{mv_return_fields} };
@out = map { join $s->{mv_return_delim}, @$_ } @out;
$s->{mv_results} = join $s->{mv_record_delim}, @out;
}
else {
my $col = @{ $s->{mv_return_fields} };
my @names = @{ $s->{mv_field_names} };
$names[0] eq '0' and $names[0] = 'code';
my %hash;
for (@out) {
my @col = split /$s->{mv_return_delim}/, $_, $col;
$hash{ $col[0] } = {};
@{ $hash{$col[0]} } {@names} = @col;
}
$s->{mv_results} = \%hash;
}
#::logDebug("returning search: " . Vend::Util::uneval($s));
return $s;
}
# Unfortunate hack need for Safe searches
*escape = \&Vend::Search::escape;
*spec_check = \&Vend::Search::spec_check;
*get_scalar = \&Vend::Search::get_scalar;
*more_matches = \&Vend::Search::more_matches;
*get_return = \&Vend::Search::get_return;
*map_ops = \&Vend::Search::map_ops;
*get_limit = \&Vend::Search::get_limit;
*saved_params = \&Vend::Search::saved_params;
*range_check = \&Vend::Search::range_check;
*create_search_and = \&Vend::Search::create_search_and;
*create_search_or = \&Vend::Search::create_search_or;
*save_context = \&Vend::Search::save_context;
*dump_options = \&Vend::Search::dump_options;
*save_more = \&Vend::Search::save_more;
*sort_search_return = \&Vend::Search::sort_search_return;
*get_scalar = \&Vend::Search::get_scalar;
*hash_fields = \&Vend::Search::hash_fields;
*save_specs = \&Vend::Search::save_specs;
*restore_specs = \&Vend::Search::restore_specs;
*splice_specs = \&Vend::Search::splice_specs;
*search_error = \&Vend::Search::search_error;
*save_more = \&Vend::Search::save_more;
*sort_search_return = \&Vend::Search::sort_search_return;
1;
__END__
More information about the interchange-cvs
mailing list