Skip to content

KinoSearch support #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,17 @@ if ( $plucene_inst ) {
$Wiki::Toolkit::TestConfig::config{plucene} = undef;
}

# If we have Plucene installed, we can test that without asking questions.
eval { require KinoSearch; };
my $plucene_inst = $@ ? 0 : 1;
if ( $plucene_inst ) {
print "You have KinoSearch installed, so will test with that...\n\n";
$Wiki::Toolkit::TestConfig::config{kinosearch} = 1;
} else {
print "KinoSearch not installed; skipping test...\n\n";
$Wiki::Toolkit::TestConfig::config{kinosearch} = undef;
}

# Write out the config for next run.
open OUT, ">lib/Wiki/Toolkit/TestConfig.pm"
or die "Couldn't open lib/Wiki/Toolkit/TestConfig.pm for writing: $!";
Expand Down
142 changes: 142 additions & 0 deletions lib/Wiki/Toolkit/Search/KinoSearch.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package Wiki::Toolkit::Search::KinoSearch;
use strict;
our $VERSION = '0.01';

use base 'Wiki::Toolkit::Search::Base';

use File::Spec;
use KinoSearch::InvIndexer;
use KinoSearch::Analysis::PolyAnalyzer;

=head1 NAME

Wiki::Toolkit::Search::KinoSearch - Use KinoSearch to search your Wiki::Toolkit wiki.

=head1 SYNOPSIS

my $search = Wiki::Toolkit::Search::KinoSearch->new( path => "/var/KinoSearch/wiki" );
my %wombat_nodes = $search->search_nodes("wombat");

Provides search-related methods for L<Wiki::Toolkit>.

=cut

=head1 METHODS

=over 4

=item B<new>

my $search = Wiki::Toolkit::Search::KinoSearch->new( path => "/var/KinoSearch/wiki" );

Takes only one parameter, which is mandatory. C<path> must be a directory
for storing the indexed data. It should exist and be writeable.

=cut

sub _init {
my ( $self, %args ) = @_;
$self->{_dir} = $args{path};
return $self;
}

sub _dir { shift->{_dir} }

sub _analyzer {
KinoSearch::Analysis::PolyAnalyzer->new( language => 'en', );
}

sub _indexer {
my ($self) = @_;
my $indexer = KinoSearch::InvIndexer->new(
analyzer => $self->_analyzer,
invindex => $self->_dir,
create => 1,
);
$indexer->spec_field( name => 'title' );
$indexer->spec_field(
name => 'body_text',
vectorized => 1,
);
return $indexer;
}

sub index_node {
my ( $self, $node, $content ) = @_;
my $indexer = $self->_indexer;
my $doc = $indexer->new_doc;
$doc->set_value( title => $node );
$doc->set_value( body_text => $content );
$indexer->add_doc($doc);
$indexer->finish( optimize => $self->optimize );
}

sub _searcher {
my ($self) = @_;
KinoSearch::Searcher->new(
invindex => $self->_dir,
analyzer => $self->_analyzer,
);
}

sub _search_nodes {
my ( $self, $query ) = @_;
$self->_searcher->search($query);
}

sub search_nodes {
my ( $self, @args ) = @_;
my $hits = $self->_search_nodes(@args);
my $results = {};
while ( $hit = $hits->fetch_hit_hashref ) {
$results->{ $hit->{title} } = $hit->{score};
}
return %$results;
}

# sub _fuzzy_match {
# my ( $self, $string, $canonical ) = @_;
# return
# map { $_ => ( $_ eq $string ? 2 : 1 ) }
# $self->_search_nodes("fuzzy:$canonical");
# }

# sub indexed {
# my ( $self, $id ) = @_;
# my $term = Plucene::Index::Term->new( { field => 'id', text => $id } );
# return $self->_reader->doc_freq($term);
# }

sub optimize { 1 }

sub delete_node {
my ( $self, $id ) = @_;
my $term = KinoSearch::Index::Term->new( title => $id );
my $indexer = $self->_indexer;
$indexer->delete_docs_by_term($term);
$indexer->finish( optimize => $self->optimize );
}

sub supports_phrase_searches { return 0; }
sub supports_fuzzy_searches { return 0; }

1;
__END__

=back

=head1 TODO

=over 4

=item Phrase Searching
=item Fuzzy Matching

=back

=head1 SEE ALSO

L<KinoSearch>, L<Wiki::Toolkit>, L<Wiki::Toolkit::Search::Base>.

=cut

10 changes: 10 additions & 0 deletions lib/Wiki/Toolkit/TestLib.pm
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,12 @@ if ( $configured{plucene} ) {
$plucene_path = "t/plucene";
}

my $kino_path;
# Test with Plucene if possible.
if ( $configured{kinosearch} ) {
$kino_path = "t/kinosearch";
}

# @wiki_info describes which searches work with which stores.

# Database-specific searchers.
Expand All @@ -169,6 +175,10 @@ foreach my $dbtype ( qw( MySQL Pg SQLite ) ) {
if ( $datastore_info{$dbtype} and $plucene_path );
push @wiki_info, { datastore_info => $datastore_info{$dbtype} }
if $datastore_info{$dbtype};

push @wiki_info, { datastore_info => $datastore_info{$dbtype},
plucene_path => $kino_path }
if ( $datastore_info{$dbtype} and $kino_path );
}

=head1 METHODS
Expand Down