Skip to content

Commit

Permalink
在 SDK 上开放 addQueryString 及 setScwsMulti 相关接口功能
Browse files Browse the repository at this point in the history
  • Loading branch information
hightman committed Apr 1, 2013
1 parent d84ffa7 commit 2b5f888
Show file tree
Hide file tree
Showing 18 changed files with 198 additions and 16 deletions.
34 changes: 34 additions & 0 deletions sdk/php/lib/XSIndex.class.php
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,40 @@ public function delSynonym($raw, $synonym = null)
return $this;
}

/**
* 设置当前索引库的分词复合等级
* 复合等级是 scws 分词粒度控制的一个重要参数, 是长词细分处理依据, 默认为 3, 值范围 0~15
* 注意: 这个设置仅直对当前索引库有效, 多次调用设置值被覆盖仅最后那次设置有效,
* 而且仅对设置之后提交的索引数据起作用, 如需对以前的索引数据生效请重建索引.
* @param int $level 要设置的分词复合等级
* @return XSIndex 返回自身对象以支持串接操作
* @since 1.4.7
* @throw XSException 出错时抛出异常
*/
public function setScwsMulti($level)
{
$level = intval($level);
if ($level >= 0 && $level < 16)
{
$cmd = array('cmd' => CMD_SEARCH_SCWS_SET, 'arg1' => CMD_SCWS_SET_MULTI, 'arg2' => $level);
$this->execCommand($cmd);
}
return $this;
}

/**
* 获取当前索引库的分词复合等级
* @return int 返回当前库的分词复合等级
* @see setScwsMulti
* @since 1.4.7
*/
public function getScwsMulti()
{
$cmd = array('cmd' => CMD_SEARCH_SCWS_GET, 'arg1' => CMD_SCWS_GET_MULTI);
$res = $this->execCommand($cmd, CMD_OK_INFO);
return intval($res->buf);
}

/**
* 开启索引命令提交缓冲区
* 为优化网络性能, 有必要先将本地提交的 add/update/del 等索引变动指令缓存下来,
Expand Down
22 changes: 21 additions & 1 deletion sdk/php/lib/XSSearch.class.php
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,26 @@ public function getFacets($field = null)
return isset($this->_facets[$field]) ? $this->_facets[$field] : array();
}

/**
* 设置当前搜索语句的分词复合等级
* 复合等级是 scws 分词粒度控制的一个重要参数, 是长词细分处理依据, 默认为 3, 值范围 0~15
* 注意: 这个设置仅直对本次搜索有效, 仅对设置之后的 {@link setQuery} 起作用, 由于 query
* 设计的方式问题, 目前无法支持搜索语句单字切分, 但您可以在模糊检索时设为 0 来关闭复合分词
* @param int $level 要设置的分词复合等级
* @return XSSearch 返回自身对象以支持串接操作
* @since 1.4.7
*/
public function setScwsMulti($level)
{
$level = intval($level);
if ($level >= 0 && $level < 16)
{
$cmd = array('cmd' => CMD_SEARCH_SCWS_SET, 'arg1' => CMD_SCWS_SET_MULTI, 'arg2' => $level);
$this->execCommand($cmd);
}
return $this;
}

/**
* 设置搜索结果的数量和偏移
* 用于搜索结果分页, 每次调用 {@link search} 后会还原这2个变量到初始值
Expand Down Expand Up @@ -838,7 +858,7 @@ private function clearQuery()
* @param float $scale 权重计算缩放比例, 默认为 1表示不缩放, 其它值范围 0.xx ~ 655.35
* @return string 修正后的搜索语句
*/
private function addQueryString($query, $addOp = CMD_QUERY_OP_AND, $scale = 1)
public function addQueryString($query, $addOp = CMD_QUERY_OP_AND, $scale = 1)
{
$query = $this->preQueryString($query);
$bscale = ($scale > 0 && $scale != 1) ? pack('n', intval($scale * 100)) : '';
Expand Down
3 changes: 2 additions & 1 deletion sdk/php/lib/xs_cmd.inc.php
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?php
/* Automatically generated at 2013/03/29 11:33 */
/* Automatically generated at 2013/04/01 14:09 */
define('CMD_NONE', 0);
define('CMD_DEFAULT', CMD_NONE);
define('CMD_PROTOCOL', 20110707);
Expand Down Expand Up @@ -103,6 +103,7 @@
define('CMD_SCWS_GET_RESULT', 2);
define('CMD_SCWS_GET_TOPS', 3);
define('CMD_SCWS_HAS_WORD', 4);
define('CMD_SCWS_GET_MULTI', 5);
define('CMD_SCWS_SET_IGNORE', 50);
define('CMD_SCWS_SET_MULTI', 51);
define('CMD_SCWS_SET_DUALITY', 52);
Expand Down
1 change: 1 addition & 0 deletions sdk/php/tests/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
// $Id$
$prefix = trim(file_get_contents(getenv('HOME') . '/.xs_installed'));
shell_exec($prefix . '/bin/xs-ctl.sh restart');
sleep(1);

// global temp ini files
$GLOBALS['fixIniData'] = array(
Expand Down
56 changes: 56 additions & 0 deletions sdk/php/tests/lib/XSIndexTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -251,4 +251,60 @@ public function testCustomDict()
$this->assertEquals(1, $search->count('subject:测看'));
$this->assertEquals(0, $search->count('subject:看'));
}

private function countSubjectTerm($term)
{
$search = $this->object->xs->search->reopen(true)->setCharset('utf-8');
return $search->setQuery(null)->addQueryTerm('subject', $term)->count();
}

public function testScwsMulti()
{
// objects
$index = $this->object;
$doc = new XSDocument('utf-8');
$doc->pid = 7788;
$doc->subject = '管理制度';
$doc->message = '中华人民共和国';
// default scws
$this->assertEquals(3, $index->getScwsMulti());
$index->setScwsMulti(16);
$this->assertEquals(3, $index->getScwsMulti());
$index->setScwsMulti(-1);
$this->assertEquals(3, $index->getScwsMulti());
$index->update($doc);
$index->flushIndex();
sleep(2);
$this->assertEquals(1, $this->countSubjectTerm('管理制度'));
$this->assertEquals(1, $this->countSubjectTerm('管理'));
$this->assertEquals(0, $this->countSubjectTerm(''));
$this->assertEquals(0, $this->countSubjectTerm(''));
// multi = 0
$index->setScwsMulti(0);
$index->update($doc);
$index->flushIndex();
sleep(2);
$this->assertEquals(1, $this->countSubjectTerm('管理制度'));
$this->assertEquals(0, $this->countSubjectTerm('管理'));
$this->assertEquals(0, $this->countSubjectTerm(''));
$this->assertEquals(0, $this->countSubjectTerm(''));
// multi = 5
$index->setScwsMulti(5);
$index->update($doc);
$index->flushIndex();
sleep(2);
$this->assertEquals(1, $this->countSubjectTerm('管理制度'));
$this->assertEquals(1, $this->countSubjectTerm('管理'));
$this->assertEquals(1, $this->countSubjectTerm(''));
$this->assertEquals(0, $this->countSubjectTerm(''));
// multi = 15
$index->setScwsMulti(15);
$index->update($doc);
$index->flushIndex();
sleep(2);
$this->assertEquals(1, $this->countSubjectTerm('管理制度'));
$this->assertEquals(1, $this->countSubjectTerm('管理'));
$this->assertEquals(1, $this->countSubjectTerm(''));
$this->assertEquals(1, $this->countSubjectTerm(''));
}
}
17 changes: 15 additions & 2 deletions sdk/php/tests/lib/XSSearchTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -547,12 +547,12 @@ public function testSetDb()
$this->assertInstanceOf('XSException', $e2);
$this->assertEquals(CMD_ERR_XAPIAN, $e2->getCode());
}

public function testCustomDict()
{
$index = self::$xs->index;
$search = self::$xs->search;

// without custom dict
$index->setCustomDict('');
$query = $search->reopen(true)->getQuery('去测测看');
Expand All @@ -567,4 +567,17 @@ public function testCustomDict()
$query = $search->reopen(true)->getQuery('去测测看');
$this->assertEquals('Xapian::Query((去:(pos=1) AND (测测看:(pos=2) SYNONYM (测测:(pos=90) AND 测看:(pos=91)))))', $query);
}

public function testScwsMulti()
{
$search = self::$xs->search;
// default scws
$this->assertEquals(array('管理制度', '管理', '制度'), $search->terms('管理制度'));
// multi = 0
$search->setScwsMulti(0);
$this->assertEquals(array('管理制度'), $search->terms('管理制度'));
// multi = 2
$search->setScwsMulti(2);
$this->assertEquals(array('管理制度', '管理', '理制', '制度'), $search->terms('管理制度'));
}
}
20 changes: 18 additions & 2 deletions sdk/php/util/Indexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,12 @@
// file & database
$file = XSUtil::getOpt(null, 'file', true);
$db = XSUtil::getOpt('d', 'db');
$scws_multi = XSUtil::getOpt(null, 'scws-multi');

// help message
if (XSUtil::getOpt('h', 'help') !== null || !is_string($project)
|| (!$custom_dict && !$stop_rebuild && !$flush && !$flush_log
&& !$info && !$clean && !$source && !$add_synonym && !$del_synonym))
|| (!$custom_dict && !$stop_rebuild && !$flush && !$flush_log
&& !$info && !$clean && !$source && !$add_synonym && !$del_synonym && !$scws_multi))
{
$version = PACKAGE_NAME . '/' . PACKAGE_VERSION;
echo <<<EOF
Expand Down Expand Up @@ -79,6 +80,8 @@
--del-synonym=<raw1[:synonym1[,raw2[:synonym2]]]...>
删除一个或多个同义词, 多个之间用半角逗号分隔, 原词和同义词之间用冒号分隔
省略同义词则表示删除该原词的所有同义词
--scws-multi[=level]
查看或设置搜索语句的 scws 复合分词等级(值:0-15,默认为 3)
--rebuild 使用平滑重建方式导入数据,必须与 --source 配合使用
--stop-rebuild 强制中止没未完成的索引重建状态 (慎用)
--clean 清空库内当前的索引数据
Expand Down Expand Up @@ -161,6 +164,14 @@
if ($db !== null)
$index->setDb($db);

// scws multi
if ($scws_multi !== null && $scws_multi !== true)
{
$index->setScwsMulti($scws_multi);
if (!empty($source))
$scws_multi = null;
}

// special actions
if ($info !== null)
{
Expand Down Expand Up @@ -218,6 +229,11 @@
}
}
}
else if ($scws_multi !== null)
{
$level = $index->getScwsMulti();
echo "当前索引库的 scws 复合分词等级为:$level\n";
}
else
{
// clean
Expand Down
9 changes: 7 additions & 2 deletions sdk/php/util/Quest.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@
require_once dirname(__FILE__) . '/XSUtil.class.php';

// check arguments
XSUtil::parseOpt(array('p', 'q', 'c', 'd', 's', 'project', 'query', 'db', 'limit', 'charset', 'sort', 'add-weight'));
XSUtil::parseOpt(array('p', 'q', 'c', 'd', 's', 'project', 'query', 'db', 'limit', 'charset', 'sort', 'add-weight', 'scws-multi'));
$project = XSUtil::getOpt('p', 'project', true);
$query = XSUtil::getOpt('q', 'query', true);
$hot = XSUtil::getOpt(null, 'hot');
$synonyms = XSUtil::getOpt(null, 'list-synonyms');
$terms = XSUtil::getOpt(null, 'terms');
$weights = XSUtil::getOpt(null, 'add-weight');
$info = XSUtil::getopt(null, 'info');
$info = XSUtil::getOpt(null, 'info');
$scws_multi = XSUtil::getOpt(null, 'scws-multi');

// magick output charset
$charset = XSUtil::getOpt('c', 'charset');
Expand Down Expand Up @@ -55,6 +56,8 @@
-s <field1[,field2[,...]] 指定排序字段,在字段前加上 ~ 符号表示逆序
--fuzzy 将搜索默认设为模糊搜索
--synonym 开启自动同义词搜索功能
--scws-multi=<level>
查看或设置搜索语句的 scws 复合分词等级(值:0-15,默认为 3)
--add-weight=<[field1:]word1[:weight1][,[field2:]word2[:weight2]]>
添加搜索权重词汇,词与次数之间用半角冒号分隔
--hot[=total|last|cur]
Expand Down Expand Up @@ -114,6 +117,8 @@
$search->addDb(trim($dbs[$i]));
}
}
if ($scws_multi !== null)
$search->setScwsMulti($scws_multi);

if ($hot !== null)
{
Expand Down
3 changes: 2 additions & 1 deletion src/conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ int conn_quit(XS_CONN *conn, int res)
close(CONN_FD());

debug_free(conn);
conn_server.num_burst--;
if (conn_server.num_burst > 0)
conn_server.num_burst--;
return CMD_RES_QUIT;
}

Expand Down
1 change: 1 addition & 0 deletions src/global.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define SEARCH_LOG_DB "log_db"
#define DEFAULT_BACKLOG 63 // default backlog for listen()
#define MAX_EXPAND_LEN 15
#define DEFAULT_SCWS_MULTI 3 // default scws multi level

#ifdef HAVE_MM

Expand Down
2 changes: 1 addition & 1 deletion src/import.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ static scws_t load_user_scws(int multi, char *dbpath)
scws_set_dict(s, SCWS_ETCDIR "/dict.utf8.xdb", SCWS_XDICT_MEM);
scws_add_dict(s, SCWS_ETCDIR "/" CUSTOM_DICT_FILE, SCWS_XDICT_TXT);
scws_add_dict(s, ptr, SCWS_XDICT_TXT);
scws_set_multi(s, multi << 12);
scws_set_multi(s, (multi << 12) & SCWS_MULTI_MASK);
return s;
}

Expand Down
1 change: 0 additions & 1 deletion src/import.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#define DEFAULT_STEMMER "english" // default stemmer
#define DEFAULT_COMMIT_NUMBER 10000 // document numbers
#define DEFAULT_COMMIT_SIZE 256 // MB
#define DEFAULT_SCWS_MULTI 3 // default scws multi level

#define DEFAULT_ARCHIVE_THRESHOLD 100000 // default threshold value to archive

Expand Down
19 changes: 18 additions & 1 deletion src/indexd.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,9 @@ static void db_import_call(XS_DB *db, XS_USER *user)
// fork child process to run the import
if ((pid = fork()) == 0)
{
EXTERNAL_CALL(xs_import, "xs-import", "-Q", dbpath, sndfile);
char arg[16];
sprintf(arg, "-m%d", db->scws_multi);
EXTERNAL_CALL(xs_import, "xs-import", "-Q", arg, dbpath, sndfile);
}
else if (pid > 0)
{
Expand Down Expand Up @@ -1222,6 +1224,21 @@ static int index_zcmd_exec(XS_CONN *conn)
case CMD_DOC_INDEX:
rc = CMD_RES_CONT | CMD_RES_SAVE;
break;
// scws multi
case CMD_SEARCH_SCWS_SET:
if (cmd->arg1 == CMD_SCWS_SET_MULTI && get_conn_wdb(conn) != NULL)
conn->wdb->scws_multi = (short) cmd->arg2;
break;
case CMD_SEARCH_SCWS_GET:
if (cmd->arg1 != CMD_SCWS_GET_MULTI || get_conn_wdb(conn) == NULL)
rc = CMD_RES_UNIMP;
else
{
char buf[8];
sprintf(buf, "%d", conn->wdb->scws_multi);
rc = CONN_RES_OK2(INFO, buf);
}
break;
// others, passed to next handler
default:
rc = CMD_RES_NEXT;
Expand Down
2 changes: 1 addition & 1 deletion src/logging.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ static scws_t load_user_scws(int multi, char *dbpath)
scws_set_dict(s, SCWS_ETCDIR "/dict.utf8.xdb", SCWS_XDICT_MEM);
scws_add_dict(s, SCWS_ETCDIR "/" CUSTOM_DICT_FILE, SCWS_XDICT_TXT);
scws_add_dict(s, ptr, SCWS_XDICT_TXT);
scws_set_multi(s, multi << 12);
scws_set_multi(s, (multi << 12) & SCWS_MULTI_MASK);
return s;
}

Expand Down
19 changes: 17 additions & 2 deletions src/task.cc
Original file line number Diff line number Diff line change
Expand Up @@ -645,9 +645,18 @@ static int zcmd_task_default(XS_CONN *conn)
}
break;
case CMD_SEARCH_SCWS_SET:
if (cmd->arg1 == CMD_SCWS_SET_MULTI)
{
scws_t scws = (scws_t) zarg->qp->get_scws();
if (scws != NULL)
{
scws_set_multi(scws, (cmd->arg2 << 12) & SCWS_MULTI_MASK);
log_debug_conn("change scws multi level (MODE:%d)", cmd->arg2);
}
}
break;
case CMD_SEARCH_SCWS_GET:
rc = CMD_RES_UNIMP;
break;
default:
rc = CMD_RES_NEXT;
break; // passed to next
Expand Down Expand Up @@ -1887,7 +1896,7 @@ static int zcmd_scws_set(XS_CONN *conn)
}
else if (cmd->arg1 == CMD_SCWS_SET_MULTI)
{
scws_set_multi(scws, (cmd->arg2 << 12));
scws_set_multi(scws, (cmd->arg2 << 12) & SCWS_MULTI_MASK);
}
else if (cmd->arg1 == CMD_SCWS_SET_IGNORE)
{
Expand All @@ -1909,6 +1918,12 @@ static int zcmd_scws_get(XS_CONN *conn)
{
return CONN_RES_OK2(INFO, SCWS_VERSION);
}
else if (cmd->arg1 == CMD_SCWS_GET_MULTI)
{
char buf[8];
sprintf(buf, "%d", scws->mode & SCWS_MULTI_MASK);
return CONN_RES_OK2(INFO, buf);
}
else if (cmd->arg1 == CMD_SCWS_HAS_WORD)
{
int count;
Expand Down
Loading

0 comments on commit 2b5f888

Please sign in to comment.