Skip to content

Commit

Permalink
让同义词权重可通过 XSSearch::setSynonymScale 调整,默认不调整
Browse files Browse the repository at this point in the history
  • Loading branch information
hightman committed May 15, 2013
1 parent c07070a commit 9e592d3
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 58 deletions.
16 changes: 16 additions & 0 deletions sdk/php/lib/XSSearch.class.php
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,22 @@ public function setAutoSynonyms($value = true)
return $this;
}

/**
* 设置同义词搜索的权重比例
* @param float $value 取值范围 0.01-2.55, 1 表示不调整
* @return XSSearch 返回对象本身以支持串接操作
* @notice scws 的复合分词也是以同义词方式呈现的
* @since 1.4.7
*/
public function setSynonymScale($value)
{
$arg1 = 1;
$arg2 = max(0, (intval($value * 100) & 255));
$cmd = new XSCommand(CMD_SEARCH_SET_MISC, $arg1, $arg2);
$this->execCommand($cmd);
return $this;
}

/**
* 获取当前库内的全部同义词列表
* @param int $limit 数量上限, 若设为 0 则启用默认值 100 个
Expand Down
3 changes: 2 additions & 1 deletion sdk/php/lib/xs_cmd.inc.php
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?php
/* Automatically generated at 2013/04/08 14:45 */
/* Automatically generated at 2013/05/15 12:27 */
define('CMD_NONE', 0);
define('CMD_DEFAULT', CMD_NONE);
define('CMD_PROTOCOL', 20110707);
Expand Down Expand Up @@ -53,6 +53,7 @@
define('CMD_SEARCH_SET_FACETS', 197);
define('CMD_SEARCH_SCWS_SET', 198);
define('CMD_SEARCH_SET_CUTOFF', 199);
define('CMD_SEARCH_SET_MISC', 200);
define('CMD_QUERY_INIT', 224);
define('CMD_QUERY_PARSE', 225);
define('CMD_QUERY_TERM', 226);
Expand Down
18 changes: 9 additions & 9 deletions sdk/php/tests/lib/XSSearchTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,8 @@ public function queryProvider()
array('subject:项目测试', 'Xapian::Query((B项目:(pos=1) AND B测试:(pos=2)))'),
array('subject2:测试', 'Xapian::Query((Zsubject2:(pos=1) AND 测试:(pos=2)))'),
array('subject2:Hello', 'Xapian::Query((subject2:(pos=1) PHRASE 2 hello:(pos=2)))'),
array('项目管理制度', 'Xapian::Query((项目:(pos=1) AND (管理制度:(pos=2) SYNONYM 0.9000000000000000222 * (管理:(pos=90) AND 制度:(pos=91)))))'),
array('subject:项目管理制度', 'Xapian::Query((B项目:(pos=1) AND (B管理制度:(pos=2) SYNONYM 0.9000000000000000222 * (B管理:(pos=90) AND B制度:(pos=91)))))'),
array('项目管理制度', 'Xapian::Query((项目:(pos=1) AND (管理制度:(pos=2) SYNONYM (管理:(pos=90) AND 制度:(pos=91)))))'),
array('subject:项目管理制度', 'Xapian::Query((B项目:(pos=1) AND (B管理制度:(pos=2) SYNONYM (B管理:(pos=90) AND B制度:(pos=91)))))'),
array('几句说明', 'Xapian::Query((几句:(pos=1) AND 说明:(pos=2)))'),
array('说明几句', 'Xapian::Query((说明:(pos=1) AND 几句:(pos=2)))'),
array('pid:1 AND pid:2', 'Xapian::Query((0 * A1 AND 0 * A2))'),
Expand Down Expand Up @@ -466,7 +466,7 @@ public function testSearchSynonyms()

// test fuzzy multi query
$search->setFuzzy();
$this->testQuery('中华人民共和国', 'Xapian::Query((中华人民共和国:(pos=1) SYNONYM 0.9000000000000000222 * (中华:(pos=89) OR 人民:(pos=90) OR 共和国:(pos=91))))');
$this->testQuery('中华人民共和国', 'Xapian::Query((中华人民共和国:(pos=1) SYNONYM (中华:(pos=89) OR 人民:(pos=90) OR 共和国:(pos=91))))');
$this->testQuery('"中华人民共和国"', 'Xapian::Query(中华人民共和国:(pos=1))');
$search->setFuzzy(false);

Expand All @@ -487,12 +487,12 @@ public function testSearchSynonyms()
// test synonym query
$search->setAutoSynonyms();
$queries = array(
'项目test' => 'Xapian::Query((项目:(pos=1) AND (Ztest:(pos=2) SYNONYM 0.9000000000000000222 * quiz:(pos=79) SYNONYM 0.9000000000000000222 * 测试:(pos=80))))',
'俗话 subject:(项目 test)' => 'Xapian::Query((俗话:(pos=1) AND B项目:(pos=2) AND (ZBtest:(pos=3) SYNONYM 0.9000000000000000222 * Bquiz:(pos=80) SYNONYM 0.9000000000000000222 * B测试:(pos=81))))',
'爱写hello world' => 'Xapian::Query((爱写:(pos=1) AND ((Zhello:(pos=2) AND Zworld:(pos=3)) SYNONYM 0.9000000000000000222 * 有意思:(pos=68))))',
'demo 迅搜' => 'Xapian::Query((Zdemo:(pos=1) AND (迅搜:(pos=2) SYNONYM 0.9000000000000000222 * xunsearch:(pos=90))))',
'项目test' => 'Xapian::Query((项目:(pos=1) AND (Ztest:(pos=2) SYNONYM quiz:(pos=79) SYNONYM 测试:(pos=80))))',
'俗话 subject:(项目 test)' => 'Xapian::Query((俗话:(pos=1) AND B项目:(pos=2) AND (ZBtest:(pos=3) SYNONYM Bquiz:(pos=80) SYNONYM B测试:(pos=81))))',
'爱写hello world' => 'Xapian::Query((爱写:(pos=1) AND ((Zhello:(pos=2) AND Zworld:(pos=3)) SYNONYM 有意思:(pos=68))))',
'demo 迅搜' => 'Xapian::Query((Zdemo:(pos=1) AND (迅搜:(pos=2) SYNONYM xunsearch:(pos=90))))',
'"demo 迅搜"' => 'Xapian::Query((demo:(pos=1) PHRASE 2 迅搜:(pos=2)))',
'testing' => 'Xapian::Query((Ztest:(pos=1) SYNONYM 0.9000000000000000222 * Zquiz:(pos=78) SYNONYM 0.9000000000000000222 * 测试:(pos=79)))',
'testing' => 'Xapian::Query((Ztest:(pos=1) SYNONYM Zquiz:(pos=78) SYNONYM 测试:(pos=79)))',
);
foreach ($queries as $raw => $expect)
{
Expand Down Expand Up @@ -567,7 +567,7 @@ public function testCustomDict()
EOF;
$index->setCustomDict($dict);
$query = $search->reopen(true)->getQuery('去测测看');
$this->assertEquals('Xapian::Query((去:(pos=1) AND (测测看:(pos=2) SYNONYM 0.9000000000000000222 * (测测:(pos=90) AND 测看:(pos=91)))))', $query);
$this->assertEquals('Xapian::Query((去:(pos=1) AND (测测看:(pos=2) SYNONYM (测测:(pos=90) AND 测看:(pos=91)))))', $query);
}

public function testScwsMulti()
Expand Down
10 changes: 8 additions & 2 deletions sdk/php/util/Quest.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@
--sort=<field1[,field2[,...]]
-s <field1[,field2[,...]] 指定排序字段,在字段前加上 ~ 符号表示逆序
--fuzzy 将搜索默认设为模糊搜索
--synonym 开启自动同义词搜索功能
--synonym[=scale]
开启自动同义词搜索功能,可选择设置同义词权重调整(0.01~2.55)
--scws-multi=<level>
查看或设置搜索语句的 scws 复合分词等级(值:0-15,默认为 3)
--add-weight=<[field1:]word1[:weight1][,[field2:]word2[:weight2]]>
Expand Down Expand Up @@ -236,8 +237,13 @@
// fuzzy search
if (XSUtil::getOpt(null, 'fuzzy') !== null)
$search->setFuzzy();
if (XSUtil::getOpt(null, 'synonym') !== null)
$syn = XSUtil::getOpt(null, 'synonym');
if ($syn !== null)
{
$search->setAutoSynonyms();
if ($syn !== true)
$search->setSynonymScale(floatval($syn));
}

if (($pos = strpos($limit, ',')) === false)
$offset = 0;
Expand Down
1 change: 1 addition & 0 deletions src/searchd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ static int worker_zcmd_exec(XS_CONN *conn)
case CMD_SEARCH_SET_COLLAPSE:
case CMD_SEARCH_SET_FACETS:
case CMD_SEARCH_SET_CUTOFF:
case CMD_SEARCH_SET_MISC:
case CMD_QUERY_INIT:
case CMD_QUERY_PARSE:
case CMD_QUERY_TERM:
Expand Down
4 changes: 4 additions & 0 deletions src/task.cc
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,10 @@ static int zcmd_task_default(XS_CONN *conn)
case CMD_SEARCH_SET_CUTOFF:
zarg->eq->set_cutoff(cmd->arg1 > 100 ? 100 : cmd->arg1, (double) cmd->arg2 / 10.0);
break;
case CMD_SEARCH_SET_MISC:
if (cmd->arg1 == 1)
zarg->qp->set_syn_scale((double) cmd->arg2 / 100.0);
break;
case CMD_QUERY_INIT:
if (!zarg->qq->empty())
{
Expand Down
9 changes: 8 additions & 1 deletion src/xs_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ struct xs_import_hdr

/**
* Register value slot for facets searching
* arg1: 0/1(exact or not), blen: field number, buf: vno list
* arg1:0/1(exact or not), blen: field number, buf: vno list
*/
#define CMD_SEARCH_SET_FACETS 197

Expand All @@ -415,6 +415,13 @@ struct xs_import_hdr
*/
#define CMD_SEARCH_SET_CUTOFF 199

/**
* Set misc options of search
* arg1:type(syn_scale|...)
* arg2:scale*10
*/
#define CMD_SEARCH_SET_MISC 200

/**
* ----------------------------------
* Commands for search query: 224~255
Expand Down
Loading

0 comments on commit 9e592d3

Please sign in to comment.