Skip to content

Commit 486383d

Browse files
authored
PCBC-987: Fix consistency vector encoding for FTS (#163)
1 parent 19703d7 commit 486383d

File tree

7 files changed

+141
-39
lines changed

7 files changed

+141
-39
lines changed

Couchbase/MutationState.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ public function export(): array
6060
foreach ($this->tokens as $token) {
6161
$state[] = [
6262
"partitionId" => $token->partitionId(),
63-
"partitionUuid" => hexdec($token->partitionUuid()),
64-
"sequenceNumber" => hexdec($token->sequenceNumber()),
63+
"partitionUuid" => $token->partitionUuid(),
64+
"sequenceNumber" => $token->sequenceNumber(),
6565
"bucketName" => $token->bucketName(),
6666
];
6767
}

Couchbase/SearchOptions.php

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class SearchOptions implements JsonSerializable
2929
private ?int $skip = null;
3030
private ?bool $explain = null;
3131
private ?bool $disableScoring = null;
32-
private ?array $consistentWith = null;
32+
private ?MutationState $consistentWith = null;
3333
private ?array $fields = null;
3434
private ?array $facets = null;
3535
private ?array $sort = null;
@@ -132,16 +132,7 @@ public function disableScoring(bool $disabled): SearchOptions
132132
*/
133133
public function consistentWith(string $index, MutationState $state): SearchOptions
134134
{
135-
$vectors = [];
136-
foreach ($state->tokens() as $token) {
137-
$vectors[] = [
138-
'partitionId' => $token->partitionId(),
139-
'partitionUuid' => $token->partitionUuid(),
140-
'sequenceNumber' => $token->sequenceNumber(),
141-
'bucketName' => $token->bucketName(),
142-
];
143-
}
144-
$this->consistentWith = $vectors;
135+
$this->consistentWith = $state;
145136
return $this;
146137
}
147138

@@ -322,7 +313,7 @@ public static function export(?SearchOptions $options): array
322313
'disableScoring' => $options->disableScoring,
323314
'fields' => $options->fields,
324315
'sortSpecs' => $sort,
325-
'consistentWith' => $options->consistentWith,
316+
'consistentWith' => $options->consistentWith == null ? null : $options->consistentWith->export(),
326317
'facets' => $options->facets,
327318
'highlightStyle' => $highlightStyle,
328319
'highlightFields' => $highlightFields,

src/wrapper/conversion_utilities.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -732,16 +732,16 @@ zval_to_common_search_request(const zend_string* index_name, const zend_string*
732732
std::uint64_t sequence_number;
733733
std::uint16_t partition_id;
734734
std::string bucket_name;
735-
if (auto e = cb_assign_integer(partition_id, options, "partitionId"); e.ec) {
735+
if (auto e = cb_assign_integer(partition_id, item, "partitionId"); e.ec) {
736736
return { {}, e };
737737
}
738-
if (auto e = cb_assign_integer(partition_uuid, options, "partitionUuid"); e.ec) {
738+
if (auto e = cb_assign_integer(partition_uuid, item, "partitionUuid"); e.ec) {
739739
return { {}, e };
740740
}
741-
if (auto e = cb_assign_integer(sequence_number, options, "sequenceNumber"); e.ec) {
741+
if (auto e = cb_assign_integer(sequence_number, item, "sequenceNumber"); e.ec) {
742742
return { {}, e };
743743
}
744-
if (auto e = cb_assign_string(bucket_name, options, "bucketName"); e.ec) {
744+
if (auto e = cb_assign_string(bucket_name, item, "bucketName"); e.ec) {
745745
return { {}, e };
746746
}
747747
vectors.emplace_back(mutation_token{ partition_uuid, sequence_number, partition_id, bucket_name });

src/wrapper/conversion_utilities.hxx

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <chrono>
3333

3434
#include <fmt/format.h>
35+
#include <type_traits>
3536

3637
namespace couchbase::transactions
3738
{
@@ -67,6 +68,44 @@ query_response_to_zval(zval* return_value, const core::operations::query_respons
6768
void
6869
search_query_response_to_zval(zval* return_value, const core::operations::search_response& resp);
6970

71+
template <typename Integer>
72+
static Integer
73+
parse_integer(const std::string& str, std::size_t* pos = 0, int base = 10)
74+
{
75+
if constexpr (std::is_signed_v<Integer>) {
76+
return std::stoll(str, pos, base);
77+
} else {
78+
return std::stoull(str, pos, base);
79+
}
80+
}
81+
82+
template<typename Integer>
83+
static std::pair<core_error_info, std::optional<Integer>>
84+
cb_get_integer_from_hex(const zend_string* value, std::string_view name)
85+
{
86+
auto hex_string = cb_string_new(value);
87+
88+
if(hex_string.empty()) {
89+
return { { errc::common::invalid_argument, ERROR_LOCATION, fmt::format("unexpected empty string for {}", name) }, {} };
90+
}
91+
92+
try {
93+
std::size_t pos;
94+
auto result = parse_integer<Integer>(hex_string, &pos, 16);
95+
if (result < std::numeric_limits<Integer>::min() || result > std::numeric_limits<Integer>::max()) {
96+
return { { errc::common::invalid_argument, ERROR_LOCATION, fmt::format("number out of range for {}", name) }, {} };
97+
}
98+
if (pos != hex_string.length()) {
99+
return { { errc::common::invalid_argument, ERROR_LOCATION, fmt::format("trailing garbage in {}", name) }, {} };
100+
}
101+
return {{}, result};
102+
} catch (const std::invalid_argument& e) {
103+
return { { errc::common::invalid_argument, ERROR_LOCATION, fmt::format("invalid hex number for {}", name) }, {} };
104+
} catch (const std::out_of_range& e) {
105+
return { { errc::common::invalid_argument, ERROR_LOCATION, fmt::format("number out of range for {}", name) }, {} };
106+
}
107+
}
108+
70109
template<typename Integer>
71110
static std::pair<core_error_info, std::optional<Integer>>
72111
cb_get_integer(const zval* options, std::string_view name)
@@ -87,6 +126,8 @@ cb_get_integer(const zval* options, std::string_view name)
87126
return {};
88127
case IS_LONG:
89128
break;
129+
case IS_STRING:
130+
return cb_get_integer_from_hex<Integer>(Z_STR_P(value), name);
90131
default:
91132
return {
92133
{ errc::common::invalid_argument, ERROR_LOCATION, fmt::format("expected {} to be a integer value in the options", name) },

tests/KeyValueScanTest.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ public function setUp(): void
4343
{
4444
parent::setUp();
4545
$this->skipIfProtostellar();
46+
$this->skipIfUnsupported($this->version()->supportsCollections());
4647

4748
$this->collection = $this->defaultCollection();
4849
for ($i = 0; $i < 100; $i++) {

tests/SearchTest.php

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020

2121
use Couchbase\BooleanSearchQuery;
2222
use Couchbase\ClusterInterface;
23+
use Couchbase\CollectionInterface;
2324
use Couchbase\ConjunctionSearchQuery;
2425
use Couchbase\DateRangeSearchFacet;
2526
use Couchbase\DateRangeSearchQuery;
2627
use Couchbase\DisjunctionSearchQuery;
2728
use Couchbase\DocIdSearchQuery;
29+
use Couchbase\DurabilityLevel;
2830
use Couchbase\Exception\FeatureNotAvailableException;
2931
use Couchbase\Exception\IndexNotFoundException;
3032
use Couchbase\GeoBoundingBoxSearchQuery;
@@ -52,6 +54,7 @@
5254
use Couchbase\TermRangeSearchQuery;
5355
use Couchbase\TermSearchFacet;
5456
use Couchbase\TermSearchQuery;
57+
use Couchbase\UpsertOptions;
5558
use Couchbase\VectorQuery;
5659
use Couchbase\VectorQueryCombination;
5760
use Couchbase\VectorSearch;
@@ -63,34 +66,59 @@
6366
class SearchTest extends Helpers\CouchbaseTestCase
6467
{
6568
private ClusterInterface $cluster;
69+
private CollectionInterface $collection;
6670
private SearchIndexManager $indexManager;
6771

72+
/**
73+
* @return number of the documents in dataset
74+
*/
75+
public function loadDataset(): int
76+
{
77+
$dataset = json_decode(file_get_contents(__DIR__ . "/beer-data.json"), true);
78+
79+
$options = UpsertOptions::build()->durabilityLevel(DurabilityLevel::MAJORITY_AND_PERSIST_TO_ACTIVE);
80+
foreach ($dataset as $id => $document) {
81+
$this->collection->upsert($id, $document, $options);
82+
}
83+
84+
return count($dataset);
85+
}
86+
87+
public function createSearchIndex(int $datasetSize): void
88+
{
89+
fprintf(STDERR, "Create 'beer-search' to index %d docs\n", $datasetSize);
90+
$indexDump = json_decode(file_get_contents(__DIR__ . "/beer-search.json"), true);
91+
$index = SearchIndex::build("beer-search", self::env()->bucketName());
92+
$index->setParams($indexDump["params"]);
93+
$this->indexManager->upsertIndex($index);
94+
95+
$start = time();
96+
while (true) {
97+
try {
98+
$indexedDocuments = $this->indexManager->getIndexedDocumentsCount("beer-search");
99+
fprintf(STDERR, "%ds, Indexing 'beer-search': %d docs\n", time() - $start, $indexedDocuments);
100+
if ($indexedDocuments >= $datasetSize) {
101+
break;
102+
}
103+
sleep(5);
104+
} catch (\Couchbase\Exception\IndexNotReadyException $ex) {
105+
}
106+
}
107+
}
108+
68109
public function setUp(): void
69110
{
70111
parent::setUp();
71112

72113
$this->cluster = $this->connectCluster();
114+
$this->collection = $this->openBucket(self::env()->bucketName())->defaultCollection();
73115

74116
if (self::env()->useCouchbase()) {
75117
$this->indexManager = $this->cluster->searchIndexes();
76118
try {
77119
$this->indexManager->getIndex("beer-search");
78120
} catch (IndexNotFoundException $ex) {
79-
$indexDump = json_decode(file_get_contents(__DIR__ . "/beer-search.json"), true);
80-
$index = SearchIndex::build("beer-search", "beer-sample");
81-
$index->setParams($indexDump["params"]);
82-
$this->indexManager->upsertIndex($index);
83-
}
84-
while (true) {
85-
try {
86-
$indexedDocuments = $this->indexManager->getIndexedDocumentsCount("beer-search");
87-
fprintf(STDERR, "Indexing 'beer-search': %d docs\n", $indexedDocuments);
88-
if ($indexedDocuments > 7000) {
89-
break;
90-
}
91-
sleep(3);
92-
} catch (\Couchbase\Exception\IndexNotReadyException $ex) {
93-
}
121+
$this->createSearchIndex($this->loadDataset());
94122
}
95123
}
96124
}
@@ -159,6 +187,7 @@ public function testSearchWithNoHits()
159187
$this->assertEquals(0, $result->metaData()->totalHits());
160188
}
161189

190+
162191
public function testSearchWithConsistency()
163192
{
164193
$this->skipIfCaves();
@@ -173,8 +202,7 @@ public function testSearchWithConsistency()
173202
$this->assertEmpty($result->rows());
174203
$this->assertEquals(0, $result->metaData()->totalHits());
175204

176-
$collection = $this->cluster->bucket('beer-sample')->defaultCollection();
177-
$result = $collection->upsert($id, ["type" => "beer", "name" => $id]);
205+
$result = $this->collection->upsert($id, ["type" => "beer", "name" => $id]);
178206
$mutationState = new MutationState();
179207
$mutationState->add($result);
180208

@@ -358,7 +386,7 @@ public function testCompoundSearchQueries()
358386
$disjunctionQuery = new DisjunctionSearchQuery([$nameQuery, $descriptionQuery]);
359387
$options = SearchOptions::build()->fields(["type", "name", "description"]);
360388
$result = $this->cluster->searchQuery("beer-search", $disjunctionQuery, $options);
361-
$this->assertGreaterThan(1000, $result->metaData()->totalHits());
389+
$this->assertGreaterThan(20, $result->metaData()->totalHits());
362390
$this->assertNotEmpty($result->rows());
363391
$this->assertMatchesRegularExpression('/green/i', $result->rows()[0]['fields']['name']);
364392
$this->assertDoesNotMatchRegularExpression('/hop/i', $result->rows()[0]['fields']['name']);
@@ -434,18 +462,18 @@ public function testSearchWithFacets()
434462
$this->assertNotNull($result->facets()['foo']);
435463
$this->assertEquals('name', $result->facets()['foo']->field());
436464
$this->assertEquals('ale', $result->facets()['foo']->terms()[0]->term());
437-
$this->assertGreaterThan(1000, $result->facets()['foo']->terms()[0]->count());
465+
$this->assertGreaterThan(10, $result->facets()['foo']->terms()[0]->count());
438466

439467
$this->assertNotNull($result->facets()['bar']);
440468
$this->assertEquals('updated', $result->facets()['bar']->field());
441469
$this->assertEquals('old', $result->facets()['bar']->dateRanges()[0]->name());
442-
$this->assertGreaterThan(5000, $result->facets()['bar']->dateRanges()[0]->count());
470+
$this->assertGreaterThan(30, $result->facets()['bar']->dateRanges()[0]->count());
443471

444472
$this->assertNotNull($result->facets()['baz']);
445473
$this->assertEquals('abv', $result->facets()['baz']->field());
446474
$this->assertEquals('light', $result->facets()['baz']->numericRanges()[0]->name());
447475
$this->assertGreaterThan(0, $result->facets()['baz']->numericRanges()[0]->max());
448-
$this->assertGreaterThan(100, $result->facets()['baz']->numericRanges()[0]->count());
476+
$this->assertGreaterThan(15, $result->facets()['baz']->numericRanges()[0]->count());
449477
}
450478

451479
public function testNullInNumericRangeFacet()

0 commit comments

Comments
 (0)