@@ -40,7 +40,7 @@ class TNGrammBuilder {
4040 class THashesCountSelector {
4141 public:
4242 template <class TActor >
43- static void BuildHashes (const ui8* data, const TActor& actor) {
43+ static void BuildHashes (const ui8* data, TActor& actor) {
4444 ui64 hash = (ui64)2166136261 * (ui64)HashIdx;
4545 actor (THashesBuilder<CharsCount>::Build (data, hash));
4646 THashesCountSelector<HashIdx - 1 , CharsCount>::BuildHashes (data, actor);
@@ -51,7 +51,7 @@ class TNGrammBuilder {
5151 class THashesCountSelector <0 , CharsCount> {
5252 public:
5353 template <class TActor >
54- static void BuildHashes (const ui8* /* data*/ , const TActor& /* actor*/ ) {
54+ static void BuildHashes (const ui8* /* data*/ , TActor& /* actor*/ ) {
5555 }
5656 };
5757
@@ -60,7 +60,7 @@ class TNGrammBuilder {
6060 private:
6161 template <class TActor >
6262 static void BuildHashesImpl (
63- const ui8* data, const ui32 dataSize, const std::optional<NRequest::TLikePart::EOperation> op, const TActor& actor) {
63+ const ui8* data, const ui32 dataSize, const std::optional<NRequest::TLikePart::EOperation> op, TActor& actor) {
6464 TBuffer fakeString;
6565 if (!op || op == NRequest::TLikePart::EOperation::StartsWith) {
6666 for (ui32 c = 1 ; c <= CharsCount; ++c) {
@@ -90,7 +90,7 @@ class TNGrammBuilder {
9090 public:
9191 template <class TActor >
9292 static void BuildHashes (const ui8* data, const ui32 dataSize, const ui32 hashesCount, const ui32 nGrammSize,
93- const std::optional<NRequest::TLikePart::EOperation> op, const TActor& actor) {
93+ const std::optional<NRequest::TLikePart::EOperation> op, TActor& actor) {
9494 if (HashesCount == hashesCount && CharsCount == nGrammSize) {
9595 BuildHashesImpl (data, dataSize, op, actor);
9696 } else if (HashesCount > hashesCount && CharsCount > nGrammSize) {
@@ -105,13 +105,12 @@ class TNGrammBuilder {
105105 }
106106 };
107107
108-
109108 template <ui32 CharsCount>
110109 class THashesSelector <0 , CharsCount> {
111110 public:
112111 template <class TActor >
113112 static void BuildHashes (const ui8* /* data*/ , const ui32 /* dataSize*/ , const ui32 /* hashesCount*/ , const ui32 /* nGrammSize*/ ,
114- const std::optional<NRequest::TLikePart::EOperation> /* op*/ , const TActor& /* actor*/ ) {
113+ const std::optional<NRequest::TLikePart::EOperation> /* op*/ , TActor& /* actor*/ ) {
115114 AFL_VERIFY (false );
116115 }
117116 };
@@ -121,7 +120,7 @@ class TNGrammBuilder {
121120 public:
122121 template <class TActor >
123122 static void BuildHashes (const ui8* /* data*/ , const ui32 /* dataSize*/ , const ui32 /* hashesCount*/ , const ui32 /* nGrammSize*/ ,
124- const std::optional<NRequest::TLikePart::EOperation> /* op*/ , const TActor& /* actor*/ ) {
123+ const std::optional<NRequest::TLikePart::EOperation> /* op*/ , TActor& /* actor*/ ) {
125124 AFL_VERIFY (false );
126125 }
127126 };
@@ -131,14 +130,14 @@ class TNGrammBuilder {
131130 public:
132131 template <class TActor >
133132 static void BuildHashes (const ui8* /* data*/ , const ui32 /* dataSize*/ , const ui32 /* hashesCount*/ , const ui32 /* nGrammSize*/ ,
134- const std::optional<NRequest::TLikePart::EOperation> /* op*/ , const TActor& /* actor*/ ) {
133+ const std::optional<NRequest::TLikePart::EOperation> /* op*/ , TActor& /* actor*/ ) {
135134 AFL_VERIFY (false );
136135 }
137136 };
138137
139138 template <class TAction >
140- void BuildNGramms (const char * data, const ui32 dataSize, const std::optional<NRequest::TLikePart::EOperation> op, const ui32 nGrammSize,
141- const TAction& pred) {
139+ void BuildNGramms (
140+ const char * data, const ui32 dataSize, const std::optional<NRequest::TLikePart::EOperation> op, const ui32 nGrammSize, TAction& pred) {
142141 THashesSelector<TConstants::MaxHashesCount, TConstants::MaxNGrammSize>::BuildHashes (
143142 (const ui8*)data, dataSize, HashesCount, nGrammSize, op, pred);
144143 }
@@ -149,7 +148,7 @@ class TNGrammBuilder {
149148 }
150149
151150 template <class TFiller >
152- void FillNGrammHashes (const ui32 nGrammSize, const std::shared_ptr<arrow::Array>& array, const TFiller& fillData) {
151+ void FillNGrammHashes (const ui32 nGrammSize, const std::shared_ptr<arrow::Array>& array, TFiller& fillData) {
153152 AFL_VERIFY (array->type_id () == arrow::utf8 ()->id ())(" id" , array->type ()->ToString ());
154153 NArrow::SwitchType (array->type_id (), [&](const auto & type) {
155154 using TWrap = std::decay_t <decltype (type)>;
@@ -173,22 +172,35 @@ class TNGrammBuilder {
173172 }
174173
175174 template <class TFiller >
176- void FillNGrammHashes (const ui32 nGrammSize, const NRequest::TLikePart::EOperation op, const TString& userReq, const TFiller& fillData) {
175+ void FillNGrammHashes (const ui32 nGrammSize, const NRequest::TLikePart::EOperation op, const TString& userReq, TFiller& fillData) {
177176 BuildNGramms (userReq.data (), userReq.size (), op, nGrammSize, fillData);
178177 }
179178};
180179
180+ class TVectorInserter {
181+ private:
182+ bool * Values;
183+ const ui32 Size;
184+
185+ public:
186+ TVectorInserter (std::vector<bool >& values)
187+ : Values(&values[0 ])
188+ , Size(values.size()) {
189+ }
190+
191+ void operator ()(const ui64 hash) {
192+ Values[hash % Size] = true ;
193+ }
194+ };
195+
181196TString TIndexMeta::DoBuildIndexImpl (TChunkedBatchReader& reader, const ui32 /* recordsCount*/ ) const {
182197 AFL_VERIFY (reader.GetColumnsCount () == 1 )(" count" , reader.GetColumnsCount ());
183198 TNGrammBuilder builder (HashesCount);
184199
185200 std::vector<bool > bitsVector (FilterSizeBytes * 8 , false );
186- bool * memAccessor = &bitsVector[0 ];
187- const auto predSet = [&](const ui64 hashSecondary) {
188- memAccessor[hashSecondary % (FilterSizeBytes * 8 )] = true ;
189- };
201+ TVectorInserter inserter (bitsVector);
190202 for (reader.Start (); reader.IsCorrect ();) {
191- builder.FillNGrammHashes (NGrammSize, reader.begin ()->GetCurrentChunk (), predSet );
203+ builder.FillNGrammHashes (NGrammSize, reader.begin ()->GetCurrentChunk (), inserter );
192204 reader.ReadNext (reader.begin ()->GetCurrentChunk ()->length ());
193205 }
194206 return TFixStringBitsStorage (bitsVector).GetData ();
0 commit comments