diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index a73585e579126..2ee91b71766bd 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -59,13 +59,15 @@ def self.update_index(table: , id: , raw_data:) tsvector.scan(/'(([a-zA-Z0-9]+\.)+[a-zA-Z0-9]+)'\:([\w+,]+)/).reduce(additional_lexemes) do |array, (lexeme, _, positions)| count = 0 - loop do - count += 1 - break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots - term, _, remaining = lexeme.partition(".") - break if remaining.blank? - array << "'#{term}':#{positions} '#{remaining}':#{positions}" - lexeme = remaining + if lexeme !~ /^(\d+\.)?(\d+\.)?(\*|\d+)$/ + loop do + count += 1 + break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots + term, _, remaining = lexeme.partition(".") + break if remaining.blank? + array << "'#{term}':#{positions} '#{remaining}':#{positions}" + lexeme = remaining + end end array diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb index b814949ce0f7c..fc8624eccdc10 100644 --- a/spec/services/search_indexer_spec.rb +++ b/spec/services/search_indexer_spec.rb @@ -141,6 +141,15 @@ def scrub(html, strip_diacritics: false) ) end + it 'should not tokenize versions' do + post.topic.update!(title: "this is a title that I am testing") + post.update!(raw: '1.2.2') + + expect(post.post_search_data.search_data).to eq( + "'1.2.2':10 'test':8A 'titl':4A 'uncategor':9B" + ) + end + it 'should tokenize host of a URL and removes query string' do category = Fabricate(:category, name: 'awesome category') topic = Fabricate(:topic, category: category, title: 'this is a test topic')