From acc17d3542ef9d6c252e8f4c740a799663d0a595 Mon Sep 17 00:00:00 2001 From: Sascha Karnatz Date: Wed, 9 Oct 2024 12:39:53 +0200 Subject: [PATCH] Store Alchemy::Page as a single PgSearch::Document Previously the page and the ingredients (and in previous versions also essences) were stored as separate PgSearch::Document entries. On search these document were combined to a single document to make it usable in the search itself. These mechanic made it pretty difficult to extend the search with other models. In newer versions of Alchemy the page is only available after a new page version is released and these mechanic is used to create a single PgSearch::Document with the content of the page and all supported ingredients. This change makes the whole search and the index creation less complex. The Alchemy module now has searchable_ingredients attribute which makes it easier to change the indexing from the outside. Furthermore a few extensions are now in a Search namespace, because they are not related to the PgSearch gem. --- .../alchemy/pg_search/ingredient_extension.rb | 30 --- .../pg_search/pg_search_document_extension.rb | 14 -- .../element_extension.rb | 4 +- .../alchemy/search/ingredient_extension.rb | 14 ++ .../alchemy/search/page_extension.rb | 10 + app/views/alchemy/search/_result.html.erb | 10 +- lib/alchemy-pg_search.rb | 56 ++--- spec/dummy/config/alchemy/elements.yml | 4 +- spec/dummy/config/alchemy/page_layouts.yml | 6 + spec/features/fulltext_search_feature_spec.rb | 13 +- spec/lib/alchemy-pg_search_spec.rb | 194 +++++------------- spec/models/ingredient_spec.rb | 66 ++---- spec/models/pg_search_document_spec.rb | 22 -- 13 files changed, 147 insertions(+), 296 deletions(-) delete mode 100644 app/extensions/alchemy/pg_search/ingredient_extension.rb rename app/extensions/alchemy/{pg_search => search}/element_extension.rb (70%) create mode 100644 app/extensions/alchemy/search/ingredient_extension.rb create mode 100644 app/extensions/alchemy/search/page_extension.rb diff --git a/app/extensions/alchemy/pg_search/ingredient_extension.rb b/app/extensions/alchemy/pg_search/ingredient_extension.rb deleted file mode 100644 index 2c377f0..0000000 --- a/app/extensions/alchemy/pg_search/ingredient_extension.rb +++ /dev/null @@ -1,30 +0,0 @@ -module Alchemy::PgSearch::IngredientExtension - - def self.multisearch_config - { - against: [ - :value, - ], - additional_attributes: ->(ingredient) { { page_id: ingredient.element.page.id } }, - if: :searchable? - } - end - - def self.prepended(base) - base.include PgSearch::Model - base.multisearchable(multisearch_config) - end - - def searchable? - Alchemy::PgSearch.is_searchable?(type) && - (definition.key?(:searchable) ? definition[:searchable] : true) && - value.present? && !!element&.searchable? - end -end - -# add the PgSearch model to all ingredients -Alchemy::Ingredient.prepend(Alchemy::PgSearch::IngredientExtension) - -# add custom content fields for Richtext, and Picture -Alchemy::Ingredients::Picture.multisearchable(Alchemy::PgSearch::IngredientExtension.multisearch_config.merge({against: [:caption]})) -Alchemy::Ingredients::Richtext.multisearchable(Alchemy::PgSearch::IngredientExtension.multisearch_config.merge({against: [:stripped_body]})) diff --git a/app/extensions/alchemy/pg_search/pg_search_document_extension.rb b/app/extensions/alchemy/pg_search/pg_search_document_extension.rb index e7cf73c..0b0e96f 100644 --- a/app/extensions/alchemy/pg_search/pg_search_document_extension.rb +++ b/app/extensions/alchemy/pg_search/pg_search_document_extension.rb @@ -2,20 +2,6 @@ module Alchemy::PgSearch::PgSearchDocumentExtension def self.prepended(base) base.belongs_to :page, class_name: "::Alchemy::Page", foreign_key: "page_id", optional: true end - - ## - # get a list of excerpts of the searched phrase - # The JSON_AGG - method will transform the grouped content entries into json which have to be "unpacked". - # @return [array] - def excerpts - return [] if content.blank? - begin - parsed_content = JSON.parse content - parsed_content.kind_of?(Array) ? parsed_content : [] - rescue JSON::ParserError - [] - end - end end PgSearch::Document.prepend(Alchemy::PgSearch::PgSearchDocumentExtension) diff --git a/app/extensions/alchemy/pg_search/element_extension.rb b/app/extensions/alchemy/search/element_extension.rb similarity index 70% rename from app/extensions/alchemy/pg_search/element_extension.rb rename to app/extensions/alchemy/search/element_extension.rb index 0ef3823..85ff1fc 100644 --- a/app/extensions/alchemy/pg_search/element_extension.rb +++ b/app/extensions/alchemy/search/element_extension.rb @@ -1,4 +1,4 @@ -module Alchemy::PgSearch::ElementExtension +module Alchemy::Search::ElementExtension def self.prepended(base) base.attr_writer :searchable end @@ -12,4 +12,4 @@ def searchable? end end -Alchemy::Element.prepend(Alchemy::PgSearch::ElementExtension) +Alchemy::Element.prepend(Alchemy::Search::ElementExtension) diff --git a/app/extensions/alchemy/search/ingredient_extension.rb b/app/extensions/alchemy/search/ingredient_extension.rb new file mode 100644 index 0000000..b74c593 --- /dev/null +++ b/app/extensions/alchemy/search/ingredient_extension.rb @@ -0,0 +1,14 @@ +module Alchemy::Search::IngredientExtension + def searchable_content + send(Alchemy.searchable_ingredients[type.to_sym])&.squish + end + + def searchable? + Alchemy.searchable_ingredients.has_key?(type.to_sym) && + (definition.key?(:searchable) ? definition[:searchable] : true) && + !!element&.searchable? + end +end + +# add the PgSearch model to all ingredients +Alchemy::Ingredient.prepend(Alchemy::Search::IngredientExtension) diff --git a/app/extensions/alchemy/search/page_extension.rb b/app/extensions/alchemy/search/page_extension.rb new file mode 100644 index 0000000..b59dc5b --- /dev/null +++ b/app/extensions/alchemy/search/page_extension.rb @@ -0,0 +1,10 @@ +# Enable Postgresql full text indexing. +# +module Alchemy::Search::PageExtension + def searchable? + (definition.key?(:searchable) ? definition[:searchable] : true) && + searchable && public? && !layoutpage? + end +end + +Alchemy::Page.prepend(Alchemy::Search::PageExtension) diff --git a/app/views/alchemy/search/_result.html.erb b/app/views/alchemy/search/_result.html.erb index f7bea6f..923783c 100644 --- a/app/views/alchemy/search/_result.html.erb +++ b/app/views/alchemy/search/_result.html.erb @@ -1,12 +1,6 @@
  • - <% page = result.page %> + <% page = result.searchable %>

    <%= link_to page.name, show_alchemy_page_path(page) %>

    - <% if result.excerpts.any? %> - <% result.excerpts.each do |excerpt| %> -

    <%= highlighted_excerpt(excerpt, params[:query]) %>

    - <% end %> - <% else %> -

    <%= page.meta_description %>

    - <% end %> +

    <%= highlighted_excerpt(result.content, params[:query]) %>

    <%= link_to page.urlname, show_alchemy_page_path(page) %>

  • diff --git a/lib/alchemy-pg_search.rb b/lib/alchemy-pg_search.rb index 2e79354..bfee160 100644 --- a/lib/alchemy-pg_search.rb +++ b/lib/alchemy-pg_search.rb @@ -5,24 +5,22 @@ module Alchemy mattr_accessor :search_class @@search_class = PgSearch - module PgSearch - SEARCHABLE_INGREDIENTS = %w[Text Richtext Picture] + mattr_accessor :searchable_ingredients + @@searchable_ingredients = { + "Alchemy::Ingredients::Text": :value, + "Alchemy::Ingredients::Richtext": :stripped_body, + "Alchemy::Ingredients::Picture": :caption, + } + module PgSearch extend Config ## - # is ingredient searchable? - # @param ingredient_type [string] - # @return [boolean] - def self.is_searchable?(ingredient_type) - SEARCHABLE_INGREDIENTS.include?(ingredient_type.gsub(/Alchemy::Ingredients::/, "")) - end - - ## - # index all supported Alchemy models + # index all supported Alchemy pages def self.rebuild - [Alchemy::Page, Alchemy::Ingredient].each do |model| - ::PgSearch::Multisearch.rebuild(model) + ActiveRecord::Base.transaction do + ::PgSearch::Document.delete_all + Alchemy::Page.all.each{ |page| index_page(page) } end end @@ -39,14 +37,16 @@ def self.remove_page(page) # # @param page [Alchemy::Page] def self.index_page(page) - remove_page page - page.update_pg_search_document - page.all_elements.includes(:ingredients).find_each do |element| - element.ingredients.select { |i| Alchemy::PgSearch.is_searchable?(i.type) }.each do |ingredient| - ingredient.update_pg_search_document - end - end + + document = page.pg_search_document + return if document.nil? + + ingredient_content = page.all_elements.includes(ingredients: {element: :page}).map do |element| + element.ingredients.select { |i| i.searchable? }.map(&:searchable_content).join(" ") + end.join(" ") + + document.update_column(:content, "#{document.content} #{ingredient_content}".squish) end ## @@ -56,13 +56,17 @@ def self.index_page(page) # @param ability [nil|CanCan::Ability] # @return [ActiveRecord::Relation] def self.search(query, ability: nil) - query = ::PgSearch.multisearch(query) - .select("JSON_AGG(content) as content", :page_id) - .reorder("") - .group(:page_id) - .joins(:page) + query = ::PgSearch.multisearch(query).includes(:searchable) - query = query.merge(Alchemy::Page.accessible_by(ability, :read)) if ability + if ability + # left_joins method is not usable here, because the order of the joins are incorrect + # and would result in a SQL error. We can receive the correct query order with these + # odd left join string + # Ref: https://guides.rubyonrails.org/active_record_querying.html#using-a-string-sql-fragment + query = query + .joins("LEFT JOIN alchemy_pages ON alchemy_pages.id = pg_search_documents.page_id") + .merge(Alchemy::Page.accessible_by(ability, :read)) + end query end diff --git a/spec/dummy/config/alchemy/elements.yml b/spec/dummy/config/alchemy/elements.yml index 90581b1..5d7dd84 100644 --- a/spec/dummy/config/alchemy/elements.yml +++ b/spec/dummy/config/alchemy/elements.yml @@ -34,11 +34,11 @@ searchable: false - role: public type: Richtext - default: "This is some public text." + default: "This is some public richtext." - role: confidential type: Richtext searchable: false - default: "This is some confidential text." + default: "This is some confidential richtext." - role: image type: Picture - role: secret_image diff --git a/spec/dummy/config/alchemy/page_layouts.yml b/spec/dummy/config/alchemy/page_layouts.yml index f30c0f6..8fc755f 100644 --- a/spec/dummy/config/alchemy/page_layouts.yml +++ b/spec/dummy/config/alchemy/page_layouts.yml @@ -6,6 +6,12 @@ - article - secrets +- name: mixed + elements: + - mixed + autogenerate: + - mixed + - name: search searchresults: true unique: true diff --git a/spec/features/fulltext_search_feature_spec.rb b/spec/features/fulltext_search_feature_spec.rb index 43e48c4..c47ccf4 100644 --- a/spec/features/fulltext_search_feature_spec.rb +++ b/spec/features/fulltext_search_feature_spec.rb @@ -21,15 +21,19 @@ image.save end + before do + Alchemy::PgSearch.rebuild + end + it "displays search results from text ingredients" do - visit("/suche?query=search") + visit("/suche?query=headline") within(".search_results") do expect(page).to have_content("This is a headline everybody should be able to search for.") end end - it "displays search results from richtext essences" do - visit("/suche?query=search") + it "displays search results from richtext ingredient" do + visit("/suche?query=text%20block") within(".search_results") do expect(page).to have_content("This is a text block everybody should be able to search for.") end @@ -59,7 +63,6 @@ it "does not display results placed on global pages" do # A layout page is configured and the page is indexed after publish public_page.update!(layoutpage: true) - Alchemy::PgSearch.index_page public_page visit("/suche?query=search") expect(page).to have_css("h2.no_search_results") @@ -131,6 +134,7 @@ before do nested_element.ingredient_by_role("headline").update!({ value: "Content from nested element" }) + Alchemy::PgSearch.rebuild end it "displays search results from nested elements" do @@ -184,6 +188,7 @@ page_version: create(:alchemy_page, :public).public_version, ) end + Alchemy::PgSearch.rebuild end context "when default config is used" do diff --git a/spec/lib/alchemy-pg_search_spec.rb b/spec/lib/alchemy-pg_search_spec.rb index 1ae1c1d..a8339ad 100644 --- a/spec/lib/alchemy-pg_search_spec.rb +++ b/spec/lib/alchemy-pg_search_spec.rb @@ -1,187 +1,105 @@ require 'spec_helper' describe Alchemy::PgSearch do - let(:page_version) { create(:alchemy_page_version, :published) } - let(:ingredient_element) { create(:alchemy_element, :with_ingredients, name: "ingredient_test", public: true, page_version: page_version) } - - let(:prepared_ingredients) do - { :ingredient_text => :value, :ingredient_richtext => :value, :ingredient_picture => :value }.each do |ingredient_name, field| - ingredient = ingredient_element.ingredient_by_role(ingredient_name) - ingredient[field] = "foo" - ingredient.save + let!(:first_page) { create(:alchemy_page, :public) } + let!(:second_page) { create(:alchemy_page, :public) } + + describe '#rebuild' do + subject { described_class.rebuild } + + it 'should have both created pages indexed documents' do + expect(PgSearch::Document.count).to be(2) end - end - let(:first_page) { Alchemy::Page.first } - let(:second_page) { Alchemy::Page.last } - - context 'rebuild' do - it 'should have zero indexed documents' do - expect(PgSearch::Document.count).to be(0) - end - - context 'after rebuild' do - before do - prepared_ingredients - Alchemy::PgSearch.rebuild - end - - it 'should have entries (2 Pages + 3 Ingredients)' do - expect(PgSearch::Document.count).to eq(5) - end - - it "should have three ingredients" do - expect(PgSearch::Document.where(searchable_type: "Alchemy::Ingredient").count).to eq(3) - end + it "has 3 Pages (Root Page + 2 created pages)" do + subject + expect(PgSearch::Document.count).to be(3) end end - context 'remove_page' do - before do - prepared_ingredients - Alchemy::PgSearch.rebuild - end - - context 'remove first page' do - before { Alchemy::PgSearch.remove_page first_page } - - it 'should have only one page and relative ingredients (1 Page + 3 Ingredients)' do - expect(PgSearch::Document.count).to eq(4) - end - - it 'should have one page entry' do - expect(PgSearch::Document.where(searchable_type: "Alchemy::Page").count).to eq(1) - end - end - - context 'remove second page' do - before { Alchemy::PgSearch.remove_page second_page } + describe "#remove_page" do + subject { described_class.remove_page(first_page) } - it 'should have only one page (1 Page)' do - expect(PgSearch::Document.count).to eq(1) - end - - it 'should have one page entry' do - expect(PgSearch::Document.where(searchable_type: "Alchemy::Page").count).to eq(1) - end + it "should remove the page from search index" do + expect { subject }.to change { PgSearch::Document.count }.by(-1) + expect(first_page.reload.pg_search_document).to be_nil end end - context 'index_page' do + describe "#index_page" do + let!(:first_page) { create(:alchemy_page, :public, name: "Mixed", page_layout: "mixed", autogenerate_elements: true) } + let(:content) { first_page.pg_search_document.content } before do - prepared_ingredients PgSearch::Document.destroy_all # clean the whole index end - it 'should have zero indexed documents' do - expect(PgSearch::Document.count).to be(0) - end - - context 'first_page' do - before do - Alchemy::PgSearch.index_page first_page - end + subject { described_class.index_page(first_page.reload) } - it 'should have only one entry' do - expect(PgSearch::Document.count).to be(1) - end - - it 'should be the first page' do - expect(PgSearch::Document.first.page_id).to be(first_page.id) - end + it "creates a new pg_search document" do + expect { subject }.to change { PgSearch::Document.count }.by(1) end - context 'second_page' do - before do - Alchemy::PgSearch.index_page second_page - end - - it 'should have four entries (1 Page + 3 Ingredients)' do - expect(PgSearch::Document.count).to be(4) - end - - it 'should be all relate to the same page ' do - PgSearch::Document.all.each do |document| - expect(document.page_id).to be(second_page.id) - end - end + it "has the page title as content" do + subject + expect(content).to include("Mixed ") end - context 'nested elements' do - let!(:nested_element) { create(:alchemy_element, :with_ingredients, name: "article", public: true, page_version: page_version, parent_element: ingredient_element) } - - before do - Alchemy::PgSearch.index_page second_page - end - - it 'should have 6 documents' do - # 1 Page + 3 previous ingredients + 2 new article ingredients - expect(PgSearch::Document.count).to be(6) - end - - it 'should be all relate to the same page ' do - PgSearch::Document.all.each do |document| - expect(document.page_id).to be(second_page.id) - end - end + it "has ingredient as content" do + subject + expect(content).to include("public title") end - context 'page searchable' do - let(:searchable) { true } - let!(:page) { create(:alchemy_page, :public, name: "Searchable Page", searchable: searchable) } - let(:result) { Alchemy::PgSearch.search "searchable" } + it "hasn't not searchable ingredient in content" do + subject + expect(content).to_not include("secret password") + end - before do - Alchemy::PgSearch.rebuild - end + it "stores stripped content from ingredient" do + subject + expect(content).to include("public richtext") + end - it 'should find one page' do - expect(result.length).to eq(1) - end + it "removes whitespace from content" do + subject + expect(content).to start_with("Mixed") + end - context 'searchable disabled' do - let(:searchable) { false } + context "hidden page" do + let!(:first_page) { create(:alchemy_page) } - it 'should not find any page' do - expect(result.length).to eq(0) - end + it "creates nothing" do + expect { subject }.to change { PgSearch::Document.count }.by(0) end end end context 'search' do - let(:result) { Alchemy::PgSearch.search "foo" } - - before do - create(:alchemy_page, :restricted, :public, name: "foo") - prepared_ingredients - Alchemy::PgSearch.rebuild - end + let!(:third_page) { create(:alchemy_page, :restricted, :public, name: "Third Page") } + let(:ability) { nil } + + subject(:result) { Alchemy::PgSearch.search "page", ability: } - it 'should find two pages' do - expect(result.length).to eq(2) + it 'should find three pages' do + expect(result.length).to eq(3) end context 'ability' do let(:user) { User.create(alchemy_roles: ["member"]) } - let(:result) { Alchemy::PgSearch.search "foo", ability: Alchemy::Permissions.new(user) } + let(:ability) { Alchemy::Permissions.new(user) } context 'with a logged in user' do - it 'should find two pages' do - expect(result.length).to eq(2) + it 'should find the restricted page' do + expect(result.length).to eq(3) + expect(result.last.page).to eq(third_page) end end context 'with an unknown user' do let(:user) { User.create(alchemy_roles: []) } - it 'should find one page' do - expect(result.length).to eq(1) - end - - it 'should find only the second page' do - expect(result.take.page).to eq(second_page) + it 'should find two pages' do + expect(result.length).to eq(2) end end end diff --git a/spec/models/ingredient_spec.rb b/spec/models/ingredient_spec.rb index 41fa0c0..a51f13b 100644 --- a/spec/models/ingredient_spec.rb +++ b/spec/models/ingredient_spec.rb @@ -1,6 +1,6 @@ require "spec_helper" -RSpec.shared_examples_for "it is searchable" do +RSpec.shared_examples_for "it is searchable" do |attribute| describe "searchable?" do subject { ingredient.searchable? } @@ -20,13 +20,6 @@ end end - context "ingredient has no content" do - it "should be not searchable" do - ingredient.value = nil - expect(ingredient.searchable?).to be(false) - end - end - context "element is not public" do it "should be not searchable" do element.public = false @@ -44,43 +37,16 @@ end end -RSpec.shared_examples_for "it is in search index" do - describe "search index" do - let(:document) { PgSearch::Document.first } - - subject do - ingredient - ::PgSearch::Multisearch.rebuild described_class - end - - it "should have one entry" do - subject - expect(PgSearch::Document.where(searchable_type: "Alchemy::Ingredient").count).to eq(1) - end - - it "should have the content" do - subject - expect(PgSearch::Document.first.content).to eq(content) - end - - context "configured as not searchable" do - before do - allow_any_instance_of(ingredient.class).to receive(:definition).at_least(:once) do - { - searchable: false, - } - end - end +RSpec.shared_examples_for "a searchable content" do + it "has searchable content" do + expect(ingredient.searchable_content).to eq("foo bar") + end - it "should have no index entry" do - subject - expect(PgSearch::Document.where(searchable_type: "Alchemy::Ingredient").count).to eq(0) - end - end + context "whitespaces" do + let(:content) { " foo\n bar "} - it "should be the current ingredient" do - subject - expect(document.searchable).to eq(ingredient) + it "has removed unnecessary whitespaces" do + expect(ingredient.searchable_content).to eq("foo bar") end end end @@ -91,26 +57,26 @@ create(:alchemy_element, :with_ingredients, name: "ingredient_test", public: true, page_version: page_version) end - let(:content) { "foo bar"} + let(:content) { "foo bar" } describe Alchemy::Ingredients::Text do let(:ingredient) { create(:alchemy_ingredient_text, value: content, element: element) } - it_behaves_like "it is searchable" - it_behaves_like "it is in search index" + it_behaves_like "it is searchable", field: :value + it_behaves_like "a searchable content" end describe Alchemy::Ingredients::Richtext do let(:ingredient) { create(:alchemy_ingredient_richtext, value: "foo bar", element: element) } - it_behaves_like "it is searchable" - it_behaves_like "it is in search index" + it_behaves_like "it is searchable", field: :stripped_body + it_behaves_like "a searchable content" end describe Alchemy::Ingredients::Picture do let(:ingredient) { create(:alchemy_ingredient_picture, value: create(:alchemy_picture), caption: content, element: element) } - it_behaves_like "it is searchable" - it_behaves_like "it is in search index" + it_behaves_like "it is searchable", field: :caption + it_behaves_like "a searchable content" end end diff --git a/spec/models/pg_search_document_spec.rb b/spec/models/pg_search_document_spec.rb index 5c997bb..e0db0c9 100644 --- a/spec/models/pg_search_document_spec.rb +++ b/spec/models/pg_search_document_spec.rb @@ -17,26 +17,4 @@ end end end - - describe "excerpts" do - it "should be empty if the content is nil" do - expect(document.excerpts).to eq([]) - end - - context "with no json" do - let(:document) { PgSearch::Document.new(content: "123") } - - it "should be empty if the content is a not valid json" do - expect(document.excerpts).to eq([]) - end - end - - context "with valid json" do - let(:document) { PgSearch::Document.new(content: '["123", "456"]') } - - it "should have an array of the json content" do - expect(document.excerpts).to eq(%w[123 456]) - end - end - end end