From 7108409c619cd2ba4ae8c836b9f3b429a7e9837b Mon Sep 17 00:00:00 2001 From: Daniel Pierce Date: Wed, 6 Dec 2023 16:43:41 -0500 Subject: [PATCH] Use member_ids_ssim instead of file_set_ids_ssim file_set_ids_ssim has always been a verbatim copy of member_ids_ssim, and in works with many members it can become an awkwardly large piece of duplicated data in solr. PcdmObjectIndexer does not duplicate this field, but various systems expect it to be available. This change modifies those systems to use member_ids_ssim. Filtering by has_model_ssim is added where needed to filter collections from results. --- .../analytics/work_reports_controller.rb | 2 +- app/indexers/hyrax/work_indexer.rb | 1 + .../hyrax/catalog_search_builder.rb | 4 +-- .../hyrax/member_with_files_search_builder.rb | 2 +- app/services/hyrax/admin_set_service.rb | 4 +-- spec/controllers/catalog_controller_spec.rb | 12 +++---- spec/features/admin_spec.rb | 18 +++++----- .../hyrax/catalog_search_builder_spec.rb | 2 +- spec/services/hyrax/admin_set_service_spec.rb | 36 +++++++++---------- 9 files changed, 41 insertions(+), 40 deletions(-) diff --git a/app/controllers/hyrax/admin/analytics/work_reports_controller.rb b/app/controllers/hyrax/admin/analytics/work_reports_controller.rb index 4e5306e36b..b141edbf90 100644 --- a/app/controllers/hyrax/admin/analytics/work_reports_controller.rb +++ b/app/controllers/hyrax/admin/analytics/work_reports_controller.rb @@ -29,7 +29,7 @@ def show @pageviews = Hyrax::Analytics.daily_events_for_id(@document.id, 'work-view') @uniques = Hyrax::Analytics.unique_visitors_for_id(@document.id) @downloads = Hyrax::Analytics.daily_events_for_id(@document.id, 'file_set_in_work_download') - @files = paginate(@document._source["file_set_ids_ssim"], rows: 5) + @files = paginate(@document._source["member_ids_ssim"], rows: 5) respond_to do |format| format.html format.csv { export_data } diff --git a/app/indexers/hyrax/work_indexer.rb b/app/indexers/hyrax/work_indexer.rb index 4c072b33c6..38437fde3f 100644 --- a/app/indexers/hyrax/work_indexer.rb +++ b/app/indexers/hyrax/work_indexer.rb @@ -15,6 +15,7 @@ def generate_solr_document # This enables us to return a Work when we have a FileSet that matches # the search query. While at the same time allowing us not to return Collections # when a work in the collection matches the query. + # NOTE: file_set_ids_ssim is not indexed for valkyrie resources solr_doc['file_set_ids_ssim'] = solr_doc['member_ids_ssim'] solr_doc['visibility_ssi'] = object.visibility diff --git a/app/search_builders/hyrax/catalog_search_builder.rb b/app/search_builders/hyrax/catalog_search_builder.rb index 6966015ac7..20db6c0f27 100644 --- a/app/search_builders/hyrax/catalog_search_builder.rb +++ b/app/search_builders/hyrax/catalog_search_builder.rb @@ -78,8 +78,8 @@ def dismax_query "{!dismax v=$user_query}" end - # join from file id to work relationship solrized file_set_ids_ssim + # join from file id to work relationship solrized member_ids_ssim def join_for_works_from_files - "{!join from=#{Hyrax.config.id_field} to=file_set_ids_ssim}#{dismax_query}" + "{!join from=#{Hyrax.config.id_field} to=member_ids_ssim}{!terms f=has_model_ssim}FileSet,Hyrax::FileSet#{dismax_query}" end end diff --git a/app/search_builders/hyrax/member_with_files_search_builder.rb b/app/search_builders/hyrax/member_with_files_search_builder.rb index 74d81cbef8..ec4eed03b9 100644 --- a/app/search_builders/hyrax/member_with_files_search_builder.rb +++ b/app/search_builders/hyrax/member_with_files_search_builder.rb @@ -9,7 +9,7 @@ class MemberWithFilesSearchBuilder < ::SearchBuilder # This is like include_collection_ids, but it also joins the files. def include_contained_files(solr_parameters) solr_parameters[:fq] ||= [] - solr_parameters[:fq] << "{!join from=file_set_ids_ssim to=id}{!join from=child_object_ids_ssim to=id}id:#{collection_id}" + solr_parameters[:fq] << "{!join from=member_ids_ssim to=id}{!join from=child_object_ids_ssim to=id}id:#{collection_id}" end # include filters into the query to only include the collection memebers diff --git a/app/services/hyrax/admin_set_service.rb b/app/services/hyrax/admin_set_service.rb index 9baa6d5de7..a5c177a4a1 100644 --- a/app/services/hyrax/admin_set_service.rb +++ b/app/services/hyrax/admin_set_service.rb @@ -52,8 +52,8 @@ def builder(access) def count_files(admin_sets) file_counts = Hash.new(0) admin_sets.each do |admin_set| - query = "{!join from=file_set_ids_ssim to=id}isPartOf_ssim:#{admin_set.id}" - file_results = Hyrax::SolrService.get(fq: [query, "has_model_ssim:FileSet"], rows: 0) + query = "{!join from=member_ids_ssim to=id}isPartOf_ssim:#{admin_set.id}" + file_results = Hyrax::SolrService.get(fq: [query, "{!terms f=has_model_ssim}FileSet,Hyrax::FileSet"], rows: 0) file_counts[admin_set.id] = file_results['response']['numFound'] end file_counts diff --git a/spec/controllers/catalog_controller_spec.rb b/spec/controllers/catalog_controller_spec.rb index e90e5191bf..e59a4e86c9 100644 --- a/spec/controllers/catalog_controller_spec.rb +++ b/spec/controllers/catalog_controller_spec.rb @@ -88,20 +88,20 @@ end let(:work1) do { has_model_ssim: ["GenericWork"], id: "ff365c76z", title_tesim: ["me too"], - file_set_ids_ssim: ["ff365c78h", "ff365c79s"], read_access_group_ssim: ["public"], + member_ids_ssim: ["ff365c78h", "ff365c79s"], read_access_group_ssim: ["public"], edit_access_person_ssim: ["user1@example.com"] } end let(:work2) do { has_model_ssim: ["GenericWork"], id: "ff365c777", title_tesim: ["find me"], - file_set_ids_ssim: [], read_access_group_ssim: ["public"], edit_access_person_ssim: ["user2@example.com"] } + member_ids_ssim: [], read_access_group_ssim: ["public"], edit_access_person_ssim: ["user2@example.com"] } end let(:file1) do { has_model_ssim: ["FileSet"], id: "ff365c78h", title_tesim: ["find me"], - file_set_ids_ssim: [], edit_access_person_ssim: [user.user_key] } + member_ids_ssim: [], edit_access_person_ssim: [user.user_key] } end let(:file2) do { has_model_ssim: ["FileSet"], id: "ff365c79s", title_tesim: ["other file"], - file_set_ids_ssim: [], edit_access_person_ssim: [user.user_key] } + member_ids_ssim: [], edit_access_person_ssim: [user.user_key] } end before do @@ -144,13 +144,13 @@ # NOTE: The old expected behavior was "finds a work and a work that contains a file set with a matching title". # This is no longer the case in a Valkyrie environment. A work's child file set's metadata is no longer passed in # to the work's SolrDocument. The only references to the containing file sets are their ids. - it "finds a work and a work that contains a file set with a matching title", pending: 'FIXME: Valkyrie indexer should do this' do + it "finds a work and a work that contains a file set with a matching title" do get :index, params: { q: 'find me', search_field: 'all_fields' } expect(assigns(:response).documents.map(&:id)).to contain_exactly(work1[:id], work2[:id]) end # NOTE: The same logic in the above comment applies here. - it "finds a work that contains a file set with a matching title", pending: 'FIXME: Valkyrie indexer should do this' do + it "finds a work that contains a file set with a matching title" do get :index, params: { q: 'other file', search_field: 'all_fields' } expect(assigns(:response).documents.map(&:id)).to contain_exactly(work1[:id]) end diff --git a/spec/features/admin_spec.rb b/spec/features/admin_spec.rb index 72c55cbe1e..78a7a40ae1 100644 --- a/spec/features/admin_spec.rb +++ b/spec/features/admin_spec.rb @@ -2,20 +2,20 @@ RSpec.describe "The admin dashboard", :clean_repo do let(:user) { create :admin } let(:admin_set_1) do - create(:admin_set, title: ["First Admin Set"], - description: ["A description"], - edit_users: [user.user_key]) + valkyrie_create(:hyrax_admin_set, title: ["First Admin Set"], + description: ["A description"], + edit_users: [user]) end let(:admin_set_2) do - create(:admin_set, title: ["Second Admin Set"], - description: ["A description"], - edit_users: [user.user_key]) + valkyrie_create(:hyrax_admin_set, title: ["Second Admin Set"], + description: ["A description"], + edit_users: [user]) end before do - create(:work_with_files, title: ["Work A"], admin_set_id: admin_set_1.id, edit_users: [user]) - create(:work_with_one_file, title: ["Work B"], admin_set_id: admin_set_2.id, edit_users: [user]) - create(:work_with_file_and_work, title: ["Work C"], admin_set_id: admin_set_2.id, edit_users: [user]) + valkyrie_create(:hyrax_work, :with_member_file_sets, title: ["Work A"], admin_set_id: admin_set_1.id, edit_users: [user]) + valkyrie_create(:hyrax_work, :with_one_file_set, title: ["Work B"], admin_set_id: admin_set_2.id, edit_users: [user]) + valkyrie_create(:hyrax_work, :with_file_and_work, title: ["Work C"], admin_set_id: admin_set_2.id, edit_users: [user]) end it 'renders the counts of Works and Files in all AdminSets' do diff --git a/spec/search_builders/hyrax/catalog_search_builder_spec.rb b/spec/search_builders/hyrax/catalog_search_builder_spec.rb index e8d2ed3892..d52b4cd982 100644 --- a/spec/search_builders/hyrax/catalog_search_builder_spec.rb +++ b/spec/search_builders/hyrax/catalog_search_builder_spec.rb @@ -13,7 +13,7 @@ it "creates a valid solr join for works and files" do subject expect(solr_params[:user_query]).to eq user_query - expect(solr_params[:q]).to eq "{!lucene}_query_:\"{!dismax v=$user_query}\" _query_:\"{!join from=id to=file_set_ids_ssim}{!dismax v=$user_query}\"" + expect(solr_params[:q]).to eq '{!lucene}_query_:"{!dismax v=$user_query}" _query_:"{!join from=id to=member_ids_ssim}{!terms f=has_model_ssim}FileSet,Hyrax::FileSet{!dismax v=$user_query}"' end end diff --git a/spec/services/hyrax/admin_set_service_spec.rb b/spec/services/hyrax/admin_set_service_spec.rb index a93fe7a441..335d755c53 100644 --- a/spec/services/hyrax/admin_set_service_spec.rb +++ b/spec/services/hyrax/admin_set_service_spec.rb @@ -132,10 +132,10 @@ end context "when there are works and files in the admin set" do - let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_1', 'file_3', 'file_4'] } } - let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_2', file_set_ids_ssim: ['file_2'] } } - let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_2', file_set_ids_ssim: ['file_6', 'file_7'] } } - let(:work4_attrs) { { id: 'work_4', isPartOf_ssim: 'admin_set_3', file_set_ids_ssim: ['file_8'] } } + let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_1', 'file_3', 'file_4'] } } + let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_2', member_ids_ssim: ['file_2'] } } + let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_2', member_ids_ssim: ['file_6', 'file_7'] } } + let(:work4_attrs) { { id: 'work_4', isPartOf_ssim: 'admin_set_3', member_ids_ssim: ['file_8'] } } it "returns rows with document in the first column, count of works in second column and count of files in the third column" do expect(subject).to eq [struct.new(admin_sets[0], 1, 3), struct.new(admin_sets[1], 2, 3), struct.new(admin_sets[2], 1, 1)] @@ -143,9 +143,9 @@ end context "when there are no files in the admin set" do - let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: [] } } - let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_2', file_set_ids_ssim: [] } } - let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_2', file_set_ids_ssim: [] } } + let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', member_ids_ssim: [] } } + let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_2', member_ids_ssim: [] } } + let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_2', member_ids_ssim: [] } } it "returns rows with document in the first column, count of works in second column and count of no files in the third column" do expect(subject).to eq [struct.new(admin_sets[0], 1, 0), struct.new(admin_sets[1], 2, 0), struct.new(admin_sets[2], 0, 0)] @@ -153,17 +153,17 @@ end context "when there are more than 10 works in the admin set" do - let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_1'] } } - let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_2'] } } - let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_3'] } } - let(:work4_attrs) { { id: 'work_4', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_4'] } } - let(:work5_attrs) { { id: 'work_5', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_5'] } } - let(:work6_attrs) { { id: 'work_6', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_6'] } } - let(:work7_attrs) { { id: 'work_7', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_7'] } } - let(:work8_attrs) { { id: 'work_8', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_8'] } } - let(:work9_attrs) { { id: 'work_9', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_9'] } } - let(:work10_attrs) { { id: 'work_10', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_10'] } } - let(:work11_attrs) { { id: 'work_11', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_11'] } } + let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_1'] } } + let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_2'] } } + let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_3'] } } + let(:work4_attrs) { { id: 'work_4', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_4'] } } + let(:work5_attrs) { { id: 'work_5', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_5'] } } + let(:work6_attrs) { { id: 'work_6', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_6'] } } + let(:work7_attrs) { { id: 'work_7', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_7'] } } + let(:work8_attrs) { { id: 'work_8', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_8'] } } + let(:work9_attrs) { { id: 'work_9', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_9'] } } + let(:work10_attrs) { { id: 'work_10', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_10'] } } + let(:work11_attrs) { { id: 'work_11', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_11'] } } it "returns rows with document in the first column, count of works in second column and count of files in the third column" do expect(subject).to eq [struct.new(admin_sets[0], 11, 11), struct.new(admin_sets[1], 0, 0), struct.new(admin_sets[2], 0, 0)]