Skip to content

Commit

Permalink
Use member_ids_ssim instead of file_set_ids_ssim
Browse files Browse the repository at this point in the history
file_set_ids_ssim has always been a verbatim copy of member_ids_ssim, and in works with many
members it can become an awkwardly large piece of duplicated data in solr.
PcdmObjectIndexer does not duplicate this field, but various systems expect it to be available.
This change modifies those systems to use member_ids_ssim. Filtering by has_model_ssim is added
where needed to filter collections from results.
  • Loading branch information
dlpierce committed Dec 7, 2023
1 parent 64f1a6f commit 7108409
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def show
@pageviews = Hyrax::Analytics.daily_events_for_id(@document.id, 'work-view')
@uniques = Hyrax::Analytics.unique_visitors_for_id(@document.id)
@downloads = Hyrax::Analytics.daily_events_for_id(@document.id, 'file_set_in_work_download')
@files = paginate(@document._source["file_set_ids_ssim"], rows: 5)
@files = paginate(@document._source["member_ids_ssim"], rows: 5)
respond_to do |format|
format.html
format.csv { export_data }
Expand Down
1 change: 1 addition & 0 deletions app/indexers/hyrax/work_indexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def generate_solr_document
# This enables us to return a Work when we have a FileSet that matches
# the search query. While at the same time allowing us not to return Collections
# when a work in the collection matches the query.
# NOTE: file_set_ids_ssim is not indexed for valkyrie resources
solr_doc['file_set_ids_ssim'] = solr_doc['member_ids_ssim']
solr_doc['visibility_ssi'] = object.visibility

Expand Down
4 changes: 2 additions & 2 deletions app/search_builders/hyrax/catalog_search_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ def dismax_query
"{!dismax v=$user_query}"
end

# join from file id to work relationship solrized file_set_ids_ssim
# join from file id to work relationship solrized member_ids_ssim
def join_for_works_from_files
"{!join from=#{Hyrax.config.id_field} to=file_set_ids_ssim}#{dismax_query}"
"{!join from=#{Hyrax.config.id_field} to=member_ids_ssim}{!terms f=has_model_ssim}FileSet,Hyrax::FileSet#{dismax_query}"
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class MemberWithFilesSearchBuilder < ::SearchBuilder
# This is like include_collection_ids, but it also joins the files.
def include_contained_files(solr_parameters)
solr_parameters[:fq] ||= []
solr_parameters[:fq] << "{!join from=file_set_ids_ssim to=id}{!join from=child_object_ids_ssim to=id}id:#{collection_id}"
solr_parameters[:fq] << "{!join from=member_ids_ssim to=id}{!join from=child_object_ids_ssim to=id}id:#{collection_id}"
end

# include filters into the query to only include the collection memebers
Expand Down
4 changes: 2 additions & 2 deletions app/services/hyrax/admin_set_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ def builder(access)
def count_files(admin_sets)
file_counts = Hash.new(0)
admin_sets.each do |admin_set|
query = "{!join from=file_set_ids_ssim to=id}isPartOf_ssim:#{admin_set.id}"
file_results = Hyrax::SolrService.get(fq: [query, "has_model_ssim:FileSet"], rows: 0)
query = "{!join from=member_ids_ssim to=id}isPartOf_ssim:#{admin_set.id}"
file_results = Hyrax::SolrService.get(fq: [query, "{!terms f=has_model_ssim}FileSet,Hyrax::FileSet"], rows: 0)
file_counts[admin_set.id] = file_results['response']['numFound']
end
file_counts
Expand Down
12 changes: 6 additions & 6 deletions spec/controllers/catalog_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,20 @@
end
let(:work1) do
{ has_model_ssim: ["GenericWork"], id: "ff365c76z", title_tesim: ["me too"],
file_set_ids_ssim: ["ff365c78h", "ff365c79s"], read_access_group_ssim: ["public"],
member_ids_ssim: ["ff365c78h", "ff365c79s"], read_access_group_ssim: ["public"],
edit_access_person_ssim: ["user1@example.com"] }
end
let(:work2) do
{ has_model_ssim: ["GenericWork"], id: "ff365c777", title_tesim: ["find me"],
file_set_ids_ssim: [], read_access_group_ssim: ["public"], edit_access_person_ssim: ["user2@example.com"] }
member_ids_ssim: [], read_access_group_ssim: ["public"], edit_access_person_ssim: ["user2@example.com"] }
end
let(:file1) do
{ has_model_ssim: ["FileSet"], id: "ff365c78h", title_tesim: ["find me"],
file_set_ids_ssim: [], edit_access_person_ssim: [user.user_key] }
member_ids_ssim: [], edit_access_person_ssim: [user.user_key] }
end
let(:file2) do
{ has_model_ssim: ["FileSet"], id: "ff365c79s", title_tesim: ["other file"],
file_set_ids_ssim: [], edit_access_person_ssim: [user.user_key] }
member_ids_ssim: [], edit_access_person_ssim: [user.user_key] }
end

before do
Expand Down Expand Up @@ -144,13 +144,13 @@
# NOTE: The old expected behavior was "finds a work and a work that contains a file set with a matching title".
# This is no longer the case in a Valkyrie environment. A work's child file set's metadata is no longer passed in
# to the work's SolrDocument. The only references to the containing file sets are their ids.
it "finds a work and a work that contains a file set with a matching title", pending: 'FIXME: Valkyrie indexer should do this' do
it "finds a work and a work that contains a file set with a matching title" do
get :index, params: { q: 'find me', search_field: 'all_fields' }
expect(assigns(:response).documents.map(&:id)).to contain_exactly(work1[:id], work2[:id])
end

# NOTE: The same logic in the above comment applies here.
it "finds a work that contains a file set with a matching title", pending: 'FIXME: Valkyrie indexer should do this' do
it "finds a work that contains a file set with a matching title" do
get :index, params: { q: 'other file', search_field: 'all_fields' }
expect(assigns(:response).documents.map(&:id)).to contain_exactly(work1[:id])
end
Expand Down
18 changes: 9 additions & 9 deletions spec/features/admin_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
RSpec.describe "The admin dashboard", :clean_repo do
let(:user) { create :admin }
let(:admin_set_1) do
create(:admin_set, title: ["First Admin Set"],
description: ["A description"],
edit_users: [user.user_key])
valkyrie_create(:hyrax_admin_set, title: ["First Admin Set"],
description: ["A description"],
edit_users: [user])
end
let(:admin_set_2) do
create(:admin_set, title: ["Second Admin Set"],
description: ["A description"],
edit_users: [user.user_key])
valkyrie_create(:hyrax_admin_set, title: ["Second Admin Set"],
description: ["A description"],
edit_users: [user])
end

before do
create(:work_with_files, title: ["Work A"], admin_set_id: admin_set_1.id, edit_users: [user])
create(:work_with_one_file, title: ["Work B"], admin_set_id: admin_set_2.id, edit_users: [user])
create(:work_with_file_and_work, title: ["Work C"], admin_set_id: admin_set_2.id, edit_users: [user])
valkyrie_create(:hyrax_work, :with_member_file_sets, title: ["Work A"], admin_set_id: admin_set_1.id, edit_users: [user])
valkyrie_create(:hyrax_work, :with_one_file_set, title: ["Work B"], admin_set_id: admin_set_2.id, edit_users: [user])
valkyrie_create(:hyrax_work, :with_file_and_work, title: ["Work C"], admin_set_id: admin_set_2.id, edit_users: [user])
end

it 'renders the counts of Works and Files in all AdminSets' do
Expand Down
2 changes: 1 addition & 1 deletion spec/search_builders/hyrax/catalog_search_builder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
it "creates a valid solr join for works and files" do
subject
expect(solr_params[:user_query]).to eq user_query
expect(solr_params[:q]).to eq "{!lucene}_query_:\"{!dismax v=$user_query}\" _query_:\"{!join from=id to=file_set_ids_ssim}{!dismax v=$user_query}\""
expect(solr_params[:q]).to eq '{!lucene}_query_:"{!dismax v=$user_query}" _query_:"{!join from=id to=member_ids_ssim}{!terms f=has_model_ssim}FileSet,Hyrax::FileSet{!dismax v=$user_query}"'
end
end

Expand Down
36 changes: 18 additions & 18 deletions spec/services/hyrax/admin_set_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,38 +132,38 @@
end

context "when there are works and files in the admin set" do
let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_1', 'file_3', 'file_4'] } }
let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_2', file_set_ids_ssim: ['file_2'] } }
let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_2', file_set_ids_ssim: ['file_6', 'file_7'] } }
let(:work4_attrs) { { id: 'work_4', isPartOf_ssim: 'admin_set_3', file_set_ids_ssim: ['file_8'] } }
let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_1', 'file_3', 'file_4'] } }
let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_2', member_ids_ssim: ['file_2'] } }
let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_2', member_ids_ssim: ['file_6', 'file_7'] } }
let(:work4_attrs) { { id: 'work_4', isPartOf_ssim: 'admin_set_3', member_ids_ssim: ['file_8'] } }

it "returns rows with document in the first column, count of works in second column and count of files in the third column" do
expect(subject).to eq [struct.new(admin_sets[0], 1, 3), struct.new(admin_sets[1], 2, 3), struct.new(admin_sets[2], 1, 1)]
end
end

context "when there are no files in the admin set" do
let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: [] } }
let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_2', file_set_ids_ssim: [] } }
let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_2', file_set_ids_ssim: [] } }
let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', member_ids_ssim: [] } }
let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_2', member_ids_ssim: [] } }
let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_2', member_ids_ssim: [] } }

it "returns rows with document in the first column, count of works in second column and count of no files in the third column" do
expect(subject).to eq [struct.new(admin_sets[0], 1, 0), struct.new(admin_sets[1], 2, 0), struct.new(admin_sets[2], 0, 0)]
end
end

context "when there are more than 10 works in the admin set" do
let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_1'] } }
let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_2'] } }
let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_3'] } }
let(:work4_attrs) { { id: 'work_4', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_4'] } }
let(:work5_attrs) { { id: 'work_5', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_5'] } }
let(:work6_attrs) { { id: 'work_6', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_6'] } }
let(:work7_attrs) { { id: 'work_7', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_7'] } }
let(:work8_attrs) { { id: 'work_8', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_8'] } }
let(:work9_attrs) { { id: 'work_9', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_9'] } }
let(:work10_attrs) { { id: 'work_10', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_10'] } }
let(:work11_attrs) { { id: 'work_11', isPartOf_ssim: 'admin_set_1', file_set_ids_ssim: ['file_11'] } }
let(:work1_attrs) { { id: 'work_1', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_1'] } }
let(:work2_attrs) { { id: 'work_2', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_2'] } }
let(:work3_attrs) { { id: 'work_3', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_3'] } }
let(:work4_attrs) { { id: 'work_4', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_4'] } }
let(:work5_attrs) { { id: 'work_5', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_5'] } }
let(:work6_attrs) { { id: 'work_6', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_6'] } }
let(:work7_attrs) { { id: 'work_7', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_7'] } }
let(:work8_attrs) { { id: 'work_8', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_8'] } }
let(:work9_attrs) { { id: 'work_9', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_9'] } }
let(:work10_attrs) { { id: 'work_10', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_10'] } }
let(:work11_attrs) { { id: 'work_11', isPartOf_ssim: 'admin_set_1', member_ids_ssim: ['file_11'] } }

it "returns rows with document in the first column, count of works in second column and count of files in the third column" do
expect(subject).to eq [struct.new(admin_sets[0], 11, 11), struct.new(admin_sets[1], 0, 0), struct.new(admin_sets[2], 0, 0)]
Expand Down

0 comments on commit 7108409

Please sign in to comment.