Skip to content

Commit

Permalink
🐛 Fix date indexing to return more than YYYY
Browse files Browse the repository at this point in the history
Prior to this commit when the date_created property was indexed, only
the year was indexed.  If the input was something like "2024-01-01" then
`date_ssi` would only be "2024".  This commit will index YYYY-MM-DD,
YYYY-MM, and YYYY if given.  Anything other than those three formats
will be indexed as is and won't work correctly with sorting or range
queries.

Ref:
  - #973
  • Loading branch information
kirkkwang committed Mar 7, 2024
1 parent 09bf754 commit 23a8ec6
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 6 deletions.
23 changes: 17 additions & 6 deletions app/indexers/app_indexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,23 @@ def full_text(file_set_id)
end

def add_date(solr_doc)
# The allowed date formats are either YYYY, YYYY-MM, or YYYY-MM-DD
# the date must be formatted as a 4 digit year in order to be sorted.
valid_date_formats = /\A(\d{4})(?:-\d{2}(?:-\d{2})?)?\z/
date_string = solr_doc['date_created_tesim']&.first
year = date_string&.match(valid_date_formats)&.captures&.first
solr_doc['date_tesi'] = year if year
solr_doc['date_ssi'] = year if year
return unless date_string

date_string = pad_date_with_zero(date_string) if date_string.include?('-')

# The allowed date formats are either YYYY, YYYY-MM, or YYYY-MM-DD
valid_date_formats = /\A(\d{4}(?:-\d{2}(?:-\d{2})?)?)\z/
date = date_string&.match(valid_date_formats)&.captures&.first

# If the date is not in the correct format, index the original date string
date ||= date_string

solr_doc['date_tesi'] = date if date
solr_doc['date_ssi'] = date if date
end

def pad_date_with_zero(date_string)
date_string.split('-').map { |d| d.rjust(2, '0') }.join('-')
end
end
58 changes: 58 additions & 0 deletions spec/indexers/app_indexer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,62 @@
expect(solr_document.fetch("account_cname_tesim")).to eq(account.cname)
end
end

describe "#generate_solr_document" do
context "when given a date with a YYYY-MM-DD format" do
it "indexes date_ssi in YYYY-MM-DD format" do
work.date_created = ["2024-01-01"]
expect(solr_document.fetch("date_ssi")).to eq("2024-01-01")
end
end

context "when given a date with a YYYY-MM format" do
it "indexes date_ssi in YYYY-MM format" do
work.date_created = ["2024-01"]
expect(solr_document.fetch("date_ssi")).to eq("2024-01")
end
end

context "when given a date with a YYYY format" do
it "indexes date_ssi in YYYY format" do
work.date_created = ["2024"]
expect(solr_document.fetch("date_ssi")).to eq("2024")
end
end

context "when given a date with a YYYY-M-D format" do
it "converts the date to YYYY-MM-DD format and indexes date_ssi" do
work.date_created = ["2024-1-1"]
expect(solr_document.fetch("date_ssi")).to eq("2024-01-01")
end
end

context "when given a date with a YYYY-M format" do
it "converts the date to YYYY-MM format and indexes date_ssi" do
work.date_created = ["2024-1"]
expect(solr_document.fetch("date_ssi")).to eq("2024-01")
end
end

context "when given a date with a YYYY-MM-D format" do
it "converts the date to YYYY-MM-DD format and indexes date_ssi" do
work.date_created = ["2024-01-1"]
expect(solr_document.fetch("date_ssi")).to eq("2024-01-01")
end
end

context "when given a date with a YYYY-M-DD format" do
it "converts the date to YYYY-M-DD format and indexes date_ssi" do
work.date_created = ["2024-1-01"]
expect(solr_document.fetch("date_ssi")).to eq("2024-01-01")
end
end

context "when given a date with an invalid format" do
it "indexes the given date" do
work.date_created = ["Jan 1, 2024"]
expect(solr_document.fetch("date_ssi")).to eq("Jan 1, 2024")
end
end
end
end

0 comments on commit 23a8ec6

Please sign in to comment.