Skip to content

Commit

Permalink
Limit the date range for date dimension ETL
Browse files Browse the repository at this point in the history
If a user entered a date for an event that was way in the past or future
it would cause the ETL process to create a large number of records
unnecessarily.  Therefore, the date range is being limited in order to
prevent this problem from occuring.
  • Loading branch information
anthonycrumley committed Oct 30, 2018
1 parent 3f5dc7a commit 296a3fd
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 5 deletions.
26 changes: 21 additions & 5 deletions app/analytics/etl/dimensions/date.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
class Etl::Dimensions::Date
def self.run
load_date_range
def self.run(zero_day: default_zero_day, future_window: default_future_window)
load_date_range zero_day, future_window
load_null_date
end

Expand All @@ -22,13 +22,21 @@ def self.day_number_in_school_year(date)
(date - first_day_of_school(date)).to_i + 1
end

def self.default_future_window
1.year.from_now.to_date
end

def self.default_zero_day
Date.new(2015, 1, 1)
end

def self.first_day_of_school(date)
(school_year_number(date).to_s + '-07-01').to_date
end

def self.load_date_range
date = NetworkEvent.minimum(:scheduled_at).to_date
max_date = NetworkEvent.maximum(:scheduled_at).to_date
def self.load_date_range(zero_day, future_window)
date = minimum_date(zero_day)
max_date = maximum_date(future_window)
while date <= max_date do
attributes = {
date: date.to_date,
Expand Down Expand Up @@ -93,6 +101,14 @@ def self.load_null_date
persist_date attributes

end

def self.maximum_date(future_window)
[future_window, NetworkEvent.maximum(:scheduled_at).to_date].min
end

def self.minimum_date(zero_day)
[zero_day, NetworkEvent.minimum(:scheduled_at).to_date].max
end

def self.persist_date(attributes)
if DateDimension.where(date: attributes[:date]).exists?
Expand Down
22 changes: 22 additions & 0 deletions test/etl/dimensions/date_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,28 @@ def setup
assert_equal days, DateDimension.where.not(date: nil).count
end

test 'No date is created before zero day' do
minimum_date = DateDimension.where.not(date: nil).order(:date).first
minimum_date.delete

zero_day = minimum_date.date + 1.day
Etl::Dimensions::Date.run zero_day: zero_day

new_minimum_date = DateDimension.where.not(date: nil).order(:date).first
assert_equal zero_day, new_minimum_date.date
end

test 'No date is created beyond the future window' do
maximum_date = DateDimension.where.not(date: nil).order(date: :desc).first
maximum_date.delete

future_window = maximum_date.date - 1.day
Etl::Dimensions::Date.run future_window: future_window

new_maximum_date = DateDimension.where.not(date: nil).order(date: :desc).first
assert_equal future_window, new_maximum_date.date
end

test 'A date is created for a missing date' do
assert_equal 1, DateDimension.where(date: nil).count
end
Expand Down

0 comments on commit 296a3fd

Please sign in to comment.