From 296a3fdf4f4fad5ca89de54fa82ae28bffc4fbe2 Mon Sep 17 00:00:00 2001 From: Anthony Crumley Date: Tue, 30 Oct 2018 00:10:49 +0000 Subject: [PATCH] Limit the date range for date dimension ETL If a user entered a date for an event that was way in the past or future it would cause the ETL process to create a large number of records unnecessarily. Therefore, the date range is being limited in order to prevent this problem from occuring. --- app/analytics/etl/dimensions/date.rb | 26 +++++++++++++++++++++----- test/etl/dimensions/date_test.rb | 22 ++++++++++++++++++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/app/analytics/etl/dimensions/date.rb b/app/analytics/etl/dimensions/date.rb index bee90a3..d766adc 100644 --- a/app/analytics/etl/dimensions/date.rb +++ b/app/analytics/etl/dimensions/date.rb @@ -1,6 +1,6 @@ class Etl::Dimensions::Date - def self.run - load_date_range + def self.run(zero_day: default_zero_day, future_window: default_future_window) + load_date_range zero_day, future_window load_null_date end @@ -22,13 +22,21 @@ def self.day_number_in_school_year(date) (date - first_day_of_school(date)).to_i + 1 end + def self.default_future_window + 1.year.from_now.to_date + end + + def self.default_zero_day + Date.new(2015, 1, 1) + end + def self.first_day_of_school(date) (school_year_number(date).to_s + '-07-01').to_date end - def self.load_date_range - date = NetworkEvent.minimum(:scheduled_at).to_date - max_date = NetworkEvent.maximum(:scheduled_at).to_date + def self.load_date_range(zero_day, future_window) + date = minimum_date(zero_day) + max_date = maximum_date(future_window) while date <= max_date do attributes = { date: date.to_date, @@ -93,6 +101,14 @@ def self.load_null_date persist_date attributes end + + def self.maximum_date(future_window) + [future_window, NetworkEvent.maximum(:scheduled_at).to_date].min + end + + def self.minimum_date(zero_day) + [zero_day, NetworkEvent.minimum(:scheduled_at).to_date].max + end def self.persist_date(attributes) if DateDimension.where(date: attributes[:date]).exists? diff --git a/test/etl/dimensions/date_test.rb b/test/etl/dimensions/date_test.rb index c21c43c..b14b389 100644 --- a/test/etl/dimensions/date_test.rb +++ b/test/etl/dimensions/date_test.rb @@ -23,6 +23,28 @@ def setup assert_equal days, DateDimension.where.not(date: nil).count end + test 'No date is created before zero day' do + minimum_date = DateDimension.where.not(date: nil).order(:date).first + minimum_date.delete + + zero_day = minimum_date.date + 1.day + Etl::Dimensions::Date.run zero_day: zero_day + + new_minimum_date = DateDimension.where.not(date: nil).order(:date).first + assert_equal zero_day, new_minimum_date.date + end + + test 'No date is created beyond the future window' do + maximum_date = DateDimension.where.not(date: nil).order(date: :desc).first + maximum_date.delete + + future_window = maximum_date.date - 1.day + Etl::Dimensions::Date.run future_window: future_window + + new_maximum_date = DateDimension.where.not(date: nil).order(date: :desc).first + assert_equal future_window, new_maximum_date.date + end + test 'A date is created for a missing date' do assert_equal 1, DateDimension.where(date: nil).count end