Skip to content

[SPARK-26002][SQL] Fix day of year calculation for Julian calendar days #23000

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,30 @@ object DateTimeUtils {

final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L

// number of days in 400 years
// number of days in 400 years by Gregorian calendar
final val daysIn400Years: Int = 146097

// In the Julian calendar every year that is exactly divisible by 4 is a leap year without any
// exception. But in the Gregorian calendar every year that is exactly divisible by four
// is a leap year, except for years that are exactly divisible by 100, but these centurial years
// are leap years if they are exactly divisible by 400.
// So there are 3 extra days in the Julian calendar within a 400 years cycle compared to the
// Gregorian calendar.
final val extraLeapDaysIn400YearsJulian = 3

// number of days in 400 years by Julian calendar
final val daysIn400YearsInJulian: Int = daysIn400Years + extraLeapDaysIn400YearsJulian

// number of days between 1.1.1970 and 1.1.2001
final val to2001 = -11323

// this is year -17999, calculation: 50 * daysIn400Year
final val YearZero = -17999
final val toYearZero = to2001 + 7304850

// days to year -17999 in Julian calendar
final val toYearZeroInJulian = toYearZero + 49 * extraLeapDaysIn400YearsJulian

final val TimeZoneGMT = TimeZone.getTimeZone("GMT")
final val TimeZoneUTC = TimeZone.getTimeZone("UTC")
final val MonthOf31Days = Set(1, 3, 5, 7, 8, 10, 12)
Expand Down Expand Up @@ -585,20 +601,30 @@ object DateTimeUtils {
* Return the number of days since the start of 400 year period.
* The second year of a 400 year period (year 1) starts on day 365.
*/
private[this] def yearBoundary(year: Int): Int = {
year * 365 + ((year / 4 ) - (year / 100) + (year / 400))
private[this] def yearBoundary(year: Int, isGregorian: Boolean): Int = {
if (isGregorian) {
year * 365 + ((year / 4) - (year / 100) + (year / 400))
} else {
year * 365 + (year / 4)
}
}

/**
* Calculates the number of years for the given number of days. This depends
* on a 400 year period.
* @param days days since the beginning of the 400 year period
* @param isGregorian indicates whether leap years should be calculated according to Gregorian
* (or Julian) calendar
* @return (number of year, days in year)
*/
private[this] def numYears(days: Int): (Int, Int) = {
private[this] def numYears(days: Int, isGregorian: Boolean): (Int, Int) = {
val year = days / 365
val boundary = yearBoundary(year)
if (days > boundary) (year, days - boundary) else (year - 1, days - yearBoundary(year - 1))
val boundary = yearBoundary(year, isGregorian)
if (days > boundary) {
(year, days - boundary)
} else {
(year - 1, days - yearBoundary(year - 1, isGregorian))
}
}

/**
Expand All @@ -609,18 +635,26 @@ object DateTimeUtils {
* equals to the period 1.1.1601 until 31.12.2000.
*/
private[this] def getYearAndDayInYear(daysSince1970: SQLDate): (Int, Int) = {
// add the difference (in days) between 1.1.1970 and the artificial year 0 (-17999)
var daysSince1970Tmp = daysSince1970
// Since Julian calendar was replaced with the Gregorian calendar,
// the 10 days after Oct. 4 were skipped.
// (1582-10-04) -141428 days since 1970-01-01
if (daysSince1970 <= -141428) {
daysSince1970Tmp -= 10
getYearAndDayInYear(daysSince1970 - 10, toYearZeroInJulian, daysIn400YearsInJulian, false)
} else {
getYearAndDayInYear(daysSince1970, toYearZero, daysIn400Years, true)
}
val daysNormalized = daysSince1970Tmp + toYearZero
}

private def getYearAndDayInYear(
daysSince1970: SQLDate,
toYearZero: SQLDate,
daysIn400Years: SQLDate,
isGregorian: Boolean): (Int, Int) = {
// add the difference (in days) between 1.1.1970 and the artificial year 0 (-17999)
val daysNormalized = daysSince1970 + toYearZero
val numOfQuarterCenturies = daysNormalized / daysIn400Years
val daysInThis400 = daysNormalized % daysIn400Years + 1
val (years, dayInYear) = numYears(daysInThis400)
val (years, dayInYear) = numYears(daysInThis400, isGregorian)
val year: Int = (2001 - 20000) + 400 * numOfQuarterCenturies + years
(year, dayInYear)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,36 @@ class DateTimeUtilsSuite extends SparkFunSuite {
assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 78)
}

test("SPARK-26002: correct day of year calculations for Julian calendar years") {
val c = Calendar.getInstance()
c.set(Calendar.MILLISECOND, 0)
(1000 to 1600 by 100).foreach { year =>
// January 1 is the 1st day of year.
c.set(year, 0, 1, 0, 0, 0)
assert(getYear(getInUTCDays(c.getTimeInMillis)) === year)
assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 1)
assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 1)

// March 1 is the 61st day of the year as they are leap years. It is true for
// even the multiples of 100 as before 1582-10-4 the Julian calendar leap year calculation
// is used in which every multiples of 4 are leap years
c.set(year, 2, 1, 0, 0, 0)
assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 61)
assert(getMonth(getInUTCDays(c.getTimeInMillis)) === 3)

// testing leap day (February 29) in leap years
c.set(year, 1, 29, 0, 0, 0)
assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)

// For non-leap years:
c.set(year + 1, 2, 1, 0, 0, 0)
assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)
}

c.set(1582, 2, 1, 0, 0, 0)
assert(getDayInYear(getInUTCDays(c.getTimeInMillis)) === 60)
}

test("get year") {
val c = Calendar.getInstance()
c.set(2015, 2, 18, 0, 0, 0)
Expand Down
2 changes: 2 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/datetime.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ select current_date = current_date(), current_timestamp = current_timestamp(), a
select a, b from ttf2 order by a, current_date;

select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), weekday(null), weekday('1582-10-15 13:10:15');

select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01');
10 changes: 9 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/datetime.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 10
-- Number of queries: 11


-- !query 0
Expand Down Expand Up @@ -89,3 +89,11 @@ select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), week
struct<weekday(CAST(2007-02-03 AS DATE)):int,weekday(CAST(2009-07-30 AS DATE)):int,weekday(CAST(2017-05-27 AS DATE)):int,weekday(CAST(NULL AS DATE)):int,weekday(CAST(1582-10-15 13:10:15 AS DATE)):int>
-- !query 9 output
5 3 5 NULL 4


-- !query 10
select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01')
-- !query 10 schema
struct<year(CAST(1500-01-01 AS DATE)):int,month(CAST(1500-01-01 AS DATE)):int,dayofyear(CAST(1500-01-01 AS DATE)):int>
-- !query 10 output
1500 1 1