Skip to content

Commit

Permalink
feat(bigquery): add interval support (#5907)
Browse files Browse the repository at this point in the history
* feat(bigquery): add a new IntervalValue representation
  • Loading branch information
shollyman authored May 3, 2022
1 parent 1c145f4 commit 9e979c9
Show file tree
Hide file tree
Showing 7 changed files with 540 additions and 6 deletions.
321 changes: 321 additions & 0 deletions bigquery/intervalvalue.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,321 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package bigquery

import (
"bytes"
"fmt"
"strconv"
"time"
)

// IntervalValue is a go type for representing BigQuery INTERVAL values.
// Intervals are represented using three distinct parts:
// * Years and Months
// * Days
// * Time (Hours/Mins/Seconds/Fractional Seconds).
//
// More information about BigQuery INTERVAL types can be found at:
// https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type
//
// IntervalValue is EXPERIMENTAL and subject to change or removal without notice.
type IntervalValue struct {
// In canonical form, Years and Months share a consistent sign and reduced
// to avoid large month values.
Years int32
Months int32

// In canonical form, Days are independent of the other parts and can have it's
// own sign. There is no attempt to reduce larger Day values into the Y-M part.
Days int32

// In canonical form, the time parts all share a consistent sign and are reduced.
Hours int32
Minutes int32
Seconds int32
// This represents the fractional seconds as nanoseconds.
SubSecondNanos int32
}

// String returns string representation of the interval value using the canonical format.
// The canonical format is as follows:
//
// [sign]Y-M [sign]D [sign]H:M:S[.F]
func (iv *IntervalValue) String() string {
// Don't canonicalize the current value. Instead, if it's not canonical,
// compute the canonical form and use that.
src := iv
if !iv.IsCanonical() {
src = iv.Canonicalize()
}
out := fmt.Sprintf("%d-%d %d %d:%d:%d", src.Years, int32abs(src.Months), src.Days, src.Hours, int32abs(src.Minutes), int32abs(src.Seconds))
if src.SubSecondNanos != 0 {
mantStr := fmt.Sprintf("%09d", src.SubSecondNanos)
for len(mantStr) > 0 && mantStr[len(mantStr)-1:] == "0" {
mantStr = mantStr[0 : len(mantStr)-1]
}
out = fmt.Sprintf("%s.%s", out, mantStr)
}
return out
}

// intervalPart is used for parsing string representations.
type intervalPart int

const (
yearsPart = iota
monthsPart
daysPart
hoursPart
minutesPart
secondsPart
subsecsPart
)

func (i intervalPart) String() string {
knownParts := []string{"YEARS", "MONTHS", "DAYS", "HOURS", "MINUTES", "SECONDS", "SUBSECONDS"}
if i < 0 || int(i) > len(knownParts) {
return fmt.Sprintf("UNKNOWN(%d)", i)
}
return knownParts[i]
}

// canonicalParts indicates the parse order for canonical format.
var canonicalParts = []intervalPart{yearsPart, monthsPart, daysPart, hoursPart, minutesPart, secondsPart, subsecsPart}

// ParseInterval parses an interval in canonical string format and returns the IntervalValue it represents.
func ParseInterval(value string) (*IntervalValue, error) {
iVal := &IntervalValue{}
for _, part := range canonicalParts {
remaining, v, err := getPartValue(part, value)
if err != nil {
return nil, err
}
switch part {
case yearsPart:
iVal.Years = v
case monthsPart:
iVal.Months = v
if iVal.Years < 0 {
iVal.Months = -v
}
case daysPart:
iVal.Days = v
case hoursPart:
iVal.Hours = v
case minutesPart:
iVal.Minutes = v
if iVal.Hours < 0 {
iVal.Minutes = -v
}
case secondsPart:
iVal.Seconds = v
if iVal.Hours < 0 {
iVal.Seconds = -v
}
case subsecsPart:
iVal.SubSecondNanos = v
if iVal.Hours < 0 {
iVal.SubSecondNanos = -v
}
default:
return nil, fmt.Errorf("encountered invalid part %s during parse", part)
}
value = remaining
}
return iVal, nil
}

func getPartValue(part intervalPart, s string) (string, int32, error) {
s = trimPrefix(part, s)
return getNumVal(part, s)
}

// trimPrefix removes formatting prefix relevant to the given type.
func trimPrefix(part intervalPart, s string) string {
var trimByte byte
switch part {
case yearsPart, daysPart, hoursPart:
trimByte = byte(' ')
case monthsPart:
trimByte = byte('-')
case minutesPart, secondsPart:
trimByte = byte(':')
case subsecsPart:
trimByte = byte('.')
}
for len(s) > 0 && s[0] == trimByte {
s = s[1:]
}
return s
}

func getNumVal(part intervalPart, s string) (string, int32, error) {

allowedVals := []byte("0123456789")
var allowedSign bool
captured := ""
switch part {
case yearsPart, daysPart, hoursPart:
allowedSign = true
}
// capture sign prefix +/-
if len(s) > 0 && allowedSign {
switch s[0] {
case '-':
captured = "-"
s = s[1:]
case '+':
s = s[1:]
}
}
for len(s) > 0 && bytes.IndexByte(allowedVals, s[0]) >= 0 {
captured = captured + string(s[0])
s = s[1:]
}

if len(captured) == 0 {
if part == subsecsPart {
return s, 0, nil
}
return "", 0, fmt.Errorf("no value parsed for part %s", part.String())
}
// special case: subsecs is a mantissa, convert it to nanos
if part == subsecsPart {
parsed, err := strconv.ParseFloat(fmt.Sprintf("0.%s", captured), 64)
if err != nil {
return "", 0, fmt.Errorf("couldn't parse %s as %s", captured, part.String())
}
return s, int32(parsed * 1e9), nil
}
parsed, err := strconv.ParseInt(captured, 10, 32)
if err != nil {
return "", 0, fmt.Errorf("error parsing value %s for %s: %v", captured, part.String(), err)
}
return s, int32(parsed), nil
}

// IntervalValueFromDuration converts a time.Duration to an IntervalType representation.
//
// The converted duration only leverages the hours/minutes/seconds part of the interval,
// the other parts representing days, months, and years are not used.
func IntervalValueFromDuration(in time.Duration) *IntervalValue {
nanos := in.Nanoseconds()
out := &IntervalValue{}
out.Hours = int32(nanos / 3600 / 1e9)
nanos = nanos - (int64(out.Hours) * 3600 * 1e9)
out.Minutes = int32(nanos / 60 / 1e9)
nanos = nanos - (int64(out.Minutes) * 60 * 1e9)
out.Seconds = int32(nanos / 1e9)
nanos = nanos - (int64(out.Seconds) * 1e9)
out.SubSecondNanos = int32(nanos)
return out
}

// ToDuration converts an interval to a time.Duration value.
//
// For the purposes of conversion:
// Years are normalized to 12 months.
// Months are normalized to 30 days.
// Days are normalized to 24 hours.
func (iv *IntervalValue) ToDuration() time.Duration {
var accum int64
accum = 12*int64(iv.Years) + int64(iv.Months)
// widen to days
accum = accum*30 + int64(iv.Days)
// hours
accum = accum*24 + int64(iv.Hours)
// minutes
accum = accum*60 + int64(iv.Minutes)
// seconds
accum = accum*60 + int64(iv.Seconds)
// subsecs
accum = accum*1e9 + int64(iv.SubSecondNanos*1e9)
return time.Duration(accum)
}

// Canonicalize returns an IntervalValue where signs for elements in the
// Y-M and H:M:S.F are consistent and values are normalized/reduced.
//
// Canonical form enables more consistent comparison of the encoded
// interval. For example, encoding an interval with 12 months is equivalent
// to an interval of 1 year.
func (iv *IntervalValue) Canonicalize() *IntervalValue {
newIV := &IntervalValue{iv.Years, iv.Months, iv.Days, iv.Hours, iv.Minutes, iv.Seconds, iv.SubSecondNanos}
// canonicalize Y-M part
totalMonths := iv.Years*12 + iv.Months
newIV.Years = totalMonths / 12
totalMonths = totalMonths - (newIV.Years * 12)
newIV.Months = totalMonths % 12

// No canonicalization for the Days part.

// canonicalize time part by switching to Nanos.
totalNanos := int64(iv.Hours)*3600*1e9 +
int64(iv.Minutes)*60*1e9 +
int64(iv.Seconds)*1e9 +
int64(iv.SubSecondNanos)

// Reduce to parts.
newIV.Hours = int32(totalNanos / 60 / 60 / 1e9)
totalNanos = totalNanos - (int64(newIV.Hours) * 3600 * 1e9)
newIV.Minutes = int32(totalNanos / 60 / 1e9)
totalNanos = totalNanos - (int64(newIV.Minutes) * 60 * 1e9)
newIV.Seconds = int32(totalNanos / 1e9)
totalNanos = totalNanos - (int64(newIV.Seconds) * 1e9)
newIV.SubSecondNanos = int32(totalNanos)
return newIV
}

// IsCanonical evaluates whether the current representation is in canonical
// form.
func (iv *IntervalValue) IsCanonical() bool {
if !sameSign(iv.Years, iv.Months) ||
!sameSign(iv.Hours, iv.Minutes) {
return false
}
// We allow large days and hours values, because they are within different parts.
if int32abs(iv.Months) > 12 ||
int32abs(iv.Minutes) > 60 ||
int32abs(iv.Seconds) > 60 ||
int32abs(iv.SubSecondNanos) > 1e9 {
return false
}
// TODO: We don't currently validate that each part represents value smaller than 10k years.
return true
}

func int32abs(x int32) int32 {
if x < 0 {
return -x
}
return x
}

func sameSign(nums ...int32) bool {
var pos, neg int
for _, n := range nums {
if n > 0 {
pos = pos + 1
}
if n < 0 {
neg = neg + 1
}
}
if pos > 0 && neg > 0 {
return false
}
return true
}
Loading

0 comments on commit 9e979c9

Please sign in to comment.