Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 57 additions & 10 deletions go/adbc/driver/bigquery/connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,19 @@ func sanitizeDataset(value string) (string, error) {
}
}

// Encode a value as a JSON string. Returns "" in case the value is empty or if there was
// an error
func encodeJson[S ~[]E | ~map[string]E, E any](v S) string {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zeroshade are you happy with this?

Copy link
Contributor Author

@serramatutu serramatutu Sep 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not happy with this. But I'm not good enough at go to know a better way to ensure v is JSON-encodable and can be used with len(v). Happy to change if anyone has better ideas.

This at least compiles tho :P

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just:

encoded, _ := json.Marshal(v)
return string(encoded)

which should return either the encoded value or an empty string if it can't be encoded?

Copy link
Contributor Author

@serramatutu serramatutu Sep 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't preserve the current behavior:

if len(v) == 0 {
    encoded := ""
} else {
    encoded := json.Marshal(v)
}

If we just json.Marshal a value with length 0 we'll get [] instead of empty string.

We could change to simplify, but that would be a breaking change :D

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@serramatutu can you skip setting the value on metadata altogether when its empty?

Going from

	metadata["Labels"] = encodeJson[map[string]string, string](md.Labels)

to something like this:

encodeJson[map[string]string, string](metadata, "Labels", md.Labels)

Helping reduce the bloat caused by keys in the metadata map and reducing the number of possible states to "either it exists with something or is not set at all".

ret := ""
if len(v) > 0 {
encoded, err := json.Marshal(v)
if err == nil {
ret = string(encoded)
}
}
return ret
}

func (c *connectionImpl) getTableSchemaWithFilter(ctx context.Context, catalog *string, dbSchema *string, tableName string, columnName *string) (*arrow.Schema, error) {
if catalog == nil {
catalog = &c.catalog
Expand All @@ -703,6 +716,7 @@ func (c *connectionImpl) getTableSchemaWithFilter(ctx context.Context, catalog *
metadata["Name"] = md.Name
metadata["Location"] = md.Location
metadata["Description"] = md.Description
// md.Schema: the table Schema is defined at the bottom using md.Schema
if md.MaterializedView != nil {
metadata["MaterializedView.EnableRefresh"] = strconv.FormatBool(md.MaterializedView.EnableRefresh)
metadata["MaterializedView.LastRefreshTime"] = md.MaterializedView.LastRefreshTime.Format(time.RFC3339Nano)
Expand All @@ -713,6 +727,9 @@ func (c *connectionImpl) getTableSchemaWithFilter(ctx context.Context, catalog *
metadata["MaterializedView.MaxStaleness"] = md.MaxStaleness.String()
}
}
metadata["ViewQuery"] = md.ViewQuery
metadata["UseLegacySQL"] = strconv.FormatBool(md.UseLegacySQL)
metadata["UseStandardSQL"] = strconv.FormatBool(md.UseStandardSQL)
if md.TimePartitioning != nil {
// "DAY", "HOUR", "MONTH", "YEAR"
metadata["TimePartitioning.Type"] = string(md.TimePartitioning.Type)
Expand All @@ -733,17 +750,35 @@ func (c *connectionImpl) getTableSchemaWithFilter(ctx context.Context, catalog *
metadata["RangePartitioning.Range.Interval"] = strconv.FormatInt(md.RangePartitioning.Range.Interval, 10)
}
}
if md.RequirePartitionFilter {
metadata["RequirePartitionFilter"] = strconv.FormatBool(md.RequirePartitionFilter)
metadata["RequirePartitionFilter"] = strconv.FormatBool(md.RequirePartitionFilter)
if md.Clustering != nil {
metadata["Clustering.Fields"] = encodeJson[[]string, string](md.Clustering.Fields)
}
metadata["ExpirationTime"] = md.ExpirationTime.Format(time.RFC3339Nano)
metadata["Labels"] = encodeJson[map[string]string, string](md.Labels)
// TODO: ExternalDataConfig
if md.ExternalDataConfig != nil {
metadata["ExternalDataConfig.SourceFormat"] = string(md.ExternalDataConfig.SourceFormat)
metadata["ExternalDataConfig.SourceURIs"] = encodeJson[[]string, string](md.ExternalDataConfig.SourceURIs)
// TODO: Schema
metadata["ExternalDataConfig.AutoDetect"] = strconv.FormatBool(md.ExternalDataConfig.AutoDetect)
metadata["ExternalDataConfig.Compression"] = string(md.ExternalDataConfig.Compression)
metadata["ExternalDataConfig.IgnoreUnknownValues"] = strconv.FormatBool(md.ExternalDataConfig.IgnoreUnknownValues)
metadata["ExternalDataConfig.MaxBadRecords"] = strconv.FormatInt(md.ExternalDataConfig.MaxBadRecords, 10)
// TODO: Options, do we need this? It looks like it contains the same thing as ExternalDataConfig?
if md.ExternalDataConfig.HivePartitioningOptions != nil {
metadata["ExternalDataConfig.HivePartitioningOptions.Mode"] = string(md.ExternalDataConfig.HivePartitioningOptions.Mode)
metadata["ExternalDataConfig.HivePartitioningOptions.SourceURIPrefix"] = md.ExternalDataConfig.HivePartitioningOptions.SourceURIPrefix
metadata["ExternalDataConfig.HivePartitioningOptions.RequirePartitionFilter"] = strconv.FormatBool(md.ExternalDataConfig.HivePartitioningOptions.RequirePartitionFilter)
}
metadata["ExternalDataConfig.DecimalTargetTypes"] = encodeJson[[]bigquery.DecimalTargetType, bigquery.DecimalTargetType](md.ExternalDataConfig.DecimalTargetTypes)
metadata["ExternalDataConfig.ConnectionID"] = md.ExternalDataConfig.ConnectionID
metadata["ExternalDataConfig.ReferenceFileSchemaURI"] = md.ExternalDataConfig.ReferenceFileSchemaURI
metadata["ExternalDataConfig.MetadataCacheMode"] = string(md.ExternalDataConfig.MetadataCacheMode)
}
if md.EncryptionConfig != nil {
metadata["EncryptionConfig.KMSKeyName"] = md.EncryptionConfig.KMSKeyName
}
labels := ""
if len(md.Labels) > 0 {
encodedLabel, err := json.Marshal(md.Labels)
if err == nil {
labels = string(encodedLabel)
}
}
metadata["Labels"] = labels
metadata["FullID"] = md.FullID
metadata["Type"] = string(md.Type)
metadata["CreationTime"] = md.CreationTime.Format(time.RFC3339Nano)
Expand All @@ -759,8 +794,20 @@ func (c *connectionImpl) getTableSchemaWithFilter(ctx context.Context, catalog *
metadata["CloneDefinition.BaseTableReference"] = md.CloneDefinition.BaseTableReference.FullyQualifiedName()
metadata["CloneDefinition.CloneTime"] = md.CloneDefinition.CloneTime.Format(time.RFC3339Nano)
}
if md.StreamingBuffer != nil {
metadata["StreamingBuffer.EstimatedBytes"] = strconv.FormatUint(md.StreamingBuffer.EstimatedBytes, 10)
metadata["StreamingBuffer.EstimatedRows"] = strconv.FormatUint(md.StreamingBuffer.EstimatedRows, 10)
metadata["StreamingBuffer.OldestEntryTime"] = md.StreamingBuffer.OldestEntryTime.Format(time.RFC3339Nano)
}
metadata["ETag"] = md.ETag
metadata["DefaultCollation"] = md.DefaultCollation
if md.TableConstraints != nil {
if md.TableConstraints.PrimaryKey != nil {
metadata["TableConstraints.PrimaryKey.Columns"] = encodeJson[[]string, string](md.TableConstraints.PrimaryKey.Columns)
}
// TODO: TableConstraints.ForeignKeys, how do we represent list of structs?
}
metadata["ResourceTags"] = encodeJson[map[string]string, string](md.ResourceTags)
tableMetadata := arrow.MetadataFrom(metadata)

fields := make([]arrow.Field, len(md.Schema))
Expand Down
Loading