Skip to content

Commit

Permalink
fix: Better JSON schema for embedding (#418)
Browse files Browse the repository at this point in the history
  • Loading branch information
candiduslynx authored Jan 31, 2024
1 parent 6ac85a6 commit f5dfada
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 203 deletions.
110 changes: 110 additions & 0 deletions schema.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package filetypes

import (
"reflect"

cqjsonschema "github.com/cloudquery/codegen/jsonschema"
"github.com/cloudquery/filetypes/v4/csv"
jsonfile "github.com/cloudquery/filetypes/v4/json"
"github.com/cloudquery/filetypes/v4/parquet"
"github.com/invopop/jsonschema"
orderedmap "github.com/wk8/go-ordered-map/v2"
)

// JSONSchemaOptions should be used when generating schema to add the nested spec info
func (FileSpec) JSONSchemaOptions() []cqjsonschema.Option {
fileSpecType := reflect.TypeOf(FileSpec{})
return []cqjsonschema.Option{func(r *jsonschema.Reflector) {
fileSpecFields := func(t reflect.Type) []reflect.StructField {
if t != fileSpecType {
return nil
}
return reflect.VisibleFields(reflect.TypeOf(struct {
CSVSpec csv.Spec
JSONSpec jsonfile.Spec
ParquetSpec parquet.Spec
}{}))
}
if r.AdditionalFields == nil {
r.AdditionalFields = fileSpecFields
} else {
old := r.AdditionalFields
r.AdditionalFields = func(r reflect.Type) []reflect.StructField {
if extra := fileSpecFields(r); len(extra) > 0 {
return extra
}
return old(r)
}
}
}}
}

func (FileSpec) JSONSchemaExtend(sc *jsonschema.Schema) {
// now we need to remove extra fields
refCSVSpec := sc.Properties.Value("CSVSpec").Ref
refJSONSpec := sc.Properties.Value("JSONSpec").Ref
refParquetSpec := sc.Properties.Value("ParquetSpec").Ref
sc.Properties.Delete("CSVSpec")
sc.Properties.Delete("JSONSpec")
sc.Properties.Delete("ParquetSpec")

sc.Properties.Set("format_spec", &jsonschema.Schema{
OneOf: []*jsonschema.Schema{
{
AnyOf: []*jsonschema.Schema{
{Ref: refCSVSpec},
{Ref: refJSONSpec},
{Ref: refParquetSpec},
},
},
{Type: "null"},
},
})

// now we need to enforce format -> specific type
formatSpecOneOf := []*jsonschema.Schema{
// CSV
{
Properties: func() *orderedmap.OrderedMap[string, *jsonschema.Schema] {
properties := jsonschema.NewProperties()
properties.Set("format", &jsonschema.Schema{Type: "string", Const: FormatTypeCSV})
properties.Set("format_spec", &jsonschema.Schema{
OneOf: []*jsonschema.Schema{{Ref: refCSVSpec}, {Type: "null"}},
})
return properties
}(),
},
// JSON
{
Properties: func() *orderedmap.OrderedMap[string, *jsonschema.Schema] {
properties := jsonschema.NewProperties()
properties.Set("format", &jsonschema.Schema{Type: "string", Const: FormatTypeJSON})
properties.Set("format_spec", &jsonschema.Schema{
OneOf: []*jsonschema.Schema{{Ref: refJSONSpec}, {Type: "null"}},
})
return properties
}(),
},
// Parquet
{
Properties: func() *orderedmap.OrderedMap[string, *jsonschema.Schema] {
properties := jsonschema.NewProperties()
properties.Set("format", &jsonschema.Schema{Type: "string", Const: FormatTypeParquet})
properties.Set("format_spec", &jsonschema.Schema{
OneOf: []*jsonschema.Schema{{Ref: refParquetSpec}, {Type: "null"}},
})
return properties
}(),
},
}
if sc.OneOf == nil {
sc.OneOf = formatSpecOneOf
} else {
// may happen when embedding, so move to all_of{{one_of},{one_of}}
sc.AllOf = []*jsonschema.Schema{
{OneOf: sc.OneOf},
{OneOf: formatSpecOneOf},
}
sc.OneOf = nil
}
}
71 changes: 34 additions & 37 deletions schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,6 @@
"$ref": "#/$defs/FileSpec",
"$defs": {
"FileSpec": {
"$id": "/schemas/FileSpec",
"$defs": {
"CSVSpec": {
"properties": {
"skip_header": {
"type": "boolean",
"description": "Specifies if the first line of a file should be the header.",
"default": false
},
"delimiter": {
"type": "string",
"pattern": "^.$",
"description": "Character that will be used as the delimiter.",
"default": ","
}
},
"additionalProperties": false,
"type": "object",
"description": "CloudQuery CSV file output spec."
},
"JSONSpec": {
"additionalProperties": false,
"type": "object",
"description": "CloudQuery JSON file output spec."
},
"ParquetSpec": {
"additionalProperties": false,
"type": "object",
"description": "CloudQuery Parquet file output spec."
}
},
"oneOf": [
{
"properties": {
Expand All @@ -45,7 +14,7 @@
"format_spec": {
"oneOf": [
{
"$ref": "#/$defs/CSVSpec"
"$ref": "#/$defs/Spec"
},
{
"type": "null"
Expand All @@ -63,7 +32,7 @@
"format_spec": {
"oneOf": [
{
"$ref": "#/$defs/JSONSpec"
"$ref": "#/$defs/Spec-1"
},
{
"type": "null"
Expand All @@ -81,7 +50,7 @@
"format_spec": {
"oneOf": [
{
"$ref": "#/$defs/ParquetSpec"
"$ref": "#/$defs/Spec-2"
},
{
"type": "null"
Expand All @@ -106,13 +75,13 @@
{
"anyOf": [
{
"$ref": "#/$defs/CSVSpec"
"$ref": "#/$defs/Spec"
},
{
"$ref": "#/$defs/JSONSpec"
"$ref": "#/$defs/Spec-1"
},
{
"$ref": "#/$defs/ParquetSpec"
"$ref": "#/$defs/Spec-2"
}
]
},
Expand All @@ -135,6 +104,34 @@
"required": [
"format"
]
},
"Spec": {
"properties": {
"skip_header": {
"type": "boolean",
"description": "Specifies if the first line of a file should be the header.",
"default": false
},
"delimiter": {
"type": "string",
"pattern": "^.$",
"description": "Character that will be used as the delimiter.",
"default": ","
}
},
"additionalProperties": false,
"type": "object",
"description": "CloudQuery CSV file output spec."
},
"Spec-1": {
"additionalProperties": false,
"type": "object",
"description": "CloudQuery JSON file output spec."
},
"Spec-2": {
"additionalProperties": false,
"type": "object",
"description": "CloudQuery Parquet file output spec."
}
}
}
91 changes: 91 additions & 0 deletions schema_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package filetypes

import (
"testing"

"github.com/cloudquery/codegen/jsonschema"
"github.com/stretchr/testify/require"
)

func TestFileSpec_JSONSchemaExtend(t *testing.T) {
schema, err := jsonschema.Generate(FileSpec{}, FileSpec{}.JSONSchemaOptions()...)
require.NoError(t, err)

jsonschema.TestJSONSchema(t, string(schema), []jsonschema.TestCase{
{
Name: "empty",
Err: true, // missing format
Spec: `{}`,
},
{
Name: "empty format",
Err: true,
Spec: `{"format":""}`,
},
{
Name: "null format",
Err: true,
Spec: `{"format":null}`,
},
{
Name: "bad format",
Err: true,
Spec: `{"format":123}`,
},
{
Name: "bad format value",
Err: true,
Spec: `{"format":"abc"}`,
},
{
Name: "csv format",
Spec: `{"format":"csv"}`,
},
{
Name: "csv format + empty format_spec",
Spec: `{"format":"csv","format_spec":{}}`,
},
{
Name: "csv format + null format_spec",
Spec: `{"format":"csv","format_spec":null}`,
},
{
Name: "csv format + csv format_spec",
Spec: `{"format":"csv","format_spec":{"skip_header": true, "delimiter":","}}`,
},
{
Name: "json format",
Spec: `{"format":"json"}`,
},
{
Name: "json format + empty format_spec",
Spec: `{"format":"json","format_spec":{}}`,
},
{
Name: "json format + null format_spec",
Spec: `{"format":"json","format_spec":null}`,
},
{
Name: "json format + csv format_spec",
Err: true,
Spec: `{"format":"json","format_spec":{"skip_header": true, "delimiter":","}}`,
},
{
Name: "parquet format",
Spec: `{"format":"parquet"}`,
},
{
Name: "parquet format + empty format_spec",
Spec: `{"format":"parquet","format_spec":{}}`,
},
{
Name: "parquet format + null format_spec",
Spec: `{"format":"parquet","format_spec":null}`,
},
{
Name: "parquet format + csv format_spec",
Err: true,
Spec: `{"format":"parquet","format_spec":{"skip_header": true, "delimiter":","}}`,
},
})
}
4 changes: 3 additions & 1 deletion schemagen/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ import (
func main() {
fmt.Println("Generating JSON schema for plugin spec")
jsonschema.GenerateIntoFile(new(filetypes.FileSpec), path.Join(currDir(), "..", "schema.json"),
jsonschema.WithAddGoComments("github.com/cloudquery/filetypes/v4", path.Join(currDir(), "..")),
append(filetypes.FileSpec{}.JSONSchemaOptions(),
jsonschema.WithAddGoComments("github.com/cloudquery/filetypes/v4", path.Join(currDir(), "..")),
)...,
)
}

Expand Down
Loading

0 comments on commit f5dfada

Please sign in to comment.