Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add import reader for json #29252

Merged
merged 11 commits into from
Jan 5, 2024
19 changes: 13 additions & 6 deletions internal/storage/insert_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,12 @@
if i.Data == nil || len(i.Data) == 0 {
return 0
}

data, ok := i.Data[common.RowIDField]
if !ok {
return 0
var rowNum int
for _, data := range i.Data {
rowNum = data.RowNum()
break
}

return data.RowNum()
return rowNum
}

func (i *InsertData) GetMemorySize() int {
Expand Down Expand Up @@ -115,6 +114,14 @@
return nil
}

func (i *InsertData) GetRow(idx int) map[FieldID]interface{} {
res := make(map[FieldID]interface{})
for field, data := range i.Data {
res[field] = data.GetRow(idx)
}
return res

Check warning on line 122 in internal/storage/insert_data.go

View check run for this annotation

Codecov / codecov/patch

internal/storage/insert_data.go#L117-L122

Added lines #L117 - L122 were not covered by tests
}

// FieldData defines field data interface
type FieldData interface {
GetMemorySize() int
Expand Down
143 changes: 143 additions & 0 deletions internal/util/importutilv2/json/reader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package json

import (
"encoding/json"
"fmt"
"io"
"strings"

"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/pkg/util/merr"
)

const (
RowRootNode = "rows"
)

type Row = map[storage.FieldID]any

type reader struct {
dec *json.Decoder
schema *schemapb.CollectionSchema

bufferSize int
isOldFormat bool

parser RowParser
}

func NewReader(r io.Reader, schema *schemapb.CollectionSchema, bufferSize int) (*reader, error) {
reader := &reader{
dec: json.NewDecoder(r),
schema: schema,
bufferSize: bufferSize,
}
var err error
reader.parser, err = NewRowParser(schema)
if err != nil {
return nil, err
}

Check warning on line 56 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L55-L56

Added lines #L55 - L56 were not covered by tests
err = reader.Init()
if err != nil {
return nil, err
}

Check warning on line 60 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L59-L60

Added lines #L59 - L60 were not covered by tests
return reader, nil
}

func (j *reader) Init() error {
// Treat number value as a string instead of a float64.
// By default, json lib treat all number values as float64,
// but if an int64 value has more than 15 digits,
// the value would be incorrect after converting from float64.
j.dec.UseNumber()
t, err := j.dec.Token()
if err != nil {
return merr.WrapErrImportFailed(fmt.Sprintf("failed to decode JSON, error: %v", err))
}

Check warning on line 73 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L72-L73

Added lines #L72 - L73 were not covered by tests
if t != json.Delim('{') && t != json.Delim('[') {
return merr.WrapErrImportFailed("invalid JSON format, the content should be started with '{' or '['")
}

Check warning on line 76 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L75-L76

Added lines #L75 - L76 were not covered by tests
j.isOldFormat = t == json.Delim('{')
return nil
}

func (j *reader) Read() (*storage.InsertData, error) {
insertData, err := storage.NewInsertData(j.schema)
if err != nil {
return nil, err
}

Check warning on line 85 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L84-L85

Added lines #L84 - L85 were not covered by tests
if !j.dec.More() {
return nil, nil
}

Check warning on line 88 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L87-L88

Added lines #L87 - L88 were not covered by tests
if j.isOldFormat {
// read the key
t, err := j.dec.Token()
if err != nil {
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to decode the JSON file, error: %v", err))
}
key := t.(string)
keyLower := strings.ToLower(key)
// the root key should be RowRootNode
if keyLower != RowRootNode {
return nil, merr.WrapErrImportFailed(fmt.Sprintf("invalid JSON format, the root key should be '%s', but get '%s'", RowRootNode, key))
}

Check warning on line 100 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L90-L100

Added lines #L90 - L100 were not covered by tests

// started by '['
t, err = j.dec.Token()
if err != nil {
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to decode the JSON file, error: %v", err))
}

Check warning on line 106 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L103-L106

Added lines #L103 - L106 were not covered by tests

if t != json.Delim('[') {
return nil, merr.WrapErrImportFailed("invalid JSON format, rows list should begin with '['")
}

Check warning on line 110 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L108-L110

Added lines #L108 - L110 were not covered by tests
}
for j.dec.More() {
var value any
if err = j.dec.Decode(&value); err != nil {
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to parse row, error: %v", err))
}

Check warning on line 116 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L115-L116

Added lines #L115 - L116 were not covered by tests
row, err := j.parser.Parse(value)
if err != nil {
return nil, err
}

Check warning on line 120 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L119-L120

Added lines #L119 - L120 were not covered by tests
err = insertData.Append(row)
if err != nil {
return nil, err
}

Check warning on line 124 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L123-L124

Added lines #L123 - L124 were not covered by tests
if insertData.GetMemorySize() >= j.bufferSize {
break

Check warning on line 126 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L126

Added line #L126 was not covered by tests
}
}

if !j.dec.More() {
t, err := j.dec.Token()
if err != nil {
return nil, merr.WrapErrImportFailed(fmt.Sprintf("failed to decode JSON, error: %v", err))
}

Check warning on line 134 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L133-L134

Added lines #L133 - L134 were not covered by tests
if t != json.Delim(']') {
return nil, merr.WrapErrImportFailed("invalid JSON format, rows list should end with ']'")
}

Check warning on line 137 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L136-L137

Added lines #L136 - L137 were not covered by tests
}

return insertData, nil
}

func (j *reader) Close() {}

Check warning on line 143 in internal/util/importutilv2/json/reader.go

View check run for this annotation

Codecov / codecov/patch

internal/util/importutilv2/json/reader.go#L143

Added line #L143 was not covered by tests
Loading
Loading