Skip to content

Commit 301dea6

Browse files
committed
perf(data): implement automatic field naming convention detection with LRU caching
1 parent dfb6b24 commit 301dea6

File tree

4 files changed

+657
-25
lines changed

4 files changed

+657
-25
lines changed

pkg/component/base/execution.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ type Job struct {
3232
// InputReader is an interface for reading input data from a job.
3333
type InputReader interface {
3434
// ReadData reads the input data from the job into the provided struct.
35+
// The unmarshaler automatically detects the correct naming convention for each field
36+
// based on available input data, providing seamless integration with any external package.
3537
ReadData(ctx context.Context, input any) (err error)
3638

3739
// Deprecated: Read() is deprecated and will be removed in a future version.

pkg/data/struct.go

Lines changed: 253 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
package data
22

33
import (
4+
"container/list"
45
"context"
56
"errors"
67
"fmt"
78
"reflect"
89
"strconv"
910
"strings"
11+
"sync"
12+
13+
"github.com/iancoleman/strcase"
1014

1115
"github.com/instill-ai/pipeline-backend/pkg/data/format"
1216
"github.com/instill-ai/pipeline-backend/pkg/external"
@@ -43,23 +47,100 @@ import (
4347
// - For Document: "application/pdf", "text/plain", etc
4448
// - For pointers: "default=value" to specify default value when nil
4549

50+
// fieldMappingCache implements an LRU cache for struct field name mappings
51+
type fieldMappingCache struct {
52+
cache map[reflect.Type]map[string]string // Type -> FieldName -> ResolvedName
53+
lru *list.List // LRU list for eviction
54+
items map[reflect.Type]*list.Element // Type -> List element mapping
55+
maxSize int // Maximum number of cached types
56+
mu sync.RWMutex // Read-write mutex for thread safety
57+
}
58+
59+
// newFieldMappingCache creates a new LRU cache with the specified maximum size
60+
func newFieldMappingCache(maxSize int) *fieldMappingCache {
61+
return &fieldMappingCache{
62+
cache: make(map[reflect.Type]map[string]string),
63+
lru: list.New(),
64+
items: make(map[reflect.Type]*list.Element),
65+
maxSize: maxSize,
66+
}
67+
}
68+
69+
// get retrieves field mappings for a struct type from cache
70+
func (c *fieldMappingCache) get(structType reflect.Type) (map[string]string, bool) {
71+
if c == nil {
72+
return nil, false
73+
}
74+
c.mu.RLock()
75+
mappings, exists := c.cache[structType]
76+
if exists {
77+
// Move to front (most recently used)
78+
c.lru.MoveToFront(c.items[structType])
79+
}
80+
c.mu.RUnlock()
81+
return mappings, exists
82+
}
83+
84+
// set stores field mappings for a struct type in cache
85+
func (c *fieldMappingCache) set(structType reflect.Type, mappings map[string]string) {
86+
if c == nil {
87+
return
88+
}
89+
c.mu.Lock()
90+
defer c.mu.Unlock()
91+
92+
// If already exists, update and move to front
93+
if elem, exists := c.items[structType]; exists {
94+
c.cache[structType] = mappings
95+
c.lru.MoveToFront(elem)
96+
return
97+
}
98+
99+
// Check if we need to evict
100+
if c.lru.Len() >= c.maxSize {
101+
// Remove least recently used
102+
oldest := c.lru.Back()
103+
if oldest != nil {
104+
oldestType := oldest.Value.(reflect.Type)
105+
delete(c.cache, oldestType)
106+
delete(c.items, oldestType)
107+
c.lru.Remove(oldest)
108+
}
109+
}
110+
111+
// Add new entry
112+
c.cache[structType] = mappings
113+
elem := c.lru.PushFront(structType)
114+
c.items[structType] = elem
115+
}
116+
46117
// Marshaler is used to marshal a struct into a Map.
47118
type Marshaler struct {
119+
fieldCache *fieldMappingCache
48120
}
49121

50122
// Unmarshaler is used to unmarshal data into a struct.
51123
type Unmarshaler struct {
52124
binaryFetcher external.BinaryFetcher
125+
fieldCache *fieldMappingCache
53126
}
54127

55-
// NewMarshaler creates a new Marshaler.
128+
// NewMarshaler creates a new Marshaler with field name caching enabled.
56129
func NewMarshaler() *Marshaler {
57-
return &Marshaler{}
130+
return &Marshaler{
131+
fieldCache: newFieldMappingCache(200), // Cache up to 200 struct types
132+
}
58133
}
59134

60-
// NewUnmarshaler creates a new Unmarshaler with a binary fetcher.
135+
// NewUnmarshaler creates a new Unmarshaler with automatic naming convention detection.
136+
// The unmarshaler automatically detects the correct naming convention for each field
137+
// based on available input data, providing seamless integration with any external package.
138+
// Field name mappings are cached for improved performance on repeated operations.
61139
func NewUnmarshaler(binaryFetcher external.BinaryFetcher) *Unmarshaler {
62-
return &Unmarshaler{binaryFetcher}
140+
return &Unmarshaler{
141+
binaryFetcher: binaryFetcher,
142+
fieldCache: newFieldMappingCache(200), // Cache up to 200 struct types
143+
}
63144
}
64145

65146
// Unmarshal converts a Map value into the provided struct s using `instill` tags.
@@ -112,7 +193,7 @@ func (u *Unmarshaler) unmarshalStruct(ctx context.Context, m Map, v reflect.Valu
112193
}
113194

114195
// Get the field name from the embedded struct's field
115-
embFieldName := u.getFieldName(embField)
196+
embFieldName := u.getFieldNameFromMap(embField, m)
116197
if val, ok := m[embFieldName]; ok {
117198
if err := u.unmarshalValue(ctx, val, embValue, embField); err != nil {
118199
return fmt.Errorf("error unmarshaling embedded field %s: %w", embFieldName, err)
@@ -126,7 +207,7 @@ func (u *Unmarshaler) unmarshalStruct(ctx context.Context, m Map, v reflect.Valu
126207
continue
127208
}
128209

129-
fieldName := u.getFieldName(field)
210+
fieldName := u.getFieldNameFromMap(field, m)
130211
val, ok := m[fieldName]
131212
if !ok {
132213
// Check for default value if field is nil pointer or zero value
@@ -411,13 +492,20 @@ func (u *Unmarshaler) unmarshalToReflectMap(ctx context.Context, v Map, field re
411492

412493
// unmarshalToStruct handles unmarshaling of Map values into struct.
413494
func (u *Unmarshaler) unmarshalToStruct(ctx context.Context, v Map, field reflect.Value) error {
495+
structType := field.Type()
496+
497+
// Get cached field mappings for this struct type
498+
fieldMappings := u.getFieldMappingsForType(structType, v)
499+
414500
for i := 0; i < field.NumField(); i++ {
415-
structField := field.Type().Field(i)
501+
structField := structType.Field(i)
416502
fieldValue := field.Field(i)
417503
if !fieldValue.CanSet() {
418504
continue
419505
}
420-
fieldName := u.getFieldName(structField)
506+
507+
// Use cached mapping instead of computing each time
508+
fieldName := fieldMappings[structField.Name]
421509
val, ok := v[fieldName]
422510
if !ok {
423511
continue
@@ -552,14 +640,76 @@ func (u *Unmarshaler) unmarshalInterface(v format.Value, field reflect.Value, st
552640
return fmt.Errorf("cannot unmarshal %T into %v", v, field.Type())
553641
}
554642

555-
// getFieldName returns the field name from the struct tag or the field name itself.
556-
func (u *Unmarshaler) getFieldName(field reflect.StructField) string {
557-
tag := field.Tag.Get("instill")
558-
if tag == "" {
559-
return field.Name
643+
// getFieldMappingsForType returns cached field name mappings for a struct type.
644+
// If not cached, it computes the mappings using automatic naming convention detection.
645+
func (u *Unmarshaler) getFieldMappingsForType(structType reflect.Type, inputMap Map) map[string]string {
646+
// Initialize cache if nil (for backward compatibility with tests)
647+
if u.fieldCache == nil {
648+
u.fieldCache = newFieldMappingCache(200)
649+
}
650+
651+
// Try to get from cache first
652+
if mappings, exists := u.fieldCache.get(structType); exists {
653+
return mappings
654+
}
655+
656+
// Cache miss - compute mappings for all fields in this struct type
657+
mappings := make(map[string]string)
658+
for i := 0; i < structType.NumField(); i++ {
659+
field := structType.Field(i)
660+
mappings[field.Name] = u.computeFieldName(field, inputMap)
661+
}
662+
663+
// Cache the computed mappings
664+
u.fieldCache.set(structType, mappings)
665+
return mappings
666+
}
667+
668+
// computeFieldName computes the field name using automatic naming convention detection.
669+
// This is the core logic that tries different naming conventions and returns the one that exists in the input map.
670+
func (u *Unmarshaler) computeFieldName(field reflect.StructField, inputMap Map) string {
671+
// First priority: instill tag (always takes precedence)
672+
if tag := field.Tag.Get("instill"); tag != "" {
673+
parts := strings.Split(tag, ",")
674+
return parts[0]
560675
}
561-
parts := strings.Split(tag, ",")
562-
return parts[0]
676+
677+
// Second priority: try json tag with automatic convention detection
678+
jsonTag := field.Tag.Get("json")
679+
if jsonTag != "" && jsonTag != "-" {
680+
parts := strings.Split(jsonTag, ",")
681+
jsonFieldName := parts[0]
682+
if jsonFieldName != "" {
683+
// Try different naming conventions and return the one that exists in input
684+
conversions := []struct {
685+
name string
686+
converted string
687+
}{
688+
{"kebab-case", jsonFieldName}, // No conversion
689+
{"camelCase", strcase.ToKebab(jsonFieldName)}, // camelCase -> kebab-case
690+
{"snake_case", strcase.ToKebab(jsonFieldName)}, // snake_case -> kebab-case
691+
{"PascalCase", strcase.ToKebab(jsonFieldName)}, // PascalCase -> kebab-case
692+
}
693+
694+
for _, conv := range conversions {
695+
if _, exists := inputMap[conv.converted]; exists {
696+
return conv.converted
697+
}
698+
}
699+
}
700+
}
701+
702+
// Fallback: use field name as-is
703+
return field.Name
704+
}
705+
706+
// getFieldNameFromMap returns the field name using cached mappings when possible.
707+
// This method maintains backward compatibility while leveraging caching for performance.
708+
func (u *Unmarshaler) getFieldNameFromMap(field reflect.StructField, inputMap Map) string {
709+
// For single field lookups, we still use the direct computation to avoid
710+
// computing mappings for the entire struct when only one field is needed.
711+
// The cache is most beneficial for full struct unmarshaling.
712+
return u.computeFieldName(field, inputMap)
563713
}
564714

565715
// Marshal converts a struct into a Map that represents the struct fields as values.
@@ -621,34 +771,112 @@ func (m *Marshaler) marshalValue(v reflect.Value) (format.Value, error) {
621771
}
622772
}
623773

624-
// marshalStruct handles marshaling of struct values.
625-
func (m *Marshaler) marshalStruct(v reflect.Value) (Map, error) {
626-
t := v.Type()
627-
mp := Map{}
774+
// getMarshalFieldMappings returns cached field name mappings for marshaling a struct type.
775+
// If not cached, it computes the mappings for consistent kebab-case output.
776+
func (m *Marshaler) getMarshalFieldMappings(structType reflect.Type) map[string]marshalFieldInfo {
777+
// Initialize cache if nil (for backward compatibility with tests)
778+
if m.fieldCache == nil {
779+
m.fieldCache = newFieldMappingCache(200)
780+
}
628781

629-
for i := 0; i < t.NumField(); i++ {
630-
field := t.Field(i)
631-
fieldValue := v.Field(i)
782+
// Try to get from cache first
783+
if mappings, exists := m.fieldCache.get(structType); exists {
784+
// Convert cached string mappings to marshalFieldInfo
785+
result := make(map[string]marshalFieldInfo)
786+
for fieldName, resolvedName := range mappings {
787+
// We need to recompute format tags since they're not cached in string form
788+
field, _ := structType.FieldByName(fieldName)
789+
var formatTag string
790+
if instillTag := field.Tag.Get("instill"); instillTag != "" {
791+
parts := strings.Split(instillTag, ",")
792+
if len(parts) > 1 {
793+
formatTag = parts[1]
794+
}
795+
}
796+
result[fieldName] = marshalFieldInfo{
797+
resolvedName: resolvedName,
798+
formatTag: formatTag,
799+
}
800+
}
801+
return result
802+
}
803+
804+
// Cache miss - compute mappings for all fields in this struct type
805+
mappings := make(map[string]string)
806+
fieldInfos := make(map[string]marshalFieldInfo)
807+
808+
for i := 0; i < structType.NumField(); i++ {
809+
field := structType.Field(i)
632810

633811
// Skip unexported fields
634812
if field.PkgPath != "" {
635813
continue
636814
}
637815

638-
tag := field.Tag.Get("instill")
639816
var fieldName string
640817
var formatTag string
641818

642-
if tag != "" {
643-
parts := strings.Split(tag, ",")
819+
// First priority: instill tag
820+
if instillTag := field.Tag.Get("instill"); instillTag != "" {
821+
parts := strings.Split(instillTag, ",")
644822
fieldName = parts[0]
645823
if len(parts) > 1 {
646824
formatTag = parts[1]
647825
}
826+
} else if jsonTag := field.Tag.Get("json"); jsonTag != "" && jsonTag != "-" {
827+
// Second priority: json tag, convert to kebab-case
828+
parts := strings.Split(jsonTag, ",")
829+
jsonFieldName := parts[0]
830+
if jsonFieldName != "" {
831+
fieldName = strcase.ToKebab(jsonFieldName)
832+
} else {
833+
fieldName = field.Name
834+
}
648835
} else {
836+
// Fallback: use field name as-is
649837
fieldName = field.Name
650838
}
651839

840+
mappings[field.Name] = fieldName
841+
fieldInfos[field.Name] = marshalFieldInfo{
842+
resolvedName: fieldName,
843+
formatTag: formatTag,
844+
}
845+
}
846+
847+
// Cache the computed mappings (string form for compatibility with cache)
848+
m.fieldCache.set(structType, mappings)
849+
return fieldInfos
850+
}
851+
852+
// marshalFieldInfo contains field mapping information for marshaling
853+
type marshalFieldInfo struct {
854+
resolvedName string
855+
formatTag string
856+
}
857+
858+
// marshalStruct handles marshaling of struct values.
859+
func (m *Marshaler) marshalStruct(v reflect.Value) (Map, error) {
860+
t := v.Type()
861+
mp := Map{}
862+
863+
// Get cached field mappings for this struct type
864+
fieldMappings := m.getMarshalFieldMappings(t)
865+
866+
for i := 0; i < t.NumField(); i++ {
867+
field := t.Field(i)
868+
fieldValue := v.Field(i)
869+
870+
// Skip unexported fields
871+
if field.PkgPath != "" {
872+
continue
873+
}
874+
875+
// Use cached mapping
876+
fieldInfo := fieldMappings[field.Name]
877+
fieldName := fieldInfo.resolvedName
878+
formatTag := fieldInfo.formatTag
879+
652880
// Handle format conversion before marshaling
653881
if formatTag != "" && fieldValue.CanInterface() {
654882
if val, ok := fieldValue.Interface().(format.Value); ok {

0 commit comments

Comments
 (0)