11package data
22
33import (
4+ "container/list"
45 "context"
56 "errors"
67 "fmt"
78 "reflect"
89 "strconv"
910 "strings"
11+ "sync"
12+
13+ "github.com/iancoleman/strcase"
1014
1115 "github.com/instill-ai/pipeline-backend/pkg/data/format"
1216 "github.com/instill-ai/pipeline-backend/pkg/external"
@@ -43,23 +47,100 @@ import (
4347// - For Document: "application/pdf", "text/plain", etc
4448// - For pointers: "default=value" to specify default value when nil
4549
50+ // fieldMappingCache implements an LRU cache for struct field name mappings
51+ type fieldMappingCache struct {
52+ cache map [reflect.Type ]map [string ]string // Type -> FieldName -> ResolvedName
53+ lru * list.List // LRU list for eviction
54+ items map [reflect.Type ]* list.Element // Type -> List element mapping
55+ maxSize int // Maximum number of cached types
56+ mu sync.RWMutex // Read-write mutex for thread safety
57+ }
58+
59+ // newFieldMappingCache creates a new LRU cache with the specified maximum size
60+ func newFieldMappingCache (maxSize int ) * fieldMappingCache {
61+ return & fieldMappingCache {
62+ cache : make (map [reflect.Type ]map [string ]string ),
63+ lru : list .New (),
64+ items : make (map [reflect.Type ]* list.Element ),
65+ maxSize : maxSize ,
66+ }
67+ }
68+
69+ // get retrieves field mappings for a struct type from cache
70+ func (c * fieldMappingCache ) get (structType reflect.Type ) (map [string ]string , bool ) {
71+ if c == nil {
72+ return nil , false
73+ }
74+ c .mu .RLock ()
75+ mappings , exists := c .cache [structType ]
76+ if exists {
77+ // Move to front (most recently used)
78+ c .lru .MoveToFront (c .items [structType ])
79+ }
80+ c .mu .RUnlock ()
81+ return mappings , exists
82+ }
83+
84+ // set stores field mappings for a struct type in cache
85+ func (c * fieldMappingCache ) set (structType reflect.Type , mappings map [string ]string ) {
86+ if c == nil {
87+ return
88+ }
89+ c .mu .Lock ()
90+ defer c .mu .Unlock ()
91+
92+ // If already exists, update and move to front
93+ if elem , exists := c .items [structType ]; exists {
94+ c .cache [structType ] = mappings
95+ c .lru .MoveToFront (elem )
96+ return
97+ }
98+
99+ // Check if we need to evict
100+ if c .lru .Len () >= c .maxSize {
101+ // Remove least recently used
102+ oldest := c .lru .Back ()
103+ if oldest != nil {
104+ oldestType := oldest .Value .(reflect.Type )
105+ delete (c .cache , oldestType )
106+ delete (c .items , oldestType )
107+ c .lru .Remove (oldest )
108+ }
109+ }
110+
111+ // Add new entry
112+ c .cache [structType ] = mappings
113+ elem := c .lru .PushFront (structType )
114+ c .items [structType ] = elem
115+ }
116+
46117// Marshaler is used to marshal a struct into a Map.
47118type Marshaler struct {
119+ fieldCache * fieldMappingCache
48120}
49121
50122// Unmarshaler is used to unmarshal data into a struct.
51123type Unmarshaler struct {
52124 binaryFetcher external.BinaryFetcher
125+ fieldCache * fieldMappingCache
53126}
54127
55- // NewMarshaler creates a new Marshaler.
128+ // NewMarshaler creates a new Marshaler with field name caching enabled .
56129func NewMarshaler () * Marshaler {
57- return & Marshaler {}
130+ return & Marshaler {
131+ fieldCache : newFieldMappingCache (200 ), // Cache up to 200 struct types
132+ }
58133}
59134
60- // NewUnmarshaler creates a new Unmarshaler with a binary fetcher.
135+ // NewUnmarshaler creates a new Unmarshaler with automatic naming convention detection.
136+ // The unmarshaler automatically detects the correct naming convention for each field
137+ // based on available input data, providing seamless integration with any external package.
138+ // Field name mappings are cached for improved performance on repeated operations.
61139func NewUnmarshaler (binaryFetcher external.BinaryFetcher ) * Unmarshaler {
62- return & Unmarshaler {binaryFetcher }
140+ return & Unmarshaler {
141+ binaryFetcher : binaryFetcher ,
142+ fieldCache : newFieldMappingCache (200 ), // Cache up to 200 struct types
143+ }
63144}
64145
65146// Unmarshal converts a Map value into the provided struct s using `instill` tags.
@@ -112,7 +193,7 @@ func (u *Unmarshaler) unmarshalStruct(ctx context.Context, m Map, v reflect.Valu
112193 }
113194
114195 // Get the field name from the embedded struct's field
115- embFieldName := u .getFieldName (embField )
196+ embFieldName := u .getFieldNameFromMap (embField , m )
116197 if val , ok := m [embFieldName ]; ok {
117198 if err := u .unmarshalValue (ctx , val , embValue , embField ); err != nil {
118199 return fmt .Errorf ("error unmarshaling embedded field %s: %w" , embFieldName , err )
@@ -126,7 +207,7 @@ func (u *Unmarshaler) unmarshalStruct(ctx context.Context, m Map, v reflect.Valu
126207 continue
127208 }
128209
129- fieldName := u .getFieldName (field )
210+ fieldName := u .getFieldNameFromMap (field , m )
130211 val , ok := m [fieldName ]
131212 if ! ok {
132213 // Check for default value if field is nil pointer or zero value
@@ -411,13 +492,20 @@ func (u *Unmarshaler) unmarshalToReflectMap(ctx context.Context, v Map, field re
411492
412493// unmarshalToStruct handles unmarshaling of Map values into struct.
413494func (u * Unmarshaler ) unmarshalToStruct (ctx context.Context , v Map , field reflect.Value ) error {
495+ structType := field .Type ()
496+
497+ // Get cached field mappings for this struct type
498+ fieldMappings := u .getFieldMappingsForType (structType , v )
499+
414500 for i := 0 ; i < field .NumField (); i ++ {
415- structField := field . Type () .Field (i )
501+ structField := structType .Field (i )
416502 fieldValue := field .Field (i )
417503 if ! fieldValue .CanSet () {
418504 continue
419505 }
420- fieldName := u .getFieldName (structField )
506+
507+ // Use cached mapping instead of computing each time
508+ fieldName := fieldMappings [structField .Name ]
421509 val , ok := v [fieldName ]
422510 if ! ok {
423511 continue
@@ -552,14 +640,76 @@ func (u *Unmarshaler) unmarshalInterface(v format.Value, field reflect.Value, st
552640 return fmt .Errorf ("cannot unmarshal %T into %v" , v , field .Type ())
553641}
554642
555- // getFieldName returns the field name from the struct tag or the field name itself.
556- func (u * Unmarshaler ) getFieldName (field reflect.StructField ) string {
557- tag := field .Tag .Get ("instill" )
558- if tag == "" {
559- return field .Name
643+ // getFieldMappingsForType returns cached field name mappings for a struct type.
644+ // If not cached, it computes the mappings using automatic naming convention detection.
645+ func (u * Unmarshaler ) getFieldMappingsForType (structType reflect.Type , inputMap Map ) map [string ]string {
646+ // Initialize cache if nil (for backward compatibility with tests)
647+ if u .fieldCache == nil {
648+ u .fieldCache = newFieldMappingCache (200 )
649+ }
650+
651+ // Try to get from cache first
652+ if mappings , exists := u .fieldCache .get (structType ); exists {
653+ return mappings
654+ }
655+
656+ // Cache miss - compute mappings for all fields in this struct type
657+ mappings := make (map [string ]string )
658+ for i := 0 ; i < structType .NumField (); i ++ {
659+ field := structType .Field (i )
660+ mappings [field .Name ] = u .computeFieldName (field , inputMap )
661+ }
662+
663+ // Cache the computed mappings
664+ u .fieldCache .set (structType , mappings )
665+ return mappings
666+ }
667+
668+ // computeFieldName computes the field name using automatic naming convention detection.
669+ // This is the core logic that tries different naming conventions and returns the one that exists in the input map.
670+ func (u * Unmarshaler ) computeFieldName (field reflect.StructField , inputMap Map ) string {
671+ // First priority: instill tag (always takes precedence)
672+ if tag := field .Tag .Get ("instill" ); tag != "" {
673+ parts := strings .Split (tag , "," )
674+ return parts [0 ]
560675 }
561- parts := strings .Split (tag , "," )
562- return parts [0 ]
676+
677+ // Second priority: try json tag with automatic convention detection
678+ jsonTag := field .Tag .Get ("json" )
679+ if jsonTag != "" && jsonTag != "-" {
680+ parts := strings .Split (jsonTag , "," )
681+ jsonFieldName := parts [0 ]
682+ if jsonFieldName != "" {
683+ // Try different naming conventions and return the one that exists in input
684+ conversions := []struct {
685+ name string
686+ converted string
687+ }{
688+ {"kebab-case" , jsonFieldName }, // No conversion
689+ {"camelCase" , strcase .ToKebab (jsonFieldName )}, // camelCase -> kebab-case
690+ {"snake_case" , strcase .ToKebab (jsonFieldName )}, // snake_case -> kebab-case
691+ {"PascalCase" , strcase .ToKebab (jsonFieldName )}, // PascalCase -> kebab-case
692+ }
693+
694+ for _ , conv := range conversions {
695+ if _ , exists := inputMap [conv .converted ]; exists {
696+ return conv .converted
697+ }
698+ }
699+ }
700+ }
701+
702+ // Fallback: use field name as-is
703+ return field .Name
704+ }
705+
706+ // getFieldNameFromMap returns the field name using cached mappings when possible.
707+ // This method maintains backward compatibility while leveraging caching for performance.
708+ func (u * Unmarshaler ) getFieldNameFromMap (field reflect.StructField , inputMap Map ) string {
709+ // For single field lookups, we still use the direct computation to avoid
710+ // computing mappings for the entire struct when only one field is needed.
711+ // The cache is most beneficial for full struct unmarshaling.
712+ return u .computeFieldName (field , inputMap )
563713}
564714
565715// Marshal converts a struct into a Map that represents the struct fields as values.
@@ -621,34 +771,112 @@ func (m *Marshaler) marshalValue(v reflect.Value) (format.Value, error) {
621771 }
622772}
623773
624- // marshalStruct handles marshaling of struct values.
625- func (m * Marshaler ) marshalStruct (v reflect.Value ) (Map , error ) {
626- t := v .Type ()
627- mp := Map {}
774+ // getMarshalFieldMappings returns cached field name mappings for marshaling a struct type.
775+ // If not cached, it computes the mappings for consistent kebab-case output.
776+ func (m * Marshaler ) getMarshalFieldMappings (structType reflect.Type ) map [string ]marshalFieldInfo {
777+ // Initialize cache if nil (for backward compatibility with tests)
778+ if m .fieldCache == nil {
779+ m .fieldCache = newFieldMappingCache (200 )
780+ }
628781
629- for i := 0 ; i < t .NumField (); i ++ {
630- field := t .Field (i )
631- fieldValue := v .Field (i )
782+ // Try to get from cache first
783+ if mappings , exists := m .fieldCache .get (structType ); exists {
784+ // Convert cached string mappings to marshalFieldInfo
785+ result := make (map [string ]marshalFieldInfo )
786+ for fieldName , resolvedName := range mappings {
787+ // We need to recompute format tags since they're not cached in string form
788+ field , _ := structType .FieldByName (fieldName )
789+ var formatTag string
790+ if instillTag := field .Tag .Get ("instill" ); instillTag != "" {
791+ parts := strings .Split (instillTag , "," )
792+ if len (parts ) > 1 {
793+ formatTag = parts [1 ]
794+ }
795+ }
796+ result [fieldName ] = marshalFieldInfo {
797+ resolvedName : resolvedName ,
798+ formatTag : formatTag ,
799+ }
800+ }
801+ return result
802+ }
803+
804+ // Cache miss - compute mappings for all fields in this struct type
805+ mappings := make (map [string ]string )
806+ fieldInfos := make (map [string ]marshalFieldInfo )
807+
808+ for i := 0 ; i < structType .NumField (); i ++ {
809+ field := structType .Field (i )
632810
633811 // Skip unexported fields
634812 if field .PkgPath != "" {
635813 continue
636814 }
637815
638- tag := field .Tag .Get ("instill" )
639816 var fieldName string
640817 var formatTag string
641818
642- if tag != "" {
643- parts := strings .Split (tag , "," )
819+ // First priority: instill tag
820+ if instillTag := field .Tag .Get ("instill" ); instillTag != "" {
821+ parts := strings .Split (instillTag , "," )
644822 fieldName = parts [0 ]
645823 if len (parts ) > 1 {
646824 formatTag = parts [1 ]
647825 }
826+ } else if jsonTag := field .Tag .Get ("json" ); jsonTag != "" && jsonTag != "-" {
827+ // Second priority: json tag, convert to kebab-case
828+ parts := strings .Split (jsonTag , "," )
829+ jsonFieldName := parts [0 ]
830+ if jsonFieldName != "" {
831+ fieldName = strcase .ToKebab (jsonFieldName )
832+ } else {
833+ fieldName = field .Name
834+ }
648835 } else {
836+ // Fallback: use field name as-is
649837 fieldName = field .Name
650838 }
651839
840+ mappings [field .Name ] = fieldName
841+ fieldInfos [field .Name ] = marshalFieldInfo {
842+ resolvedName : fieldName ,
843+ formatTag : formatTag ,
844+ }
845+ }
846+
847+ // Cache the computed mappings (string form for compatibility with cache)
848+ m .fieldCache .set (structType , mappings )
849+ return fieldInfos
850+ }
851+
852+ // marshalFieldInfo contains field mapping information for marshaling
853+ type marshalFieldInfo struct {
854+ resolvedName string
855+ formatTag string
856+ }
857+
858+ // marshalStruct handles marshaling of struct values.
859+ func (m * Marshaler ) marshalStruct (v reflect.Value ) (Map , error ) {
860+ t := v .Type ()
861+ mp := Map {}
862+
863+ // Get cached field mappings for this struct type
864+ fieldMappings := m .getMarshalFieldMappings (t )
865+
866+ for i := 0 ; i < t .NumField (); i ++ {
867+ field := t .Field (i )
868+ fieldValue := v .Field (i )
869+
870+ // Skip unexported fields
871+ if field .PkgPath != "" {
872+ continue
873+ }
874+
875+ // Use cached mapping
876+ fieldInfo := fieldMappings [field .Name ]
877+ fieldName := fieldInfo .resolvedName
878+ formatTag := fieldInfo .formatTag
879+
652880 // Handle format conversion before marshaling
653881 if formatTag != "" && fieldValue .CanInterface () {
654882 if val , ok := fieldValue .Interface ().(format.Value ); ok {
0 commit comments