@@ -10,22 +10,22 @@ import (
10
10
"github.com/PeerDB-io/peer-flow/connectors/utils"
11
11
"github.com/PeerDB-io/peer-flow/generated/protos"
12
12
"github.com/PeerDB-io/peer-flow/model"
13
- "github.com/PeerDB-io/peer-flow/model/qvalue"
14
13
util "github.com/PeerDB-io/peer-flow/utils"
15
- "github.com/linkedin/goavro/v2"
16
14
log "github.com/sirupsen/logrus"
17
15
_ "github.com/snowflakedb/gosnowflake"
18
16
)
19
17
20
18
type SnowflakeAvroSyncMethod struct {
19
+ config * protos.QRepConfig
21
20
connector * SnowflakeConnector
22
- localDir string
23
21
}
24
22
25
- func NewSnowflakeAvroSyncMethod (connector * SnowflakeConnector , localDir string ) * SnowflakeAvroSyncMethod {
23
+ func NewSnowflakeAvroSyncMethod (
24
+ config * protos.QRepConfig ,
25
+ connector * SnowflakeConnector ) * SnowflakeAvroSyncMethod {
26
26
return & SnowflakeAvroSyncMethod {
27
+ config : config ,
27
28
connector : connector ,
28
- localDir : localDir ,
29
29
}
30
30
}
31
31
@@ -80,16 +80,55 @@ func (s *SnowflakeAvroSyncMethod) writeToAvroFile(
80
80
avroSchema * model.QRecordAvroSchemaDefinition ,
81
81
partitionID string ,
82
82
) (string , error ) {
83
- localFilePath := fmt .Sprintf ("%s/%s.avro" , s .localDir , partitionID )
84
- err := WriteRecordsToAvroFile (records , avroSchema , localFilePath )
85
- if err != nil {
86
- return "" , fmt .Errorf ("failed to write records to Avro file: %w" , err )
83
+ if s .config .StagingPath == "" {
84
+ tmpDir , err := os .MkdirTemp ("" , "peerdb-avro" )
85
+ if err != nil {
86
+ return "" , fmt .Errorf ("failed to create temp dir: %w" , err )
87
+ }
88
+
89
+ localFilePath := fmt .Sprintf ("%s/%s.avro" , tmpDir , partitionID )
90
+ err = WriteRecordsToAvroFile (records , avroSchema , localFilePath )
91
+ if err != nil {
92
+ return "" , fmt .Errorf ("failed to write records to Avro file: %w" , err )
93
+ }
94
+
95
+ return localFilePath , nil
96
+ } else if strings .HasPrefix (s .config .StagingPath , "s3://" ) {
97
+ // users will have set AWS_REGION, AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
98
+ // in their environment.
99
+
100
+ // Remove s3:// prefix
101
+ stagingPath := strings .TrimPrefix (s .config .StagingPath , "s3://" )
102
+
103
+ // Split into bucket and prefix
104
+ splitPath := strings .SplitN (stagingPath , "/" , 2 )
105
+
106
+ bucket := splitPath [0 ]
107
+ prefix := ""
108
+ if len (splitPath ) > 1 {
109
+ // Remove leading and trailing slashes from prefix
110
+ prefix = strings .Trim (splitPath [1 ], "/" )
111
+ }
112
+
113
+ s3Key := fmt .Sprintf ("%s/%s/%s.avro" , prefix , s .config .FlowJobName , partitionID )
114
+
115
+ err := WriteRecordsToS3 (records , avroSchema , bucket , s3Key )
116
+ if err != nil {
117
+ return "" , fmt .Errorf ("failed to write records to S3: %w" , err )
118
+ }
119
+
120
+ return "" , nil
87
121
}
88
122
89
- return localFilePath , nil
123
+ return "" , fmt . Errorf ( "unsupported staging path: %s" , s . config . StagingPath )
90
124
}
91
125
92
126
func (s * SnowflakeAvroSyncMethod ) putFileToStage (localFilePath string , stage string ) error {
127
+ if localFilePath == "" {
128
+ log .Infof ("no file to put to stage" )
129
+ return nil
130
+ }
131
+
93
132
putCmd := fmt .Sprintf ("PUT file://%s @%s" , localFilePath , stage )
94
133
if _ , err := s .connector .database .Exec (putCmd ); err != nil {
95
134
return fmt .Errorf ("failed to put file to stage: %w" , err )
@@ -157,52 +196,6 @@ func (s *SnowflakeAvroSyncMethod) insertMetadata(
157
196
return nil
158
197
}
159
198
160
- func WriteRecordsToAvroFile (
161
- records * model.QRecordBatch ,
162
- avroSchema * model.QRecordAvroSchemaDefinition ,
163
- filePath string ,
164
- ) error {
165
- file , err := os .Create (filePath )
166
- if err != nil {
167
- return fmt .Errorf ("failed to create file: %w" , err )
168
- }
169
- defer file .Close ()
170
-
171
- // Create OCF Writer
172
- ocfWriter , err := goavro .NewOCFWriter (goavro.OCFConfig {
173
- W : file ,
174
- Schema : avroSchema .Schema ,
175
- })
176
- if err != nil {
177
- return fmt .Errorf ("failed to create OCF writer: %w" , err )
178
- }
179
-
180
- colNames := records .Schema .GetColumnNames ()
181
-
182
- // Write each QRecord to the OCF file
183
- for _ , qRecord := range records .Records {
184
- avroConverter := model .NewQRecordAvroConverter (
185
- qRecord ,
186
- qvalue .QDWHTypeSnowflake ,
187
- & avroSchema .NullableFields ,
188
- colNames ,
189
- )
190
- avroMap , err := avroConverter .Convert ()
191
- if err != nil {
192
- log .Errorf ("failed to convert QRecord to Avro compatible map: %v" , err )
193
- return fmt .Errorf ("failed to convert QRecord to Avro compatible map: %w" , err )
194
- }
195
-
196
- err = ocfWriter .Append ([]interface {}{avroMap })
197
- if err != nil {
198
- log .Errorf ("failed to write record to OCF file: %v" , err )
199
- return fmt .Errorf ("failed to write record to OCF file: %w" , err )
200
- }
201
- }
202
-
203
- return nil
204
- }
205
-
206
199
type SnowflakeAvroWriteHandler struct {
207
200
db * sql.DB
208
201
dstTableName string
@@ -228,6 +221,7 @@ func NewSnowflakeAvroWriteHandler(
228
221
func (s * SnowflakeAvroWriteHandler ) HandleAppendMode () error {
229
222
//nolint:gosec
230
223
copyCmd := fmt .Sprintf ("COPY INTO %s FROM @%s %s" , s .dstTableName , s .stage , strings .Join (s .copyOpts , "," ))
224
+ log .Infof ("running copy command: %s" , copyCmd )
231
225
if _ , err := s .db .Exec (copyCmd ); err != nil {
232
226
return fmt .Errorf ("failed to run COPY INTO command: %w" , err )
233
227
}
0 commit comments