@@ -1211,6 +1211,8 @@ private abstract static class BigQuerySourceBase extends BoundedSource<TableRow>
1211
1211
protected final BigQueryServices bqServices ;
1212
1212
protected final ValueProvider <String > executingProject ;
1213
1213
1214
+ private List <BoundedSource <TableRow >> cachedSplitResult ;
1215
+
1214
1216
private BigQuerySourceBase (
1215
1217
String jobIdToken ,
1216
1218
String extractDestinationDir ,
@@ -1225,19 +1227,30 @@ private BigQuerySourceBase(
1225
1227
@ Override
1226
1228
public List <BoundedSource <TableRow >> splitIntoBundles (
1227
1229
long desiredBundleSizeBytes , PipelineOptions options ) throws Exception {
1228
- BigQueryOptions bqOptions = options .as (BigQueryOptions .class );
1229
- TableReference tableToExtract = getTableToExtract (bqOptions );
1230
- JobService jobService = bqServices .getJobService (bqOptions );
1231
- String extractJobId = getExtractJobId (jobIdToken );
1232
- List <String > tempFiles = executeExtract (extractJobId , tableToExtract , jobService );
1233
-
1234
- TableSchema tableSchema = bqServices .getDatasetService (bqOptions ).getTable (
1235
- tableToExtract .getProjectId (),
1236
- tableToExtract .getDatasetId (),
1237
- tableToExtract .getTableId ()).getSchema ();
1238
-
1239
- cleanupTempResource (bqOptions );
1240
- return createSources (tempFiles , tableSchema );
1230
+ // splitIntoBundles() can be called multiple times, e.g. Dataflow runner may call it multiple
1231
+ // times with different desiredBundleSizeBytes in case the splitIntoBundles() call produces
1232
+ // too many sources. We ignore desiredBundleSizeBytes anyway, however in any case, we should
1233
+ // not initiate another BigQuery extract job for the repeated splitIntoBundles() calls.
1234
+ if (cachedSplitResult == null ) {
1235
+ BigQueryOptions bqOptions = options .as (BigQueryOptions .class );
1236
+ TableReference tableToExtract = getTableToExtract (bqOptions );
1237
+ JobService jobService = bqServices .getJobService (bqOptions );
1238
+ String extractJobId = getExtractJobId (jobIdToken );
1239
+ List <String > tempFiles = executeExtract (extractJobId , tableToExtract , jobService );
1240
+
1241
+ TableSchema tableSchema =
1242
+ bqServices
1243
+ .getDatasetService (bqOptions )
1244
+ .getTable (
1245
+ tableToExtract .getProjectId (),
1246
+ tableToExtract .getDatasetId (),
1247
+ tableToExtract .getTableId ())
1248
+ .getSchema ();
1249
+
1250
+ cleanupTempResource (bqOptions );
1251
+ cachedSplitResult = createSources (tempFiles , tableSchema );
1252
+ }
1253
+ return cachedSplitResult ;
1241
1254
}
1242
1255
1243
1256
protected abstract TableReference getTableToExtract (BigQueryOptions bqOptions ) throws Exception ;
0 commit comments