Skip to content

Commit 37f7ee8

Browse files
committed
Add PK Chunking to Bulk Api Query
1 parent 209a876 commit 37f7ee8

File tree

4 files changed

+240
-33
lines changed

4 files changed

+240
-33
lines changed

readme.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,30 @@ if ($result->id) {
356356
}
357357
```
358358

359+
### Query with PK Chunking
360+
361+
```php
362+
$operationType = 'query';
363+
$objectType = 'Account';
364+
$objectData = 'SELECT Id, Name FROM Account';
365+
366+
$result = Salesforce::bulk()->runBatch($operationType, $objectType, $objectData, [
367+
'contentType' => 'CSV',
368+
'Sforce-Enable-PKChunking' => [
369+
'chunkSize' => 2500,
370+
],
371+
]);
372+
373+
if ($result->id) {
374+
$id = $result->id;
375+
foreach ($result->batches as $batch) {
376+
foreach ($batch->records as $record) {
377+
$account_id = $record['Id'];
378+
}
379+
}
380+
}
381+
```
382+
359383
### Custom REST Endpoint (GET)
360384

361385
```php

src/Bulk.php

Lines changed: 101 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,14 @@ public function runBatch($operation, $objectType, $data, $options = [])
3737
$batches = [];
3838

3939
$defaults = [
40-
'externalIdFieldName' => null,
41-
'batchSize' => 2000,
42-
'batchTimeout' => 600,
43-
'contentType' => 'JSON',
44-
'pollIntervalSeconds' => 5,
45-
'isBatchedResult' => false,
46-
'concurrencyMode' => 'Parallel',
40+
'externalIdFieldName' => null,
41+
'batchSize' => 2000,
42+
'batchTimeout' => 600,
43+
'contentType' => 'JSON',
44+
'pollIntervalSeconds' => 5,
45+
'isBatchedResult' => false,
46+
'concurrencyMode' => 'Parallel',
47+
'Sforce-Enable-PKChunking' => false,
4748
];
4849

4950
$options = array_replace($defaults, $options);
@@ -52,7 +53,7 @@ public function runBatch($operation, $objectType, $data, $options = [])
5253
$options['isBatchedResult'] = true;
5354
}
5455

55-
$job = $this->createJob($operation, $objectType, $options['externalIdFieldName'], $options['contentType'], $options['concurrencyMode']);
56+
$job = $this->createJob($operation, $objectType, $options['externalIdFieldName'], $options['contentType'], $options['concurrencyMode'], $options);
5657

5758
if ($job->id) {
5859
//if data is array, we can split it into batches
@@ -62,7 +63,7 @@ public function runBatch($operation, $objectType, $data, $options = [])
6263
for ($i = 1; $i <= $totalNumberOfBatches; $i++) {
6364
$batches[] = $this->addBatch($job->id, array_splice($data, ($i - 1) * $options['batchSize'], $options['batchSize']));
6465
}
65-
} else { //probably a string query so run in onee batch
66+
} else { //probably a string query so run in one batch
6667
$batches[] = $this->addBatch($job->id, $data);
6768
}
6869
} else {
@@ -72,6 +73,10 @@ public function runBatch($operation, $objectType, $data, $options = [])
7273
$time = time();
7374
$timeout = $time + $options['batchTimeout'];
7475

76+
if($options['Sforce-Enable-PKChunking']){
77+
$batches = $this->allBatchDetails($job->id, $options['contentType']);
78+
}
79+
7580
$batches_finished = [];
7681

7782
while (count($batches_finished) < count($batches) && $time < $timeout) {
@@ -82,10 +87,13 @@ public function runBatch($operation, $objectType, $data, $options = [])
8287
continue;
8388
}
8489

85-
$batch = $this->batchDetails($job->id, $batch->id);
86-
if (in_array($batch->state, ['Completed', 'Failed', 'Not Processed'])) {
87-
$batchResult = $this->batchResult($job->id, $batch->id, $options['isBatchedResult']);
88-
$batch->records = $batchResult->records;
90+
$batch = $this->batchDetails($job->id, $batch->id, $options['contentType']);
91+
if (in_array($batch->state, ['Completed', 'Failed', 'Not Processed', 'NotProcessed'])) {
92+
93+
if(in_array($batch->state, ['Completed'])) {
94+
$batchResult = $this->batchResult($job->id, $batch->id, $options['isBatchedResult'], null, $options['contentType']);
95+
$batch->records = $batchResult->records;
96+
}
8997
$batches_finished[] = $batch->id;
9098
}
9199
}
@@ -118,7 +126,7 @@ public function runBatch($operation, $objectType, $data, $options = [])
118126
*
119127
* @return BulkJobResponse
120128
*/
121-
public function createJob($operation, $objectType, $externalIdFieldName = null, $contentType = 'JSON', $concurrencyMode = 'Parallel')
129+
public function createJob($operation, $objectType, $externalIdFieldName = null, $contentType = 'JSON', $concurrencyMode = 'Parallel', $options=[])
122130
{
123131
$url = '/services/async/'.SalesforceConfig::get('salesforce.api.version').'/job';
124132

@@ -128,6 +136,12 @@ public function createJob($operation, $objectType, $externalIdFieldName = null,
128136
'concurrencyMode' => $concurrencyMode,
129137
];
130138

139+
$headers = [];
140+
141+
if(isset($options['Sforce-Enable-PKChunking']) && $options['Sforce-Enable-PKChunking']){
142+
$headers['Sforce-Enable-PKChunking'] = $this->parsePkChunkingHeader($options['Sforce-Enable-PKChunking']);
143+
}
144+
131145
//order of variables matters so this externalIdFieldName has to come before contentType
132146
if ($operation == 'upsert') {
133147
$json_array['externalIdFieldName'] = $externalIdFieldName;
@@ -136,7 +150,8 @@ public function createJob($operation, $objectType, $externalIdFieldName = null,
136150
$json_array['contentType'] = $contentType;
137151

138152
$result = $this->call_api('post', $url, [
139-
'json' => $json_array,
153+
'json' => $json_array,
154+
'headers' => $headers,
140155
]);
141156

142157
if ($result && is_array($result)) {
@@ -146,11 +161,14 @@ public function createJob($operation, $objectType, $externalIdFieldName = null,
146161
return new BulkJobResponse();
147162
}
148163

149-
public function jobDetails($jobId)
164+
public function jobDetails($jobId, $format='json')
150165
{
151166
$url = '/services/async/'.SalesforceConfig::get('salesforce.api.version').'/job/'.$jobId;
152167

153-
$result = $this->call_api('get', $url);
168+
$result = $this->call_api('get', $url,
169+
[
170+
'format' => $this->batchResponseFormatFromContentType($format),
171+
]);
154172

155173
if ($result && is_array($result)) {
156174
return new BulkJobResponse($result);
@@ -191,7 +209,7 @@ public function closeJob($jobId)
191209
*
192210
* @return BulkBatchResponse
193211
*/
194-
public function addBatch($jobId, $data)
212+
public function addBatch($jobId, $data, $format='json')
195213
{
196214
if (!$jobId) {
197215
//throw exception
@@ -214,6 +232,7 @@ public function addBatch($jobId, $data)
214232
$result = $this->call_api('post', $url, [
215233
'body' => $body,
216234
'headers' => $headers,
235+
'format' => $this->batchResponseFormatFromContentType($format),
217236
]);
218237

219238
if ($result && is_array($result)) {
@@ -226,6 +245,7 @@ public function addBatch($jobId, $data)
226245
/**
227246
* @param $jobId
228247
* @param $batchId
248+
* @param $format
229249
*
230250
* @return BulkBatchResponse
231251
*/
@@ -234,7 +254,7 @@ public function batchDetails($jobId, $batchId, $format = 'json')
234254
$url = '/services/async/'.SalesforceConfig::get('salesforce.api.version').'/job/'.$jobId.'/batch/'.$batchId;
235255

236256
$result = $this->call_api('get', $url, [
237-
'format' => $format,
257+
'format' => $this->batchResponseFormatFromContentType($format),
238258
]);
239259

240260
if ($result && is_array($result)) {
@@ -246,6 +266,37 @@ public function batchDetails($jobId, $batchId, $format = 'json')
246266
return new BulkBatchResponse();
247267
}
248268

269+
/**
270+
* @param $jobId
271+
* @param $format
272+
*
273+
* @return BulkBatchResponse[]
274+
*/
275+
public function allBatchDetails($jobId, $format = 'json')
276+
{
277+
$batches = [];
278+
279+
//TODO: Fix hack to give initial Salesforce batch time to split into many batches by PK
280+
sleep(10);
281+
////////////////////////////////////////////////////////////////////////////////////////
282+
283+
$url = '/services/async/'.SalesforceConfig::get('salesforce.api.version').'/job/'.$jobId.'/batch';
284+
285+
$result = $this->call_api('get', $url, [
286+
'format' => $this->batchResponseFormatFromContentType($format),
287+
]);
288+
289+
if ($result && is_array($result) && isset($result['batchInfo']) && !isset($result['batchInfo']['id'])) {
290+
foreach($result['batchInfo'] as $batch) {
291+
$batches[] = new BulkBatchResponse($batch);
292+
}
293+
} else {
294+
//throw exception
295+
}
296+
297+
return $batches;
298+
}
299+
249300
/**
250301
* @param $jobId
251302
* @param $batchId
@@ -261,14 +312,17 @@ public function batchResult($jobId, $batchId, $isBatchedResult = false, $resultI
261312

262313
$url = '/services/async/'.SalesforceConfig::get('salesforce.api.version').'/job/'.$jobId.'/batch/'.$batchId.'/result';
263314

315+
$resultPostArray = [];
316+
264317
//if this is a query result, the main result page will have an array of result ids to follow for hte query results
265318
if ($resultId) {
266319
$url = $url.'/'.$resultId;
320+
$resultPostArray['format'] = $format;
321+
}else{
322+
$resultPostArray['format'] = $this->batchResponseFormatFromContentType($format);
267323
}
268324

269-
$result = $this->call_api('get', $url, [
270-
'format' => $format,
271-
]);
325+
$result = $this->call_api('get', $url, $resultPostArray);
272326

273327
if ($result && is_array($result)) {
274328

@@ -277,6 +331,13 @@ public function batchResult($jobId, $batchId, $isBatchedResult = false, $resultI
277331
$result['records'] = [];
278332
}
279333

334+
if(isset($result['result'])){
335+
if(!is_array($result['result'])){
336+
$result['result'] = [$result['result']];
337+
}
338+
$result = array_merge($result, $result['result']);
339+
}
340+
280341
//maximum amount of batch records allowed is 10,000
281342
for ($i = 0; $i < 10000; $i++) {
282343
//skip processing for the rest of the records if they don't exist
@@ -286,7 +347,7 @@ public function batchResult($jobId, $batchId, $isBatchedResult = false, $resultI
286347

287348
//batched results return a list of result ids that need to be processed to get the actual data
288349
if ($isBatchedResult) {
289-
$batchResult = $this->batchResult($jobId, $batchId, false, $result[$i]);
350+
$batchResult = $this->batchResult($jobId, $batchId, false, $result[$i], $format);
290351
$result['records'] = array_merge($result['records'], $batchResult->records);
291352
} else {
292353
//fix boolean values from appearing as
@@ -437,20 +498,15 @@ public function addBinaryBatch($jobId, BinaryBatch $binaryBatch, $contentType =
437498
return new BulkBatchResponse();
438499
}
439500

440-
/* public function binaryBatchResult($jobId, $batchId, $isBatchedResult = false, $resultId = null, $format='json')
441-
{
442-
$result = $this->batchResult($jobId, $batchId, $isBatchedResult, $resultId, $format);
443-
444-
if($result->state)
445-
}
446-
*/
447501
protected function batchResponseFormatFromContentType($contentType)
448502
{
449503
switch (strtoupper($contentType)) {
450504
case 'ZIP_CSV':
451505
case 'ZIP/CSV':
452506
case 'ZIP_XML':
453507
case 'ZIP/XML':
508+
case 'CSV':
509+
case 'XML':
454510
$return = 'xml';
455511
break;
456512
default:
@@ -460,4 +516,19 @@ protected function batchResponseFormatFromContentType($contentType)
460516

461517
return $return;
462518
}
519+
520+
protected function parsePkChunkingHeader($pk_chunk_header)
521+
{
522+
if(is_array($pk_chunk_header)){
523+
$header_parts = [];
524+
foreach($pk_chunk_header as $key=>$value) {
525+
$header_parts[] = $key.'='.$value;
526+
}
527+
528+
return implode('; ',$header_parts);
529+
}elseif(in_array($pk_chunk_header, [true,'true','TRUE'])){
530+
return 'TRUE';
531+
}
532+
return 'FALSE';
533+
}
463534
}

src/Salesforce.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ public function call_api($method, $url, $options = [], $debug_info = [])
468468

469469
$format = 'json';
470470
if (isset($options['format'])) {
471-
$format = $options['format'];
471+
$format = strtolower($options['format']);
472472
unset($options['format']);
473473
}
474474

0 commit comments

Comments
 (0)