Skip to content

Commit

Permalink
Merged PR 55159: Sync contacts via JSON instead of CSV
Browse files Browse the repository at this point in the history
## What's being changed

CSV file-based sync for Customer, Guest and Subscriber sync is being replaced with JSON data. The 'export' phase of each sync will now return an array of hydrated SDK contact objects, ready for V3 API import.

Various optimisations for speed and efficiency have been included:
- `import_data` is now LONGTEXT to allow storing of larger JSON blobs
- mega batches that are too large for an API request will be completed and processed
- any usage of abstract classes or other inheritance in these syncs is being refactored out, in line with general Magento design principles
- the aim is to consolidate all merging and processing of mega batches in single classes (MergeManager and MegaBatchProcessor)

## Why it's being changed

V3 of our API accepts bulk JSON imports for contacts including data fields.

## How to review / test this change

- Test batching of customer, guest and subscriber syncs
- Test batch processing including data transfer to Dotdigital and the marking of rows as imported
- Test data field collection when pre-sending contacts in automation sync
- Old-style imports of CSV queued at point of upgrade must still be processed and be able to be retried
- Old-style CSV imports that are in progress must still be processed as previously
- Test resetting imports and retrying
- Cleaner cron must still remove any remaining archive folder

_Test PayloadTooLargeException_
- adjust down your contact mega batch size
- set the MEGA_BATCH_SIZE_BYTE_LIMIT_API const to fx 4000
- run the sync to test
- mega batches should be built only so long as their prospective size remains under the limit

## Notes

- During these syncs, rows must be marked as imported even if the JSON import was rejected, otherwise the offset won't work correctly in the export

Related work items: #236430, #260229
  • Loading branch information
sta1r committed Jul 22, 2024
1 parent 1eb7260 commit aba9d03
Show file tree
Hide file tree
Showing 60 changed files with 2,241 additions and 664 deletions.
23 changes: 17 additions & 6 deletions Helper/File.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
/**
* Creates the csv files in export folder and move to archive when it's complete.
* Log info and debug to a custom log file connector.log
*
* @deprecated CSV data transfer is replaced with JSON.
* @see \Dotdigitalgroup\Email\Model\Sync\Batch\MegaBatchProcessor
*/
class File
{
Expand Down Expand Up @@ -87,8 +90,18 @@ private function getOutputFolder()
*/
public function getArchiveFolder()
{
$this->createDirectoryIfNotExists($this->outputArchiveFolder);
return $this->outputArchiveFolder;
}

/**
* Get archive folder.
*
* @return string
* @throws FileSystemException
*/
public function getArchiveFolderCreatingIfNotExists()
{
$this->createDirectoryIfNotExists($this->outputArchiveFolder);
return $this->outputArchiveFolder;
}

Expand Down Expand Up @@ -116,7 +129,7 @@ public function archiveCSV($filename)
{
$this->moveFile(
$this->getOutputFolder(),
$this->getArchiveFolder(),
$this->getArchiveFolderCreatingIfNotExists(),
$filename
);
}
Expand Down Expand Up @@ -147,8 +160,6 @@ private function moveFile($sourceFolder, $destFolder, $filename)
* Open for writing only; place the file pointer at the end of the file.
* If the file does not exist, attempt to create it.
*
* @deprecated use Magento\Framework\Filesystem\DriverInterface::filePutCsv instead
* @see Dotdigitalgroup\Email\Model\Sync\Batch\AbstractBatchProcessor::sendDataToFile
* @param string $filepath
* @param array $csv
*
Expand Down Expand Up @@ -259,7 +270,7 @@ public function getLogFileContent($filename = 'connector')
public function getFilePathWithFallback($filename)
{
$emailPath = $this->getOutputFolder() . DIRECTORY_SEPARATOR . $filename;
$archivePath = $this->getArchiveFolder() . DIRECTORY_SEPARATOR . $filename;
$archivePath = $this->getArchiveFolderCreatingIfNotExists() . DIRECTORY_SEPARATOR . $filename;
return $this->driver->isFile($emailPath) ? $emailPath : $archivePath;
}

Expand All @@ -285,7 +296,7 @@ public function isFile($filepath)
public function isFilePathExistWithFallback($filename)
{
$emailPath = $this->getOutputFolder() . DIRECTORY_SEPARATOR . $filename;
$archivePath = $this->getArchiveFolder() . DIRECTORY_SEPARATOR . $filename;
$archivePath = $this->getArchiveFolderCreatingIfNotExists() . DIRECTORY_SEPARATOR . $filename;
return $this->driver->isFile($emailPath) ? true : ($this->driver->isFile($archivePath) ? true : false);
}

Expand Down
3 changes: 3 additions & 0 deletions Model/Apiconnector/Client.php
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ public function getContactById($id)
*
* @return mixed
* @throws \Magento\Framework\Exception\LocalizedException
*
* @deprecated We no longer post csv files to Dotdigital.
* @see \Dotdigitalgroup\Email\Model\Sync\Importer\Type\Contact\BulkJson
*/
public function postAddressBookContactsImport($filename, $addressBookId)
{
Expand Down
11 changes: 11 additions & 0 deletions Model/Connector/ContactData.php
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,20 @@ public function init(AbstractModel $model, array $columns)
{
$this->model = $model;
$this->columns = $columns;
$this->contactData = [];
return $this;
}

/**
* Get contact data.
*
* @return array
*/
public function getContactData()
{
return $this->contactData;
}

/**
* Set column data on the customer model.
*
Expand Down
18 changes: 16 additions & 2 deletions Model/Cron/Cleaner.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,7 @@ public function run(): void
$this->cleanTable($table, $dateColumn);
}

$archivedFolder = $this->fileHelper->getArchiveFolder();
$this->fileHelper->deleteDir($archivedFolder);
$this->cleanUpCsvArchiveFolder();
}

/**
Expand Down Expand Up @@ -164,4 +163,19 @@ public function getTableCleanerInterval(): string
{
return (string) $this->scopeConfig->getValue(Config::XML_PATH_CRON_SCHEDULE_TABLE_CLEANER_INTERVAL);
}

/**
* Clean up CSV archive folder.
*
* @return void
* @throws FileSystemException
*
* @deprecated CSV files are no longer used.
* @see \Dotdigitalgroup\Email\Model\Sync\Importer\Type\Contact\BulkJson;
*/
private function cleanUpCsvArchiveFolder()
{
$archivedFolder = $this->fileHelper->getArchiveFolder();
$this->fileHelper->deleteDir($archivedFolder);
}
}
97 changes: 85 additions & 12 deletions Model/Importer.php
Original file line number Diff line number Diff line change
@@ -1,10 +1,23 @@
<?php

declare(strict_types=1);

namespace Dotdigitalgroup\Email\Model;

use Dotdigitalgroup\Email\Helper\Data;
use Dotdigitalgroup\Email\Model\ResourceModel\Importer\CollectionFactory;
use InvalidArgumentException;
use Magento\Framework\Data\Collection\AbstractDb;
use Magento\Framework\Exception\AlreadyExistsException;
use Magento\Framework\Exception\CouldNotSaveException;
use Magento\Framework\Model\AbstractModel;
use Magento\Framework\Model\Context;
use Magento\Framework\Model\ResourceModel\AbstractResource;
use Magento\Framework\Registry;
use Magento\Framework\Serialize\SerializerInterface;
use Magento\Framework\Stdlib\DateTime;

class Importer extends \Magento\Framework\Model\AbstractModel
class Importer extends AbstractModel
{
public const NOT_IMPORTED = 0;
public const IMPORTING = 1;
Expand Down Expand Up @@ -66,23 +79,23 @@ class Importer extends \Magento\Framework\Model\AbstractModel
private $serializer;

/**
* @var \Dotdigitalgroup\Email\Helper\Data
* @var Data
*/
private $helper;

/**
* Importer constructor.
*
* @param \Magento\Framework\Model\Context $context
* @param \Magento\Framework\Registry $registry
* @param Context $context
* @param Registry $registry
* @param ResourceModel\Importer $importerResource
* @param ResourceModel\Importer\CollectionFactory $importerCollection
* @param \Magento\Framework\Stdlib\DateTime $dateTime
* @param CollectionFactory $importerCollection
* @param DateTime $dateTime
* @param SerializerInterface $serializer
* @param \Dotdigitalgroup\Email\Helper\Data $helper
* @param Data $helper
* @param array $data
* @param \Magento\Framework\Model\ResourceModel\AbstractResource|null $resource
* @param \Magento\Framework\Data\Collection\AbstractDb|null $resourceCollection
* @param AbstractResource|null $resource
* @param AbstractDb|null $resourceCollection
*/
public function __construct(
\Magento\Framework\Model\Context $context,
Expand All @@ -91,7 +104,7 @@ public function __construct(
ResourceModel\Importer\CollectionFactory $importerCollection,
\Magento\Framework\Stdlib\DateTime $dateTime,
SerializerInterface $serializer,
\Dotdigitalgroup\Email\Helper\Data $helper,
Data $helper,
array $data = [],
\Magento\Framework\Model\ResourceModel\AbstractResource $resource = null,
\Magento\Framework\Data\Collection\AbstractDb $resourceCollection = null
Expand Down Expand Up @@ -144,6 +157,9 @@ public function beforeSave()
* @param string $message
*
* @return bool
*
* @deprecated See newer method
* @see addToImporterQueue
*/
public function registerQueue(
$importType,
Expand Down Expand Up @@ -197,6 +213,56 @@ public function registerQueue(
return false;
}

/**
* Register import in queue.
*
* @param string $importType
* @param array $importData
* @param string $importMode
* @param int $websiteId
* @param int $retryCount
* @param int $importStatus
* @param string $importId
* @param string $message
* @param string $importStarted
*
* @return void
* @throws AlreadyExistsException
*/
public function addToImporterQueue(
string $importType,
array $importData,
string $importMode,
int $websiteId,
int $retryCount = 0,
int $importStatus = 0,
string $importId = '',
string $message = '',
string $importStarted = ''
): void {
if ($retryCount === 3) {
return;
}

if (empty($importData)) {
return;
}

$serializedData = $this->serializer->serialize($importData);

$this->setImportType($importType)
->setImportStatus($importStatus)
->setImportId($importId)
->setImportData($serializedData)
->setWebsiteId($websiteId)
->setImportMode($importMode)
->setMessage($message)
->setRetryCount($retryCount)
->setImportStarted($importStarted);

$this->importerResource->save($this);
}

/**
* Saves item.
*
Expand Down Expand Up @@ -236,12 +302,19 @@ public function _getImportingItems($websiteIds)
* @param string $importMode
* @param int $limit
* @param array $websiteIds
* @param bool $useFile
*
* @return \Dotdigitalgroup\Email\Model\ResourceModel\Importer\Collection
*/
public function _getQueue($importType, $importMode, $limit, $websiteIds)
public function _getQueue($importType, $importMode, $limit, $websiteIds, $useFile = false)
{
return $this->importerCollection->create()
->getQueueByTypeAndMode($importType, $importMode, $limit, $websiteIds);
->getQueueByTypeAndMode(
$importType,
$importMode,
$limit,
$websiteIds,
$useFile
);
}
}
14 changes: 12 additions & 2 deletions Model/ResourceModel/Importer/Collection.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ public function reset()
*
* @param array $websiteIds
* @param array $types
* @param bool $useFile
*
* @return $this|boolean
*/
public function getItemsWithImportingStatus($websiteIds, array $types)
public function getItemsWithImportingStatus($websiteIds, array $types, bool $useFile = false)
{
$collection = $this->addFieldToFilter(
'import_status',
Expand All @@ -62,6 +63,10 @@ public function getItemsWithImportingStatus($websiteIds, array $types)

$this->addFieldToFilter('import_type', $importTypeFilter);

if ($useFile) {
$this->addFieldToFilter('import_file', ['neq' => '']);
}

if ($collection->getSize()) {
return $collection;
}
Expand All @@ -76,10 +81,11 @@ public function getItemsWithImportingStatus($websiteIds, array $types)
* @param string|array $importMode
* @param int $limit
* @param array $websiteIds
* @param bool $useFile
*
* @return $this
*/
public function getQueueByTypeAndMode($importType, $importMode, $limit, $websiteIds)
public function getQueueByTypeAndMode($importType, $importMode, $limit, $websiteIds, $useFile = false)
{
if (is_array($importType)) {
$condition = [];
Expand All @@ -106,6 +112,10 @@ public function getQueueByTypeAndMode($importType, $importMode, $limit, $website

$this->addFieldToFilter('website_id', ['in' => $websiteIds]);

if ($useFile) {
$this->addFieldToFilter('import_file', ['neq' => '']);
}

$this->setPageSize($limit)
->setCurPage(1);

Expand Down
4 changes: 4 additions & 0 deletions Model/Sync/AbstractContactSyncer.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

use Magento\Framework\DataObject;

/**
* @deprecated We now have a dedicated MergeManager class for merging batches.
* @see \Dotdigitalgroup\Email\Model\Sync\Batch\MergeManager
*/
abstract class AbstractContactSyncer extends DataObject
{
/**
Expand Down
Loading

0 comments on commit aba9d03

Please sign in to comment.