use DataReader\Config\BaseConfig;
class ValidatedUserConfig extends BaseConfig
{
public function __construct()
{
// Multiple validators
$this->addValidator(function($item) {
return isset($item['email']) && filter_var($item['email'], FILTER_VALIDATE_EMAIL);
});
$this->addValidator(function($item) {
return isset($item['age']) && $item['age'] >= 18;
});
// Field mapping
$this->setFieldMapping([
0 => 'name',
1 => 'email',
2 => 'age',
3 => 'country'
]);
}
public function configureItem($item): ?array
{
$mapped = $this->mapFields($item);
// Skip invalid items
if (!$this->validateItem($mapped)) {
return null;
}
return [
'name' => ucwords(strtolower($mapped['name'])),
'email' => strtolower($mapped['email']),
'age' => (int)$mapped['age'],
'country' => strtoupper($mapped['country']),
'is_adult' => $mapped['age'] >= 18
];
}
public function configureFirstItem($item)
{
return false; // Skip headers
}
}class MultiStepConfig implements ConfigInterface
{
private array $processors = [];
public function addProcessor(callable $processor): self
{
$this->processors[] = $processor;
return $this;
}
public function configureItem($item): array
{
$result = $item;
foreach ($this->processors as $processor) {
$result = $processor($result);
if ($result === null) {
break; // Skip this item
}
}
return $result;
}
public function configureFirstItem($item)
{
return false;
}
}
// Usage
$config = new MultiStepConfig();
$config->addProcessor(function($item) {
// Step 1: Clean data
return array_map('trim', $item);
})
->addProcessor(function($item) {
// Step 2: Validate
return filter_var($item[1], FILTER_VALIDATE_EMAIL) ? $item : null;
})
->addProcessor(function($item) {
// Step 3: Transform
return [
'name' => $item[0],
'email' => strtolower($item[1]),
'created' => date('Y-m-d H:i:s')
];
});class StreamingConfig extends BaseConfig
{
private int $processed = 0;
private int $memoryLimit;
public function __construct(int $memoryLimitMB = 128)
{
$this->memoryLimit = $memoryLimitMB * 1024 * 1024;
}
public function configureItem($item): array
{
$this->processed++;
// Memory management
if ($this->processed % 1000 === 0) {
$usage = memory_get_usage(true);
if ($usage > $this->memoryLimit) {
gc_collect_cycles();
error_log("Memory usage: " . round($usage / 1024 / 1024, 2) . "MB after processing {$this->processed} items");
}
}
return $this->processItem($item);
}
private function processItem($item): array
{
// Your processing logic here
return [
'id' => $item[0],
'data' => $item[1],
'processed_at' => time()
];
}
}use DataReader\Resource\Resource;
use DataReader\ResourceInterface;
use DataReader\ConfigInterface;
class PaginatedApiResource extends Resource implements ResourceInterface
{
private string $baseUrl;
private int $perPage;
private array $headers;
public function __construct(string $baseUrl, int $perPage = 100, array $headers = [])
{
$this->baseUrl = $baseUrl;
$this->perPage = $perPage;
$this->headers = $headers;
}
public function apply(ConfigInterface $config): array
{
$allItems = [];
$page = 1;
do {
$url = $this->baseUrl . "?page={$page}&per_page={$this->perPage}";
$response = $this->makeRequest($url);
$data = json_decode($response, true);
if (empty($data['items'])) {
break;
}
foreach ($data['items'] as $item) {
$processed = $config->configureItem($item);
if ($processed !== null) {
$allItems[] = $processed;
}
}
$page++;
} while (count($data['items']) === $this->perPage);
$this->setData($allItems);
return $this->getData();
}
private function makeRequest(string $url): string
{
$context = stream_context_create([
'http' => [
'method' => 'GET',
'header' => implode("\r\n", $this->headers)
]
]);
$result = file_get_contents($url, false, $context);
if ($result === false) {
throw new ResourceException("Failed to fetch data from: {$url}");
}
return $result;
}
public function setData($data): void
{
$this->data = $data;
}
public function getData(): array
{
return $this->data ?? [];
}
}use PHPUnit\Framework\TestCase;
use DataReader\Reader;
use DataReader\Resource\ArrayData;
use DataReader\Output\Json;
class ReaderTest extends TestCase
{
public function testBasicDataProcessing(): void
{
$data = [
['John', 'john@example.com', '30'],
['Jane', 'jane@example.com', '25']
];
$config = new class implements ConfigInterface {
public function configureItem($item): array {
return [
'name' => $item[0],
'email' => $item[1],
'age' => (int)$item[2]
];
}
public function configureFirstItem($item) {
return $this->configureItem($item);
}
};
$reader = new Reader(
new ArrayData($data),
new Json(),
$config
);
$result = $reader->run();
$decoded = json_decode($result, true);
$this->assertCount(2, $decoded);
$this->assertEquals('John', $decoded[0]['name']);
$this->assertEquals(30, $decoded[0]['age']);
}
}1. Memory Exhaustion with Large Files
// Solution: Use chunked processing
ini_set('memory_limit', '512M');
class ChunkedFileProcessor
{
public function processFile(string $filename, int $chunkSize = 1000): void
{
$handle = fopen($filename, 'r');
$chunk = [];
$count = 0;
while (($line = fgetcsv($handle)) !== false) {
$chunk[] = $line;
$count++;
if ($count >= $chunkSize) {
$this->processChunk($chunk);
$chunk = [];
$count = 0;
gc_collect_cycles();
}
}
if (!empty($chunk)) {
$this->processChunk($chunk);
}
fclose($handle);
}
private function processChunk(array $chunk): void
{
$reader = new Reader(
new ArrayData($chunk),
new Json(),
new MyConfig()
);
echo $reader->run();
}
}2. Character Encoding Issues
class EncodingAwareConfig extends BaseConfig
{
private string $encoding;
public function __construct(string $encoding = 'UTF-8')
{
$this->encoding = $encoding;
}
public function configureItem($item): array
{
// Convert encoding
foreach ($item as $key => $value) {
if (is_string($value)) {
$item[$key] = mb_convert_encoding($value, 'UTF-8', $this->encoding);
}
}
return $this->processItem($item);
}
}3. Invalid Data Handling
class RobustConfig extends BaseConfig
{
public function configureItem($item): ?array
{
try {
// Validate required fields
if (empty($item[0]) || empty($item[1])) {
return null; // Skip invalid records
}
return [
'name' => $this->sanitizeString($item[0]),
'email' => $this->validateEmail($item[1]),
'age' => $this->parseAge($item[2] ?? null)
];
} catch (\Exception $e) {
error_log("Error processing item: " . json_encode($item) . " - " . $e->getMessage());
return null;
}
}
private function sanitizeString(?string $value): string
{
return trim(strip_tags($value ?? ''));
}
private function validateEmail(?string $email): ?string
{
$clean = filter_var($email, FILTER_VALIDATE_EMAIL);
return $clean ?: null;
}
private function parseAge($value): ?int
{
if ($value === null || $value === '') {
return null;
}
$age = (int)$value;
return ($age > 0 && $age < 150) ? $age : null;
}
}- Complete ArrayData implementation
- Add comprehensive error handling with custom exceptions
- Implement XML and CSV output formats
- Create BaseConfig class with field mapping and validation
- Add factory methods for common use cases
- Add unit tests with PHPUnit
- Implement streaming support for large files (>100MB)
- Add Excel file format support (.xlsx, .xls)
- Implement caching mechanisms for processed data
- Add batch processing capabilities
- Create CLI tool for command-line usage
- Add data transformation pipelines
- Implement async processing support
- Add database resource connectors (MySQL, PostgreSQL, SQLite)
- Implement API resource connectors (REST, GraphQL)
- Add data validation rule system
- Create visual data mapping interface
- Add support for nested data structures
- Implement data diff and merge capabilities
- Plugin system for third-party extensions
- Web-based data transformation UI
- Integration with popular frameworks (Laravel, Symfony)
- Performance optimization for big data processing
- Machine learning integration for data analysis# Data Reader
A flexible and robust PHP library for reading, processing, and outputting data from various sources with configurable transformation pipelines, validation, and multiple output formats.
- Multiple Data Sources: Support for files (CSV, JSON, XML), arrays, and extensible resource types
- Configurable Processing: Transform and validate data during reading with custom configuration classes
- Multiple Output Formats: JSON, XML, CSV output with customizable options
- Robust Error Handling: Custom exceptions and comprehensive validation
- Clean Architecture: Interface-driven design following SOLID principles
- Type Safety: Full PHP 7.4+ type hints and strict typing
- Easy Integration: Simple fluent API for chaining operations
- Factory Methods: Quick setup for common use cases
- Field Mapping & Validation: Built-in support for data transformation and validation
Install via Composer:
composer require resoul/data-reader<?php
require_once 'vendor/autoload.php';
use DataReader\Factory\ReaderFactory;
// Quick setup with factory methods
$reader = ReaderFactory::createCsvReader('data.csv');
// Or manual setup with custom configuration
use DataReader\Reader;
use DataReader\Resource\File;
use DataReader\Resource\File\CSV;
use DataReader\Output\Json;
use DataReader\Config\BaseConfig;
class UserDataConfig extends BaseConfig
{
public function configureItem($item): array
{
return [
'id' => (int)$item[0],
'name' => trim($item[1]),
'email' => strtolower($item[2]),
'age' => (int)$item[3],
'created_at' => new DateTime($item[4])
];
}
public function configureFirstItem($item)
{
// Skip header row
return false;
}
}
$reader = new Reader();
$reader->resource(new File('users.csv', new CSV()));
$reader->config(new UserDataConfig());
$reader->output(new Json(JSON_PRETTY_PRINT));
try {
$processedData = $reader->run();
echo $processedData; // JSON output
} catch (\DataReader\Exception\DataReaderException $e) {
echo "Error: " . $e->getMessage();
}The main orchestrator class that coordinates resource reading, data configuration, and output formatting.
$reader = new Reader($resource, $output, $config);
// or use fluent interface
$reader->resource($resource)
->config($config)
->output($output)
->run();Data sources that implement ResourceInterface:
- File: Read from files with multiple format handlers (CSV, JSON, XML)
- ArrayData: Process in-memory arrays with full configuration support
- Custom: Extend
Resourceclass for databases, APIs, or other sources
Built-in support for multiple file formats:
- CSV: Comma-separated values with configurable delimiters
- JSON: JavaScript Object Notation with error handling
- XML: Extensible Markup Language with customizable element mapping
Transform and validate data using ConfigInterface or extend BaseConfig:
use DataReader\Config\BaseConfig;
class ProductConfig extends BaseConfig
{
public function __construct()
{
// Set up field mapping
$this->setFieldMapping([
0 => 'name',
1 => 'price',
2 => 'category'
]);
// Add validators
$this->addValidator(function($item) {
return isset($item['price']) && $item['price'] > 0;
});
}
public function configureItem($item): array
{
$mapped = $this->mapFields($item);
if (!$this->validateItem($mapped)) {
throw new InvalidArgumentException('Invalid item data');
}
return [
'name' => trim($mapped['name']),
'price' => (float)$mapped['price'],
'category' => strtoupper($mapped['category']),
'in_stock' => $mapped['price'] > 0
];
}
public function configureFirstItem($item)
{
// Skip header or process first row
return false;
}
}Multiple output formats with customizable options:
- Json: JSON with formatting options
- XML: XML with custom root and item elements
- CSV: CSV with configurable delimiters and enclosures
use DataReader\Factory\ReaderFactory;
// CSV with default JSON output
$users = ReaderFactory::createCsvReader('users.csv')
->config(new UserConfig())
->run();
// JSON file processing
$products = ReaderFactory::createJsonReader('products.json')
->config(new ProductConfig())
->run();
// Array data processing
$data = [['name' => 'John', 'age' => 30], ['name' => 'Jane', 'age' => 25]];
$processed = ReaderFactory::createArrayReader($data)
->config(new PersonConfig())
->run();use DataReader\Reader;
use DataReader\Resource\File;
use DataReader\Resource\File\CSV;
use DataReader\Output\Json;
class UserConfig implements \DataReader\ConfigInterface
{
public function configureItem($item): array
{
return [
'id' => (int)$item[0],
'name' => trim($item[1]),
'email' => filter_var($item[2], FILTER_VALIDATE_EMAIL),
'created_at' => new DateTime($item[3])
];
}
public function configureFirstItem($item)
{
// Skip header row
return false;
}
}
$reader = new Reader(
new File('users.csv', new CSV()),
new Json(JSON_PRETTY_PRINT),
new UserConfig()
);
try {
$users = $reader->run();
echo $users; // Pretty-printed JSON
} catch (\DataReader\Exception\ResourceException $e) {
echo "File error: " . $e->getMessage();
}use DataReader\Resource\File\JSON;
$reader = new Reader(
new File('data.json', new JSON()),
new Json(),
new DataConfig()
);
$data = $reader->run();use DataReader\Resource\File\XML;
// XML with custom item tag
$reader = new Reader(
new File('products.xml', new XML('product')), // item tag = 'product'
new Json(),
new ProductConfig()
);
$products = $reader->run();use DataReader\Resource\ArrayData;
use DataReader\Config\BaseConfig;
class ProductConfig extends BaseConfig
{
public function __construct()
{
// Set up field mapping
$this->setFieldMapping([
'product_name' => 'name',
'price' => 'price',
'quantity' => 'stock'
]);
// Add validation
$this->addValidator(function($item) {
return isset($item['price']) && $item['price'] > 0;
});
}
public function configureItem($item): array
{
$mapped = $this->mapFields($item);
if (!$this->validateItem($mapped)) {
return null; // Skip invalid items
}
return [
'name' => $mapped['name'],
'price' => (float)$mapped['price'],
'in_stock' => (int)$mapped['stock'] > 0
];
}
public function configureFirstItem($item)
{
return $this->configureItem($item);
}
}
$rawData = [
['product_name' => 'Laptop', 'price' => '999.99', 'quantity' => '5'],
['product_name' => 'Mouse', 'price' => '29.99', 'quantity' => '0'],
['product_name' => 'Invalid', 'price' => '-10', 'quantity' => '1'] // Will be skipped
];
$reader = new Reader(
new ArrayData($rawData),
new Json(),
new ProductConfig()
);
$products = $reader->run();use DataReader\Output\XML;
$reader = new Reader(
new File('data.csv', new CSV()),
new XML('products', 'product'), // root: products, items: product
new ProductConfig()
);
$xmlOutput = $reader->run();
echo $xmlOutput;
// <products>
// <product>
// <name>Laptop</name>
// <price>999.99</price>
// </product>
// </products>use DataReader\Output\CSV as CsvOutput;
$reader = new Reader(
new File('data.json', new JSON()),
new CsvOutput('|', '"'), // Custom delimiter and enclosure
new DataConfig()
);
$csvOutput = $reader->run();use DataReader\Resource\Resource;
use DataReader\ResourceInterface;
use DataReader\ConfigInterface;
use DataReader\Exception\ResourceException;
class DatabaseResource extends Resource implements ResourceInterface
{
private \PDO $connection;
private string $query;
public function __construct(\PDO $connection, string $query)
{
$this->connection = $connection;
$this->query = $query;
}
public function apply(ConfigInterface $config): array
{
try {
$stmt = $this->connection->prepare($this->query);
$stmt->execute();
$items = [];
$isFirst = true;
while ($row = $stmt->fetch(\PDO::FETCH_ASSOC)) {
if ($isFirst) {
$firstItem = $config->configureFirstItem($row);
if ($firstItem !== false) {
$items[] = $firstItem;
}
$isFirst = false;
} else {
$items[] = $config->configureItem($row);
}
}
$this->setData($items);
return $this->getData();
} catch (\PDOException $e) {
throw new ResourceException('Database error: ' . $e->getMessage());
}
}
public function setData($data): void
{
$this->data = $data;
}
public function getData(): array
{
return $this->data ?? [];
}
}
// Usage
$pdo = new PDO($dsn, $user, $pass);
$reader = new Reader(
new DatabaseResource($pdo, 'SELECT * FROM users'),
new Json(),
new UserConfig()
);use DataReader\Resource\FileInterface;
use DataReader\ConfigInterface;
use DataReader\Exception\ResourceException;
class ExcelFormat implements FileInterface
{
public function read($handle, ConfigInterface $config): array
{
// Example with PhpSpreadsheet (requires composer package)
$content = stream_get_contents($handle);
$tempFile = tempnam(sys_get_temp_dir(), 'excel');
file_put_contents($tempFile, $content);
try {
$spreadsheet = \PhpOffice\PhpSpreadsheet\IOFactory::load($tempFile);
$worksheet = $spreadsheet->getActiveSheet();
$data = $worksheet->toArray();
$items = [];
foreach ($data as $index => $row) {
if ($index === 0) {
$firstItem = $config->configureFirstItem($row);
if ($firstItem !== false) {
$items[] = $firstItem;
}
} else {
$items[] = $config->configureItem($row);
}
}
return $items;
} finally {
unlink($tempFile);
}
}
}use DataReader\Output\Output;
use DataReader\OutputInterface;
class HTMLOutput extends Output implements OutputInterface
{
private string $tableClass;
public function __construct(string $tableClass = 'table')
{
$this->tableClass = $tableClass;
}
public function items($items): string
{
if (empty($items)) {
return '<p>No data available</p>';
}
$html = "<table class=\"{$this->tableClass}\">\n";
// Header
$headers = array_keys($items[0]);
$html .= "<thead><tr>\n";
foreach ($headers as $header) {
$html .= "<th>" . htmlspecialchars($header) . "</th>\n";
}
$html .= "</tr></thead>\n";
// Body
$html .= "<tbody>\n";
foreach ($items as $item) {
$html .= "<tr>\n";
foreach ($item as $value) {
$html .= "<td>" . htmlspecialchars((string)$value) . "</td>\n";
}
$html .= "</tr>\n";
}
$html .= "</tbody>\n</table>";
return $html;
}
}Constructor: __construct(?ResourceInterface $resource = null, ?OutputInterface $output = null, ?ConfigInterface $config = null)
Methods:
resource(ResourceInterface $resource): self- Set data sourceconfig(ConfigInterface $config): self- Set data configurationoutput(OutputInterface $output): self- Set output formatrun(): mixed- Execute the data processing pipelinegetItems(): array- Get processed items without output formattinggetTotalItems(): int- Get count of processed items
ReaderFactory Methods:
createCsvReader(string $filename): Reader- Quick CSV reader setupcreateJsonReader(string $filename): Reader- Quick JSON reader setupcreateArrayReader(array $data): Reader- Quick array reader setup
getItems(): arraygetTotalItems(): int
apply(ConfigInterface $config): array
configureItem($item): mixed- Transform individual data itemsconfigureFirstItem($item): mixed- Handle first item (headers, etc.)
items($items): mixed- Format processed data for output
read($handle, ConfigInterface $config): array- Read from file handle
File(string $filename, FileInterface $format)- File-based data sourceArrayData(array $data = [])- Array-based data source
CSV()- CSV file readerJSON()- JSON file readerXML(string $itemTag = 'item')- XML file reader
Json(int $options = JSON_PRETTY_PRINT)- JSON outputXML(string $root = 'data', string $item = 'item')- XML outputCSV(string $delimiter = ',', string $enclosure = '"')- CSV output
BaseConfig- Abstract base with field mapping and validationsetFieldMapping(array $mapping): selfaddValidator(callable $validator): selfmapFields($item): array(protected)validateItem($item): bool(protected)
DataReaderException- Base exception classResourceException- Resource-related errorsConfigurationException- Configuration errorsOutputException- Output formatting errors
- PHP 8.0 or higher (uses strict typing and return type declarations)
- No external dependencies for core functionality
- Optional dependencies for extended functionality:
phpoffice/phpspreadsheet- for Excel file supportext-simplexml- for XML processing (usually included)ext-json- for JSON processing (usually included)
The library provides comprehensive error handling with custom exception types:
use DataReader\Exception\{DataReaderException, ResourceException, ConfigurationException, OutputException};
try {
$reader = ReaderFactory::createCsvReader('nonexistent.csv');
$data = $reader->run();
} catch (ResourceException $e) {
// Handle file/resource errors
echo "Resource error: " . $e->getMessage();
} catch (ConfigurationException $e) {
// Handle configuration errors
echo "Configuration error: " . $e->getMessage();
} catch (OutputException $e) {
// Handle output formatting errors
echo "Output error: " . $e->getMessage();
} catch (DataReaderException $e) {
// Handle any other data reader errors
echo "General error: " . $e->getMessage();
}- Large Files: Consider implementing streaming for files > 100MB
- Array Processing: ArrayData loads all data into memory
- Output Buffering: JSON and XML outputs build complete strings in memory
// For large datasets, process in chunks
class ChunkedConfig extends BaseConfig
{
private int $processed = 0;
private int $chunkSize;
public function __construct(int $chunkSize = 1000)
{
$this->chunkSize = $chunkSize;
}
public function configureItem($item): ?array
{
if ($this->processed++ % $this->chunkSize === 0) {
// Trigger garbage collection every chunk
gc_collect_cycles();
}
return $this->processItem($item);
}
}This project is licensed under the MIT License - see the LICENSE file for details.
- Complete ArrayData implementation
- Add XML output format
- Implement data validation features
- Add streaming support for large files
- Create additional file format handlers (JSON, XML, Excel)
- Add caching mechanisms
- Implement batch processing capabilities
For support, please open an issue on the GitHub repository.