-
-
Notifications
You must be signed in to change notification settings - Fork 1
Token Tracking
The Token & Cost Tracking feature provides comprehensive monitoring and analysis of API token usage and associated costs across all supported AI providers. This feature helps you:
- Monitor Resource Usage: Track tokens consumed by each API request
- Optimize Costs: Analyze and reduce API expenses with detailed cost breakdowns
- Budget Management: Set limits and monitor spending across multiple models
- Performance Analysis: Understand token efficiency and response characteristics
- Provider Comparison: Compare costs across different AI providers and models
- ✅ Automatic Token Tracking: Extracts token usage from all provider responses
- ✅ Real-time Cost Calculation: Calculate costs based on current provider pricing
- ✅ Multi-Provider Support: Works with OpenAI, Anthropic, Google Gemini, xAI, Meta, DeepSeek
- ✅ Backward Compatible: Existing code continues to work without changes
- ✅ Rich Metadata: Access response IDs, finish reasons, timestamps, and more
- ✅ Cost Optimization Tools: Identify the cheapest models and estimate costs before requests
Token tracking is built into the core library (v2.0+). No additional installation required!
composer require rumenx/php-chatbotuse Rumenx\PhpChatbot\PhpChatbot;
use Rumenx\PhpChatbot\Models\OpenAiModel;
$model = new OpenAiModel('your-api-key');
$chatbot = new PhpChatbot($model);
// Make a request
$response = $chatbot->ask('Explain quantum computing in simple terms');
echo $response; // Backward compatible - auto-converts to string
// Get token usage
$usage = $chatbot->getLastTokenUsage();
if ($usage) {
echo "Prompt tokens: {$usage->promptTokens}\n";
echo "Completion tokens: {$usage->completionTokens}\n";
echo "Total tokens: {$usage->totalTokens}\n";
}
// Get cost
$cost = $chatbot->getLastCost();
if ($cost !== null) {
echo "Cost: $" . number_format($cost, 4) . "\n";
}// Get the complete ChatResponse object
$chatResponse = $chatbot->getLastResponse();
if ($chatResponse) {
$metadata = $chatResponse->getMetadata();
echo "Model: {$metadata->model}\n";
echo "Finish Reason: {$metadata->finishReason}\n";
echo "Response ID: {$metadata->responseId}\n";
echo "Timestamp: {$metadata->timestamp->format('Y-m-d H:i:s')}\n";
if ($metadata->hasTokenUsage()) {
$usage = $metadata->tokenUsage;
echo "Tokens used: {$usage->totalTokens}\n";
}
}use Rumenx\PhpChatbot\Support\CostCalculator;
$calculator = new CostCalculator();
// After making a request
$usage = $chatbot->getLastTokenUsage();
$cost = $calculator->calculateCost('gpt-4o', $usage);
echo "Request cost: $" . number_format($cost, 4) . "\n";$totalCost = 0.0;
$requests = ['Tell me a joke', 'Explain AI', 'What is PHP?'];
foreach ($requests as $prompt) {
$response = $chatbot->ask($prompt);
$cost = $chatbot->getLastCost();
if ($cost !== null) {
$totalCost += $cost;
echo "Prompt: $prompt\n";
echo "Cost: $" . number_format($cost, 6) . "\n\n";
}
}
echo "Total cost: $" . number_format($totalCost, 4) . "\n";$budget = 1.00; // $1.00 budget
$spent = 0.0;
$prompts = ['Question 1', 'Question 2', 'Question 3'];
foreach ($prompts as $prompt) {
// Estimate cost before making the request
$estimatedCost = $chatbot->estimateCost($prompt);
if ($spent + $estimatedCost > $budget) {
echo "Budget exceeded! Stopping.\n";
break;
}
$response = $chatbot->ask($prompt);
$actualCost = $chatbot->getLastCost() ?? 0.0;
$spent += $actualCost;
echo "Response: $response\n";
echo "Cost: $" . number_format($actualCost, 6) . "\n";
echo "Remaining budget: $" . number_format($budget - $spent, 4) . "\n\n";
}use Rumenx\PhpChatbot\Support\CostCalculator;
$calculator = new CostCalculator();
// Find the cheapest model by provider
$cheapestOpenAI = $calculator->getCheapestModel('openai');
$cheapestAnthropic = $calculator->getCheapestModel('anthropic');
$cheapestGoogle = $calculator->getCheapestModel('google');
echo "Cheapest OpenAI model: $cheapestOpenAI\n";
echo "Cheapest Anthropic model: $cheapestAnthropic\n";
echo "Cheapest Google model: $cheapestGoogle\n";
// Compare specific models
$models = ['gpt-4o', 'gpt-4o-mini', 'claude-3-5-sonnet', 'gemini-1.5-flash'];
$promptTokens = 1000;
$completionTokens = 500;
echo "\nCost comparison for 1K prompt + 500 completion tokens:\n";
foreach ($models as $model) {
$cost = $calculator->estimateCost($model, $promptTokens, $completionTokens);
echo "$model: $" . number_format($cost, 4) . "\n";
}$calculator = new CostCalculator();
// Check if model is free (e.g., Ollama)
if ($calculator->isLocalModel('llama3')) {
echo "This model is free to use!\n";
}
// Check if model has pricing information
if ($calculator->hasModelPricing('gpt-4o')) {
$pricing = $calculator->getModelPricing('gpt-4o');
echo "Input: $" . $pricing['input'] . " per 1M tokens\n";
echo "Output: $" . $pricing['output'] . " per 1M tokens\n";
}-
Use Cheaper Models for Simple Tasks
// Use gpt-4o-mini instead of gpt-4o for simple queries $model = new OpenAiModel('key', 'gpt-4o-mini');
-
Limit Response Length
// Set max_tokens to control output length $response = $chatbot->ask('Explain AI', [ 'max_tokens' => 100 // Shorter = cheaper ]);
-
Monitor Token Usage Patterns
$usage = $chatbot->getLastTokenUsage(); // Check if responses are too long if ($usage->completionTokens > 1000) { echo "Consider reducing max_tokens or using a cheaper model\n"; } // Check token efficiency $ratio = $usage->completionTokens / $usage->promptTokens; echo "Output/Input ratio: " . number_format($ratio, 2) . "\n";
-
Batch Similar Requests
// Instead of multiple small requests, combine them $combined = "Answer these questions:\n1. What is AI?\n2. What is ML?\n3. What is DL?"; $response = $chatbot->ask($combined);
use Rumenx\PhpChatbot\Models\OpenAiModel;
$model = new OpenAiModel('your-api-key', 'gpt-4o');
$chatbot = new PhpChatbot($model);
$response = $chatbot->ask('What is artificial intelligence?');
// Get detailed token breakdown
$usage = $chatbot->getLastTokenUsage();
echo "Prompt tokens: {$usage->promptTokens}\n";
echo "Completion tokens: {$usage->completionTokens}\n";
echo "Total tokens: {$usage->totalTokens}\n";
// Calculate cost
$calculator = new CostCalculator();
$cost = $calculator->calculateCost('gpt-4o', $usage);
echo "Cost: $" . CostCalculator::formatCost($cost) . "\n";use Rumenx\PhpChatbot\Models\AnthropicModel;
$model = new AnthropicModel('your-api-key', 'claude-3-5-sonnet-20241022');
$chatbot = new PhpChatbot($model);
$response = $chatbot->ask('Explain machine learning');
$chatResponse = $chatbot->getLastResponse();
$metadata = $chatResponse->getMetadata();
echo "Model: {$metadata->model}\n";
echo "Input tokens: {$metadata->tokenUsage->promptTokens}\n";
echo "Output tokens: {$metadata->tokenUsage->completionTokens}\n";
echo "Cost: $" . number_format($chatbot->getLastCost(), 4) . "\n";use Rumenx\PhpChatbot\Models\GeminiModel;
$model = new GeminiModel('your-api-key', 'gemini-1.5-flash');
$chatbot = new PhpChatbot($model);
$response = $chatbot->ask('What is deep learning?');
// Gemini provides token usage
$usage = $chatbot->getLastTokenUsage();
if ($usage) {
echo "Tokens used: {$usage->totalTokens}\n";
echo "Cost: $" . number_format($chatbot->getLastCost(), 6) . "\n";
}use Rumenx\PhpChatbot\Models\DeepSeekAiModel;
// DeepSeek is one of the most cost-effective models
$model = new DeepSeekAiModel('your-api-key');
$chatbot = new PhpChatbot($model);
$response = $chatbot->ask('Write a poem about PHP');
echo "Response: $response\n";
echo "Cost: $" . CostCalculator::formatCost($chatbot->getLastCost()) . "\n";use Rumenx\PhpChatbot\Models\OllamaModel;
// Ollama models are free - they run locally
$model = new OllamaModel('llama3');
$chatbot = new PhpChatbot($model);
$response = $chatbot->ask('Explain neural networks');
echo "Response: $response\n";
echo "Cost: $0.00 (Local model)\n";
// Token tracking may not be available for local models
$usage = $chatbot->getLastTokenUsage();
if ($usage) {
echo "Tokens used: {$usage->totalTokens}\n";
}The ChatResponse class wraps AI model responses with metadata and token information.
namespace Rumenx\PhpChatbot\Support;
class ChatResponse implements \Stringable
{
// Properties
public readonly string $content;
public readonly ResponseMetadata $metadata;
// Methods
public function __construct(string $content, ResponseMetadata $metadata);
public function __toString(): string;
public function getContent(): string;
public function getMetadata(): ResponseMetadata;
public function getTokenUsage(): ?TokenUsage;
public function getModel(): string;
public function getFinishReason(): ?string;
public function wasTruncated(): bool;
public function wasFiltered(): bool;
public function completedNormally(): bool;
public function toArray(): array;
public function getSummary(): string;
// Factory methods
public static function fromString(string $content, string $model): self;
public static function fromOpenAI(array $response, string $model): self;
public static function fromAnthropic(array $response, string $model): self;
public static function fromGemini(array $response, string $model): self;
}Example:
$chatResponse = $chatbot->getLastResponse();
// Automatic string conversion
echo $chatResponse; // Same as echo $chatResponse->getContent();
// Access metadata
$metadata = $chatResponse->getMetadata();
$usage = $chatResponse->getTokenUsage();
// Check response status
if ($chatResponse->wasTruncated()) {
echo "Response was truncated due to token limit\n";
}
if ($chatResponse->completedNormally()) {
echo "Response completed successfully\n";
}
// Get summary
echo $chatResponse->getSummary();Tracks token consumption for API requests.
namespace Rumenx\PhpChatbot\Support;
class TokenUsage
{
// Properties
public readonly int $promptTokens;
public readonly int $completionTokens;
public readonly int $totalTokens;
// Methods
public function __construct(int $promptTokens, int $completionTokens, ?int $totalTokens = null);
public function toArray(): array;
public function getSummary(): string;
public function exceedsThreshold(int $threshold): bool;
public function getUsagePercentage(int $maxTokens): float;
public function getRemainingTokens(int $maxTokens): int;
// Factory methods
public static function fromArray(array $data): self;
public static function fromOpenAI(array $usage): self;
public static function fromAnthropic(array $usage): self;
public static function fromGemini(array $usage): self;
}Example:
$usage = $chatbot->getLastTokenUsage();
// Get token counts
echo "Prompt: {$usage->promptTokens}\n";
echo "Completion: {$usage->completionTokens}\n";
echo "Total: {$usage->totalTokens}\n";
// Check thresholds
if ($usage->exceedsThreshold(4000)) {
echo "Warning: High token usage!\n";
}
// Calculate percentages (for models with token limits)
$percentage = $usage->getUsagePercentage(8192);
echo "Used " . number_format($percentage, 1) . "% of model capacity\n";
// Get remaining tokens
$remaining = $usage->getRemainingTokens(8192);
echo "Remaining tokens: $remaining\n";
// Human-readable summary
echo $usage->getSummary();Contains metadata about the AI response.
namespace Rumenx\PhpChatbot\Support;
class ResponseMetadata
{
// Properties
public readonly ?TokenUsage $tokenUsage;
public readonly string $model;
public readonly ?string $finishReason;
public readonly ?string $responseId;
public readonly \DateTimeImmutable $timestamp;
public readonly array $extra;
// Methods
public function __construct(
string $model,
?TokenUsage $tokenUsage = null,
?string $finishReason = null,
?string $responseId = null,
?\DateTimeImmutable $timestamp = null,
array $extra = []
);
public function hasTokenUsage(): bool;
public function get(string $key, mixed $default = null): mixed;
public function wasTruncated(): bool;
public function wasFiltered(): bool;
public function completedNormally(): bool;
public function toArray(): array;
public function getSummary(): string;
}Example:
$response = $chatbot->getLastResponse();
$metadata = $response->getMetadata();
// Basic info
echo "Model: {$metadata->model}\n";
echo "Response ID: {$metadata->responseId}\n";
echo "Timestamp: {$metadata->timestamp->format('Y-m-d H:i:s')}\n";
// Finish reason
echo "Finish reason: {$metadata->finishReason}\n";
// Check status
if ($metadata->wasTruncated()) {
echo "Response was truncated (hit max_tokens limit)\n";
}
if ($metadata->wasFiltered()) {
echo "Response was filtered by content policy\n";
}
// Access extra provider-specific data
$systemFingerprint = $metadata->get('system_fingerprint');
if ($systemFingerprint) {
echo "System fingerprint: $systemFingerprint\n";
}
// Token usage
if ($metadata->hasTokenUsage()) {
$usage = $metadata->tokenUsage;
echo "Tokens: {$usage->totalTokens}\n";
}Calculates costs based on token usage and model pricing.
namespace Rumenx\PhpChatbot\Support;
class CostCalculator
{
// Methods
public function calculateCost(string $model, TokenUsage $usage): float;
public function estimateCost(string $model, int $promptTokens, int $completionTokens): float;
public function getModelPricing(string $model): ?array;
public function hasModelPricing(string $model): bool;
public function getSupportedModels(): array;
public function calculateBatchCost(string $model, array $usages): float;
public function getCheapestModel(string $provider): ?string;
public function isLocalModel(string $model): bool;
// Static methods
public static function formatCost(float $cost): string;
}Example:
$calculator = new CostCalculator();
// Calculate cost from token usage
$usage = $chatbot->getLastTokenUsage();
$cost = $calculator->calculateCost('gpt-4o', $usage);
echo "Cost: " . CostCalculator::formatCost($cost) . "\n";
// Estimate cost before making a request
$estimatedCost = $calculator->estimateCost('gpt-4o', 500, 200);
echo "Estimated cost: " . CostCalculator::formatCost($estimatedCost) . "\n";
// Get pricing information
$pricing = $calculator->getModelPricing('gpt-4o');
echo "Input: ${$pricing['input']} per 1M tokens\n";
echo "Output: ${$pricing['output']} per 1M tokens\n";
// Find cheapest models
$cheapest = $calculator->getCheapestModel('openai');
echo "Cheapest OpenAI model: $cheapest\n";
// Check if model is local (free)
if ($calculator->isLocalModel('llama3')) {
echo "This model runs locally and is free!\n";
}
// Calculate batch costs
$usages = [
$usage1,
$usage2,
$usage3,
];
$totalCost = $calculator->calculateBatchCost('gpt-4o', $usages);
echo "Total batch cost: " . CostCalculator::formatCost($totalCost) . "\n";
// Get all supported models
$models = $calculator->getSupportedModels();
echo "Supported models: " . implode(', ', $models) . "\n";New methods added to the PhpChatbot class:
// Get the last ChatResponse object
public function getLastResponse(): ?ChatResponse;
// Get token usage from last request
public function getLastTokenUsage(): ?TokenUsage;
// Calculate cost of last request
public function getLastCost(): ?float;
// Get the cost calculator instance
public function getCostCalculator(): CostCalculator;
// Estimate cost before making a request
public function estimateCost(string $prompt, ?string $model = null): float;Example:
$chatbot = new PhpChatbot($model);
// Make a request
$response = $chatbot->ask('Hello');
// Get full response object
$chatResponse = $chatbot->getLastResponse();
// Get just token usage
$usage = $chatbot->getLastTokenUsage();
// Get cost
$cost = $chatbot->getLastCost();
// Estimate cost before request
$estimated = $chatbot->estimateCost('This is a long prompt...');
echo "Estimated cost: $" . number_format($estimated, 6) . "\n";
// Access cost calculator directly
$calculator = $chatbot->getCostCalculator();
$cheapest = $calculator->getCheapestModel('openai');The CostCalculator includes up-to-date pricing for all major providers:
- gpt-4o: $2.50 / $10.00 per 1M tokens (input/output)
- gpt-4o-mini: $0.150 / $0.600 per 1M tokens
- gpt-4-turbo: $10.00 / $30.00 per 1M tokens
- gpt-3.5-turbo: $0.50 / $1.50 per 1M tokens
- claude-3-5-sonnet-20241022: $3.00 / $15.00 per 1M tokens
- claude-3-5-haiku-20241022: $1.00 / $5.00 per 1M tokens
- claude-3-opus: $15.00 / $75.00 per 1M tokens
- gemini-1.5-pro: $1.25 / $5.00 per 1M tokens
- gemini-1.5-flash: $0.075 / $0.30 per 1M tokens
- gemini-1.0-pro: $0.50 / $1.50 per 1M tokens
- grok-beta: $5.00 / $15.00 per 1M tokens
- llama-3.1-405b: $5.00 / $15.00 per 1M tokens
- llama-3.1-70b: $0.99 / $0.99 per 1M tokens
- llama-3.1-8b: $0.20 / $0.20 per 1M tokens
- deepseek-chat: $0.14 / $0.28 per 1M tokens
- deepseek-reasoner: $0.55 / $2.19 per 1M tokens
- All models: $0.00 (runs locally)
Note: Pricing is subject to change by providers. Check provider websites for the most current rates.
Problem: getLastTokenUsage() returns null
Solutions:
-
Check Provider Support: Not all providers return token usage
$usage = $chatbot->getLastTokenUsage(); if ($usage === null) { echo "Token usage not available for this provider\n"; }
-
Verify API Response: Check if the API returned token information
$response = $chatbot->getLastResponse(); $metadata = $response->getMetadata(); if (!$metadata->hasTokenUsage()) { echo "API did not return token usage\n"; }
-
Local Models: Ollama and other local models may not provide token counts
if ($calculator->isLocalModel($model->getModel())) { echo "Local models don't track tokens\n"; }
Problem: getLastCost() returns 0.0 or null
Solutions:
-
Check Model Pricing: Verify the model has pricing data
$calculator = new CostCalculator(); if (!$calculator->hasModelPricing($model->getModel())) { echo "No pricing data for this model\n"; }
-
Local Models: Local models are free
if ($calculator->isLocalModel($model->getModel())) { echo "Cost is $0.00 for local models\n"; }
-
Missing Token Usage: Cost requires token usage data
if (!$chatbot->getLastTokenUsage()) { echo "Cannot calculate cost without token usage\n"; }
Problem: Type errors when treating response as string
Solution: The ChatResponse class implements Stringable for automatic conversion:
// These all work:
$response = $chatbot->ask('Hello');
echo $response; // Automatic __toString()
echo $response->getContent(); // Explicit
echo (string) $response; // Cast
// For type hints, use string|ChatResponse
function processResponse(string|ChatResponse $response): void {
$text = (string) $response;
// ...
}Problem: Existing code expects string responses
Solution: No changes needed! ChatResponse automatically converts to string:
// Old code still works
$response = $chatbot->ask('Hello');
if (str_contains($response, 'hello')) { // Works!
// ...
}
// New code can access metadata
$chatResponse = $chatbot->getLastResponse();
$usage = $chatResponse->getTokenUsage();Problem: Need to check cost before making expensive API calls
Solution: Use estimateCost() method:
// Estimate based on prompt length
$prompt = "Long prompt text...";
$estimated = $chatbot->estimateCost($prompt);
if ($estimated > 0.10) {
echo "This request will cost more than $0.10\n";
echo "Estimated: $" . number_format($estimated, 4) . "\n";
// Decide whether to proceed
}
// Or calculate manually
$calculator = new CostCalculator();
$promptTokens = strlen($prompt) / 4; // Rough estimate: 1 token ≈ 4 chars
$completionTokens = 500; // Expected output
$cost = $calculator->estimateCost('gpt-4o', (int)$promptTokens, $completionTokens);Token usage may not be available for all providers:
$usage = $chatbot->getLastTokenUsage();
if ($usage !== null) {
echo "Tokens: {$usage->totalTokens}\n";
} else {
echo "Token tracking not available\n";
}Track usage over time for insights:
$history = [];
foreach ($prompts as $prompt) {
$response = $chatbot->ask($prompt);
$usage = $chatbot->getLastTokenUsage();
$cost = $chatbot->getLastCost();
$history[] = [
'prompt' => $prompt,
'tokens' => $usage?->totalTokens ?? 0,
'cost' => $cost ?? 0.0,
'timestamp' => new DateTime(),
];
}
// Analyze
$totalTokens = array_sum(array_column($history, 'tokens'));
$totalCost = array_sum(array_column($history, 'cost'));
echo "Total tokens: $totalTokens\n";
echo "Total cost: $" . number_format($totalCost, 4) . "\n";Control costs by limiting response length:
$response = $chatbot->ask('Explain AI', [
'max_tokens' => 150, // Limit response length
'temperature' => 0.7,
]);
// Check if response was truncated
$chatResponse = $chatbot->getLastResponse();
if ($chatResponse->wasTruncated()) {
echo "Warning: Response was cut off at token limit\n";
}Use cheaper models for simple tasks:
$calculator = new CostCalculator();
// For simple tasks
$cheapModel = $calculator->getCheapestModel('openai'); // gpt-4o-mini
// For complex tasks requiring high quality
$premiumModel = 'gpt-4o';
// Decision logic
if ($taskComplexity === 'simple') {
$model->setModel($cheapModel);
} else {
$model->setModel($premiumModel);
}Set up monitoring for high usage:
$usage = $chatbot->getLastTokenUsage();
$cost = $chatbot->getLastCost();
// Alert on high token usage
if ($usage && $usage->exceedsThreshold(3000)) {
error_log("High token usage detected: {$usage->totalTokens}");
}
// Alert on expensive requests
if ($cost && $cost > 0.05) {
error_log("Expensive request: $" . number_format($cost, 4));
}- Quick Start Guide - Get started with PHP Chatbot
- Examples - More usage examples
- API Reference - Complete API documentation
- Configuration - Configuration options
- Best Practices - Development best practices
- GitHub Issues: Report bugs or request features
- Documentation: Full wiki documentation
- License: MIT License
Last updated: October 2025