Skip to content
Merged

0.5.5 #123

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,29 @@ All notable changes to LogTide will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.5.5] - 2026-02-06

### Fixed

- **Detection Category Filter Validation Error**: Fixed `querystring/category must match exactly one schema in oneOf` on `/api/v1/siem/detections`
- Replaced `oneOf` schema (string | array) with simple `type: array` — Fastify auto-coerces single values to arrays
- Aligned Zod validation schema to match

### Performance

- **Admin Dashboard 31s → ~1s**: Fixed all admin stats endpoints causing dashboard timeout on 50M+ logs
- `/api/v1/admin/stats/logs`: Switched to `logs_daily_stats` continuous aggregate for top orgs/projects/per-day (37s → 31ms), `approximate_row_count()` for total (677ms → 56ms)
- `/api/v1/admin/stats/database`: Replaced 2x `COUNT(*)` full scans with `approximate_row_count()` + `pg_class.reltuples`, single parallel batch (1.4s → 180ms)
- `/api/v1/admin/stats/performance`: Changed `created_at` filter to `time` for chunk pruning (793ms → 160ms), parallelized all queries
- All 6 queries per endpoint now run via `Promise.all()` instead of sequentially

- **Error Group Logs Timeout**: Fixed `/api/v1/error-groups/:id/logs` statement timeout on large datasets
- Added `logs.time` bounds (`firstSeen`/`lastSeen`) to enable TimescaleDB chunk pruning on the hypertable JOIN
- Removed expensive `COUNT(*)` query — uses `error_groups.occurrence_count` (maintained by trigger) instead
- Eliminated redundant group fetch (reuses data already loaded for authorization check)

---

## [0.5.4] - 2026-02-06

### Added
Expand Down
285 changes: 142 additions & 143 deletions packages/backend/src/modules/admin/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -246,69 +246,55 @@ export class AdminService {
/**
* Get database statistics
*/
/**
* PERFORMANCE: Uses pg_class reltuples for row estimates (avoids full table scans),
* approximate_row_count for logs hypertable, and runs queries in parallel.
*/
async getDatabaseStats(): Promise<DatabaseStats> {
// Query table sizes and row counts
const tables = await db.executeQuery<{
name: string;
size: string;
rows: number;
indexes_size: string;
}>(
sql`
SELECT
schemaname || '.' || tablename AS name,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size,
(SELECT COUNT(*) FROM logs WHERE tablename = 'logs')::int AS rows,
pg_size_pretty(pg_indexes_size(schemaname||'.'||tablename)) AS indexes_size
FROM pg_tables
WHERE schemaname = 'public'
AND tablename IN ('users', 'organizations', 'projects', 'logs', 'alert_rules', 'alert_history', 'api_keys', 'sessions', 'notifications', 'sigma_rules')
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
`.compile(db)
);

// Get row counts for each table
const logsCount = await db
.selectFrom('logs')
.select(sql<number>`COUNT(*)::int`.as('count'))
.executeTakeFirstOrThrow();

const usersCount = await db
.selectFrom('users')
.select(sql<number>`COUNT(*)::int`.as('count'))
.executeTakeFirstOrThrow();

const orgsCount = await db
.selectFrom('organizations')
.select(sql<number>`COUNT(*)::int`.as('count'))
.executeTakeFirstOrThrow();
const [tables, rowEstimates, totalSizeResult] = await Promise.all([
// Table sizes (no COUNT subquery - uses pg catalog only)
db.executeQuery<{
name: string;
size: string;
indexes_size: string;
}>(
sql`
SELECT
schemaname || '.' || tablename AS name,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size,
pg_size_pretty(pg_indexes_size(schemaname||'.'||tablename)) AS indexes_size
FROM pg_tables
WHERE schemaname = 'public'
AND tablename IN ('users', 'organizations', 'projects', 'logs', 'alert_rules', 'alert_history', 'api_keys', 'sessions', 'notifications', 'sigma_rules')
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
`.compile(db)
),

const projectsCount = await db
.selectFrom('projects')
.select(sql<number>`COUNT(*)::int`.as('count'))
.executeTakeFirstOrThrow();
// Row estimates: approximate_row_count for hypertables, reltuples for regular tables
db.executeQuery<{ name: string; rows: number }>(
sql`
SELECT 'logs' AS name, approximate_row_count('logs')::int AS rows
UNION ALL
SELECT relname AS name, GREATEST(reltuples, 0)::int AS rows
FROM pg_class
WHERE relname IN ('users', 'organizations', 'projects', 'alert_rules', 'alert_history')
`.compile(db)
),

const alertRulesCount = await db
.selectFrom('alert_rules')
.select(sql<number>`COUNT(*)::int`.as('count'))
.executeTakeFirstOrThrow();
// Total database size
db.executeQuery<{ size: string }>(
sql`SELECT pg_size_pretty(pg_database_size(current_database())) AS size`.compile(db)
),
]);

// Calculate total database size
const totalSizeResult = await db.executeQuery<{ size: string }>(
sql`SELECT pg_size_pretty(pg_database_size(current_database())) AS size`.compile(db)
);
// Build row count lookup
const rowsMap = new Map(rowEstimates.rows.map((r) => [r.name, r.rows]));

const tablesWithCounts = tables.rows.map((table) => {
let rows = 0;
if (table.name === 'public.logs') rows = logsCount.count;
else if (table.name === 'public.users') rows = usersCount.count;
else if (table.name === 'public.organizations') rows = orgsCount.count;
else if (table.name === 'public.projects') rows = projectsCount.count;
else if (table.name === 'public.alert_rules') rows = alertRulesCount.count;

const tableName = table.name.replace('public.', '');
return {
...table,
rows,
rows: rowsMap.get(tableName) || 0,
};
});

Expand All @@ -323,92 +309,100 @@ export class AdminService {

/**
* Get log statistics
*
* PERFORMANCE: Uses logs_daily_stats continuous aggregate for per-day, top orgs,
* and top projects (30ms vs 37s on raw logs). approximate_row_count for total.
* All queries run in parallel.
*/
async getLogsStats(): Promise<LogsStats> {
const now = new Date();
const thirtyDaysAgo = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
const oneDayAgo = new Date(now.getTime() - 24 * 60 * 60 * 1000);
const oneHourAgo = new Date(now.getTime() - 60 * 60 * 1000);

// Total logs
const totalLogs = await db
.selectFrom('logs')
.select(sql<number>`COUNT(*)::int`.as('count'))
.executeTakeFirstOrThrow();
// Run all queries in parallel - uses continuous aggregates for heavy queries
const [totalLogs, logsPerDay, topOrgs, topProjects, logsLastHour, logsLastDay] = await Promise.all([
// Approximate total logs (avoids full table scan on hypertable)
db.executeQuery<{ count: number }>(
sql`SELECT approximate_row_count('logs')::int AS count`.compile(db)
).then(r => r.rows[0] ?? { count: 0 }),

// Logs per day (last 30 days)
const logsPerDay = await db.executeQuery<{
date: string;
count: number;
}>(
sql`
SELECT
DATE(time) AS date,
COUNT(*)::int AS count
FROM logs
WHERE time >= ${sql.lit(thirtyDaysAgo)}
GROUP BY DATE(time)
ORDER BY date DESC
LIMIT 30
`.compile(db)
);

// Top organizations by log count
const topOrgs = await db
.selectFrom('logs')
.innerJoin('projects', 'projects.id', 'logs.project_id')
.innerJoin('organizations', 'organizations.id', 'projects.organization_id')
.select([
'organizations.id as organizationId',
'organizations.name as organizationName',
sql<number>`COUNT(*)::int`.as('count'),
])
.groupBy(['organizations.id', 'organizations.name'])
.orderBy('count', 'desc')
.limit(10)
.execute();
// Logs per day from continuous aggregate (43ms vs 5.9s on raw)
db.executeQuery<{ date: string; count: number }>(
sql`
SELECT
bucket::date AS date,
SUM(log_count)::int AS count
FROM logs_daily_stats
WHERE bucket >= ${sql.lit(thirtyDaysAgo)}
GROUP BY bucket
ORDER BY bucket DESC
LIMIT 30
`.compile(db)
),

// Top projects by log count
const topProjects = await db
.selectFrom('logs')
.innerJoin('projects', 'projects.id', 'logs.project_id')
.innerJoin('organizations', 'organizations.id', 'projects.organization_id')
.select([
'projects.id as projectId',
'projects.name as projectName',
'organizations.name as organizationName',
sql<number>`COUNT(*)::int`.as('count'),
])
.groupBy(['projects.id', 'projects.name', 'organizations.name'])
.orderBy('count', 'desc')
.limit(10)
.execute();
// Top organizations from continuous aggregate (31ms vs 37s on raw)
db.executeQuery<{ organizationId: string; organizationName: string; count: number }>(
sql`
SELECT
o.id AS "organizationId",
o.name AS "organizationName",
SUM(lds.log_count)::int AS count
FROM logs_daily_stats lds
JOIN projects p ON p.id = lds.project_id
JOIN organizations o ON o.id = p.organization_id
WHERE lds.bucket >= ${sql.lit(thirtyDaysAgo)}
GROUP BY o.id, o.name
ORDER BY count DESC
LIMIT 10
`.compile(db)
),

// Logs in last hour and last day for growth calculation
const logsLastHour = await db
.selectFrom('logs')
.select(sql<number>`COUNT(*)::int`.as('count'))
.where('time', '>=', oneHourAgo)
.executeTakeFirstOrThrow();
// Top projects from continuous aggregate (37ms vs 16s on raw)
db.executeQuery<{ projectId: string; projectName: string; organizationName: string; count: number }>(
sql`
SELECT
p.id AS "projectId",
p.name AS "projectName",
o.name AS "organizationName",
SUM(lds.log_count)::int AS count
FROM logs_daily_stats lds
JOIN projects p ON p.id = lds.project_id
JOIN organizations o ON o.id = p.organization_id
WHERE lds.bucket >= ${sql.lit(thirtyDaysAgo)}
GROUP BY p.id, p.name, o.name
ORDER BY count DESC
LIMIT 10
`.compile(db)
),

const logsLastDay = await db
.selectFrom('logs')
.select(sql<number>`COUNT(*)::int`.as('count'))
.where('time', '>=', oneDayAgo)
.executeTakeFirstOrThrow();
// Logs in last hour (uses chunk pruning on time column, ~160ms)
db
.selectFrom('logs')
.select(sql<number>`COUNT(*)::int`.as('count'))
.where('time', '>=', oneHourAgo)
.executeTakeFirstOrThrow(),

// Logs in last day (uses chunk pruning on time column)
db
.selectFrom('logs')
.select(sql<number>`COUNT(*)::int`.as('count'))
.where('time', '>=', oneDayAgo)
.executeTakeFirstOrThrow(),
]);

return {
total: totalLogs.count,
perDay: logsPerDay.rows.map((row) => ({
date: row.date,
count: row.count,
})),
topOrganizations: topOrgs.map((org) => ({
topOrganizations: topOrgs.rows.map((org) => ({
organizationId: org.organizationId,
organizationName: org.organizationName,
count: org.count,
})),
topProjects: topProjects.map((proj) => ({
topProjects: topProjects.rows.map((proj) => ({
projectId: proj.projectId,
projectName: proj.projectName,
organizationName: proj.organizationName,
Expand All @@ -423,34 +417,39 @@ export class AdminService {

/**
* Get performance statistics
*
* PERFORMANCE: Uses `time` column (hypertable partition key) for chunk pruning
* instead of `created_at` which scans all chunks. Runs queries in parallel.
*/
async getPerformanceStats(): Promise<PerformanceStats> {
const oneHourAgo = new Date(Date.now() - 60 * 60 * 1000);
const [logsLastHour, logsSize, avgLatencyResult, compressionRatio] = await Promise.all([
// Logs in last hour - use `time` for chunk pruning (160ms vs 793ms with created_at)
db
.selectFrom('logs')
.select(sql<number>`COUNT(*)::int`.as('count'))
.where('time', '>=', new Date(Date.now() - 60 * 60 * 1000))
.executeTakeFirstOrThrow(),

// Logs ingested in last hour
const logsLastHour = await db
.selectFrom('logs')
.select(sql<number>`COUNT(*)::int`.as('count'))
.where('created_at', '>=', oneHourAgo)
.executeTakeFirstOrThrow();
// Logs table size (pg catalog, instant)
db.executeQuery<{ size: string }>(
sql`SELECT pg_size_pretty(pg_total_relation_size('logs')) AS size`.compile(db)
),

const throughput = logsLastHour.count / 3600; // logs per second
// Average latency from recent ingestion
db.executeQuery<{ avg_latency: number }>(
sql`
SELECT AVG(EXTRACT(EPOCH FROM (NOW() - time)) * 1000)::int AS avg_latency
FROM logs
WHERE time > NOW() - INTERVAL '5 minutes'
AND time <= NOW()
`.compile(db)
),

// Get logs table size
const logsSize = await db.executeQuery<{ size: string }>(
sql`SELECT pg_size_pretty(pg_total_relation_size('logs')) AS size`.compile(db)
);

// Calculate average latency from recent ingestion (time between log generation and ingestion)
// This is a simplified metric - for production, use dedicated metrics service like Prometheus
const avgLatencyResult = await db.executeQuery<{ avg_latency: number }>(
sql`
SELECT AVG(EXTRACT(EPOCH FROM (NOW() - time)) * 1000)::int AS avg_latency
FROM logs
WHERE time > NOW() - INTERVAL '5 minutes'
AND time <= NOW()
`.compile(db)
);
// Compression ratio
this.getCompressionRatio(),
]);

const throughput = logsLastHour.count / 3600; // logs per second
const avgLatency = avgLatencyResult.rows[0]?.avg_latency || 0;

return {
Expand All @@ -460,7 +459,7 @@ export class AdminService {
},
storage: {
logsSize: logsSize.rows[0]?.size || '0 bytes',
compressionRatio: await this.getCompressionRatio(),
compressionRatio,
},
};
}
Expand Down
6 changes: 6 additions & 0 deletions packages/backend/src/modules/exceptions/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,12 @@ export async function exceptionsRoutes(fastify: FastifyInstance) {

const result = await exceptionService.getLogsForErrorGroup({
groupId: params.id,
fingerprint: group.fingerprint,
organizationId: group.organizationId,
projectId: group.projectId,
firstSeen: new Date(group.firstSeen),
lastSeen: new Date(group.lastSeen),
occurrenceCount: group.occurrenceCount,
limit: query.limit,
offset: query.offset,
});
Expand Down
Loading
Loading