From 5a21dd8f5bcd610f0c009cfa35edfb33ebd1309b Mon Sep 17 00:00:00 2001 From: Sean Knowles Date: Mon, 13 Oct 2025 12:08:54 +0100 Subject: [PATCH 1/4] feat(classify): update sync to match by indicator name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Changed from ID-based to NAME-based matching - One classification now updates ALL indicators with that name - Handles country-specific indicators (e.g., 180 Balance of Trade) - Synced 666 names → 15,384 indicators (99.96% coverage) - Added detailed statistics and top 10 indicators by country count Results: - Balance of Trade: 180 countries - GDP: 191 countries - Population: 186 countries - Temperature: 204 countries 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../scripts/production/sync_to_postgres.ts | 149 ++++++++++++------ 1 file changed, 100 insertions(+), 49 deletions(-) diff --git a/packages/classify/scripts/production/sync_to_postgres.ts b/packages/classify/scripts/production/sync_to_postgres.ts index 8c335c9..343dbf3 100644 --- a/packages/classify/scripts/production/sync_to_postgres.ts +++ b/packages/classify/scripts/production/sync_to_postgres.ts @@ -3,6 +3,10 @@ /** * Sync classification data from local SQLite to PostgreSQL staging database * Updates the 3 new columns: type, temporal_aggregation, heat_map_orientation + * + * IMPORTANT: Updates by INDICATOR NAME, not ID + * - One classification (e.g., "Balance of Trade") updates ALL indicators with that name + * - Handles country-specific indicator IDs (e.g., 180 "Balance of Trade" indicators) */ import { Database } from "@db/sqlite"; @@ -11,7 +15,7 @@ import { Client } from "https://deno.land/x/postgres@v0.17.0/mod.ts"; const SQLITE_DB_PATH = "./data/classify_production_v2.db"; const DATABASE_URL = Deno.env.get("DATABASE_URL"); -console.log("🔄 Starting classification sync to PostgreSQL...\n"); +console.log("🔄 Starting classification sync to PostgreSQL (BY NAME)...\n"); // Open SQLite database console.log(`📂 Opening SQLite database: ${SQLITE_DB_PATH}`); @@ -22,12 +26,12 @@ sqlite.exec("PRAGMA foreign_keys = ON;"); console.log("📊 Reading classifications from SQLite..."); const classifications = sqlite.prepare(` SELECT - indicator_id, + name, indicator_type as type, temporal_aggregation, heat_map_orientation FROM classifications - ORDER BY indicator_id + ORDER BY name `).all(); console.log(`✓ Found ${classifications.length} classifications in SQLite\n`); @@ -43,41 +47,39 @@ await pgClient.connect(); console.log("✓ Connected to PostgreSQL\n"); -// Get all indicators from PostgreSQL -console.log("📊 Reading indicators from PostgreSQL..."); -const pgIndicators = await pgClient.queryObject<{ id: string; name: string }>` - SELECT id, name +// Get indicator name statistics from PostgreSQL +console.log("📊 Analyzing indicators in PostgreSQL..."); +const pgStats = await pgClient.queryObject<{ + total: string; + unique_names: string; +}>` + SELECT + COUNT(*) as total, + COUNT(DISTINCT name) as unique_names FROM indicators - ORDER BY id `; -console.log(`✓ Found ${pgIndicators.rows.length} indicators in PostgreSQL\n`); - -// Create a map for quick lookup -const pgIndicatorMap = new Map( - pgIndicators.rows.map((row) => [row.id, row.name]), +console.log( + `✓ Found ${pgStats.rows[0].total} total indicators (${ + pgStats.rows[0].unique_names + } unique names)\n`, ); -// Sync classifications -console.log("🔄 Syncing classifications...\n"); +// Sync classifications by name +console.log("🔄 Syncing classifications by indicator name...\n"); -let updatedCount = 0; +let updatedIndicatorCount = 0; +let updatedNameCount = 0; let notFoundCount = 0; let errorCount = 0; -const notFoundIndicators: string[] = []; +const notFoundNames: string[] = []; +const updateStats: Array<{ name: string; count: number }> = []; for (const classification of classifications) { - const indicatorId = classification.indicator_id; + const indicatorName = classification.name; try { - // Check if indicator exists in PostgreSQL - if (!pgIndicatorMap.has(indicatorId)) { - notFoundIndicators.push(indicatorId); - notFoundCount++; - continue; - } - - // Update the indicator with classification data + // Update ALL indicators with this name const result = await pgClient.queryObject` UPDATE indicators SET @@ -85,18 +87,26 @@ for (const classification of classifications) { temporal_aggregation = ${classification.temporal_aggregation}::temporal_aggregation, heat_map_orientation = ${classification.heat_map_orientation}::heat_map_orientation, updated_at = NOW() - WHERE id = ${indicatorId} + WHERE name = ${indicatorName} `; if (result.rowCount && result.rowCount > 0) { - updatedCount++; - if (updatedCount % 100 === 0) { - console.log(` ✓ Updated ${updatedCount} indicators...`); + updatedIndicatorCount += result.rowCount; + updatedNameCount++; + updateStats.push({ name: indicatorName, count: result.rowCount }); + + if (updatedNameCount % 50 === 0) { + console.log( + ` ✓ Updated ${updatedNameCount} indicator names (${updatedIndicatorCount} total indicators)...`, + ); } + } else { + notFoundNames.push(indicatorName); + notFoundCount++; } } catch (error) { console.error( - ` ✗ Error updating ${indicatorId}: ${ + ` ✗ Error updating "${indicatorName}": ${ error instanceof Error ? error.message : String(error) }`, ); @@ -106,51 +116,92 @@ for (const classification of classifications) { console.log("\n========================================"); console.log("Sync Results:"); -console.log(` ✓ Updated: ${updatedCount}`); -console.log(` ⚠ Not found: ${notFoundCount}`); -console.log(` ✗ Errors: ${errorCount}`); +console.log(` ✓ Indicator names updated: ${updatedNameCount}`); +console.log(` ✓ Total indicators updated: ${updatedIndicatorCount}`); +console.log(` ⚠ Names not found: ${notFoundCount}`); +console.log(` ✗ Errors: ${errorCount}`); console.log("========================================\n"); +// Show top 10 indicators by update count +if (updateStats.length > 0) { + console.log("📊 Top 10 indicators by country count:\n"); + updateStats + .sort((a, b) => b.count - a.count) + .slice(0, 10) + .forEach((stat) => { + console.log(` ${stat.name.padEnd(30)} → ${stat.count} countries`); + }); + console.log(); +} + if (notFoundCount > 0 && notFoundCount <= 20) { - console.log("⚠ Indicators not found in PostgreSQL:"); - notFoundIndicators.forEach((id) => console.log(` - ${id}`)); + console.log("⚠ Indicator names not found in PostgreSQL:"); + notFoundNames.forEach((name) => console.log(` - ${name}`)); console.log(); } else if (notFoundCount > 20) { console.log( - `⚠ ${notFoundCount} indicators not found in PostgreSQL (first 20):`, + `⚠ ${notFoundCount} indicator names not found in PostgreSQL (first 20):`, ); - notFoundIndicators.slice(0, 20).forEach((id) => console.log(` - ${id}`)); + notFoundNames.slice(0, 20).forEach((name) => console.log(` - ${name}`)); console.log(` ... and ${notFoundCount - 20} more\n`); } // Verify some updates -console.log("🔍 Verifying updates (sample of 5 indicators)...\n"); -const sampleIndicators = classifications.slice(0, 5); - -for (const sample of sampleIndicators) { - if (!pgIndicatorMap.has(sample.indicator_id)) continue; - +console.log("🔍 Verifying updates (sample of 5 indicator names)...\n"); +const sampleNames = [ + "Balance of Trade", + "GDP", + "Population", + "Inflation Rate", + "Exports", +] + .filter((name) => updateStats.some((s) => s.name === name)); + +for (const name of sampleNames) { const result = await pgClient.queryObject<{ - id: string; - name: string; + count: string; type: string; temporal_aggregation: string; heat_map_orientation: string; }>` - SELECT id, name, type, temporal_aggregation, heat_map_orientation + SELECT + COUNT(*) as count, + type, + temporal_aggregation, + heat_map_orientation FROM indicators - WHERE id = ${sample.indicator_id} + WHERE name = ${name} + GROUP BY type, temporal_aggregation, heat_map_orientation + LIMIT 1 `; if (result.rows.length > 0) { const row = result.rows[0]; - console.log(`${row.id} (${row.name}):`); + console.log(`${name} (${row.count} indicators):`); console.log(` type: ${row.type}`); console.log(` temporal_aggregation: ${row.temporal_aggregation}`); console.log(` heat_map_orientation: ${row.heat_map_orientation}\n`); } } +// Final coverage check +const coverage = await pgClient.queryObject<{ + total: string; + classified: string; + coverage_percent: string; +}>` + SELECT + COUNT(*) as total, + COUNT(type) as classified, + ROUND(COUNT(type)::numeric / COUNT(*) * 100, 2) as coverage_percent + FROM indicators +`; + +console.log("📈 Overall Coverage:"); +console.log(` Total indicators: ${coverage.rows[0].total}`); +console.log(` Classified: ${coverage.rows[0].classified}`); +console.log(` Coverage: ${coverage.rows[0].coverage_percent}%\n`); + await pgClient.end(); console.log("✅ Sync complete!\n"); From 361d323d5afbfbe704a262a06e729a01ae693b40 Mon Sep 17 00:00:00 2001 From: Sean Knowles Date: Mon, 13 Oct 2025 15:17:18 +0100 Subject: [PATCH 2/4] fix(econify): block time normalization for period-total discrete types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Tourist Arrivals and other count indicators no longer time-normalized - period-total + discrete types (count/volume/stock/index) now blocked - Flow types (GDP, exports) still allow time conversion as intended - Fixes: 520k tourists Q2 → stays 520k, not divided by 3 to 173k/month - Version bump to 1.3.1 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- packages/econify/CHANGELOG.md | 18 ++++++++++++++++++ packages/econify/deno.json | 14 +++----------- .../src/normalization/indicator_type_rules.ts | 11 ++++++++++- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/packages/econify/CHANGELOG.md b/packages/econify/CHANGELOG.md index d456724..9ed681a 100644 --- a/packages/econify/CHANGELOG.md +++ b/packages/econify/CHANGELOG.md @@ -2,6 +2,24 @@ All notable changes to the econify package will be documented in this file. +## [1.3.1] - 2025-10-13 + +### Fixed + +- **Period-Total Time Normalization for Discrete Types**: Fixed incorrect time normalization of count indicators with `period-total` temporal aggregation + - Tourist Arrivals and other count indicators no longer incorrectly time-normalized + - Example: 520,394 tourists in Q2 now correctly stays as 520,394, not divided by 3 to get 173,465 tourists/month + - Discrete types (count, volume, stock, index) with `period-total` now block time conversion + - Flow-like types (flow, balance, rate) with `period-total` still allow time conversion (GDP, exports, etc.) + - Added smart logic in `allowsTimeConversion()` to check indicator type when temporal_aggregation is `period-total` + +### Technical Details + +- Modified `packages/econify/src/normalization/indicator_type_rules.ts` +- Updated `allowsTimeConversion()` function's `period-total` case to check if indicator type is discrete +- Discrete types: ["count", "volume", "stock", "index"] → block time conversion for period-totals +- Flow-like indicators still allow conversion as their totals represent rates over time + ## [1.3.0] - 2025-01-10 ### BREAKING CHANGES diff --git a/packages/econify/deno.json b/packages/econify/deno.json index 7fd576a..15e0a32 100644 --- a/packages/econify/deno.json +++ b/packages/econify/deno.json @@ -1,6 +1,6 @@ { "name": "@tellimer/econify", - "version": "1.3.0", + "version": "1.3.1", "tasks": { "dev": "deno run --watch src/helpers/sample_usage.ts", "test": "deno test --parallel", @@ -34,19 +34,11 @@ }, "lint": { "rules": { - "tags": [ - "recommended" - ] + "tags": ["recommended"] } }, "publish": { - "include": [ - "src/", - "mod.ts", - "README.md", - "deno.json", - "assets/" - ] + "include": ["src/", "mod.ts", "README.md", "deno.json", "assets/"] }, "repository": { "type": "git", diff --git a/packages/econify/src/normalization/indicator_type_rules.ts b/packages/econify/src/normalization/indicator_type_rules.ts index 49d3f78..9251718 100644 --- a/packages/econify/src/normalization/indicator_type_rules.ts +++ b/packages/econify/src/normalization/indicator_type_rules.ts @@ -439,7 +439,16 @@ export function allowsTimeConversion( // Flow rates during period - can convert (e.g., GDP quarterly → annual) return true; case "period-total": - // Sum over period - can convert (e.g., total transactions monthly → annual) + // Sum over period - BUT depends on indicator type + // Discrete counts/stocks cannot be meaningfully divided across time periods + // Example: 100 tourists in Q1 ≠ 33.33 tourists per month + // But: $1000M GDP in Q1 = $333M per month (it's a flow rate) + const discreteTypes = ["count", "volume", "stock", "index"]; + if (discreteTypes.includes(indicatorType || "")) { + // Discrete totals - these are cumulative event counts, not rates + return false; + } + // Flow-like totals - safe to convert (GDP, exports, spending, etc.) return true; case "period-average": // Average over period - can convert (e.g., avg temperature monthly → annual) From 657dfe5d643a64a9dd0ee20ffdc89c1d614b525a Mon Sep 17 00:00:00 2001 From: Sean Knowles Date: Mon, 13 Oct 2025 15:26:35 +0100 Subject: [PATCH 3/4] fix(econify): wrap case block in brackets for lint compliance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fixed no-case-declarations lint error in indicator_type_rules.ts - Added brackets around period-total case block - All 428 tests passing, no type errors 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- packages/econify/src/normalization/indicator_type_rules.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/econify/src/normalization/indicator_type_rules.ts b/packages/econify/src/normalization/indicator_type_rules.ts index 9251718..6e3c6b0 100644 --- a/packages/econify/src/normalization/indicator_type_rules.ts +++ b/packages/econify/src/normalization/indicator_type_rules.ts @@ -438,7 +438,7 @@ export function allowsTimeConversion( case "period-rate": // Flow rates during period - can convert (e.g., GDP quarterly → annual) return true; - case "period-total": + case "period-total": { // Sum over period - BUT depends on indicator type // Discrete counts/stocks cannot be meaningfully divided across time periods // Example: 100 tourists in Q1 ≠ 33.33 tourists per month @@ -450,6 +450,7 @@ export function allowsTimeConversion( } // Flow-like totals - safe to convert (GDP, exports, spending, etc.) return true; + } case "period-average": // Average over period - can convert (e.g., avg temperature monthly → annual) return true; From c4cf0fc6f17e1d5fd8b615c3c0a6767a534adf5f Mon Sep 17 00:00:00 2001 From: Sean Knowles Date: Mon, 13 Oct 2025 15:32:35 +0100 Subject: [PATCH 4/4] docs(econify): update changelog for v1.3.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Document period-total time normalization fix for discrete types - Add technical details about the implementation - Version 1.3.1 release notes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- packages/econify/CHANGELOG.md | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/packages/econify/CHANGELOG.md b/packages/econify/CHANGELOG.md index 9ed681a..04c20f8 100644 --- a/packages/econify/CHANGELOG.md +++ b/packages/econify/CHANGELOG.md @@ -6,19 +6,28 @@ All notable changes to the econify package will be documented in this file. ### Fixed -- **Period-Total Time Normalization for Discrete Types**: Fixed incorrect time normalization of count indicators with `period-total` temporal aggregation - - Tourist Arrivals and other count indicators no longer incorrectly time-normalized - - Example: 520,394 tourists in Q2 now correctly stays as 520,394, not divided by 3 to get 173,465 tourists/month - - Discrete types (count, volume, stock, index) with `period-total` now block time conversion - - Flow-like types (flow, balance, rate) with `period-total` still allow time conversion (GDP, exports, etc.) - - Added smart logic in `allowsTimeConversion()` to check indicator type when temporal_aggregation is `period-total` +- **Period-Total Time Normalization for Discrete Types**: Fixed incorrect time + normalization of count indicators with `period-total` temporal aggregation + - Tourist Arrivals and other count indicators no longer incorrectly + time-normalized + - Example: 520,394 tourists in Q2 now correctly stays as 520,394, not divided + by 3 to get 173,465 tourists/month + - Discrete types (count, volume, stock, index) with `period-total` now block + time conversion + - Flow-like types (flow, balance, rate) with `period-total` still allow time + conversion (GDP, exports, etc.) + - Added smart logic in `allowsTimeConversion()` to check indicator type when + temporal_aggregation is `period-total` ### Technical Details - Modified `packages/econify/src/normalization/indicator_type_rules.ts` -- Updated `allowsTimeConversion()` function's `period-total` case to check if indicator type is discrete -- Discrete types: ["count", "volume", "stock", "index"] → block time conversion for period-totals -- Flow-like indicators still allow conversion as their totals represent rates over time +- Updated `allowsTimeConversion()` function's `period-total` case to check if + indicator type is discrete +- Discrete types: ["count", "volume", "stock", "index"] → block time conversion + for period-totals +- Flow-like indicators still allow conversion as their totals represent rates + over time ## [1.3.0] - 2025-01-10