pinterest · artursarlo · Sep 16, 2025 · Sep 23, 2025 · Sep 23, 2025 · Oct 1, 2025
diff --git a/heartbeat_doc/README_HEARTBEAT.md b/heartbeat_doc/README_HEARTBEAT.md
@@ -1,15 +1,16 @@
 # gProfiler Performance Studio - Heartbeat-Based Profiling Control
 
-This document describes the heartbeat-based profiling control system that allows dynamic start/stop of profiling sessions through API commands.
+This document describes the heartbeat-based profiling control system that allows dynamic start/stop of profiling sessions through API commands with hierarchical targeting support.
 
 ## Overview
 
-The heartbeat system enables remote control of gProfiler agents through a simple yet robust mechanism:
+The heartbeat system enables remote control of gProfiler agents through a simple yet robust mechanism with support for hierarchical profiling:
 
 1. **Agents send periodic heartbeats** to the Performance Studio backend
 2. **Backend responds with profiling commands** (start/stop) when available
 3. **Agents execute commands with built-in idempotency** to prevent duplicate execution
 4. **Commands are tracked and logged** for audit and debugging
+5. **Hierarchical targeting** enables service-level profiling with plans for K8s namespace, pod, and container-level control
 
 ## Architecture
 
@@ -25,24 +26,49 @@ The heartbeat system enables remote control of gProfiler agents through a simple
 ┌─────────────────┐                 ┌──────────────────────┐
 │  Profile Data   │                 │   PostgreSQL DB      │
 │  (S3/Local)     │                 │   - Host Heartbeats  │
-└─────────────────┘                 │   - Profiling Cmds   │
+└─────────────────┘                 │   - Hierarchy Cmds   │
+                                    │   - Profiling Cmds   │
+                                    │   - Profiling Reqs   │
                                     └──────────────────────┘
 ```
 
+### Hierarchical Command Flow
+
+```
+API Request (Service) → ProfilingHierarchyCommand → ProfilingCommand(s) → Agent(s)
+       │                          │                         │                │
+       │                          │                         │                │
+       ▼                          ▼                         ▼                ▼
+  Service-level              Hierarchy table          Host-specific     Individual
+    request                 (service_name,             commands           agents
+                           container_name,           (hostname,
+                           pod_name,                 service_name)
+                           namespace)
+```
+
 ## Database Schema
 
 ### Core Tables
 
 1. **HostHeartbeats** - Track agent status and last seen information
-2. **ProfilingRequests** - Store profiling requests from API calls
-3. **ProfilingCommands** - Commands sent to agents (merged from multiple requests)
-4. **ProfilingExecutions** - Execution history for audit trail
+2. **ProfilingRequests** - Store individual profiling requests from API calls
+3. **ProfilingHierarchyCommands** - Store hierarchical profiling commands targeting services, K8s namespaces, pods, or containers
+4. **ProfilingCommands** - Commands sent to specific agents (merged from hierarchy commands)
+5. **ProfilingExecutions** - Execution history for audit trail
+
+### Hierarchical Profiling Architecture
+
+The new hierarchical system introduces a two-tier command structure:
+
+- **ProfilingHierarchyCommands**: Higher-level commands that target entire services, K8s namespaces, pods, or containers
+- **ProfilingCommands**: Host-specific commands generated from hierarchy commands for individual agents
 
 ### Key Features
 - **Simple DDL** with essential indexes only
 - **No stored procedures** - all logic in application code
 - **No triggers** - timestamps handled by application
 - **Consistent naming** with `idx_` prefix for all indexes
+- **Hierarchical targeting** supporting service-level, namespace-level, pod-level, and container-level profiling
 
 ## API Endpoints
 
@@ -62,6 +88,7 @@ Content-Type: application/json
     "host2": null
   },
   "stop_level": "process",
+  "continuous": false,
   "additional_args": {}
 }
 ```
@@ -71,12 +98,32 @@ Content-Type: application/json
 {
   "success": true,
   "message": "Start profiling request submitted successfully",
-  "request_id": "uuid",
-  "command_id": "uuid",
+  "request_id": "12345678-1234-1234-1234-123456789abc",
+  "command_ids": ["87654321-4321-4321-4321-cba987654321"],
+  "hierarchy_command_ids": ["11111111-2222-3333-4444-555555555555"],
   "estimated_completion_time": "2025-01-15T10:30:00Z"
 }
 ```
 
+**Request Parameters:**
+- `service_name` (required): Target service name
+- `request_type` (required): Either "start" or "stop"
+- `duration` (optional): Profiling duration in seconds (default: 60)
+- `frequency` (optional): Profiling frequency in Hz (default: 11)
+- `profiling_mode` (optional): "cpu", "allocation", or "none" (default: "cpu")
+- `target_hosts` (optional): Dictionary mapping hostnames to PIDs (null for all processes)
+- `stop_level` (optional): "process" or "host" (default: "process")
+- `continuous` (optional): Whether profiling should run continuously (default: false)
+- `additional_args` (optional): Additional profiler arguments
+
+**Response Fields:**
+- `success`: Whether the request was accepted
+- `message`: Human-readable status message
+- `request_id`: Unique identifier for this specific request
+- `command_ids`: List of host-specific command IDs generated
+- `hierarchy_command_ids`: List of service-level hierarchy command IDs
+- `estimated_completion_time`: When profiling is expected to complete
+
 ### 2. Agent Heartbeat
 ```http
 POST /api/metrics/heartbeat
@@ -154,21 +201,89 @@ python3 gprofiler/main.py \
 - **Host-level**: Stop entire profiling session
 - Automatic conversion when only one PID remains
 
+## Hierarchical Profiling System
+
+The new hierarchical profiling system introduces a two-tier architecture that enables profiling at different organizational levels:
+
+### Hierarchy Levels
+
+1. **Service Level** (Currently Implemented)
+   - Target entire services by name
+   - Commands automatically distributed to all hosts in the service
+   - Example: Profile all instances of "web-service"
+
+2. **Kubernetes Namespace Level** (Future Implementation)
+   - Target all pods within a specific namespace
+   - Commands distributed to all pods in the namespace
+   - Example: Profile all pods in "production" namespace
+
+3. **Pod Level** (Future Implementation)
+   - Target specific Kubernetes pods
+   - Commands sent to containers within the pod
+   - Example: Profile specific pods like "web-pod-12345"
+
+4. **Container Level** (Future Implementation)
+   - Target specific containers within pods
+   - Fine-grained control over containerized applications
+   - Example: Profile specific containers like "nginx-container"
+
+### Command Flow
+
+```
+API Request → ProfilingHierarchyCommand → ProfilingCommand(s) → Agent(s)
+     │                    │                        │               │
+     │                    │                        │               │
+     ▼                    ▼                        ▼               ▼
+Service-level      Hierarchy table         Host-specific    Individual
+  request           (service_name,           commands         agents
+                   container_name,           (hostname,
+                   pod_name,                 service_name)
+                   namespace)
+```
+
+### Hierarchy Command Structure
+
+The `ProfilingHierarchyCommands` table supports the following targeting options:
+
+- **service_name**: Target all hosts in a service
+- **namespace**: Target all pods in a K8s namespace (future)
+- **pod_name**: Target specific pods (future)
+- **container_name**: Target specific containers (future)
+
+### Current Implementation Status
+
+- ✅ **Service-level profiling**: Fully implemented
+- 🚧 **Namespace-level profiling**: Planned for future releases
+- 🚧 **Pod-level profiling**: Planned for future releases
+- 🚧 **Container-level profiling**: Planned for future releases
+
 ## Data Flow Example
 
-### 1. Create Profiling Request
+### 1. Create Service-Level Profiling Request
 ```bash
 curl -X POST http://localhost:8000/api/metrics/profile_request \
   -H "Content-Type: application/json" \
   -d '{
     "service_name": "web-service",
     "request_type": "start",
     "duration": 120,
-    "target_hostnames": ["web-01", "web-02"],
-    "profiling_mode": "cpu"
+    "profiling_mode": "cpu",
+    "continuous": false
   }'
 ```
 
+**Response:**
+```json
+{
+  "success": true,
+  "message": "Start profiling request submitted successfully",
+  "request_id": "req-12345678-1234-1234-1234-123456789abc",
+  "command_ids": ["cmd-87654321-4321-4321-4321-cba987654321"],
+  "hierarchy_command_ids": ["hier-11111111-2222-3333-4444-555555555555"],
+  "estimated_completion_time": "2025-01-15T10:32:00Z"
+}
+```
+
 ### 2. Agent Heartbeat
 ```bash
 # Agent automatically sends:
@@ -210,6 +325,49 @@ curl -X POST http://localhost:8000/api/metrics/command_completion \
   }'
 ```
 
+### Future Hierarchical Profiling Examples
+
+The following examples show planned functionality for future releases:
+
+#### Kubernetes Namespace Profiling (Planned)
+```bash
+curl -X POST http://localhost:8000/api/metrics/profile_request \
+  -H "Content-Type: application/json" \
+  -d '{
+    "namespace": "production",
+    "request_type": "start",
+    "duration": 300,
+    "profiling_mode": "cpu"
+  }'
+```
+
+#### Pod-Level Profiling (Planned)
+```bash
+curl -X POST http://localhost:8000/api/metrics/profile_request \
+  -H "Content-Type: application/json" \
+  -d '{
+    "pod_name": "web-pod-12345",
+    "namespace": "production",
+    "request_type": "start",
+    "duration": 180,
+    "profiling_mode": "allocation"
+  }'
+```
+
+#### Container-Level Profiling (Planned)
+```bash
+curl -X POST http://localhost:8000/api/metrics/profile_request \
+  -H "Content-Type: application/json" \
+  -d '{
+    "container_name": "nginx-container",
+    "pod_name": "web-pod-12345",
+    "namespace": "production",
+    "request_type": "start",
+    "duration": 120,
+    "profiling_mode": "cpu"
+  }'
+```
+
 ## Testing
 
 ### 1. Test Heartbeat System
@@ -272,14 +430,33 @@ SELECT hostname, service_name, status, heartbeat_timestamp
 FROM HostHeartbeats 
 WHERE status = 'active' AND heartbeat_timestamp > NOW() - INTERVAL '10 minutes';
 
--- Check pending commands
+-- Check pending hierarchy commands
+SELECT service_name, container_name, pod_name, namespace, command_type, created_at 
+FROM ProfilingHierarchyCommands 
+ORDER BY created_at DESC;
+
+-- Check pending host-specific commands
 SELECT hostname, service_name, command_type, status, created_at 
 FROM ProfilingCommands 
 WHERE status = 'pending';
 
 -- Check command execution history
 SELECT pe.hostname, pr.request_type, pe.status, pe.execution_time
 FROM ProfilingExecutions pe
+JOIN ProfilingRequests pr ON pe.profiling_request_id = pr.request_id
+ORDER BY pe.created_at DESC;
+
+-- Monitor service-level profiling activity
+SELECT 
+    phc.service_name,
+    phc.command_type,
+    COUNT(pc.hostname) as target_hosts,
+    phc.created_at
+FROM ProfilingHierarchyCommands phc
+LEFT JOIN ProfilingCommands pc ON phc.command_id = ANY(string_to_array(pc.combined_config->>'hierarchy_command_ids', ',')::uuid[])
+GROUP BY phc.service_name, phc.command_type, phc.created_at
+ORDER BY phc.created_at DESC;
+```
 JOIN ProfilingRequests pr ON pe.profiling_request_id = pr.ID
 ORDER BY pe.created_at DESC;
 ```

diff --git a/scripts/setup/postgres/gprofiler_recreate.sql b/scripts/setup/postgres/gprofiler_recreate.sql
@@ -279,7 +279,7 @@ CREATE TABLE ProfilingRequests (
     duration integer NULL DEFAULT 60,
     frequency integer NULL DEFAULT 11,
     profiling_mode ProfilingMode NOT NULL DEFAULT 'cpu',
-    target_hostnames text[] NOT NULL,
+    target_hostnames text[] NULL,
     pids integer[] NULL,
     stop_level text NULL DEFAULT 'process' CHECK (stop_level IN ('process', 'host')),
     additional_args jsonb NULL,
@@ -300,6 +300,29 @@ CREATE INDEX idx_profilingrequests_status ON ProfilingRequests (status);
 CREATE INDEX idx_profilingrequests_request_type ON ProfilingRequests (request_type);
 CREATE INDEX idx_profilingrequests_created_at ON ProfilingRequests (created_at);
 
+-- Profiling Hierarchy Commands Table
+CREATE TABLE ProfilingHierarchyCommands (
+    ID bigserial PRIMARY KEY,
+    command_id uuid NOT NULL,
+    service_name text NULL,
+    container_name text NULL,
+    pod_name text NULL,
+    namespace text NULL,
+    command_type text NOT NULL CHECK (command_type IN ('start', 'stop')),
+    request_ids uuid[] NOT NULL,
+    combined_config jsonb NULL,
+    created_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    CONSTRAINT "unique_profiling_hierarchy_command" UNIQUE NULLS NOT DISTINCT (service_name, container_name, pod_name, namespace)
+);
+
+CREATE INDEX idx_profilinghierarchycommands_command_id ON ProfilingHierarchyCommands (command_id);
+CREATE INDEX idx_profilinghierarchycommands_service_name ON ProfilingHierarchyCommands (service_name);
+CREATE INDEX idx_profilinghierarchycommands_container_name ON ProfilingHierarchyCommands (container_name);
+CREATE INDEX idx_profilinghierarchycommands_pod_name ON ProfilingHierarchyCommands (pod_name);
+CREATE INDEX idx_profilinghierarchycommands_namespace ON ProfilingHierarchyCommands (namespace);
+CREATE INDEX idx_profilinghierarchycommands_service_container_pod_namespace ON ProfilingHierarchyCommands (service_name, container_name, pod_name, namespace);
+
 -- Profiling Commands Table (simplified)
 CREATE TABLE ProfilingCommands (
     ID bigserial PRIMARY KEY,
@@ -317,7 +340,7 @@ CREATE TABLE ProfilingCommands (
     error_message text NULL,
     results_path text NULL,
     updated_at timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    CONSTRAINT "unique_profiling_command_per_host" UNIQUE (hostname, service_name)
+    CONSTRAINT "unique_profiling_command_per_host" UNIQUE NULLS NOT DISTINCT (hostname, service_name)
 );
 
 -- Essential indexes for profiling commands