Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions core/config/cre_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ type CRE interface {
UseLocalTimeProvider() bool
EnableDKGRecipient() bool
Linking() CRELinking
// DebugMode returns true if debug mode is enabled for workflow engines.
// When enabled, additional OTel tracing and logging is performed.
DebugMode() bool
LocalSecrets() map[string]string
}

Expand Down
4 changes: 4 additions & 0 deletions core/config/docs/core.toml
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,8 @@ GoroutineThreshold = 5000 # Default
ServerAddress = 'http://localhost:4040' # Example
# Environment sets the target environment tag in which profiles will be added to.
Environment = 'mainnet' # Default
# LinkTracesToProfiles enables linking traces to profiles in Grafana when Pyroscope and Tracing are enabled.
LinkTracesToProfiles = false # Default

[Sentry]
# **ADVANCED**
Expand Down Expand Up @@ -949,6 +951,8 @@ IgnoreJoblessBridges = false # Default
UseLocalTimeProvider = true # Default
# EnableDKGRecipient should be set to true if the DON runs a capability that uses a DKG result package.
EnableDKGRecipient = false # Default
# DebugMode enables additional tracing and logging for workflow engines.
DebugMode = false # Default

# Sharding holds settings for node sharding configuration.
[Sharding]
Expand Down
1 change: 1 addition & 0 deletions core/config/pyroscope_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ type Pyroscope interface {
AuthToken() string
ServerAddress() string
Environment() string
LinkTracesToProfiles() bool
}
17 changes: 15 additions & 2 deletions core/config/toml/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1640,8 +1640,9 @@ func (p *AutoPprof) setFrom(f *AutoPprof) {
}

type Pyroscope struct {
ServerAddress *string
Environment *string
ServerAddress *string
Environment *string
LinkTracesToProfiles *bool
}

func (p *Pyroscope) setFrom(f *Pyroscope) {
Expand All @@ -1651,6 +1652,9 @@ func (p *Pyroscope) setFrom(f *Pyroscope) {
if v := f.Environment; v != nil {
p.Environment = v
}
if v := f.LinkTracesToProfiles; v != nil {
p.LinkTracesToProfiles = v
}
}

type Sentry struct {
Expand Down Expand Up @@ -1886,6 +1890,11 @@ type CreConfig struct {
UseLocalTimeProvider *bool `toml:",omitempty"`
EnableDKGRecipient *bool `toml:",omitempty"`
Linking *LinkingConfig `toml:",omitempty"`
// DebugMode enables additional tracing and logging for workflow engines.
// When enabled, OTel traces are created for workflow execution and syncer events.
// Requires [Telemetry].Enabled = true for traces to be exported.
// WARNING: This is not suitable for production use due to performance overhead.
DebugMode *bool `toml:",omitempty"`
}

// WorkflowFetcherConfig holds the configuration for fetching workflow files
Expand Down Expand Up @@ -1942,6 +1951,10 @@ func (c *CreConfig) setFrom(f *CreConfig) {
c.Linking.TLSEnabled = v
}
}

if f.DebugMode != nil {
c.DebugMode = f.DebugMode
}
}

func (w *WorkflowFetcherConfig) ValidateConfig() error {
Expand Down
19 changes: 14 additions & 5 deletions core/logger/pyroscope.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package logger

import (
"os"
"runtime"

"github.com/grafana/pyroscope-go"
Expand All @@ -19,6 +20,10 @@ func StartPyroscope(pyroConfig config.Pyroscope, pprofConfig PprofConfig) (*pyro
runtime.SetBlockProfileRate(pprofConfig.BlockProfileRate())
runtime.SetMutexProfileFraction(pprofConfig.MutexProfileFraction())

// Increase memory profiling sample rate for better granularity
// Default is 512KB (524288 bytes) per sample
// runtime.MemProfileRate = 512 * 1024 // 512KB per sample

sha, ver := static.Short()

return pyroscope.Start(pyroscope.Config{
Expand All @@ -31,11 +36,15 @@ func StartPyroscope(pyroConfig config.Pyroscope, pprofConfig PprofConfig) (*pyro
// We disable logging the profiling info, it will be in the Pyroscope instance anyways...
Logger: nil,

Tags: map[string]string{
"SHA": sha,
"Version": ver,
"Environment": pyroConfig.Environment(),
},
Tags: func() map[string]string {
hostname, _ := os.Hostname()
return map[string]string{
"SHA": sha,
"Version": ver,
"Environment": pyroConfig.Environment(),
"hostname": hostname, // set hostname, so we can distinguish between nodes in the same environment
}
}(),

ProfileTypes: []pyroscope.ProfileType{
// these profile types are enabled by default:
Expand Down
54 changes: 54 additions & 0 deletions core/scripts/cre/environment/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1752,6 +1752,60 @@ go run . env start --with-beholder
go run . env beholder start
```

### OTel Tracing Configuration

To enable OpenTelemetry (OTel) tracing for workflow engines and see traces in Tempo/Grafana, **multiple configuration toggles must be set**:

| Toggle | Location | Required Value | Purpose |
|--------|----------|----------------|---------|
| `Telemetry.Enabled` | Node TOML | `true` | Enables the OTel exporter |
| `Telemetry.TraceSampleRatio` | Node TOML | `> 0` (e.g., `1.0`) | Controls sampling rate (0 = no traces, 1 = 100%) |
| `CRE.DebugMode` | Node TOML | `true` | Enables detailed tracing in workflow engines and syncer |
| `OTEL_SERVICE_NAME` | Environment variable | e.g., `chainlink-node` | Sets the service name for traces in Tempo |
| `Pyroscope.LinkTracesToProfiles` | Node TOML | `true` | Enables traces-to-profiles linking in Grafana (requires Pyroscope) |

**Example TOML configuration:**

```toml
[Telemetry]
Enabled = true
Endpoint = 'host.docker.internal:4317'
InsecureConnection = true
TraceSampleRatio = 1.0 # 100% sampling - adjust for production

[CRE]
DebugMode = true # WARNING: Not suitable for production due to overhead

[Pyroscope]
ServerAddress = 'http://host.docker.internal:4040'
LinkTracesToProfiles = true # Enables traces-to-profiles in Grafana
```

**Example environment variable (in nodeset config):**

```toml
[[nodesets]]
env_vars = { OTEL_SERVICE_NAME = "chainlink-node" }
```

**Common issues:**

| Symptom | Likely Cause |
|---------|--------------|
| No traces at all | `Telemetry.Enabled = false` or `TraceSampleRatio = 0` |
| No workflow engine traces | `CRE.DebugMode = false` |
| Traces show `unknown_service:chainlink` | Missing `OTEL_SERVICE_NAME` env var |
| Traces not exported | Telemetry endpoint unreachable (check `go run . obs up -f `) |
| No traces-to-profiles link in Grafana | `Pyroscope.LinkTracesToProfiles = false` or Pyroscope not running |

**Important notes:**

- `CRE.DebugMode` adds performance overhead and should only be enabled during development/debugging, not in production environments.
- **Tracing is only implemented for V2 components:**
- **V2 Syncer**: Only used when workflow registry contracts are v2.x. If you're using v1.x contracts, the V1 syncer is used and has no tracing.
- **V2 Engine (NoDAG)**: Only used by V2/NoDAG workflows. V1/DAG workflows use the V1 engine which has no tracing.
- To use tracing, ensure your environment is configured with **v2 workflow registry contracts** and you're deploying **V2 workflows**.

### Expected Error Messages

If these telemetry services are not running, you will see frequent "expected" error messages in the logs due to connection failures:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
override_mode = "all"
http_port_range_start = 10100

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-target", "web-api-trigger", "vault", "write-evm-1337", "evm-2337"]

# Capability config overrides for this DON.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
supported_evm_chains = [1337]
supported_sol_chains = ["22222222222222222222222222222222222222222222"]

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-trigger", "cron", "http-action", "http-trigger", "consensus", "don-time"]

# See ./examples/workflow-don-overrides.toml to learn how to override capability configs
Expand Down Expand Up @@ -76,7 +76,7 @@
supported_evm_chains = [1337]
supported_sol_chains = ["22222222222222222222222222222222222222222222"]

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["web-api-target", "vault", "solana"]

[nodesets.db]
Expand All @@ -99,7 +99,7 @@
override_mode = "each"
http_port_range_start = 10300

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337]

[nodesets.db]
Expand Down
4 changes: 2 additions & 2 deletions core/scripts/cre/environment/configs/workflow-don-tron.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
override_mode = "all"
http_port_range_start = 10100

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-target", "web-api-trigger", "vault", "cron", "http-action", "http-trigger", "consensus", "don-time", "write-evm-1337", "write-evm-3360022319", "read-contract-1337", "read-contract-3360022319"]

# See ./examples/workflow-don-overrides.toml to learn how to override capability configs
Expand All @@ -62,7 +62,7 @@
override_mode = "each"
http_port_range_start = 10300

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337, 2337]

[nodesets.db]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
# because bootstrap job for capability DON will be created on the boostrap node from this DON
supported_evm_chains = [1337, 2337]

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-trigger", "cron", "http-action", "http-trigger", "consensus", "don-time", "write-evm-1337", "read-contract-1337", "evm-1337"]

# See ./examples/workflow-don-overrides.toml to learn how to override capability configs
Expand Down Expand Up @@ -73,7 +73,7 @@
# to identify nodes in the gateway configuration (required by both web-api-target and vault capabilities)
supported_evm_chains = [1337, 2337]

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["web-api-target", "vault", "write-evm-2337", "read-contract-2337", "evm-2337"]

[nodesets.db]
Expand All @@ -96,7 +96,7 @@
override_mode = "each"
http_port_range_start = 10300

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337, 2337]

[nodesets.db]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
override_mode = "all"
http_port_range_start = 10100

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-target", "web-api-trigger", "vault", "cron", "http-action", "http-trigger", "consensus", "don-time", "write-evm-1337", "write-evm-2337", "read-contract-1337", "read-contract-2337", "evm-1337", "evm-2337"]

# See ./examples/workflow-don-overrides.toml to learn how to override capability configs
Expand Down Expand Up @@ -76,7 +76,7 @@ Name = 'mock-private-registry'
override_mode = "each"
http_port_range_start = 10300

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337, 2337]

[nodesets.db]
Expand Down
12 changes: 7 additions & 5 deletions core/scripts/cre/environment/configs/workflow-gateway-don.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@
name = "workflow"
don_types = ["workflow"]
override_mode = "all"
http_port_range_start = 10100
http_port_range_start = 10000
p2p_port_range_start = 12000

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
capabilities = ["ocr3", "custom-compute", "web-api-target", "web-api-trigger", "vault", "cron", "http-action", "http-trigger", "consensus", "don-time", "write-evm-1337", "write-evm-2337", "evm-1337", "evm-2337", "read-contract-1337", "read-contract-2337"]

[nodesets.db]
Expand All @@ -60,14 +61,15 @@
name = "bootstrap-gateway"
don_types = ["bootstrap", "gateway"]
override_mode = "each"
http_port_range_start = 10300
http_port_range_start = 10100
p2p_port_range_start = 12100

env_vars = { CL_EVM_CMD = "" }
env_vars = { CL_EVM_CMD = "", OTEL_SERVICE_NAME = "chainlink-node" }
supported_evm_chains = [1337, 2337]

[nodesets.db]
image = "postgres:12.0"
port = 13200
port = 13100

[[nodesets.node_specs]]
roles = ["bootstrap", "gateway"]
Expand Down
Loading
Loading