Skip to content

Commit

Permalink
add ALL config
Browse files Browse the repository at this point in the history
  • Loading branch information
jessieqliu committed Oct 7, 2024
1 parent c925837 commit 67f52a4
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 172 deletions.
11 changes: 0 additions & 11 deletions launcher/container_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
"github.com/google/go-tpm-tools/cel"
"github.com/google/go-tpm-tools/client"
"github.com/google/go-tpm-tools/launcher/agent"
"github.com/google/go-tpm-tools/launcher/internal/healthmonitoring/nodeproblemdetector"
"github.com/google/go-tpm-tools/launcher/internal/signaturediscovery"
"github.com/google/go-tpm-tools/launcher/launcherfile"
"github.com/google/go-tpm-tools/launcher/registryauth"
Expand Down Expand Up @@ -521,16 +520,6 @@ func (r *ContainerRunner) Run(ctx context.Context) error {
go teeServer.Serve()
defer teeServer.Shutdown(ctx)

// start node-problem-detector.service to collect memory related metrics.
if r.launchSpec.MemoryMonitoringEnabled {
r.logger.Println("MemoryMonitoring is enabled by the VM operator")
if err := nodeproblemdetector.StartService(r.logger); err != nil {
return err
}
} else {
r.logger.Println("MemoryMonitoring is disabled by the VM operator")
}

var streamOpt cio.Opt
switch r.launchSpec.LogRedirect {
case spec.Nowhere:
Expand Down
2 changes: 1 addition & 1 deletion launcher/internal/healthmonitoring/config.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package monitoring
package healthmonitoring

import (
"fmt"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,20 @@ type statsConfig struct {
MetricsConfigs map[string]metricConfig `json:"metricsConfigs"`
}

type diskConfig struct {
IncludeAllAttachedBlk bool `json:"includeAllAttachedBlk"`
IncludeRootBlk bool `json:includeRootBlk`
LsblkTimeout string `json:lsblkTimeout`
MetricsConfigs *statsConfig `json:metricsConfigs`
}

// SystemStatsConfig contains configurations for `System Stats Monitor`,
// a problem daemon in node-problem-detector that collects pre-defined health-related metrics from different system components.
// For now we only consider collecting memory related metrics.
// View the comprehensive configuration details on https://github.com/kubernetes/node-problem-detector/tree/master/pkg/systemstatsmonitor#detailed-configuration-options
type SystemStatsConfig struct {
CPU *statsConfig `json:"cpu,omitempty"`
Disk *statsConfig `json:"disk,omitempty"`
Disk *diskConfig `json:"disk,omitempty"`
Host *statsConfig `json:"host,omitempty"`
Memory *statsConfig `json:"memory,omitempty"`
InvokeInterval string `json:"invokeInterval,omitempty"`
Expand All @@ -43,25 +50,50 @@ func NewSystemStatsConfig() SystemStatsConfig {
}
}

var healthConfig = &SystemStatsConfig{
var allConfig = &SystemStatsConfig{
CPU: &statsConfig{map[string]metricConfig{
"cpu/load_5m": {"cpu/load_5m"},
}},
Disk: &statsConfig{map[string]metricConfig{
"disk/percent_used": {"disk/percent_used"},
"cpu/runnable_task_count": {"cpu/runnable_task_count"},
"cpu/usage_time": {"cpu/usage_time"},
"cpu/load_1m": {"cpu/load_1m"},
"cpu/load_5m": {"cpu/load_5m"},
"cpu/load_15m": {"cpu/load_15m"},
"system/cpu_stat": {"system/cpu_stat"},
"system/interrupts_total": {"system/interrupts_total"},
"system/processes_total": {"system/processes_total"},
"system/procs_blocked": {"system/procs_blocked"},
"system/procs_running": {"system/procs_running"},
}},
Disk: &diskConfig{
true, true, "5s",
&statsConfig{map[string]metricConfig{
"disk/avg_queue_len": {"disk/avg_queue_len"},
"disk/bytes_used": {"disk/bytes_used"},
"disk/percent_used": {"disk/percent_used"},
"disk/io_time": {"disk/io_time"},
"disk/merged_operation_count": {"disk/merged_operation_count"},
"disk/operation_bytes_count": {"disk/operation_bytes_count"},
"disk/operation_count": {"disk/operation_count"},
"disk/operation_time": {"disk/operation_time"},
"disk/weighted_io": {"disk/weighted_io"},
}},
},
Host: &statsConfig{map[string]metricConfig{
"host/uptime": {"host/uptime"},
}},
Memory: &statsConfig{map[string]metricConfig{
"memory/bytes_used": {"memory/bytes_used"},
"memory/anonymous_used": {"memory/anonymous_used"},
"memory/bytes_used": {"memory/bytes_used"},
"memory/dirty_used": {"memory/dirty_used"},
"memory/page_cache_used": {"memory/page_cache_used"},
"memory/unevictable_used": {"memory/unevictable_used"},
"memory/percent_used": {"memory/percent_used"},
}},
InvokeInterval: defaultInvokeIntervalString,
}

// EnableHealthMonitoringConfig overwrites system stats config with health monitoring config.
func EnableHealthMonitoringConfig() error {
return healthConfig.WriteFile(systemStatsFilePath)
// EnableAllConfig overwrites system stats config with health monitoring config.
func EnableAllConfig() error {
return allConfig.WriteFile(systemStatsFilePath)
}

// EnableMemoryBytesUsed enables "memory/bytes_used" for memory monitoring.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ func TestEnableHealthMonitoringConfig(t *testing.T) {
tmpDir := t.TempDir()
systemStatsFilePath = path.Join(tmpDir, "system-stats-monitor.json")

wantBytes, err := json.Marshal(healthConfig)
wantBytes, err := json.Marshal(allConfig)
if err != nil {
t.Fatalf("Error marshaling expected config: %v", err)
}

EnableHealthMonitoringConfig()
EnableAllConfig()

file, err := os.OpenFile(systemStatsFilePath, os.O_RDONLY, 0)
if err != nil {
Expand Down
12 changes: 7 additions & 5 deletions launcher/launcher/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,14 @@ func main() {
return
}

if launchSpec.HealthMonitoringEnabled {
logger.Printf("Health Monitoring is enabled by the VM operator")
if launchSpec.MonitoringEnabled != spec.None {
logger.Printf("Monitoring is enabled by the VM operator")

if err := nodeproblemdetector.EnableHealthMonitoringConfig(); err != nil {
logger.Printf("failed to enable Health Monitoring config: %v", err)
return
if launchSpec.MonitoringEnabled == spec.All {
if err := nodeproblemdetector.EnableAllConfig(); err != nil {
logger.Printf("failed to enable Health Monitoring config: %v", err)
return
}
}

if err := nodeproblemdetector.StartService(logger); err != nil {
Expand Down
70 changes: 48 additions & 22 deletions launcher/spec/launch_policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ import (
"path/filepath"
"strconv"
"strings"

"github.com/google/go-tpm-tools/launcher/internal/healthmonitoring/monitoring"
)

// LaunchPolicy contains policies on starting the container.
Expand All @@ -18,8 +16,8 @@ type LaunchPolicy struct {
AllowedCmdOverride bool
AllowedLogRedirect policy
AllowedMountDestinations []string
HardenedImageMonitoring monitoring.Config
DebugImageMonitoring monitoring.Config
HardenedImageMonitoring MonitoringType
DebugImageMonitoring MonitoringType
}

type policy int
Expand All @@ -30,14 +28,27 @@ const (
never
)

type monitoringType int
type MonitoringType int

const (
none monitoringType = iota
memoryOnly
health
None MonitoringType = iota
MemoryOnly
All
)

func toMonitoringType(s string) (MonitoringType, error) {
switch strings.ToLower(s) {
case "none":
return None, nil
case "memoryonly":
return MemoryOnly, nil
case "all":
return All, nil
}

return None, fmt.Errorf("invalid monitoring type %v", s)
}

// String returns LaunchPolicy details.
func (p policy) String() string {
switch p {
Expand Down Expand Up @@ -102,40 +113,39 @@ func configureMonitoringPolicy(imageLabels map[string]string, launchPolicy *Laun

logger.Printf("%s will be deprecated, use %s and %s instead", memoryMonitoring, hardenedMonitoring, debugMonitoring)

memoryOnlyConfig := monitoring.Config{CPU: false, Disk: false, Host: false, Memory: false}
switch policy {
case always:
logger.Printf("%s=always, will be treated as %s=memory and %s=memory", memoryMonitoring, hardenedMonitoring, debugMonitoring)
launchPolicy.HardenedImageMonitoring = memoryOnlyConfig
launchPolicy.DebugImageMonitoring = memoryOnlyConfig
logger.Printf("%s=always, will be treated as %s=memory_only and %s=memory_only", memoryMonitoring, hardenedMonitoring, debugMonitoring)
launchPolicy.HardenedImageMonitoring = MemoryOnly
launchPolicy.DebugImageMonitoring = MemoryOnly
case never:
logger.Printf("%s=never, will be treated as %s=none and %s=none", memoryMonitoring, hardenedMonitoring, debugMonitoring)
launchPolicy.HardenedImageMonitoring = memoryOnlyConfig
launchPolicy.DebugImageMonitoring = monitoring.NoneConfig()
launchPolicy.HardenedImageMonitoring = None
launchPolicy.DebugImageMonitoring = None
case debugOnly:
logger.Printf("%s=debug_only, will be treated as %s=none and %s=memory", memoryMonitoring, hardenedMonitoring, debugMonitoring)
launchPolicy.HardenedImageMonitoring = monitoring.NoneConfig()
launchPolicy.DebugImageMonitoring = memoryOnlyConfig
launchPolicy.HardenedImageMonitoring = None
launchPolicy.DebugImageMonitoring = MemoryOnly
}
return nil
}

if hardenedOk {
launchPolicy.HardenedImageMonitoring, err = monitoring.ToConfig(hardenedVal)
launchPolicy.HardenedImageMonitoring, err = toMonitoringType(hardenedVal)
if err != nil {
return fmt.Errorf("invalid monitoring type for hardened image: %v", err)
}
} else {
launchPolicy.HardenedImageMonitoring = monitoring.NoneConfig()
launchPolicy.HardenedImageMonitoring = None
}

if debugOk {
launchPolicy.DebugImageMonitoring, err = monitoring.ToConfig(debugVal)
launchPolicy.DebugImageMonitoring, err = toMonitoringType(debugVal)
if err != nil {
return fmt.Errorf("invalid monitoring type for debug image: %v", err)
}
} else {
launchPolicy.DebugImageMonitoring = monitoring.AllConfig()
launchPolicy.DebugImageMonitoring = MemoryOnly
}

return nil
Expand Down Expand Up @@ -189,6 +199,22 @@ func GetLaunchPolicy(imageLabels map[string]string, logger *log.Logger) (LaunchP
return launchPolicy, nil
}

func verifyMonitoringConfig(policy MonitoringType, spec MonitoringType) error {
if policy == None {
if spec != None {
return fmt.Errorf("spec configured for %v but policy is none", spec)
}

return nil
} else if policy == MemoryOnly {
if spec == All {
return fmt.Errorf("spec configured all monitoring, policy only allows memory")
}
}

return nil
}

// Verify will use the LaunchPolicy to verify the given LaunchSpec. If the verification passed, will return nil.
// If there are multiple violations, the function will return the first error.
func (p LaunchPolicy) Verify(ls LaunchSpec) error {
Expand All @@ -214,8 +240,8 @@ func (p LaunchPolicy) Verify(ls LaunchSpec) error {
monitoringPolicy = p.HardenedImageMonitoring
}

if err := monitoring.CheckCompliance(monitoringPolicy, ls.MonitoringEnabled); err != nil {
return fmt.Errorf("error verifying monitoring configs: %v", err)
if err := verifyMonitoringConfig(monitoringPolicy, ls.MonitoringEnabled); err != nil {
return fmt.Errorf("error verifying monitoring config: %v", err)
}

var err error
Expand Down
Loading

0 comments on commit 67f52a4

Please sign in to comment.