From 36b92e81995074af134080b7ed5a60cd3c0d2bf5 Mon Sep 17 00:00:00 2001 From: Shinnosuke Hirakawa <8327162+0x126@users.noreply.github.com> Date: Fri, 27 Aug 2021 12:02:16 +0900 Subject: [PATCH] add system_monitor settings (#13) * add system_monitor settings * change temp_warn/error --- .../config/autoware_error_monitor.param.yaml | 81 +++++++++++++++++++ ...ror_monitor.planning_simulation.param.yaml | 81 +++++++++++++++++++ .../system_monitor/hdd_monitor.param.yaml | 10 ++- .../system_monitor/net_monitor.param.yaml | 2 +- system_launch/launch/system.launch.xml | 4 +- 5 files changed, 173 insertions(+), 5 deletions(-) create mode 100644 system_launch/config/autoware_error_monitor.param.yaml create mode 100644 system_launch/config/autoware_error_monitor.planning_simulation.param.yaml diff --git a/system_launch/config/autoware_error_monitor.param.yaml b/system_launch/config/autoware_error_monitor.param.yaml new file mode 100644 index 0000000000000..68129f63543f6 --- /dev/null +++ b/system_launch/config/autoware_error_monitor.param.yaml @@ -0,0 +1,81 @@ +# Description: +# name: diag name +# sf_at: diag level where it becomes Safe Fault +# lf_at: diag level where it becomes Latent Fault +# spf_at: diag level where it becomes Single Point Fault +# +# Note: +# empty-value for sf_at, lf_at and spf_at is "none" +# default values are: +# sf_at: "none" +# lf_at: "warn" +# spf_at: "error" +--- +/**: + ros__parameters: + required_modules: + autonomous_driving: + names: [ + "/autoware/control/autonomous_driving/alive_monitoring", + # "/autoware/control/autonomous_driving/lane_departure", + # '/autoware/control/autonomous_driving/trajectory_deviation', # Unstable + "/autoware/control/command_gate/alive_monitoring", + "/autoware/localization/alive_monitoring", + "/autoware/localization/matching_score", + "/autoware/localization/localization_accuracy", + "/autoware/map/alive_monitoring", + "/autoware/map/map_version", + "/autoware/perception/alive_monitoring", + "/autoware/planning/alive_monitoring", + "/autoware/sensing/alive_monitoring", + # "/autoware/sensing/camera", + "/autoware/sensing/gnss", + "/autoware/sensing/imu", + "/autoware/sensing/lidar", + "/autoware/system/alive_monitoring", + "/autoware/system/emergency_stop_operation", + "/autoware/system/logging", + "/autoware/system/resource_monitoring/cpu_temperature", + "/autoware/system/resource_monitoring/cpu_usage", + "/autoware/system/resource_monitoring/cpu_thermal_throttling", + "/autoware/system/resource_monitoring/hdd_temperature", + "/autoware/system/resource_monitoring/hdd_usage", + "/autoware/system/resource_monitoring/memory_usage", + "/autoware/system/resource_monitoring/network_usage", + "/autoware/system/resource_monitoring/ntp_offset", + "/autoware/system/resource_monitoring/gpu_temperature", + "/autoware/system/resource_monitoring/gpu_usage", + "/autoware/system/resource_monitoring/gpu_memory_usage", + "/autoware/system/resource_monitoring/gpu_thermal_throttling", + "/autoware/vehicle/alive_monitoring", + "/autoware/vehicle/vehicle_errors", + "/autoware/vehicle/obstacle_crush", + ] + diag_level: + /autoware/localization/matching_score: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/sensing/gnss: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/logging: { sf_at: "warn", lf_at: "none", spf_at: "none" } + /autoware/system/resource_monitoring/cpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/resource_monitoring/cpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/resource_monitoring/cpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/resource_monitoring/hdd_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/resource_monitoring/hdd_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/resource_monitoring/memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/resource_monitoring/network_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/resource_monitoring/ntp_offset: { sf_at: "warn", lf_at: "none", spf_at: "none" } + /autoware/system/resource_monitoring/gpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/resource_monitoring/gpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/resource_monitoring/gpu_memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/resource_monitoring/gpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" } + + remote_control: + names: + [ + "/autoware/control/command_gate/alive_monitoring", + "/autoware/control/remote_control/alive_monitoring", + "/autoware/system/alive_monitoring", + "/autoware/system/emergency_stop_operation", + "/autoware/vehicle/alive_monitoring", + "/autoware/vehicle/vehicle_errors", + "/autoware/vehicle/obstacle_crush", + ] diff --git a/system_launch/config/autoware_error_monitor.planning_simulation.param.yaml b/system_launch/config/autoware_error_monitor.planning_simulation.param.yaml new file mode 100644 index 0000000000000..629f6ee64c025 --- /dev/null +++ b/system_launch/config/autoware_error_monitor.planning_simulation.param.yaml @@ -0,0 +1,81 @@ +# Description: +# name: diag name +# sf_at: diag level where it becomes Safe Fault +# lf_at: diag level where it becomes Latent Fault +# spf_at: diag level where it becomes Single Point Fault +# +# Note: +# empty-value for sf_at, lf_at and spf_at is "none" +# default values are: +# sf_at: "none" +# lf_at: "warn" +# spf_at: "error" +--- +/**: + ros__parameters: + required_modules: + autonomous_driving: + names: [ + "/autoware/control/autonomous_driving/alive_monitoring", + "/autoware/control/autonomous_driving/lane_departure", + # '/autoware/control/autonomous_driving/trajectory_deviation', # Unstable + "/autoware/control/command_gate/alive_monitoring", + "/autoware/localization/alive_monitoring", + "/autoware/localization/matching_score", + "/autoware/localization/localization_accuracy", + "/autoware/map/alive_monitoring", + "/autoware/map/map_version", + "/autoware/perception/alive_monitoring", + "/autoware/planning/alive_monitoring", + "/autoware/sensing/alive_monitoring", + "/autoware/sensing/camera", + # "/autoware/sensing/gnss", + "/autoware/sensing/imu", + "/autoware/sensing/lidar", + "/autoware/system/alive_monitoring", + "/autoware/system/emergency_stop_operation", + "/autoware/system/logging", + # '/autoware/system/resource_monitoring/cpu_temperature', # Not working in containers + # '/autoware/system/resource_monitoring/cpu_usage', # No need to watch in simulation + # '/autoware/system/resource_monitoring/cpu_thermal_throttling', # Require setup + # '/autoware/system/resource_monitoring/hdd_temperature', # Require setup + # '/autoware/system/resource_monitoring/hdd_usage', # Not working in containers + # '/autoware/system/resource_monitoring/memory_usage', # No need to watch in simulation + # '/autoware/system/resource_monitoring/network_usage', # No need to watch in simulation + # '/autoware/system/resource_monitoring/ntp_offset', # No need to watch in simulation + # '/autoware/system/resource_monitoring/gpu_temperature', # Require GPU + # '/autoware/system/resource_monitoring/gpu_usage', # Require GPU + # '/autoware/system/resource_monitoring/gpu_memory_usage', # Require GPU + # '/autoware/system/resource_monitoring/gpu_thermal_throttling', # Require GPU + "/autoware/vehicle/alive_monitoring", + "/autoware/vehicle/vehicle_errors", + "/autoware/vehicle/obstacle_crush", + ] + diag_level: + /autoware/localization/matching_score: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/sensing/gnss: { sf_at: "warn", lf_at: "error", spf_at: "none" } + /autoware/system/logging: { sf_at: "warn", lf_at: "none", spf_at: "none" } + # /autoware/system/resource_monitoring/cpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } # Not working in VMs + # /autoware/system/resource_monitoring/cpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation + # /autoware/system/resource_monitoring/cpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation + # /autoware/system/resource_monitoring/hdd_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation + # /autoware/system/resource_monitoring/hdd_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation + # /autoware/system/resource_monitoring/memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation + # /autoware/system/resource_monitoring/network_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation + # /autoware/system/resource_monitoring/ntp_offset: { sf_at: "warn", lf_at: "none", spf_at: "none" } # No need to watch in simulation + # /autoware/system/resource_monitoring/gpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation + # /autoware/system/resource_monitoring/gpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation + # /autoware/system/resource_monitoring/gpu_memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation + # /autoware/system/resource_monitoring/gpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation + + remote_control: + names: + [ + "/autoware/control/command_gate/alive_monitoring", + "/autoware/control/remote_control/alive_monitoring", + "/autoware/system/alive_monitoring", + "/autoware/system/emergency_stop_operation", + "/autoware/vehicle/alive_monitoring", + "/autoware/vehicle/vehicle_errors", + "/autoware/vehicle/obstacle_crush", + ] diff --git a/system_launch/config/system_monitor/hdd_monitor.param.yaml b/system_launch/config/system_monitor/hdd_monitor.param.yaml index bf687ae848059..caaf64892573d 100644 --- a/system_launch/config/system_monitor/hdd_monitor.param.yaml +++ b/system_launch/config/system_monitor/hdd_monitor.param.yaml @@ -5,7 +5,13 @@ disks: # Until multi type lists are allowed, name N the disks as disk0...disk{N-1} disk0: name: /dev/sda - temp_warn: 55.0 - temp_error: 70.0 + temp_warn: 70.0 + temp_error: 80.0 usage_warn: 0.95 usage_error: 0.99 + disk1: + name: /dev/sdb + temp_warn: 70.0 + temp_error: 80.0 + usage_warn: 0.95 + usage_error: 0.99 \ No newline at end of file diff --git a/system_launch/config/system_monitor/net_monitor.param.yaml b/system_launch/config/system_monitor/net_monitor.param.yaml index d0707ddba399f..9971561e2086f 100644 --- a/system_launch/config/system_monitor/net_monitor.param.yaml +++ b/system_launch/config/system_monitor/net_monitor.param.yaml @@ -1,4 +1,4 @@ /**: ros__parameters: - devices: ["*"] + devices: ["enp2s0f1"] usage_warn: 0.95 diff --git a/system_launch/launch/system.launch.xml b/system_launch/launch/system.launch.xml index d21ef0fd45b7a..30873cfda14db 100644 --- a/system_launch/launch/system.launch.xml +++ b/system_launch/launch/system.launch.xml @@ -34,13 +34,13 @@ - + - +