From 36b92e81995074af134080b7ed5a60cd3c0d2bf5 Mon Sep 17 00:00:00 2001
From: Shinnosuke Hirakawa <8327162+0x126@users.noreply.github.com>
Date: Fri, 27 Aug 2021 12:02:16 +0900
Subject: [PATCH] add system_monitor settings (#13)
* add system_monitor settings
* change temp_warn/error
---
.../config/autoware_error_monitor.param.yaml | 81 +++++++++++++++++++
...ror_monitor.planning_simulation.param.yaml | 81 +++++++++++++++++++
.../system_monitor/hdd_monitor.param.yaml | 10 ++-
.../system_monitor/net_monitor.param.yaml | 2 +-
system_launch/launch/system.launch.xml | 4 +-
5 files changed, 173 insertions(+), 5 deletions(-)
create mode 100644 system_launch/config/autoware_error_monitor.param.yaml
create mode 100644 system_launch/config/autoware_error_monitor.planning_simulation.param.yaml
diff --git a/system_launch/config/autoware_error_monitor.param.yaml b/system_launch/config/autoware_error_monitor.param.yaml
new file mode 100644
index 0000000000000..68129f63543f6
--- /dev/null
+++ b/system_launch/config/autoware_error_monitor.param.yaml
@@ -0,0 +1,81 @@
+# Description:
+# name: diag name
+# sf_at: diag level where it becomes Safe Fault
+# lf_at: diag level where it becomes Latent Fault
+# spf_at: diag level where it becomes Single Point Fault
+#
+# Note:
+# empty-value for sf_at, lf_at and spf_at is "none"
+# default values are:
+# sf_at: "none"
+# lf_at: "warn"
+# spf_at: "error"
+---
+/**:
+ ros__parameters:
+ required_modules:
+ autonomous_driving:
+ names: [
+ "/autoware/control/autonomous_driving/alive_monitoring",
+ # "/autoware/control/autonomous_driving/lane_departure",
+ # '/autoware/control/autonomous_driving/trajectory_deviation', # Unstable
+ "/autoware/control/command_gate/alive_monitoring",
+ "/autoware/localization/alive_monitoring",
+ "/autoware/localization/matching_score",
+ "/autoware/localization/localization_accuracy",
+ "/autoware/map/alive_monitoring",
+ "/autoware/map/map_version",
+ "/autoware/perception/alive_monitoring",
+ "/autoware/planning/alive_monitoring",
+ "/autoware/sensing/alive_monitoring",
+ # "/autoware/sensing/camera",
+ "/autoware/sensing/gnss",
+ "/autoware/sensing/imu",
+ "/autoware/sensing/lidar",
+ "/autoware/system/alive_monitoring",
+ "/autoware/system/emergency_stop_operation",
+ "/autoware/system/logging",
+ "/autoware/system/resource_monitoring/cpu_temperature",
+ "/autoware/system/resource_monitoring/cpu_usage",
+ "/autoware/system/resource_monitoring/cpu_thermal_throttling",
+ "/autoware/system/resource_monitoring/hdd_temperature",
+ "/autoware/system/resource_monitoring/hdd_usage",
+ "/autoware/system/resource_monitoring/memory_usage",
+ "/autoware/system/resource_monitoring/network_usage",
+ "/autoware/system/resource_monitoring/ntp_offset",
+ "/autoware/system/resource_monitoring/gpu_temperature",
+ "/autoware/system/resource_monitoring/gpu_usage",
+ "/autoware/system/resource_monitoring/gpu_memory_usage",
+ "/autoware/system/resource_monitoring/gpu_thermal_throttling",
+ "/autoware/vehicle/alive_monitoring",
+ "/autoware/vehicle/vehicle_errors",
+ "/autoware/vehicle/obstacle_crush",
+ ]
+ diag_level:
+ /autoware/localization/matching_score: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/sensing/gnss: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/logging: { sf_at: "warn", lf_at: "none", spf_at: "none" }
+ /autoware/system/resource_monitoring/cpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/resource_monitoring/cpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/resource_monitoring/cpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/resource_monitoring/hdd_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/resource_monitoring/hdd_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/resource_monitoring/memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/resource_monitoring/network_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/resource_monitoring/ntp_offset: { sf_at: "warn", lf_at: "none", spf_at: "none" }
+ /autoware/system/resource_monitoring/gpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/resource_monitoring/gpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/resource_monitoring/gpu_memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/resource_monitoring/gpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+
+ remote_control:
+ names:
+ [
+ "/autoware/control/command_gate/alive_monitoring",
+ "/autoware/control/remote_control/alive_monitoring",
+ "/autoware/system/alive_monitoring",
+ "/autoware/system/emergency_stop_operation",
+ "/autoware/vehicle/alive_monitoring",
+ "/autoware/vehicle/vehicle_errors",
+ "/autoware/vehicle/obstacle_crush",
+ ]
diff --git a/system_launch/config/autoware_error_monitor.planning_simulation.param.yaml b/system_launch/config/autoware_error_monitor.planning_simulation.param.yaml
new file mode 100644
index 0000000000000..629f6ee64c025
--- /dev/null
+++ b/system_launch/config/autoware_error_monitor.planning_simulation.param.yaml
@@ -0,0 +1,81 @@
+# Description:
+# name: diag name
+# sf_at: diag level where it becomes Safe Fault
+# lf_at: diag level where it becomes Latent Fault
+# spf_at: diag level where it becomes Single Point Fault
+#
+# Note:
+# empty-value for sf_at, lf_at and spf_at is "none"
+# default values are:
+# sf_at: "none"
+# lf_at: "warn"
+# spf_at: "error"
+---
+/**:
+ ros__parameters:
+ required_modules:
+ autonomous_driving:
+ names: [
+ "/autoware/control/autonomous_driving/alive_monitoring",
+ "/autoware/control/autonomous_driving/lane_departure",
+ # '/autoware/control/autonomous_driving/trajectory_deviation', # Unstable
+ "/autoware/control/command_gate/alive_monitoring",
+ "/autoware/localization/alive_monitoring",
+ "/autoware/localization/matching_score",
+ "/autoware/localization/localization_accuracy",
+ "/autoware/map/alive_monitoring",
+ "/autoware/map/map_version",
+ "/autoware/perception/alive_monitoring",
+ "/autoware/planning/alive_monitoring",
+ "/autoware/sensing/alive_monitoring",
+ "/autoware/sensing/camera",
+ # "/autoware/sensing/gnss",
+ "/autoware/sensing/imu",
+ "/autoware/sensing/lidar",
+ "/autoware/system/alive_monitoring",
+ "/autoware/system/emergency_stop_operation",
+ "/autoware/system/logging",
+ # '/autoware/system/resource_monitoring/cpu_temperature', # Not working in containers
+ # '/autoware/system/resource_monitoring/cpu_usage', # No need to watch in simulation
+ # '/autoware/system/resource_monitoring/cpu_thermal_throttling', # Require setup
+ # '/autoware/system/resource_monitoring/hdd_temperature', # Require setup
+ # '/autoware/system/resource_monitoring/hdd_usage', # Not working in containers
+ # '/autoware/system/resource_monitoring/memory_usage', # No need to watch in simulation
+ # '/autoware/system/resource_monitoring/network_usage', # No need to watch in simulation
+ # '/autoware/system/resource_monitoring/ntp_offset', # No need to watch in simulation
+ # '/autoware/system/resource_monitoring/gpu_temperature', # Require GPU
+ # '/autoware/system/resource_monitoring/gpu_usage', # Require GPU
+ # '/autoware/system/resource_monitoring/gpu_memory_usage', # Require GPU
+ # '/autoware/system/resource_monitoring/gpu_thermal_throttling', # Require GPU
+ "/autoware/vehicle/alive_monitoring",
+ "/autoware/vehicle/vehicle_errors",
+ "/autoware/vehicle/obstacle_crush",
+ ]
+ diag_level:
+ /autoware/localization/matching_score: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/sensing/gnss: { sf_at: "warn", lf_at: "error", spf_at: "none" }
+ /autoware/system/logging: { sf_at: "warn", lf_at: "none", spf_at: "none" }
+ # /autoware/system/resource_monitoring/cpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } # Not working in VMs
+ # /autoware/system/resource_monitoring/cpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
+ # /autoware/system/resource_monitoring/cpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
+ # /autoware/system/resource_monitoring/hdd_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
+ # /autoware/system/resource_monitoring/hdd_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
+ # /autoware/system/resource_monitoring/memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
+ # /autoware/system/resource_monitoring/network_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
+ # /autoware/system/resource_monitoring/ntp_offset: { sf_at: "warn", lf_at: "none", spf_at: "none" } # No need to watch in simulation
+ # /autoware/system/resource_monitoring/gpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
+ # /autoware/system/resource_monitoring/gpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
+ # /autoware/system/resource_monitoring/gpu_memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
+ # /autoware/system/resource_monitoring/gpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
+
+ remote_control:
+ names:
+ [
+ "/autoware/control/command_gate/alive_monitoring",
+ "/autoware/control/remote_control/alive_monitoring",
+ "/autoware/system/alive_monitoring",
+ "/autoware/system/emergency_stop_operation",
+ "/autoware/vehicle/alive_monitoring",
+ "/autoware/vehicle/vehicle_errors",
+ "/autoware/vehicle/obstacle_crush",
+ ]
diff --git a/system_launch/config/system_monitor/hdd_monitor.param.yaml b/system_launch/config/system_monitor/hdd_monitor.param.yaml
index bf687ae848059..caaf64892573d 100644
--- a/system_launch/config/system_monitor/hdd_monitor.param.yaml
+++ b/system_launch/config/system_monitor/hdd_monitor.param.yaml
@@ -5,7 +5,13 @@
disks: # Until multi type lists are allowed, name N the disks as disk0...disk{N-1}
disk0:
name: /dev/sda
- temp_warn: 55.0
- temp_error: 70.0
+ temp_warn: 70.0
+ temp_error: 80.0
usage_warn: 0.95
usage_error: 0.99
+ disk1:
+ name: /dev/sdb
+ temp_warn: 70.0
+ temp_error: 80.0
+ usage_warn: 0.95
+ usage_error: 0.99
\ No newline at end of file
diff --git a/system_launch/config/system_monitor/net_monitor.param.yaml b/system_launch/config/system_monitor/net_monitor.param.yaml
index d0707ddba399f..9971561e2086f 100644
--- a/system_launch/config/system_monitor/net_monitor.param.yaml
+++ b/system_launch/config/system_monitor/net_monitor.param.yaml
@@ -1,4 +1,4 @@
/**:
ros__parameters:
- devices: ["*"]
+ devices: ["enp2s0f1"]
usage_warn: 0.95
diff --git a/system_launch/launch/system.launch.xml b/system_launch/launch/system.launch.xml
index d21ef0fd45b7a..30873cfda14db 100644
--- a/system_launch/launch/system.launch.xml
+++ b/system_launch/launch/system.launch.xml
@@ -34,13 +34,13 @@
-
+
-
+