Skip to content

Commit

Permalink
add system_monitor settings (autowarefoundation#13)
Browse files Browse the repository at this point in the history
* add system_monitor settings

* change temp_warn/error
  • Loading branch information
0x126 committed Oct 19, 2021
1 parent 9b7f22c commit 36b92e8
Showing 5 changed files with 173 additions and 5 deletions.
81 changes: 81 additions & 0 deletions system_launch/config/autoware_error_monitor.param.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Description:
# name: diag name
# sf_at: diag level where it becomes Safe Fault
# lf_at: diag level where it becomes Latent Fault
# spf_at: diag level where it becomes Single Point Fault
#
# Note:
# empty-value for sf_at, lf_at and spf_at is "none"
# default values are:
# sf_at: "none"
# lf_at: "warn"
# spf_at: "error"
---
/**:
ros__parameters:
required_modules:
autonomous_driving:
names: [
"/autoware/control/autonomous_driving/alive_monitoring",
# "/autoware/control/autonomous_driving/lane_departure",
# '/autoware/control/autonomous_driving/trajectory_deviation', # Unstable
"/autoware/control/command_gate/alive_monitoring",
"/autoware/localization/alive_monitoring",
"/autoware/localization/matching_score",
"/autoware/localization/localization_accuracy",
"/autoware/map/alive_monitoring",
"/autoware/map/map_version",
"/autoware/perception/alive_monitoring",
"/autoware/planning/alive_monitoring",
"/autoware/sensing/alive_monitoring",
# "/autoware/sensing/camera",
"/autoware/sensing/gnss",
"/autoware/sensing/imu",
"/autoware/sensing/lidar",
"/autoware/system/alive_monitoring",
"/autoware/system/emergency_stop_operation",
"/autoware/system/logging",
"/autoware/system/resource_monitoring/cpu_temperature",
"/autoware/system/resource_monitoring/cpu_usage",
"/autoware/system/resource_monitoring/cpu_thermal_throttling",
"/autoware/system/resource_monitoring/hdd_temperature",
"/autoware/system/resource_monitoring/hdd_usage",
"/autoware/system/resource_monitoring/memory_usage",
"/autoware/system/resource_monitoring/network_usage",
"/autoware/system/resource_monitoring/ntp_offset",
"/autoware/system/resource_monitoring/gpu_temperature",
"/autoware/system/resource_monitoring/gpu_usage",
"/autoware/system/resource_monitoring/gpu_memory_usage",
"/autoware/system/resource_monitoring/gpu_thermal_throttling",
"/autoware/vehicle/alive_monitoring",
"/autoware/vehicle/vehicle_errors",
"/autoware/vehicle/obstacle_crush",
]
diag_level:
/autoware/localization/matching_score: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/sensing/gnss: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/logging: { sf_at: "warn", lf_at: "none", spf_at: "none" }
/autoware/system/resource_monitoring/cpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/resource_monitoring/cpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/resource_monitoring/cpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/resource_monitoring/hdd_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/resource_monitoring/hdd_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/resource_monitoring/memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/resource_monitoring/network_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/resource_monitoring/ntp_offset: { sf_at: "warn", lf_at: "none", spf_at: "none" }
/autoware/system/resource_monitoring/gpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/resource_monitoring/gpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/resource_monitoring/gpu_memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/resource_monitoring/gpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" }

remote_control:
names:
[
"/autoware/control/command_gate/alive_monitoring",
"/autoware/control/remote_control/alive_monitoring",
"/autoware/system/alive_monitoring",
"/autoware/system/emergency_stop_operation",
"/autoware/vehicle/alive_monitoring",
"/autoware/vehicle/vehicle_errors",
"/autoware/vehicle/obstacle_crush",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Description:
# name: diag name
# sf_at: diag level where it becomes Safe Fault
# lf_at: diag level where it becomes Latent Fault
# spf_at: diag level where it becomes Single Point Fault
#
# Note:
# empty-value for sf_at, lf_at and spf_at is "none"
# default values are:
# sf_at: "none"
# lf_at: "warn"
# spf_at: "error"
---
/**:
ros__parameters:
required_modules:
autonomous_driving:
names: [
"/autoware/control/autonomous_driving/alive_monitoring",
"/autoware/control/autonomous_driving/lane_departure",
# '/autoware/control/autonomous_driving/trajectory_deviation', # Unstable
"/autoware/control/command_gate/alive_monitoring",
"/autoware/localization/alive_monitoring",
"/autoware/localization/matching_score",
"/autoware/localization/localization_accuracy",
"/autoware/map/alive_monitoring",
"/autoware/map/map_version",
"/autoware/perception/alive_monitoring",
"/autoware/planning/alive_monitoring",
"/autoware/sensing/alive_monitoring",
"/autoware/sensing/camera",
# "/autoware/sensing/gnss",
"/autoware/sensing/imu",
"/autoware/sensing/lidar",
"/autoware/system/alive_monitoring",
"/autoware/system/emergency_stop_operation",
"/autoware/system/logging",
# '/autoware/system/resource_monitoring/cpu_temperature', # Not working in containers
# '/autoware/system/resource_monitoring/cpu_usage', # No need to watch in simulation
# '/autoware/system/resource_monitoring/cpu_thermal_throttling', # Require setup
# '/autoware/system/resource_monitoring/hdd_temperature', # Require setup
# '/autoware/system/resource_monitoring/hdd_usage', # Not working in containers
# '/autoware/system/resource_monitoring/memory_usage', # No need to watch in simulation
# '/autoware/system/resource_monitoring/network_usage', # No need to watch in simulation
# '/autoware/system/resource_monitoring/ntp_offset', # No need to watch in simulation
# '/autoware/system/resource_monitoring/gpu_temperature', # Require GPU
# '/autoware/system/resource_monitoring/gpu_usage', # Require GPU
# '/autoware/system/resource_monitoring/gpu_memory_usage', # Require GPU
# '/autoware/system/resource_monitoring/gpu_thermal_throttling', # Require GPU
"/autoware/vehicle/alive_monitoring",
"/autoware/vehicle/vehicle_errors",
"/autoware/vehicle/obstacle_crush",
]
diag_level:
/autoware/localization/matching_score: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/sensing/gnss: { sf_at: "warn", lf_at: "error", spf_at: "none" }
/autoware/system/logging: { sf_at: "warn", lf_at: "none", spf_at: "none" }
# /autoware/system/resource_monitoring/cpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } # Not working in VMs
# /autoware/system/resource_monitoring/cpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
# /autoware/system/resource_monitoring/cpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
# /autoware/system/resource_monitoring/hdd_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
# /autoware/system/resource_monitoring/hdd_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
# /autoware/system/resource_monitoring/memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
# /autoware/system/resource_monitoring/network_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
# /autoware/system/resource_monitoring/ntp_offset: { sf_at: "warn", lf_at: "none", spf_at: "none" } # No need to watch in simulation
# /autoware/system/resource_monitoring/gpu_temperature: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
# /autoware/system/resource_monitoring/gpu_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
# /autoware/system/resource_monitoring/gpu_memory_usage: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation
# /autoware/system/resource_monitoring/gpu_thermal_throttling: { sf_at: "warn", lf_at: "error", spf_at: "none" } # No need to watch in simulation

remote_control:
names:
[
"/autoware/control/command_gate/alive_monitoring",
"/autoware/control/remote_control/alive_monitoring",
"/autoware/system/alive_monitoring",
"/autoware/system/emergency_stop_operation",
"/autoware/vehicle/alive_monitoring",
"/autoware/vehicle/vehicle_errors",
"/autoware/vehicle/obstacle_crush",
]
10 changes: 8 additions & 2 deletions system_launch/config/system_monitor/hdd_monitor.param.yaml
Original file line number Diff line number Diff line change
@@ -5,7 +5,13 @@
disks: # Until multi type lists are allowed, name N the disks as disk0...disk{N-1}
disk0:
name: /dev/sda
temp_warn: 55.0
temp_error: 70.0
temp_warn: 70.0
temp_error: 80.0
usage_warn: 0.95
usage_error: 0.99
disk1:
name: /dev/sdb
temp_warn: 70.0
temp_error: 80.0
usage_warn: 0.95
usage_error: 0.99
2 changes: 1 addition & 1 deletion system_launch/config/system_monitor/net_monitor.param.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**:
ros__parameters:
devices: ["*"]
devices: ["enp2s0f1"]
usage_warn: 0.95
4 changes: 2 additions & 2 deletions system_launch/launch/system.launch.xml
Original file line number Diff line number Diff line change
@@ -34,13 +34,13 @@
<!-- Error Monitor -->
<group if="$(eval &quot;'$(var run_mode)'=='online'&quot;)">
<include file="$(find-pkg-share autoware_error_monitor)/launch/autoware_error_monitor.launch.xml">
<arg name="config_file" value="$(find-pkg-share autoware_error_monitor)/config/autoware_error_monitor.param.yaml" />
<arg name="config_file" value="$(find-pkg-share system_launch)/config/autoware_error_monitor.param.yaml" />
<arg name="use_emergency_hold" value="false" />
</include>
</group>
<group if="$(eval &quot;'$(var run_mode)'=='planning_simulation'&quot;)">
<include file="$(find-pkg-share autoware_error_monitor)/launch/autoware_error_monitor.launch.xml">
<arg name="config_file" value="$(find-pkg-share autoware_error_monitor)/config/autoware_error_monitor.planning_simulation.param.yaml" />
<arg name="config_file" value="$(find-pkg-share system_launch)/config/autoware_error_monitor.planning_simulation.param.yaml" />
<arg name="use_emergency_hold" value="false" />
</include>
</group>

0 comments on commit 36b92e8

Please sign in to comment.