Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion skills/detection/yolo-detection-2026/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ else
BACKEND="cuda"
log "Detected NVIDIA GPU (driver: $cuda_ver)"
fi
elif command -v rocm-smi &>/dev/null || [ -d "/opt/rocm" ]; then
elif command -v amd-smi &>/dev/null || command -v rocm-smi &>/dev/null || [ -d "/opt/rocm" ]; then
BACKEND="rocm"
log "Detected AMD ROCm"
elif [ "$(uname)" = "Darwin" ] && [ "$(uname -m)" = "arm64" ]; then
Expand Down
4 changes: 4 additions & 0 deletions skills/detection/yolo-detection-2026/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ ultralytics>=8.3.0 # YOLOv11/v10/v8 inference
numpy>=1.24.0,<2.0.0
opencv-python-headless>=4.8.0
Pillow>=10.0.0
# GPU inference — install ONE of these (not both!):
# AMD ROCm: pip install onnxruntime-rocm (do NOT install onnxruntime alongside)
# NVIDIA: handled by ultralytics (tensorrt)
# CPU only: pip install onnxruntime
63 changes: 58 additions & 5 deletions skills/detection/yolo-detection-2026/scripts/env_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,17 +156,60 @@ def _try_cuda(self) -> bool:
return False

def _try_rocm(self) -> bool:
"""Detect AMD GPU via rocm-smi or /opt/rocm."""
"""Detect AMD GPU via amd-smi (preferred) or rocm-smi."""
has_amd_smi = shutil.which("amd-smi") is not None
has_rocm_smi = shutil.which("rocm-smi") is not None
has_rocm_dir = Path("/opt/rocm").is_dir()

if not (has_rocm_smi or has_rocm_dir):
if not (has_amd_smi or has_rocm_smi or has_rocm_dir):
return False

self.backend = "rocm"
self.device = "cuda" # ROCm exposes as CUDA in PyTorch

if has_rocm_smi:
# Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output)
if has_amd_smi:
try:
result = subprocess.run(
["amd-smi", "static", "--json"],
capture_output=True, text=True, timeout=10,
)
if result.returncode == 0:
import json as _json
data = _json.loads(result.stdout)
# amd-smi may return {"gpu_data": [...]} or a bare list
gpu_list = data.get("gpu_data", data) if isinstance(data, dict) else data
if isinstance(gpu_list, list) and len(gpu_list) > 0:
# Pick GPU with most VRAM (discrete > iGPU)
def _vram_mb(g):
vram = g.get("vram", {}).get("size", {})
if isinstance(vram, dict):
return int(vram.get("value", 0))
return 0

best_gpu = max(gpu_list, key=_vram_mb)
best_idx = gpu_list.index(best_gpu)
asic = best_gpu.get("asic", {})
vram = best_gpu.get("vram", {}).get("size", {})

self.gpu_name = asic.get("market_name", "AMD GPU")
self.gpu_memory_mb = int(vram.get("value", 0)) if isinstance(vram, dict) else 0
self.detection_details["amd_smi"] = {
"gpu_index": best_idx,
"gfx_version": asic.get("target_graphics_version", ""),
"total_gpus": len(gpu_list),
}

# Pin to discrete GPU if multiple GPUs present
if len(gpu_list) > 1:
os.environ["HIP_VISIBLE_DEVICES"] = str(best_idx)
os.environ["ROCR_VISIBLE_DEVICES"] = str(best_idx)
_log(f"Multi-GPU: pinned to GPU {best_idx} ({self.gpu_name})")
except (subprocess.TimeoutExpired, FileNotFoundError, ValueError, Exception) as e:
_log(f"amd-smi probe failed: {e}")

# Strategy 2: rocm-smi fallback (legacy ROCm <6.3)
if not self.gpu_name and has_rocm_smi:
try:
result = subprocess.run(
["rocm-smi", "--showproductname", "--csv"],
Expand All @@ -186,7 +229,6 @@ def _try_rocm(self) -> bool:
capture_output=True, text=True, timeout=10,
)
if result.returncode == 0:
# Parse total VRAM
for line in result.stdout.strip().split("\n")[1:]:
parts = line.split(",")
if len(parts) >= 2:
Expand Down Expand Up @@ -296,11 +338,22 @@ def _fallback_cpu(self):

_log("No GPU detected, using CPU backend")

def _check_rocm_runtime(self):
"""Verify onnxruntime has ROCm provider, not just CPU."""
import onnxruntime
providers = onnxruntime.get_available_providers()
if "ROCmExecutionProvider" in providers or "MIGraphXExecutionProvider" in providers:
_log(f"onnxruntime ROCm providers: {providers}")
return True
_log(f"onnxruntime providers: {providers} — ROCmExecutionProvider not found")
_log("Fix: pip uninstall onnxruntime && pip install onnxruntime-rocm")
raise ImportError("ROCmExecutionProvider not available")

def _check_framework(self) -> bool:
"""Check if the optimized inference runtime is importable."""
checks = {
"cuda": lambda: __import__("tensorrt"),
"rocm": lambda: __import__("onnxruntime"),
"rocm": lambda: self._check_rocm_runtime(),
"mps": lambda: __import__("coremltools"),
"intel": lambda: __import__("openvino"),
"cpu": lambda: __import__("onnxruntime"),
Expand Down
63 changes: 58 additions & 5 deletions skills/lib/env_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,17 +156,60 @@ def _try_cuda(self) -> bool:
return False

def _try_rocm(self) -> bool:
"""Detect AMD GPU via rocm-smi or /opt/rocm."""
"""Detect AMD GPU via amd-smi (preferred) or rocm-smi."""
has_amd_smi = shutil.which("amd-smi") is not None
has_rocm_smi = shutil.which("rocm-smi") is not None
has_rocm_dir = Path("/opt/rocm").is_dir()

if not (has_rocm_smi or has_rocm_dir):
if not (has_amd_smi or has_rocm_smi or has_rocm_dir):
return False

self.backend = "rocm"
self.device = "cuda" # ROCm exposes as CUDA in PyTorch

if has_rocm_smi:
# Strategy 1: amd-smi static --json (ROCm 6.3+/7.x, richest output)
if has_amd_smi:
try:
result = subprocess.run(
["amd-smi", "static", "--json"],
capture_output=True, text=True, timeout=10,
)
if result.returncode == 0:
import json as _json
data = _json.loads(result.stdout)
# amd-smi may return {"gpu_data": [...]} or a bare list
gpu_list = data.get("gpu_data", data) if isinstance(data, dict) else data
if isinstance(gpu_list, list) and len(gpu_list) > 0:
# Pick GPU with most VRAM (discrete > iGPU)
def _vram_mb(g):
vram = g.get("vram", {}).get("size", {})
if isinstance(vram, dict):
return int(vram.get("value", 0))
return 0

best_gpu = max(gpu_list, key=_vram_mb)
best_idx = gpu_list.index(best_gpu)
asic = best_gpu.get("asic", {})
vram = best_gpu.get("vram", {}).get("size", {})

self.gpu_name = asic.get("market_name", "AMD GPU")
self.gpu_memory_mb = int(vram.get("value", 0)) if isinstance(vram, dict) else 0
self.detection_details["amd_smi"] = {
"gpu_index": best_idx,
"gfx_version": asic.get("target_graphics_version", ""),
"total_gpus": len(gpu_list),
}

# Pin to discrete GPU if multiple GPUs present
if len(gpu_list) > 1:
os.environ["HIP_VISIBLE_DEVICES"] = str(best_idx)
os.environ["ROCR_VISIBLE_DEVICES"] = str(best_idx)
_log(f"Multi-GPU: pinned to GPU {best_idx} ({self.gpu_name})")
except (subprocess.TimeoutExpired, FileNotFoundError, ValueError, Exception) as e:
_log(f"amd-smi probe failed: {e}")

# Strategy 2: rocm-smi fallback (legacy ROCm <6.3)
if not self.gpu_name and has_rocm_smi:
try:
result = subprocess.run(
["rocm-smi", "--showproductname", "--csv"],
Expand All @@ -186,7 +229,6 @@ def _try_rocm(self) -> bool:
capture_output=True, text=True, timeout=10,
)
if result.returncode == 0:
# Parse total VRAM
for line in result.stdout.strip().split("\n")[1:]:
parts = line.split(",")
if len(parts) >= 2:
Expand Down Expand Up @@ -296,11 +338,22 @@ def _fallback_cpu(self):

_log("No GPU detected, using CPU backend")

def _check_rocm_runtime(self):
"""Verify onnxruntime has ROCm provider, not just CPU."""
import onnxruntime
providers = onnxruntime.get_available_providers()
if "ROCmExecutionProvider" in providers or "MIGraphXExecutionProvider" in providers:
_log(f"onnxruntime ROCm providers: {providers}")
return True
_log(f"onnxruntime providers: {providers} — ROCmExecutionProvider not found")
_log("Fix: pip uninstall onnxruntime && pip install onnxruntime-rocm")
raise ImportError("ROCmExecutionProvider not available")

def _check_framework(self) -> bool:
"""Check if the optimized inference runtime is importable."""
checks = {
"cuda": lambda: __import__("tensorrt"),
"rocm": lambda: __import__("onnxruntime"),
"rocm": lambda: self._check_rocm_runtime(),
"mps": lambda: __import__("coremltools"),
"intel": lambda: __import__("openvino"),
"cpu": lambda: __import__("onnxruntime"),
Expand Down
Loading