Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ jobs:
- run: pixi run lint
- run: pixi run types
- name: Run tests
run: pixi run test -m "not slow" -v --tb=short --timeout=60
run: pixi run test -m "not slow and not manual" -v --tb=short --timeout=60
12 changes: 10 additions & 2 deletions handmotion/data/feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ def __init__(self, model_path=None):
min_hand_presence_confidence=0.5,
min_tracking_confidence=0.5,
)
self.hand_landmarker = vision.HandLandmarker.create_from_options(options)
self.hand_landmarker: vision.HandLandmarker | None = (
vision.HandLandmarker.create_from_options(options)
)

def close(self):
"""Close MediaPipe hand landmarker and release resources."""
Expand All @@ -104,7 +106,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit - ensures cleanup."""
self.close()

def extract(self, image, image_format="rgb") -> Optional[HandFeatures | None]:
def extract(self, image, image_format="rgb") -> Optional[HandFeatures]:
"""
Extract features from image.

Expand Down Expand Up @@ -137,6 +139,12 @@ def extract(self, image, image_format="rgb") -> Optional[HandFeatures | None]:
image_array = np.array(image)
mp_image = Image(image_format=ImageFormat.SRGB, data=image_array)

if self.hand_landmarker is None:
raise RuntimeError(
"FeatureExtractor is closed. Create a new FeatureExtractor instance to extract "
"features."
)

# Detect hand landmarks
detection_result = self.hand_landmarker.detect(mp_image)

Expand Down
125 changes: 99 additions & 26 deletions handmotion/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
import argparse
import logging
from pathlib import Path
from typing import Optional, Tuple, Union

import numpy as np
from PIL import Image

from handmotion.data.feature_extractor import FeatureExtractor
Expand All @@ -39,28 +41,36 @@
logger = logging.getLogger(__name__)


def predict_image(classifier, extractor, image_path):
def _extract_and_predict(
classifier: HandGestureClassifier,
image: Union[Image.Image, str, Path],
image_format: str = "rgb",
) -> Tuple[Optional[str], Optional[np.ndarray], Optional[np.ndarray]]:
"""
Predict gesture for a single image.
Internal helper to extract features and get predictions.

Args:
classifier: Loaded HandGestureClassifier
extractor: FeatureExtractor instance
image_path: Path to image file
image: PIL Image, or path to image file
image_format: "rgb" or "bgr" (default: "rgb")

Returns:
Tuple of (predicted_label, confidence_dict) or (None, None) if no hand detected
Tuple of (prediction, probabilities, class_names) or (None, None, None) if no hand detected
"""
# Load image
image = Image.open(image_path)
if image.mode != "RGB":
image = image.convert("RGB")
# Load image if path provided
if isinstance(image, (str, Path)):
image = Image.open(image)
if image.mode != "RGB":
image = image.convert("RGB")
image_format = "rgb" # PIL images are always RGB

# Initialize feature extractor
extractor = FeatureExtractor()

# Extract features
features_dict = extractor.extract(image, image_format="rgb")
features_dict = extractor.extract(image, image_format=image_format)
if features_dict is None:
logger.warning(f"No hand detected in {image_path}")
return None, None
return None, None, None

# Prepare features for classifier
landmarks = features_dict["landmarks"].reshape(1, -1) # (1, 63)
Expand All @@ -70,9 +80,71 @@ def predict_image(classifier, extractor, image_path):
# Predict
prediction = classifier.predict(features)[0]
probabilities = classifier.predict_proba(features)[0]
class_names = classifier.label_encoder.classes_

return prediction, probabilities, class_names


def predict_image(
classifier: HandGestureClassifier,
image: Union[Image.Image, str, Path],
threshold: float = 0.5,
image_format: str = "rgb",
) -> Tuple[Optional[str], Optional[float]]:
"""
Predict gesture from an image.

Args:
classifier: Loaded HandGestureClassifier
image: PIL Image, or path to image file
threshold: Minimum confidence to return prediction (0.0-1.0).
If confidence is below threshold, returns (None, None)
image_format: "rgb" or "bgr" (default: "rgb"). Only used if image is PIL Image.

Returns:
Tuple of (label, confidence) or (None, None) if:
- No hand detected
- Confidence below threshold
"""
prediction, probabilities, class_names = _extract_and_predict(classifier, image, image_format)
if prediction is None or probabilities is None or class_names is None:
logger.debug("No hand detected in image")
return None, None

# Get confidence for predicted class
pred_idx = list(class_names).index(prediction)
confidence = float(probabilities[pred_idx])

# Check threshold
if confidence < threshold:
logger.debug(f"Confidence {confidence:.4f} below threshold {threshold} for {prediction}")
return None, None

return prediction, confidence


def predict_image_with_proba(
classifier: HandGestureClassifier,
image: Union[Image.Image, str, Path],
image_format: str = "rgb",
) -> Tuple[Optional[str], Optional[dict]]:
"""
Predict gesture from an image with full probability distribution.

Args:
classifier: Loaded HandGestureClassifier
image: PIL Image, or path to image file
image_format: "rgb" or "bgr" (default: "rgb")

Returns:
Tuple of (predicted_label, confidence_dict) or (None, None) if no hand detected
"""
prediction, probabilities, class_names = _extract_and_predict(classifier, image, image_format)
if prediction is None or probabilities is None or class_names is None:
logger.warning("No hand detected in image")
return None, None

# Get class names and create confidence dict
class_names = classifier.label_encoder.classes_
confidence_dict = {class_names[i]: float(prob) for i, prob in enumerate(probabilities)}

return prediction, confidence_dict
Expand Down Expand Up @@ -116,25 +188,26 @@ def main():
classifier = HandGestureClassifier()
classifier.load(model_path)

# Initialize feature extractor
extractor = FeatureExtractor()

# Predict
logger.info(f"Processing image: {image_path}")
prediction, confidence = predict_image(classifier, extractor, image_path)

if prediction is None:
print("No hand detected in image.")
return

# Print results
print(f"\nPrediction: {prediction}")
print(f"Confidence: {confidence[prediction]:.4f}")

if args.show_proba:
prediction, confidence = predict_image_with_proba(classifier, image_path)
if prediction is None:
print("No hand detected in image.")
return
assert confidence is not None
print(f"\nPrediction: {prediction}")
print(f"Confidence: {confidence[prediction]:.4f}")
print("\nAll class probabilities:")
for class_name, prob in sorted(confidence.items(), key=lambda x: x[1], reverse=True):
print(f" {class_name}: {prob:.4f}")
else:
prediction, confidence = predict_image(classifier, image_path)
if prediction is None:
print("No hand detected in image.")
return
print(f"\nPrediction: {prediction}")
print(f"Confidence: {confidence:.4f}")


if __name__ == "__main__":
Expand Down
Loading