app.py

from PIL import Image
import io
import pandas as pd
import numpy as np
import math
from typing import Optional, List, Tuple
import aiohttp
import asyncio
import torch
import os
from dotenv import load_dotenv
import logging
import cv2
from scipy.interpolate import CubicSpline

from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load environment variables
load_dotenv()

# Initialize the models
model_path = "models/best_75epoch.pt"  # Use custom model

try:
    logger.info(f"Loading model from {model_path}")
    model = YOLO(model_path)
    model.to('cpu')  # Ensure model is on CPU
    
    # Configure model settings
    if not hasattr(model, 'task') or model.task != 'pose':
        logger.warning("Model task not set to pose, setting it explicitly")
        model.task = 'pose'  # Set task explicitly
    
    # Log detailed model configuration
    logger.info(f"Model loaded successfully. Task: {model.task}")
    logger.info(f"Model info: {model.info()}")
    logger.info(f"Model configuration: {model.model.yaml}")  # Log model architecture
    logger.info(f"Model names (classes): {model.names}")
except Exception as e:
    logger.error(f"Error loading model: {str(e)}")
    raise

def get_image_from_bytes(binary_image: bytes) -> Image:
    """Convert image from bytes to PIL RGB format
    
    Args:
        binary_image (bytes): The binary representation of the image
    
    Returns:
        PIL.Image: The image in PIL RGB format
    """
    input_image = Image.open(io.BytesIO(binary_image)).convert("RGB")
    return input_image


def get_bytes_from_image(image: Image) -> bytes:
    """
    Convert PIL image to Bytes
    
    Args:
    image (Image): A PIL image instance
    
    Returns:
    bytes : BytesIO object that contains the image in JPEG format with quality 85
    """
    return_image = io.BytesIO()
    image.save(return_image, format='JPEG', quality=85)  # save the image in JPEG format with quality 85
    return_image.seek(0)  # set the pointer to the beginning of the file
    return return_image

def transform_predict_to_df(results: list, labeles_dict: dict) -> pd.DataFrame:
    """
    Transform predict from yolov8 (torch.Tensor) to pandas DataFrame.

    Args:
        results (list): A list containing the predict output from yolov8 in the form of a torch.Tensor.
        labeles_dict (dict): A dictionary containing the labels names, where the keys are the class ids and the values are the label names.
        
    Returns:
        predict_bbox (pd.DataFrame): A DataFrame containing the bounding box coordinates, confidence scores and class labels.
    """
    # Transform the Tensor to numpy array
    predict_bbox = pd.DataFrame(results[0].to("cpu").numpy().boxes.xyxy, columns=['xmin', 'ymin', 'xmax','ymax'])
    # Add the confidence of the prediction to the DataFrame
    predict_bbox['confidence'] = results[0].to("cpu").numpy().boxes.conf
    # Add the class of the prediction to the DataFrame
    predict_bbox['class'] = (results[0].to("cpu").numpy().boxes.cls).astype(int)
    # Replace the class number with the class name from the labeles_dict
    predict_bbox['name'] = predict_bbox["class"].replace(labeles_dict)
    return predict_bbox

def get_model_predict(model: YOLO, input_image: Image, conf: float = 0.1) -> Tuple[List[List[float]], float]:
    """
    Get keypoint predictions from the model
    
    Args:
        model (YOLO): The YOLO pose model
        input_image (Image): Input image
        conf (float): Confidence threshold
        
    Returns:
        Tuple[List[List[float]], float]: (keypoints, confidence)
    """
    try:
        # Convert PIL image to numpy array
        img_array = np.array(input_image)
        logger.info(f"Input image shape: {img_array.shape}")
        
        # Calculate optimal image size (multiple of 32, preserving aspect ratio)
        orig_h, orig_w = img_array.shape[:2]
        aspect = orig_w / orig_h
        if aspect > 1:
            new_w = min(1024, (orig_w // 32) * 32)
            new_h = int(new_w / aspect)
            new_h = (new_h // 32) * 32
        else:
            new_h = min(1024, (orig_h // 32) * 32)
            new_w = int(new_h * aspect)
            new_w = (new_w // 32) * 32
            
        logger.info(f"Resizing image from {orig_w}x{orig_h} to {new_w}x{new_h}")
        
        # Make predictions with the model
        with torch.no_grad():
            results = model.predict(
                source=img_array,
                conf=conf,
                verbose=True,
                imgsz=(new_h, new_w),
                augment=False,
                retina_masks=True,
                save=False
            )
            
        logger.info(f"Number of detections: {len(results)}")
        
        # Get the first detection's keypoints
        if len(results) > 0:
            result = results[0]
            
            if hasattr(result, 'boxes'):
                boxes = result.boxes
                if len(boxes.conf) > 0:
                    confidence = float(boxes.conf[0])
                    logger.info(f"Detection confidence: {confidence}")
                else:
                    confidence = 0.0
            else:
                confidence = 0.0
            
            if hasattr(result, 'keypoints') and result.keypoints is not None:
                keypoints_obj = result.keypoints
                logger.info(f"Keypoints shape: {keypoints_obj.shape}")
                
                if len(keypoints_obj.data) > 0:
                    xy = keypoints_obj.xy[0]
                    
                    if xy.shape[0] > 0:
                        if keypoints_obj.conf is not None:
                            conf = keypoints_obj.conf[0]
                        else:
                            conf = torch.full((xy.shape[0],), confidence)
                        
                        keypoints = torch.cat([xy, conf.unsqueeze(-1)], dim=1)
                        keypoints_list = keypoints.cpu().numpy().tolist()
                        logger.info(f"Successfully extracted {len(keypoints_list)} keypoints")
                        
                        # Save debug visualization
                        os.makedirs("test_images", exist_ok=True)
                        debug_image = draw_keypoints(input_image.copy(), keypoints_list, confidence_threshold=0.1)
                        debug_path = os.path.join("test_images", "debug_keypoints.jpg")
                        debug_image.save(debug_path)
                        logger.info(f"Saved debug visualization to {debug_path}")
                        
                        return keypoints_list, confidence
        
        logger.warning("No valid keypoints found in the prediction results")
        return [], 0.0
            
    except Exception as e:
        logger.error(f"Error during prediction: {str(e)}")
        return [], 0.0


################################# BBOX Func #####################################

def add_bboxs_on_img(image: Image, predict: pd.DataFrame()) -> Image:
    """
    add a bounding box on the image

    Args:
    image (Image): input image
    predict (pd.DataFrame): predict from model

    Returns:
    Image: image whis bboxs
    """
    # Create an annotator object
    annotator = Annotator(np.array(image))

    # sort predict by xmin value
    predict = predict.sort_values(by=['xmin'], ascending=True)

    # iterate over the rows of predict dataframe
    for i, row in predict.iterrows():
        # create the text to be displayed on image
        text = f"{row['name']}: {int(row['confidence']*100)}%"
        # get the bounding box coordinates
        bbox = [row['xmin'], row['ymin'], row['xmax'], row['ymax']]
        # add the bounding box and text on the image
        annotator.box_label(bbox, text, color=colors(row['class'], True))
    # convert the annotated image to PIL image
    return Image.fromarray(annotator.result())


################################# Models #####################################


def detect_sample_model(input_image: Image) -> pd.DataFrame:
    """
    Predict from sample_model.
    Base on YoloV8

    Args:
        input_image (Image): The input image.

    Returns:
        pd.DataFrame: DataFrame containing the object location.
    """
    predict = get_model_predict(
        model=model,
        input_image=input_image,
        conf=0.5,
    )
    return predict

async def download_image(url: str) -> bytes:
    """Download image from URL
    
    Args:
        url (str): URL of the image to download
        
    Returns:
        bytes: Image data in bytes format
    """
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as response:
            if response.status != 200:
                raise ValueError(f"Failed to download image: {response.status}")
            return await response.read()

def calculate_angle(keypoints: List[List[float]]) -> Tuple[float, float]:
    """Calculate the curvature angle from keypoints
    
    Args:
        keypoints (List[List[float]]): List of [x, y] coordinates for the 4 keypoints
            [base_left, base_right, tip_left, tip_right]
            
    Returns:
        Tuple[float, float]: (angle, confidence)
    """
    # Calculate midpoints
    base_mid = [(keypoints[0][0] + keypoints[1][0])/2, (keypoints[0][1] + keypoints[1][1])/2]
    tip_mid = [(keypoints[2][0] + keypoints[3][0])/2, (keypoints[2][1] + keypoints[3][1])/2]
    
    # Calculate angle
    dx = tip_mid[0] - base_mid[0]
    dy = tip_mid[1] - base_mid[1]
    angle = math.degrees(math.atan2(dy, dx))
    
    # Calculate confidence as average of keypoint confidences
    confidence = sum(kp[2] for kp in keypoints) / len(keypoints)
    
    return angle, confidence

def draw_keypoints(image: Image, keypoints: List[List[float]], confidence_threshold: float = 0.0) -> Image:
    """Draw keypoints and their connections on the image with a smooth arc
    
    Args:
        image (Image): Input image
        keypoints (List[List[float]]): List of [x, y, confidence] keypoints
        confidence_threshold (float): Minimum confidence threshold for displaying keypoints
        
    Returns:
        Image: Image with keypoints drawn
    """
    try:
        # Convert PIL image to numpy array (BGR for cv2)
        img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
        
        # Create points for the smooth curve first (so it appears behind keypoints)
        if len(keypoints) >= 4:
            # Extract points for the curve
            points = np.array([[kp[0], kp[1]] for kp in keypoints])
            
            # Sort points by y-coordinate (top to bottom)
            sorted_indices = np.argsort(points[:, 1])
            points = points[sorted_indices]
            
            # Generate points for a smooth curve
            num_points = 200
            t = np.linspace(0, len(points)-1, num_points)
            
            # Fit cubic spline
            cs = CubicSpline(range(len(points)), points, bc_type='natural')
            
            # Generate smooth curve points
            curve_points = cs(t)
            curve_points = curve_points.astype(np.int32)
            
            # Create a mask for the arc
            mask = np.zeros_like(img)
            
            # Draw the thick blue curve on the mask
            for i in range(len(curve_points) - 1):
                cv2.line(mask, 
                        tuple(curve_points[i]), 
                        tuple(curve_points[i + 1]), 
                        (255, 255, 255),  # White outline
                        25)  # Much thicker line for outline
            
            for i in range(len(curve_points) - 1):
                cv2.line(mask, 
                        tuple(curve_points[i]), 
                        tuple(curve_points[i + 1]), 
                        (0, 0, 0),  # Black middle layer
                        21)  # Thick line
            
            for i in range(len(curve_points) - 1):
                cv2.line(mask, 
                        tuple(curve_points[i]), 
                        tuple(curve_points[i + 1]), 
                        (255, 0, 0),  # Pure blue color
                        17)  # Still thick but smaller than outline
            
            # Blend the arc with the original image
            alpha = 0.7
            mask_float = mask.astype(float) / 255
            img_float = img.astype(float)
            img = (img_float * (1 - alpha * mask_float) + alpha * mask_float * mask).astype(np.uint8)
            
            logger.info(f"Drew curve through {len(points)} points")
            logger.info(f"Curve points shape: {curve_points.shape}")
        
        # Draw keypoints on top
        for i, kp in enumerate(keypoints):
            if kp[2] >= confidence_threshold:
                x, y = int(kp[0]), int(kp[1])
                cv2.circle(img, (x, y), 8, (0, 255, 0), -1)  # Green filled circle
                label = f"{i}:{kp[2]:.2f}"
                cv2.putText(img, label, (x+10, y+10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        
        # Convert back to PIL Image (RGB)
        return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        
    except Exception as e:
        logger.error(f"Error in draw_keypoints: {str(e)}")
        logger.error(f"Error type: {type(e)}")
        import traceback
        logger.error(f"Traceback: {traceback.format_exc()}")
        return image  # Return original image if visualization fails

async def process_image(image_url: str) -> dict:
    """
    Process an image from URL and return keypoint predictions
    
    Args:
        image_url (str): URL of the image to process
        
    Returns:
        dict: Dictionary containing angle, confidence, keypoints and annotated image
    """
    try:
        # Download image
        image_bytes = await download_image(image_url)
        input_image = get_image_from_bytes(image_bytes)
        
        # Get predictions
        keypoints, confidence = get_model_predict(model, input_image)
        
        if not keypoints:
            return {
                "error": "No keypoints detected in the image",
                "angle": 0.0,
                "confidence": 0.0,
                "keypoints": [],
                "annotated_image": None
            }
        
        # Calculate angle
        angle, avg_confidence = calculate_angle(keypoints)
        
        # Draw keypoints on image
        annotated_image = draw_keypoints(input_image, keypoints, confidence_threshold=0.3)
        
        # Convert annotated image to bytes
        annotated_image_bytes = get_bytes_from_image(annotated_image)
        
        return {
            "angle": float(angle),
            "confidence": float(avg_confidence),
            "keypoints": keypoints,
            "annotated_image": annotated_image_bytes
        }
        
    except Exception as e:
        logger.error(f"Error processing image: {str(e)}")
        return {
            "error": str(e),
            "angle": 0.0,
            "confidence": 0.0,
            "keypoints": [],
            "annotated_image": None
        }