Skip to content

[Feature] Real2Sim Eval Digital Twins#536

Merged
StoneT2000 merged 56 commits intomainfrom
simplerenv-port
Sep 13, 2024
Merged

[Feature] Real2Sim Eval Digital Twins#536
StoneT2000 merged 56 commits intomainfrom
simplerenv-port

Conversation

@StoneT2000
Copy link
Member

@StoneT2000 StoneT2000 commented Aug 29, 2024

@StoneT2000
Copy link
Member Author

StoneT2000 commented Sep 12, 2024

image

MMRV and correlation coefficients of the MS3 real2sim digital twins. Good enough for use.

rom typing import Sequence
import matplotlib.pyplot as plt
import numpy as np
def pearson_correlation(perf_sim: Sequence[float], perf_real: Sequence[float]) -> float:
    perf_sim, perf_real = np.array(perf_sim), np.array(perf_real)
    assert perf_sim.shape == perf_real.shape
    perf_sim = perf_sim - np.mean(perf_sim)
    perf_real = perf_real - np.mean(perf_real)
    if np.all(perf_sim == perf_real):
        pearson = 1
    else:
        pearson = np.sum(perf_sim * perf_real) / (
            np.sqrt(np.sum(perf_sim**2) * np.sum(perf_real**2)) + 1e-8
        )
    return pearson
def mean_maximum_rank_violation(
    perf_sim: Sequence[float], perf_real: Sequence[float]
) -> float:
    perf_sim, perf_real = np.array(perf_sim), np.array(perf_real)
    assert perf_sim.shape == perf_real.shape
    rank_violations = []
    for i in range(len(perf_sim)):
        rank_violation = 0.0
        for j in range(len(perf_sim)):
            if (perf_sim[i] > perf_sim[j]) != (perf_real[i] > perf_real[j]):
                rank_violation = max(
                    rank_violation, np.abs(perf_real[i] - perf_real[j])
                )
        rank_violations.append(rank_violation)
    rank_violation = np.mean(rank_violations)
    return rank_violation
# Data from the table
real_eval_data = {
    "PutCarrotOnPlateInScene-v1": {
        "octo_base": {"success": 0.25, "grasp": 0.5},
        "octo_small": {"success": 0.083, "grasp": 0.208},
        "rt-1x": {"success": 0, "grasp": 0.167},
    },
    "PutSpoonOnTableClothInScene-v1": {
        "octo_base": {"success": 0.333, "grasp": 0.5},
        "octo_small": {"success": 0.417, "grasp": 0.542},
        "rt-1x": {"success": 0.0, "grasp": 0.042},
    },
    "StackGreenCubeOnYellowCubeInScene-v1": {
        "octo_base": {"success": 0.0, "grasp": 0.292},
        "octo_small": {"success": 0.125, "grasp": 0.583},
        "rt-1x": {"success": 0.0, "grasp": 0.0},
    },
    "PutEggplantInBasketInScene-v1": {
        "octo_base": {"success": 0.233, "grasp": 0.4},
        "octo_small": {"success": 0.433, "grasp": 0.7},
        "rt-1x": {"success": 0.0, "grasp": 0.033},
    }
}
sim_eval_data = {
    "PutCarrotOnPlateInScene-v1": {
        # "octo_base": {"success": 0.20833, "grasp": 0.4166}, # 1 seed
        "octo_base": {"success": 0.167, "grasp": 0.417}, # 3 seeds / 72 episodes
        # "octo_small": {"success": 0.125, "grasp": 0.29166}, # 1 seed
        "octo_small": {"success": 0.083, "grasp": 0.194}, # 3 seeds / 72 episodes
        "rt-1x": {"success": 0.125, "grasp": 0.208},
    },
    "PutSpoonOnTableClothInScene-v1": {
        # "octo_base": {"success": 0.125, "grasp": 0.333}, # 1 seed
        "octo_base": {"success": 0.069, "grasp": 0.347}, # 3 seeds / 72 episodes
        # "octo_small": {"success": 0.25, "grasp": 0.75}, # 1 seed
        "octo_small": {"success": 0.361, "grasp": 0.681}, # 3 seeds / 72 episodes
        "rt-1x": {"success": 0.125, "grasp": 0.166},
    },
    "StackGreenCubeOnYellowCubeInScene-v1": {
        # "octo_base": {"success": 0.0, "grasp": 0.25}, # 1 seed
        "octo_base": {"success": 0.0, "grasp": 0.194}, # 3 seeds / 72 episodes
        # "octo_small": {"success": 0.042, "grasp": 0.25}, # 1 seed
        "octo_small": {"success": 0.028, "grasp": 0.278}, # 3 seeds / 72 episodes
        "rt-1x": {"success": 0.0, "grasp": 0.042},
    },
    # results from commit d1f0893c58a4aaff787b20c5cd6fdf1a536fd3e5, which might look a bit bugged on occassion...
    "PutEggplantInBasketInScene-v1": {
        "octo_base": {"success": 0.396, "grasp": 0.75},
        "octo_small": {"success": 0.531, "grasp": 0.75},
        "rt-1x": {"success": 0.0, "grasp": 0.0},
    }
}
task_colors = ["blue", "red", "green", "purple"]
marker_styles = ["x", "o", "+"]
# Create scatter plot
plt.figure(figsize=(8, 6))
plt.grid(True)
plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='y=x')
for i, task in enumerate(real_eval_data.keys()):
    for j, model in enumerate(real_eval_data[task].keys()):
        real_task_data = real_eval_data[task][model]
        sim_task_data = sim_eval_data[task][model]
        marker_style = marker_styles[j]
        if marker_style == '+':
            markersize = 60
        else:
            markersize = 40
        plt.scatter(real_task_data["success"], sim_task_data["success"], marker=marker_style, color=task_colors[i],alpha=0.5, s=markersize)
        plt.scatter(real_task_data["grasp"], sim_task_data["grasp"], marker=marker_style, color=task_colors[i], s=markersize)

# Compute Pearson correlation coefficient for each task
task_correlations = {}
task_mmrvs = {}
for task in real_eval_data.keys():
    real_values = []
    sim_values = []
    for data_type in ["success", "grasp"]:
        for model in real_eval_data[task].keys():
            real_values.extend([real_eval_data[task][model][data_type]])
            sim_values.extend([sim_eval_data[task][model][data_type]])
        task_correlation = pearson_correlation(real_values, sim_values)
        task_correlations[f"{task}_{data_type}"] = task_correlation
        task_mmrv = mean_maximum_rank_violation(real_values, sim_values)
        task_mmrvs[f"{task}_{data_type}"] = task_mmrv
        print(f"Pearson correlation coefficient for {task}_{data_type}: {task_correlation:.4f}")
        print(f"Mean maximum rank violation for {task}_{data_type}: {task_mmrv:.4f}")

# Compute average Pearson correlation coefficient
overall_correlation = np.mean([task_correlations[task] for task in task_correlations.keys()])
print(f"Overall Pearson correlation coefficient: {overall_correlation:.4f}")
overall_mmrv = np.mean([task_mmrvs[task] for task in task_mmrvs.keys()])
print(f"Overall mean maximum rank violation: {overall_mmrv:.4f}")

# Add text annotation for correlation coefficient
plt.text(0.05, 0.95, f"r = {overall_correlation:.4f}", transform=plt.gca().transAxes, 
         verticalalignment='top', fontsize=10, 
         bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
plt.text(0.05, 0.85, f"MMRV = {overall_mmrv:.4f}", transform=plt.gca().transAxes, 
         verticalalignment='top', fontsize=10, 
         bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
legend_elements = [
    plt.Line2D([0], [0], marker='x', color='none', label='octo_base', markeredgecolor='black', markersize=10),
    plt.Line2D([0], [0], marker='o', color='none', label='octo_small', markeredgecolor='black', markersize=10),
    plt.Line2D([0], [0], marker='+', color='none', label='rt-1x', markeredgecolor='black', markersize=10),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='blue', label='Put Carrot', alpha=0.5),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='blue', label='Grasp Carrot'),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='red', label='Put Spoon', alpha=0.5),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='red', label='Grasp Spoon'),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='green', label='Stack Green Cube', alpha=0.5),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='green', label='Grasp Green Cube'),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='purple', label='Put Eggplant', alpha=0.5),
    plt.Line2D([0], [0], linestyle='none', marker='o', color='purple', label='Grasp Eggplant'),
    # plt.Line2D([0], [0], linestyle='none', marker='o', color='black', label='Success', markersize=10),
    # plt.Line2D([0], [0], linestyle='none', marker='o', color='black', label='Grasp', markersize=10, alpha=0.5)
]

# Add the legend to the plot
plt.legend(handles=legend_elements, loc='lower center', bbox_to_anchor=(0.5, -0.4), ncol=3)
# Set labels for x and y axes
plt.xlabel('Real eval')
plt.ylabel('Sim eval')
plt.title('Real eval vs Sim eval (Success and Grasp)')

# Adjust layout to prevent legend from being cut off
plt.tight_layout()


plt.show()

code snippet to generate the plot

@StoneT2000 StoneT2000 merged commit 3543565 into main Sep 13, 2024
@StoneT2000 StoneT2000 deleted the simplerenv-port branch September 13, 2024 00:52
Yuan-Xinyi pushed a commit to Yuan-Xinyi/ManiSkill_xinyi that referenced this pull request Sep 22, 2025
* work

* work

* Update base_env.py

* greenscreen trick added

* code refactors

* work

* Update widowx.py

* fixes

* fixes

* updates

* bug fixes

* align sim configs

* fixes

* Update demo_octo_eval.py

* debugged

* work

* bug fixes

* attempt to support IK

* work

* cleanup

* work

* work

* cleaned up code

* evals

* fixes

* spoon task

* Update demo_octo_eval.py

* work

* update widowx model download link and cleanup code

* fixes

* work

* bug fixes

* rt1 inference example

* bug fixes

* less eggplant rolling

* code cleanup

* GPU IK no delta controller implemented

* gpu fixes

* bug fixes

* work

* fixes

* work

* w

* cleanup

* code cleanup, assets added

* docs

* Delete demo_real2sim_eval.py

* f

* Update base_env.py

* Update base_env.py

* Delete README.md

* Update index.md
ashvinar pushed a commit to jstmn/ManiSkill that referenced this pull request Dec 25, 2025
* work

* work

* Update base_env.py

* greenscreen trick added

* code refactors

* work

* Update widowx.py

* fixes

* fixes

* updates

* bug fixes

* align sim configs

* fixes

* Update demo_octo_eval.py

* debugged

* work

* bug fixes

* attempt to support IK

* work

* cleanup

* work

* work

* cleaned up code

* evals

* fixes

* spoon task

* Update demo_octo_eval.py

* work

* update widowx model download link and cleanup code

* fixes

* work

* bug fixes

* rt1 inference example

* bug fixes

* less eggplant rolling

* code cleanup

* GPU IK no delta controller implemented

* gpu fixes

* bug fixes

* work

* fixes

* work

* w

* cleanup

* code cleanup, assets added

* docs

* Delete demo_real2sim_eval.py

* f

* Update base_env.py

* Update base_env.py

* Delete README.md

* Update index.md
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant