diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml
index 6a67e8f6793ec9..15094a84ee8f5f 100644
--- a/.github/workflows/android_arm64.yml
+++ b/.github/workflows/android_arm64.yml
@@ -178,7 +178,7 @@ jobs:
       # Upload build logs
       #
       - name: Upload build logs
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: always()
         with:
           name: build_logs
diff --git a/.github/workflows/android_x64.yml b/.github/workflows/android_x64.yml
index cab5239b4c45c0..cebaa9177b69b9 100644
--- a/.github/workflows/android_x64.yml
+++ b/.github/workflows/android_x64.yml
@@ -152,7 +152,7 @@ jobs:
       # Upload build logs
       #
       - name: Upload build logs
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: always()
         with:
           name: build_logs
diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml
index 53f3eba9a749bf..535be1e4e70457 100644
--- a/.github/workflows/build_doc.yml
+++ b/.github/workflows/build_doc.yml
@@ -77,13 +77,13 @@ jobs:
           echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV
 
       - name: 'Upload sphinx.log'
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: sphinx_build_log_${{ env.PR_NUMBER }}.log
           path: build/docs/sphinx.log
 
       - name: 'Upload docs html'
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_docs_html_${{ env.PR_NUMBER }}.zip
           path: build/docs/openvino_docs_html.zip
@@ -100,7 +100,7 @@ jobs:
 
       - name: 'Upload test results'
         if: failure()
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_docs_pytest
           path: build/docs/_artifacts/
diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml
index 6a163fb5e50043..8a2338554faae3 100644
--- a/.github/workflows/coverity.yml
+++ b/.github/workflows/coverity.yml
@@ -144,7 +144,7 @@ jobs:
         run: ${COVERITY_TOOL_DIR}/cov-analysis*/bin/cov-configure -c ${COVERITY_TOOL_DIR}/cov-analysis-linux64-2023.6.2/config/coverity_config.xml -lscc text
 
       - name: Upload Coverity build log
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: always()
         with:
           name: coverity_logs
@@ -152,7 +152,7 @@ jobs:
           if-no-files-found: 'error'
 
       - name: Upload Coverity build archive
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: always()
         with:
           name: coverity_archive
diff --git a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml
index 83770900559bab..26e8400c22a04f 100644
--- a/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml
+++ b/.github/workflows/dev_cpu_linux_snippets_libxsmm.yml
@@ -169,7 +169,7 @@ jobs:
       # Upload build artifacts and logs
       #
       - name: Upload build logs
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: always()
         with:
           name: build_logs
@@ -178,7 +178,7 @@ jobs:
 
       - name: Upload openvino package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_package
           path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz
@@ -186,7 +186,7 @@ jobs:
 
       - name: Upload openvino tests package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_tests
           path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz
@@ -325,7 +325,7 @@ jobs:
         timeout-minutes: 25
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: always()
         with:
           name: test-results-functional-cpu
diff --git a/.github/workflows/job_build_linux.yml b/.github/workflows/job_build_linux.yml
index b8eea4375e7e58..86545b6e9e7a43 100644
--- a/.github/workflows/job_build_linux.yml
+++ b/.github/workflows/job_build_linux.yml
@@ -249,7 +249,7 @@ jobs:
       # Upload build artifacts and logs
       #
       - name: Upload build logs
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: always()
         with:
           name: build_logs
@@ -258,7 +258,7 @@ jobs:
 
       - name: Upload openvino package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_package
           path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz
@@ -266,7 +266,7 @@ jobs:
           
       - name: Upload openvino wheels
         if: ${{ inputs.os != 'debian_10' && inputs.arch != 'arm' }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_wheels
           path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl
@@ -274,7 +274,7 @@ jobs:
         
       - name: Upload openvino js package
         if: ${{ fromJSON(inputs.affected-components).JS_API && inputs.build-js }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_js_package
           path: ${{ env.INSTALL_DIR_JS }}
@@ -282,7 +282,7 @@ jobs:
 
       - name: Upload openvino developer package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_developer_package
           path: ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz
@@ -290,7 +290,7 @@ jobs:
 
       - name: Upload openvino RPM packages
         if: ${{ inputs.build-rpm-packages }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_rpm_packages
           path: ${{ env.BUILD_DIR }}/*.rpm
@@ -298,7 +298,7 @@ jobs:
 
       - name: Upload openvino debian packages
         if: ${{ inputs.build-debian-packages }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_debian_packages
           path: ${{ env.BUILD_DIR }}/*.deb
@@ -306,7 +306,7 @@ jobs:
 
       - name: Upload openvino tests package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_tests
           path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz
diff --git a/.github/workflows/job_build_windows.yml b/.github/workflows/job_build_windows.yml
index c8e249513a08f0..df2544d9d9e60a 100644
--- a/.github/workflows/job_build_windows.yml
+++ b/.github/workflows/job_build_windows.yml
@@ -218,21 +218,21 @@ jobs:
       #
 
       - name: Upload openvino package
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_package
           path: ${{ env.BUILD_DIR }}/openvino_package.zip
           if-no-files-found: 'error'
           
       - name: Upload openvino wheels
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_wheels
           path: ${{ env.BUILD_DIR }}/wheels/*.whl
           if-no-files-found: 'error'
 
       - name: Upload openvino tests package
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_tests
           path: ${{ env.BUILD_DIR }}/openvino_tests.zip
@@ -240,7 +240,7 @@ jobs:
 
       - name: Upload openvino js package
         if: ${{ fromJSON(inputs.affected-components).JS_API }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_js_package
           path: ${{ env.INSTALL_DIR_JS }}
diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml
index 6848871df6e81e..e197d581f290a4 100644
--- a/.github/workflows/job_cpu_functional_tests.yml
+++ b/.github/workflows/job_cpu_functional_tests.yml
@@ -116,7 +116,7 @@ jobs:
           key: ${{ runner.os }}-${{ runner.arch }}-tests-functional-cpu-stamp-${{ github.sha }}
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: always()
         with:
           name: test-results-functional-cpu
diff --git a/.github/workflows/job_cxx_unit_tests.yml b/.github/workflows/job_cxx_unit_tests.yml
index 99c363d04d23a7..3f871151ccd282 100644
--- a/.github/workflows/job_cxx_unit_tests.yml
+++ b/.github/workflows/job_cxx_unit_tests.yml
@@ -257,7 +257,7 @@ jobs:
           ${{ env.INSTALL_TEST_DIR }}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*"
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: ${{ !cancelled() }}
         with:
           name: test-results-cpp
diff --git a/.github/workflows/job_gpu_tests.yml b/.github/workflows/job_gpu_tests.yml
index 324e653c57ebab..b9862eac09cc05 100644
--- a/.github/workflows/job_gpu_tests.yml
+++ b/.github/workflows/job_gpu_tests.yml
@@ -128,7 +128,7 @@ jobs:
 
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: always()
         with:
           name: test-results-${{ inputs.test_type }}-${{ inputs.device }}
diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml
index 9956a27f234b36..ea2669071386dd 100644
--- a/.github/workflows/job_jax_models_tests.yml
+++ b/.github/workflows/job_jax_models_tests.yml
@@ -100,7 +100,7 @@ jobs:
           TEST_DEVICE: CPU
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: ${{ !cancelled() }}
         with:
           name: test-results-jax-models-${{ inputs.model_scope }}
diff --git a/.github/workflows/job_onnx_models_tests.yml b/.github/workflows/job_onnx_models_tests.yml
index 321aa88d614310..c879f0cb6a1efc 100644
--- a/.github/workflows/job_onnx_models_tests.yml
+++ b/.github/workflows/job_onnx_models_tests.yml
@@ -112,7 +112,7 @@ jobs:
           python3 -m pytest --backend="CPU" --model_zoo_dir="${MODELS_SHARE_PATH}" ${INSTALL_TEST_DIR}/onnx/tests/tests_python/test_zoo_models.py -v -n auto --forked -k 'not _cuda' --model_zoo_xfail
 
       - name: Upload logs from pytest
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: always()
         with:
           name: onnx_models_tests_logs
diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml
index d63262c665d45c..1fafafd7623545 100644
--- a/.github/workflows/job_python_unit_tests.yml
+++ b/.github/workflows/job_python_unit_tests.yml
@@ -276,7 +276,7 @@ jobs:
             --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: ${{ !cancelled() }}
         with:
           name: test-results-python
diff --git a/.github/workflows/job_pytorch_layer_tests.yml b/.github/workflows/job_pytorch_layer_tests.yml
index 95074dc84f1ff9..abf614c70cff4e 100644
--- a/.github/workflows/job_pytorch_layer_tests.yml
+++ b/.github/workflows/job_pytorch_layer_tests.yml
@@ -147,7 +147,7 @@ jobs:
           PYTORCH_TRACING_MODE: TORCHFX
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: ${{ !cancelled() }}
         with:
           name: test-results-python-pytorch-layers
diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml
index a77c1318f3a0c8..8f3699f6ab42a2 100644
--- a/.github/workflows/job_pytorch_models_tests.yml
+++ b/.github/workflows/job_pytorch_models_tests.yml
@@ -160,6 +160,17 @@ jobs:
           TEST_DEVICE: CPU
           USE_SYSTEM_CACHE: False
 
+      - name: TorchFX GPTQ Pattern Test
+        if: ${{ inputs.model_scope == 'precommit' }}
+        # install torch 2.3.1 as newer is not yet supported by openvino backend
+        run: |
+          export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH
+          python3 -m pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --upgrade --index-url https://download.pytorch.org/whl/cpu
+          python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_gptq_torchfx_transformations.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_gptqpattern_tests.html --self-contained-html -v --tb=short
+        env:
+          TEST_DEVICE: CPU
+          USE_SYSTEM_CACHE: False
+
       - name: Reformat unsupported ops file
         if: ${{ inputs.model_scope != 'precommit' && !cancelled()}}
         run: |
@@ -171,7 +182,7 @@ jobs:
           df -h
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: ${{ !cancelled() }}
         with:
           name: test-results-torch-models-${{ inputs.model_scope }}
diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml
index ae6e91a00d1497..977b2e4f96af73 100644
--- a/.github/workflows/job_tensorflow_layer_tests.yml
+++ b/.github/workflows/job_tensorflow_layer_tests.yml
@@ -150,7 +150,7 @@ jobs:
           TEST_PRECISION: FP16
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: ${{ !cancelled() }}
         with:
           name: test-results-python-tf-layers
diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml
index db34ec7b793551..0990eae3de6e7e 100644
--- a/.github/workflows/job_tensorflow_models_tests.yml
+++ b/.github/workflows/job_tensorflow_models_tests.yml
@@ -107,7 +107,7 @@ jobs:
           TEST_DEVICE: CPU
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: ${{ !cancelled() }}
         with:
           name: test-results-tensorflow-models-${{ inputs.model_scope }}
diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml
index 238dbfec3a34eb..f7388eb98a2f3c 100644
--- a/.github/workflows/job_tokenizers.yml
+++ b/.github/workflows/job_tokenizers.yml
@@ -133,7 +133,7 @@ jobs:
 
       - name: Upload openvino tokenizers wheel
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_tokenizers_wheel
           path: ${{ env.EXTENSION_BUILD_DIR }}/*.whl
diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml
index 7b5467b01ad73e..42d7810b9f1663 100644
--- a/.github/workflows/linux_conditional_compilation.yml
+++ b/.github/workflows/linux_conditional_compilation.yml
@@ -223,7 +223,7 @@ jobs:
       # Upload build artifacts and logs
       #
       - name: Upload build logs
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: always()
         with:
           name: build_logs
@@ -232,7 +232,7 @@ jobs:
 
       - name: Upload openvino package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_package
           path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz
@@ -240,7 +240,7 @@ jobs:
 
       - name: Upload selective build statistics package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_selective_build_stat
           path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.tar.gz
@@ -248,7 +248,7 @@ jobs:
 
       - name: Upload OpenVINO tests package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_tests
           path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz
diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml
index b23e67a0f2b30e..e1a71fe92dc1a3 100644
--- a/.github/workflows/linux_sanitizers.yml
+++ b/.github/workflows/linux_sanitizers.yml
@@ -188,7 +188,7 @@ jobs:
 
       - name: Upload openvino package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_package_${{ matrix.SANITIZER }}
           path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz
@@ -196,7 +196,7 @@ jobs:
 
       - name: Upload openvino tests package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_tests_${{ matrix.SANITIZER }}
           path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz
@@ -465,7 +465,7 @@ jobs:
           ${INSTALL_TEST_DIR}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*"
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: ${{ !cancelled() }}
         with:
           name: test-results-cpp_${{ matrix.SANITIZER }}
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index 6e3f344c6dd944..bddbaab134fa9c 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -205,14 +205,14 @@ jobs:
 
       - name: Upload openvino package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_package
           path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz
           if-no-files-found: 'error'
           
       - name: Upload openvino wheels
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_wheels
           path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl
@@ -220,7 +220,7 @@ jobs:
 
       - name: Upload openvino tests package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_tests
           path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz
@@ -228,7 +228,7 @@ jobs:
 
       - name: Upload openvino js package
         if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_js_package
           path: ${{ env.INSTALL_DIR_JS }}
diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml
index 16658318de20d8..576eefde8c9b4a 100644
--- a/.github/workflows/mac_arm64.yml
+++ b/.github/workflows/mac_arm64.yml
@@ -206,14 +206,14 @@ jobs:
 
       - name: Upload openvino package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_package
           path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz
           if-no-files-found: 'error'
           
       - name: Upload openvino wheels
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_wheels
           path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl
@@ -221,7 +221,7 @@ jobs:
 
       - name: Upload openvino tests package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_tests
           path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz
@@ -229,7 +229,7 @@ jobs:
 
       - name: Upload openvino js package
         if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_js_package
           path: ${{ env.INSTALL_DIR_JS }}
diff --git a/.github/workflows/py_checks.yml b/.github/workflows/py_checks.yml
index ae0625ce4a453c..2b0d3f2272787f 100644
--- a/.github/workflows/py_checks.yml
+++ b/.github/workflows/py_checks.yml
@@ -50,7 +50,7 @@ jobs:
           git diff > samples_diff.diff
         working-directory: samples/python
 
-      - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+      - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: failure()
         with:
           name: samples_diff
@@ -68,7 +68,7 @@ jobs:
           git diff > pyopenvino_diff.diff
         working-directory: src/bindings/python/src/openvino
 
-      - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+      - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: failure()
         with:
           name: pyopenvino_diff
@@ -86,7 +86,7 @@ jobs:
           git diff > wheel_diff.diff
         working-directory: src/bindings/python/wheel
 
-      - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+      - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: failure()
         with:
           name: wheel_diff
diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml
index 2ebca2b059fdd2..5e5ac3c3482624 100644
--- a/.github/workflows/ubuntu_22.yml
+++ b/.github/workflows/ubuntu_22.yml
@@ -227,7 +227,7 @@ jobs:
 
       - name: Upload Conformance Artifacts
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: conformance_artifacts_${{ matrix.TEST_TYPE }}-${{ env.TEST_DEVICE }}
           path: ${{ env.CONFORMANCE_ARTIFACTS_DIR }}/conformance_artifacts.tar.gz
@@ -253,7 +253,7 @@ jobs:
 
       - name: Upload Conformance Artifacts
         if: ${{ matrix.TEST_TYPE == 'API' }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: conformance_artifacts_${{ matrix.TEST_TYPE }}-TEMPLATE
           path: ${{ env.CONFORMANCE_ARTIFACTS_DIR }}/conformance_artifacts.tar.gz
diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml
index 9c026f01e47233..30b2ce2f20df38 100644
--- a/.github/workflows/windows_conditional_compilation.yml
+++ b/.github/workflows/windows_conditional_compilation.yml
@@ -249,7 +249,7 @@ jobs:
 
       - name: Upload selective build statistics package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_selective_build_stat
           path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.zip
@@ -257,7 +257,7 @@ jobs:
 
       - name: Upload OpenVINO tests package
         if: ${{ always() }}
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         with:
           name: openvino_tests
           path: ${{ env.BUILD_DIR }}/openvino_tests.zip
@@ -402,7 +402,7 @@ jobs:
         timeout-minutes: 60
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: ${{ !cancelled() }}
         with:
           name: test-results-functional-cpu
diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml
index 8cac2b88078d15..bce90165408815 100644
--- a/.github/workflows/windows_vs2019_release.yml
+++ b/.github/workflows/windows_vs2019_release.yml
@@ -391,7 +391,7 @@ jobs:
         run: python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: ${{ !cancelled() }}
         with:
           name: test-results-python
@@ -502,7 +502,7 @@ jobs:
           key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }}
 
       - name: Upload Test Results
-        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
         if: ${{ !cancelled() }}
         with:
           name: test-results-functional-cpu
diff --git a/.gitmodules b/.gitmodules
index a9cad1dee5f494..5feb7458da1801 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -78,6 +78,9 @@
 [submodule "src/plugins/intel_npu/thirdparty/level-zero-ext"]
 	path = src/plugins/intel_npu/thirdparty/level-zero-ext
 	url = https://github.com/intel/level-zero-npu-extensions.git
+[submodule "src/plugins/intel_npu/thirdparty/yaml-cpp"]
+	path = src/plugins/intel_npu/thirdparty/yaml-cpp
+	url = https://github.com/jbeder/yaml-cpp.git
 [submodule "thirdparty/telemetry"]
 	path = thirdparty/telemetry
 	url = https://github.com/openvinotoolkit/telemetry.git
diff --git a/docs/sphinx_setup/assets/versions_raw.js b/docs/sphinx_setup/assets/versions_raw.js
new file mode 100644
index 00000000000000..8045057450bf5f
--- /dev/null
+++ b/docs/sphinx_setup/assets/versions_raw.js
@@ -0,0 +1 @@
+var data='[{"version": "2024"}, {"version": "2023.3"}, {"version": "2022.3"}, {"version": "nightly"}, {"version": "archives"}]';
\ No newline at end of file
diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt
index 73cdd57e508bdb..69ad9f460e357a 100644
--- a/scripts/CMakeLists.txt
+++ b/scripts/CMakeLists.txt
@@ -12,6 +12,7 @@ set(shellcheck_skip_list
     "${OpenVINO_SOURCE_DIR}/thirdparty"
     "${OpenVINO_SOURCE_DIR}/src/plugins/intel_cpu/thirdparty"
     "${OpenVINO_SOURCE_DIR}/src/plugins/intel_gpu/thirdparty"
+    "${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/thirdparty"
     "${OpenVINO_SOURCE_DIR}/src/bindings/python/thirdparty/pybind11"
     "${TEMP}")
 
diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp
index 54fb6a972b7387..6f5166dfd26760 100644
--- a/src/common/transformations/src/transformations/convert_precision.cpp
+++ b/src/common/transformations/src/transformations/convert_precision.cpp
@@ -29,7 +29,7 @@ bool fuse_type_to_parameter(const std::shared_ptr<ov::Node>& node,
                             bool convert_input_precision);
 
 // this function inserts Convert operations to 'data' input and outputs of `node`
-// to execute 'node' with the original type.
+// to execute 'node' with the original type. This function supports nodes with single output.
 bool wrap_into_original_type(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
 bool store_original_type_as_attribute(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
 
@@ -622,17 +622,20 @@ bool wrap_into_original_type(const std::shared_ptr<ov::Node>& node, const precis
 
     const auto& to = it->second;
     const auto& from = it->first;
-
-    auto convert_before = std::make_shared<ov::op::v0::Convert>(node->input_value(0), from);
-    node->input(0).replace_source_output(convert_before);
-    auto consumers = node->output(0).get_target_inputs();
-    auto convert_after = std::make_shared<ov::op::v0::Convert>(node, to);
-    for (auto& input : consumers) {
-        const auto consumer = input.get_node();
-        if (ov::is_type<ov::op::v0::Result>(consumer) || ov::is_type<ov::op::v0::Convert>(consumer)) {
-            continue;
+    if (node->get_input_size()) {
+        auto convert_before = std::make_shared<ov::op::v0::Convert>(node->input_value(0), from);
+        node->input(0).replace_source_output(convert_before);
+    }
+    if (node->get_output_size() == 1) {
+        auto consumers = node->output(0).get_target_inputs();
+        auto convert_after = std::make_shared<ov::op::v0::Convert>(node, to);
+        for (auto& input : consumers) {
+            const auto consumer = input.get_node();
+            if (ov::is_type<ov::op::v0::Result>(consumer) || ov::is_type<ov::op::v0::Convert>(consumer)) {
+                continue;
+            }
+            input.replace_source_output(convert_after);
         }
-        input.replace_source_output(convert_after);
     }
 
     return true;
diff --git a/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp b/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp
index a533739b16fea1..caeeb8c557b380 100644
--- a/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp
+++ b/src/frontends/pytorch/src/transforms/torchfx_gptq_pattern_replacer.cpp
@@ -40,18 +40,6 @@ uint32_t read_u4_data(const void* array, size_t index) {
     return val;
 };
 
-void write_u4_data(void* array, size_t index, uint32_t data) {
-    auto arr_u32 = reinterpret_cast<uint32_t*>(array);
-    size_t idx_u32 = index / 8;
-    size_t offset_u32 = index % 8;
-    uint32_t old_val = arr_u32[idx_u32];
-    data = data << (offset_u32 * 4);
-    uint32_t mask = 15;
-    mask = ~(mask << (offset_u32 * 4));
-    uint32_t new_val = (old_val & mask) | data;
-    arr_u32[idx_u32] = new_val;
-};
-
 GPTQDecompressionReplacer::GPTQDecompressionReplacer() {
     const auto& const_1 = wrap_type<v0::Constant>();
     const auto& const_2 = wrap_type<v0::Constant>();
@@ -73,61 +61,157 @@ GPTQDecompressionReplacer::GPTQDecompressionReplacer() {
     const auto& convert_2 = wrap_type<v0::Convert>({const_6});
     const auto& bitwise_and = wrap_type<ov::op::v13::BitwiseAnd>({add_or_convert, convert_2});
 
-    ov::matcher_pass_callback callback = [unsqueeze_1](Matcher& m) {
+    ov::matcher_pass_callback callback = [=](Matcher& m) {
         auto bitwise_and = m.get_match_root();
         if (!bitwise_and) {
             return false;
         }
         const auto& pattern_map = m.get_pattern_value_map();
-        const auto& input_node = pattern_map.at(unsqueeze_1).get_node_shared_ptr();
-        auto weights_u32 = std::dynamic_pointer_cast<v0::Constant>(input_node->get_input_node_shared_ptr(0));
-        auto axis = std::dynamic_pointer_cast<v0::Constant>(input_node->get_input_node_shared_ptr(1));
-        auto axis_data = axis->get_data_ptr<uint32_t>();
-
-        auto u8_shape = weights_u32->get_shape();
-        auto src = weights_u32->get_data_ptr<uint32_t>();
-
-        ov::Shape u4_shape;
-        bool dim_added = false;
-        size_t stride = 1;
-        size_t size_y = 1;
-        for (size_t i = 0; i < u8_shape.size(); i++) {
-            if (axis_data[0] == i) {
-                u4_shape.push_back(8);
-                dim_added = true;
-            }
-            if (axis_data[0] <= i) {
-                stride *= u8_shape[i];
-            } else {
-                size_y *= u8_shape[i];
-            }
-            u4_shape.push_back(u8_shape[i]);
+        auto unsqueeze_1_node = pattern_map.at(unsqueeze_1).get_node_shared_ptr();
+        auto unsqueeze_1_in0_const =
+            std::dynamic_pointer_cast<v0::Constant>(unsqueeze_1_node->get_input_node_shared_ptr(0));
+        auto unsqueeze_1_in1_const =
+            std::dynamic_pointer_cast<v0::Constant>(unsqueeze_1_node->get_input_node_shared_ptr(1));
+        auto abs_node = pattern_map.at(abs).get_node_shared_ptr();
+        auto abs_in_const = std::dynamic_pointer_cast<v0::Constant>(abs_node->get_input_node_shared_ptr(0));
+        auto broadcast_node = pattern_map.at(broadcast).get_node_shared_ptr();
+        auto unsqueeze_2_node = pattern_map.at(unsqueeze_2).get_node_shared_ptr();
+        auto unsqueeze_2_in0_const =
+            std::dynamic_pointer_cast<v0::Constant>(unsqueeze_2_node->get_input_node_shared_ptr(0));
+        auto unsqueeze_2_in1_const =
+            std::dynamic_pointer_cast<v0::Constant>(unsqueeze_2_node->get_input_node_shared_ptr(1));
+
+        OutputVector outputs_1(unsqueeze_1_node->get_output_size());
+        OutputVector unsqueeze_1_inputs(2);
+        unsqueeze_1_inputs[0] = unsqueeze_1_in0_const->outputs()[0];
+        unsqueeze_1_inputs[1] = unsqueeze_1_in1_const->outputs()[0];
+        if (!unsqueeze_1_node->constant_fold(outputs_1, unsqueeze_1_inputs)) {
+            return false;
         }
-        if (!dim_added) {
-            u4_shape.push_back(8);
+
+        OutputVector outputs_2(abs_node->get_output_size());
+        if (!abs_node->constant_fold(outputs_2, abs_in_const->outputs())) {
+            return false;
         }
 
-        auto new_const = std::make_shared<v0::Constant>(element::u4, u4_shape);
-        auto dst = const_cast<uint32_t*>(reinterpret_cast<const uint32_t*>(new_const->get_data_ptr()));
+        OutputVector outputs_3(broadcast_node->get_output_size());
+        OutputVector broadcast_inputs(2);
+        broadcast_inputs[0] = outputs_1[0];
+        broadcast_inputs[1] = outputs_2[0];
+        if (!broadcast_node->constant_fold(outputs_3, broadcast_inputs)) {
+            return false;
+        }
+
+        OutputVector outputs_4(unsqueeze_2_node->get_output_size());
+        OutputVector unsqueeze_2_inputs(2);
+        unsqueeze_2_inputs[0] = unsqueeze_2_in0_const->outputs()[0];
+        unsqueeze_2_inputs[1] = unsqueeze_2_in1_const->outputs()[0];
+        if (!unsqueeze_2_node->constant_fold(outputs_4, unsqueeze_2_inputs)) {
+            return false;
+        }
+        const int32_t* rs_in0 =
+            std::dynamic_pointer_cast<v0::Constant>(outputs_3[0].get_node_shared_ptr())->get_data_ptr<int32_t>();
+        const int32_t* rs_in1 =
+            std::dynamic_pointer_cast<v0::Constant>(outputs_4[0].get_node_shared_ptr())->get_data_ptr<int32_t>();
+        auto shifted_const = std::make_shared<v0::Constant>(element::i32, outputs_3[0].get_shape());
+        auto dst = const_cast<int32_t*>(reinterpret_cast<const int32_t*>(shifted_const->get_data_ptr()));
         if (!dst)
             return false;
 
-        size_t in_idx = 0;
-        for (size_t y = 0; y < size_y; y++) {
-            size_t offset = y * stride * 8;
-            for (size_t x = 0; x < stride; x++) {
-                for (size_t z = 0; z < 8; z++) {
-                    uint32_t val = read_u4_data(src, in_idx);
-                    write_u4_data(dst, (offset + x + stride * z), val);
-                    in_idx++;
-                }
+        // TODO: Bitwise right shift operation below might need to be
+        // optimized to reduce FIL.
+        size_t rs_in0_shape_size = shape_size(outputs_3[0].get_shape());
+        const auto& rs_in0_shape = outputs_3[0].get_shape();
+        const auto& rs_in1_shape = outputs_4[0].get_shape();
+        int shift_dim = -1;
+        size_t shift_offset = 1;
+        for (size_t i = 0; i < rs_in1_shape.size(); ++i) {
+            size_t dim = rs_in1_shape[i];
+            if (dim != 1 && dim != rs_in0_shape[i]) {
+                return false;
+            }
+            if (shift_dim != -1) {
+                shift_offset *= rs_in0_shape[i];
+            }
+            if (dim == rs_in0_shape[i]) {
+                shift_dim = static_cast<int>(i);
+            }
+        }
+        if (shift_dim == -1)
+            return false;
+        for (size_t k = 0; k < rs_in0_shape_size; ++k) {
+            size_t shift_idx = (k / shift_offset) % rs_in1_shape[shift_dim];
+            int32_t shift_val = rs_in1[shift_idx];
+            dst[k] = (rs_in0[k] >> shift_val);
+        }
+
+        std::shared_ptr<ov::Node> convert_1_node = nullptr;
+        OutputVector outputs_7;
+        if (pattern_map.find(convert_1) != pattern_map.end()) {
+            convert_1_node = pattern_map.at(convert_1).get_node_shared_ptr();
+            outputs_7.resize(convert_1_node->get_output_size());
+            if (!convert_1_node->constant_fold(outputs_7, shifted_const->outputs())) {
+                return false;
+            }
+        } else {
+            auto convert_3_node = pattern_map.at(convert_3).get_node_shared_ptr();
+            auto convert_4_node = pattern_map.at(convert_4).get_node_shared_ptr();
+            auto convert_4_in_const =
+                std::dynamic_pointer_cast<v0::Constant>(convert_4_node->get_input_node_shared_ptr(0));
+            auto add_node = pattern_map.at(add).get_node_shared_ptr();
+            OutputVector outputs_5(convert_3_node->get_output_size());
+            if (!convert_3_node->constant_fold(outputs_5, shifted_const->outputs())) {
+                return false;
+            }
+            OutputVector outputs_6(convert_4_node->get_output_size());
+            if (!convert_4_node->constant_fold(outputs_6, convert_4_in_const->outputs())) {
+                return false;
+            }
+            outputs_7.resize(add_node->get_output_size());
+            OutputVector add_inputs(2);
+            add_inputs[0] = outputs_5[0];
+            add_inputs[1] = outputs_6[0];
+            if (!add_node->constant_fold(outputs_7, add_inputs)) {
+                return false;
             }
         }
 
-        copy_runtime_info_and_name(weights_u32, {new_const}, {weights_u32, bitwise_and});
+        auto convert_2_node = pattern_map.at(convert_2).get_node_shared_ptr();
+        auto convert_2_in_const = std::dynamic_pointer_cast<v0::Constant>(convert_2_node->get_input_node_shared_ptr(0));
+
+        OutputVector outputs_8(convert_2_node->get_output_size());
+        if (!convert_2_node->constant_fold(outputs_8, convert_2_in_const->outputs())) {
+            return false;
+        }
+
+        OutputVector outputs_9(bitwise_and->get_output_size());
+
+        const int8_t* and_in0 =
+            std::dynamic_pointer_cast<v0::Constant>(outputs_7[0].get_node_shared_ptr())->get_data_ptr<int8_t>();
+        const int8_t* and_in1 =
+            std::dynamic_pointer_cast<v0::Constant>(outputs_8[0].get_node_shared_ptr())->get_data_ptr<int8_t>();
+        auto masked_const = std::make_shared<v0::Constant>(element::i8, outputs_7[0].get_shape());
+        auto masked_dst = const_cast<int8_t*>(reinterpret_cast<const int8_t*>(masked_const->get_data_ptr()));
+        if (!masked_dst)
+            return false;
+
+        size_t and_in0_shape_size = shape_size(outputs_7[0].get_shape());
+        // TODO: Bitwise and operation below might need to be
+        // optimized to reduce FIL.
+        int8_t mask = and_in1[0];
+        for (size_t k = 0; k < and_in0_shape_size; ++k) {
+            masked_dst[k] = (and_in0[k] & mask);
+        }
+
+        auto convert_to_u4 = std::make_shared<v0::Convert>(masked_const, element::u4);
+        OutputVector outputs_10(convert_to_u4->get_output_size());
+        if (!convert_to_u4->constant_fold(outputs_10, masked_const->outputs())) {
+            return false;
+        }
 
-        auto new_convert = std::make_shared<v0::Convert>(new_const, bitwise_and->get_output_element_type(0));
-        copy_runtime_info_and_name(bitwise_and, {new_convert}, {input_node});
+        auto new_convert =
+            std::make_shared<v0::Convert>(outputs_10[0].get_node_shared_ptr(), bitwise_and->get_output_element_type(0));
+        copy_runtime_info_and_name(bitwise_and, {new_convert}, {unsqueeze_1_node});
         replace_node(bitwise_and, new_convert);
         return true;
     };
diff --git a/src/plugins/intel_gpu/src/graph/debug_helper.cpp b/src/plugins/intel_gpu/src/graph/debug_helper.cpp
index 7f7071e704683e..c2c41fdfab2373 100644
--- a/src/plugins/intel_gpu/src/graph/debug_helper.cpp
+++ b/src/plugins/intel_gpu/src/graph/debug_helper.cpp
@@ -295,7 +295,7 @@ NodeDebugHelper::NodeDebugHelper(const primitive_inst& inst)
             debug_config->dump_layers_dst_only == 0 && debug_config->is_layer_for_dumping(layer_name)) {
             std::string debug_str_for_bin_load = " Command for loading : OV_GPU_LoadDumpRawBinary=\"" + layer_name + ":";
             for (size_t i = 0; i < m_inst.dependencies().size(); i++) {
-                std::string name = get_file_prefix() + layer_name + "_src" + std::to_string(i);
+                std::string name = get_file_prefix() + "_src" + std::to_string(i);
                 auto input_mem = m_inst.dep_memory_ptr(i);
                 if (input_mem == nullptr) {
                     GPU_DEBUG_COUT  << " input_mem_" << i << " is nullptr. Nothing to dump." << std::endl;
diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake
index 07efefd4452403..8a9dce04f071b9 100644
--- a/src/plugins/intel_npu/cmake/features.cmake
+++ b/src/plugins/intel_npu/cmake/features.cmake
@@ -20,3 +20,5 @@ if(NOT BUILD_SHARED_LIBS AND NOT ENABLE_MLIR_COMPILER AND NOT ENABLE_DRIVER_COMP
 endif()
 
 ov_dependent_option(ENABLE_IMD_BACKEND "Enable InferenceManagerDemo based NPU AL backend" OFF "NOT WIN32;NOT CMAKE_CROSSCOMPILING" OFF)
+
+ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF)
diff --git a/src/plugins/intel_npu/thirdparty/CMakeLists.txt b/src/plugins/intel_npu/thirdparty/CMakeLists.txt
index 4d0c66beeb7520..b064b5c7b9acd5 100644
--- a/src/plugins/intel_npu/thirdparty/CMakeLists.txt
+++ b/src/plugins/intel_npu/thirdparty/CMakeLists.txt
@@ -12,3 +12,15 @@ if(ENABLE_ZEROAPI_BACKEND)
     add_library(LevelZero::NPUExt ALIAS level-zero-ext)
     install(TARGETS level-zero-ext EXPORT "${PROJECT_NAME}Targets")
 endif()
+
+#
+# yaml-cpp
+#
+
+if(ENABLE_INTEL_NPU_PROTOPIPE)
+    add_subdirectory(yaml-cpp EXCLUDE_FROM_ALL)
+    # NB: Suppress warnings in yaml-cpp
+    if(SUGGEST_OVERRIDE_SUPPORTED)
+        target_compile_options(yaml-cpp PRIVATE -Wno-suggest-override)
+    endif()
+endif()
diff --git a/src/plugins/intel_npu/thirdparty/yaml-cpp b/src/plugins/intel_npu/thirdparty/yaml-cpp
new file mode 160000
index 00000000000000..da82fd982c260e
--- /dev/null
+++ b/src/plugins/intel_npu/thirdparty/yaml-cpp
@@ -0,0 +1 @@
+Subproject commit da82fd982c260e7f335ce5acbceff24b270544d1
diff --git a/src/plugins/intel_npu/tools/CMakeLists.txt b/src/plugins/intel_npu/tools/CMakeLists.txt
index c0e620981952e1..ac1a51f74519c8 100644
--- a/src/plugins/intel_npu/tools/CMakeLists.txt
+++ b/src/plugins/intel_npu/tools/CMakeLists.txt
@@ -6,3 +6,7 @@
 add_subdirectory(common)
 add_subdirectory(compile_tool)
 add_subdirectory(single-image-test)
+
+if (ENABLE_INTEL_NPU_PROTOPIPE)
+    add_subdirectory(protopipe)
+endif()
diff --git a/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt b/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt
new file mode 100644
index 00000000000000..9ba76d89ca8445
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt
@@ -0,0 +1,72 @@
+#
+# Copyright (C) 2023-2024 Intel Corporation.
+# SPDX-License-Identifier: Apache 2.0
+#
+
+set(TARGET_NAME protopipe)
+
+if (NOT DEFINED PROJECT_NAME)
+    cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
+    project(protopipe_standalone)
+    include("cmake/standalone.cmake")
+    return()
+endif()
+
+#
+# Dependencies
+#
+
+find_package(OpenCV QUIET COMPONENTS gapi)
+if(OpenCV_VERSION VERSION_LESS 4.9)
+    message(STATUS "NPU ${TARGET_NAME} tool is disabled due to missing dependencies: gapi from OpenCV >= 4.9.")
+    return()
+endif()
+
+if (WIN32)
+    # WA: add_tool_target expects to have all dependencies as cmake targets.
+    add_library(winmm INTERFACE)
+    target_link_libraries(winmm INTERFACE "winmm.lib")
+endif()
+
+#
+# Define the target
+#
+
+set(PROTOPIPE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
+
+ov_add_target(ADD_CPPLINT
+              TYPE EXECUTABLE
+              NAME ${TARGET_NAME}
+              ROOT ${CMAKE_CURRENT_SOURCE_DIR}
+              ADDITIONAL_SOURCE_DIRS ${PROTOPIPE_SOURCE_DIR}
+              INCLUDES ${PROTOPIPE_SOURCE_DIR}
+              LINK_LIBRARIES
+                  PRIVATE
+                      Threads::Threads
+                      gflags
+                      yaml-cpp
+                      openvino::runtime
+                      opencv_gapi
+                      winmm)
+
+
+
+set_target_properties(${TARGET_NAME} PROPERTIES
+                          FOLDER ${CMAKE_CURRENT_SOURCE_DIR}
+                          CXX_STANDARD 17)
+
+#
+# Install
+#
+
+install(TARGETS ${TARGET_NAME}
+        RUNTIME DESTINATION "tools/${TARGET_NAME}"
+        COMPONENT ${NPU_INTERNAL_COMPONENT}
+        ${OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL})
+
+if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
+    install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
+            DESTINATION "tools/${TARGET_NAME}"
+            COMPONENT ${NPU_INTERNAL_COMPONENT}
+            ${OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL})
+endif()
diff --git a/src/plugins/intel_npu/tools/protopipe/README.md b/src/plugins/intel_npu/tools/protopipe/README.md
new file mode 100644
index 00000000000000..afe6e8cffbc8c3
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/README.md
@@ -0,0 +1,608 @@
+# Protopipe
+Protopipe is the C++ tool for simulating performance and validating accuracy of the various AI scenarios.
+
+Protopipe is built atop of [OpenCV G-API](https://github.com/opencv/opencv/wiki/Graph-API) and supports running inference through the [OpenVINO](https://github.com/openvinotoolkit/openvino) and [ONNXRuntime](https://github.com/microsoft/onnxruntime) frameworks.
+
+## Table of Contents
+* [Quick start](#quick-start)
+* [How to configure](#how-to-configure)
+	* [Global parameters](#global-parameters)
+	* [Model parameters](#model-parameters)
+    * [Graph structure](#graph-structure)
+        * [Dependency Graph](#dependency-graph)
+        * [Network sequence](#network-sequence)
+    * [Scenario parameters](#scenario-parameters)
+    * [Config example](#config-example)
+* [How to run](#how-to-run)
+* [Use cases](#use-cases)
+	* [Measure Performance](#measure-performance)
+	* [Generate Reference](#generate-reference)
+	* [Validate Accuracy](#validate-accuracy)
+* [How to build](#how-to-build)
+
+## Quick start
+Consider the following [Config example](#config-example) to start using Protopipe.
+
+Learn more about available config parameters (see: [How to configure](#how-to-configure)) and explore different execution modes (see: [Use-cases](#use-cases)) for more advanced usage.
+
+## How to configure
+Protopipe uses **YAML** format file to describe the AI scenario structure and its parameters
+
+### Global parameters
+The **YAML** config starts with specifying the several global parameters:
+- `model_dir` - **Optional**. Path to the models location. (**Default**: ".")
+- `blob_dir` - **Optional**. Path to the models location. (**Default**: ".")
+- `device_name` - **Optional**. OpenVINO device name: _CPU_, _GPU_, etc. (**Default**: _NPU_)
+- `compiler_type` - **Optional**. NPU compiler type: _DRIVER_, _MLIR_. (**Default**: _DRIVER_)
+- `log_level` - **Optional**. Log level: _NONE_, _INFO_, _DEBUG_. (**Default**: _NONE_)
+- `disable_high_resolution_waitable_timer` - **Optional**. Disables high resolution timer used to perform delays on Windows. (**Default**: false)
+
+Example:
+```
+model_dir:
+  local: C:\workspace\models
+device_name: NPU
+compiler_type: MLIR
+log_level: INFO
+```
+### Model parameters
+#### Common parameters
+- `name` or `path` - **Required**. Path to the model file.
+- `framework` - **Optional**. Framework to use for inference: *onnxrt*, *openvino*. (**Default**: *openvino*)
+- `input_data`, `output_data`, `metric`, `random` - **Optional**. Follow [Use-cases](#use-cases) to  learn the details.
+#### OpenVINO parameters
+- `priority` - **Optional**. Model priority: _HIGH_, _MEDIUM_, _LOW_. (Default: _MEDIUM_)
+- `config` - **Optional**. OpenVINO Plugin specific parameters.
+- `device` - **Optional**. OpenVINO device name. 
+- `ip` - **Optional**. Input layer precision: _FP16_, _FP32_, _U8_, _I32_.
+- `op` - **Optional**. Output layer precision: _FP16_, _FP32_, _U8_, _I32_.
+- `il` - **Optional**. Input layer layout.
+- `ol` - **Optional**. Output layer layout.
+- `iml` - **Optional**. Input model layout.
+- `oml` - **Optional**. Output model layout.
+
+Examples:
+```
+- { name: model.xml, ip: FP16, iml: NHWC, il: NCHW }
+- { name: model.xml, ip: { data: FP16 }, priority: HIGH }
+- { name: model.xml, device: NPU, config: { PERFORMANCE_HINT: THROUGHPUT } }
+```
+#### ONNXRT parameters
+- `ep` - **Optional**. Specifies the parameters for particular execution provider.
+- `session_options` - **Optional**. Set various session options for the ONNX Runtime.
+
+##### Supported Execution Providers
+- [OpenVINO Execution Provider](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html)
+  - `name: OV` - **Required**. Enables OpenVINO Execution Provider.
+  - `device_type` - **Optional**.The device type: _NPU_U8_, _CPU_FP32_, etc.
+  - `params` - **Optional**. Accepts a map of options and their corresponding values that can be passed to OV EP.
+
+**Note**: If none of the supported execution providers are specified, the default `MLAS` will be used.
+
+Examples:
+```
+- { name: model.onnx, framework: onnxrt } # Default (MLAS) EP will be used
+- { name: model.onnx, framework: onnxrt, session_options: { session.disable_cpu_ep_fallback: 1 } } # Default (MLAS) EP with the sessions options will be used
+- { name: model.onnx, framework: onnxrt, ep: { name: OV, device_type: NPU_U8, params: { enable_qdq_optimizer: False, model_priority: LOW } } } # OpenVINO EP will be used
+```
+
+### Graph structure
+There are two ways to describe the execution graph structure in Protopipe:  
+1. Using [Dependency Graph](#dependency-graph) (preferable)
+2. Using [Network Sequence](#network-sequence) (old)
+
+#### Dependency Graph
+The dependency graph in Protopipe is specified by:
+- `op_desc` - The list of operations, every operation has the following parameters:
+  - `tag` - **Required**. The unique name of operation.
+  - `type` - **Optional**. The operation type: _Infer_, _CPU_, _Compound_ (**Default**: _Infer_)
+  - `repeat_count` - **Optional**. Runs operation over specified number of iterations.
+- `connections` - The list of connections between operations.
+
+Supported operation types
+1. `Infer` - Performs model inference. Follow [Model parameters](#model-parameters) for the details.
+2. `CPU` - Simulates CPU load by performing the busy wait during `time_in_us` amount of time in microseconds
+3. `Compound` - Defines a subgraphs that consists of `Infer` and `CPU` node types
+
+```
+op_desc:
+  - { tag: A, path: Model-A.xml, ip: FP16, op: FP16 }
+  - { tag: B, path: Model-B.onnx, framework: onnxrt, ep: { name: OV, device_type: CPU_FP32 } }
+  - { tag: C, type: CPU, time_in_us: 5000 }
+  - { tag: D, path: Model-D.onnx, framework: onnxrt }
+  - { tag: E, path: Model-E.xml, il: NCHW, device: NPU, config: { PEFORMANCE_HINT: LATENCY } }
+  - { tag: F, path: Model-F.xml }
+connections:
+  - [A, C, E, F]
+  - [A, B, D, F]
+  - [B, F]
+```
+```mermaid
+	graph LR;
+		A-->B
+		A-->C
+		B-->D
+		B-->F
+		C-->E
+		E-->F
+		D-->F
+```
+
+The source **is not** reflected in graph structure, assume that all operations that don't have input connections are implicitly linked with the source, e.g for the graph above:
+```mermaid
+	graph LR;
+		Source-->A
+		A-->B
+		A-->C
+		B-->D
+		B-->F
+		C-->E
+		E-->F
+		D-->F
+```
+<ins>**Note:**</ins> The situation when all nodes don't have input connections is also possible, consider:
+```
+op_desc:
+  - { tag: A, path: Model-A.xml }
+  - { tag: B, path: Model-B.xml }
+  - { tag: C, path: Model-C.xml }
+```
+
+```mermaid
+	graph LR;
+		Source-->A
+		Source-->B
+		Source-->C
+```
+In this case the section `connections` **can be omitted**.
+
+<ins>**Note:**</ins> Graph must remain `DAG`, so any loops in graph are prohibited including the self-loops as well as double edges. These are examples of incorrect graphs:
+```
+#1: Invalid - The list must contain at least two operations to connect
+- [A]
+#2: Invalid - Self-loop is prohibited 
+- [A, A]
+#3: Invalid - Loop is prohibited
+- [A, B, C, A]
+#4: Invalid - Double edge [B->C] is prohibited
+- [A, B, C]
+- [B, C]
+```
+**Example of repeat_count usage**
+```
+- op_desc:
+  - { tag: A, path: Model_A.xml, ... }
+  - { tag: B, path: Model_B.xml, repeat_count: 20 }
+  - { tag: C, path: Model_C.xml, ... }
+  connections:
+    - [A, B, C]
+```
+This defines the following pipeline:
+```mermaid
+graph LR;
+    A-->B
+    B-->C
+    B--->|20 iterations|B
+
+```
+**Example of "Compound" type operation**.
+```
+op_desc:
+  - { tag: A, path: Model-A.xml }
+  - tag: B,
+    type: Compound,
+    repeat_count: 10,
+    op_desc:
+      - { tag: D, path: Model-D.xml }
+      - { tag: E, path: Model-E.xml }
+      - { tag: F, path: Model-F.xml }
+    connections:
+      - [D, E]
+      - [D, F]
+  - { tag: C, path: Model-C.xml }
+connections:
+  - [A, B, C]
+```
+This defines the following pipeline:
+```mermaid
+graph LR;
+  A[Model-A.xml]
+  C[Model-C.xml]
+
+  subgraph B[Repeats 10 iterations]
+    direction LR
+    D[Model-D.xml]
+    E[Model-E.xml]
+    F[Model-F.xml]
+    
+    D --> E
+    D --> F
+
+  end
+  
+  A --> B
+  B --> C
+```
+
+#### Network Sequence
+There is also a way to describe the graph by using chain-like structure:  
+`network` - **Required**. List or list of lists of model parameters. Follow [Model Parameters](#model-parameters) for the details.  
+`delay_in_us` - **Optional**. Delay between models in microseconds.  
+
+```
+input_stream_list:
+- network:
+  - { name: A.xml, ip: FP16, il: NCHW, device: CPU }
+  - [{ name: B.xml, ip: FP16, op: FP16 }, { name: C.xml, ip: FP16, op: FP16 }]
+  - { name: D.xml, ip: FP16, op: FP16, config: { PEROFMRANCE_HINT: LATENCY } }
+  delay_in_us: 5000 
+```
+
+```mermaid
+	graph LR;
+	A-->Delay1;
+	Delay1-->B;
+    Delay1-->C;
+	B-->Delay2;
+	C-->Delay2;
+    Delay2-->D
+```
+
+### Scenario parameters
+The list of scenarios are specified by using `multi_inference` parameter, every scenario has the following parameters:
+- `name` - **Optional**. The name of execution scenario.
+- `input_stream_list` - **Required**. The list of the streams that will be run in parallel.  
+
+Every stream has the following execution parameters:
+- `name` - **Optional**. The name of the stream.  
+- `iteration_count` - **Optional**. Number of iterations to execute.  
+- `exec_time_in_secs` - **Optional**. Execute until timeout specified.  
+- `frames_interval_in_ms` - **Optional**. Execution frequency of the stream (**Default**: 0 - Unbounded)  
+- `target_fps` - **Optional**. Execution frequency of the stream. `target_fps = 1000 / frames_interval_in_ms`. `target_fps` and `frames_interval_in_ms` are mutually exclusive and cannot be provided together.
+- `target_latency_in_ms` - **Optional**. When iteration isn't finished within specified interval, the next frame will be dropped from execution. (**Default**: Disabled)
+- `op_desc`/`conections` or `network` - **Required**. Execution graph structure. Follow [Graph structure](#graph-structure) for the details.
+
+### Config example
+Consider the following scenario that consists of two parallel streams specified on `config.yaml`:  
+```
+model_dir:
+  local: C:\workspace\models
+device_name: NPU
+compiler_type: MLIR
+log_level: INFO
+
+multi_inference:
+- input_stream_list:
+  - network:
+    - { name: A.xml, ip: FP16, il: NCHW, device: CPU }
+    - [{ name: B.xml, ip: FP16, op: FP16 }, { name: C.xml, ip: FP16, op: FP16 }]
+    - { name: D.xml, ip: FP16, op: FP16, config: { PEROFMRANCE_HINT: LATENCY } }
+    target_fps: 30
+    exec_time_in_secs: 15
+  - op_desc:
+    - { tag: E, path: E.onnx, framework: onnxrt, ep: { name: OV, device_type: NPU_U8 } }
+    - { tag: F, type: CPU, time_in_us: 5000 }
+    - { tag: G, path: G.xml, ip: FP16, op: FP16, priority: HIGH }
+    connections:
+    - [E, F, G]
+    target_fps: 100
+    exec_time_in_secs: 15
+```
+- The first `stream` is defined by using [Network sequence](#network-sequence) syntax and will execute the following graph with `30` FPS cadence:
+	```mermaid
+	graph LR;
+	A-->B;
+	A-->C;
+	B-->D;
+	C-->D;
+	```
+- The second `stream` is defined by using [Dependency graph](#dependency-graph) syntax and will execute the following graph with `100` FPS cadence.
+	```mermaid
+	graph LR;
+	E-->F;
+	F-->G;
+	```
+ 
+Run:
+```
+./protopipe -cfg config.yaml --drop_frames
+```
+Both streams will be executed simultaneously in different threads during `15` seconds.
+
+Output format:
+```
+stream 0: throughput: <number> FPS, latency: min: <number> ms, avg: <number> ms, max: <number> ms, frames dropped: <number>/<number>
+stream 1: throughput: <number> FPS, latency: min: <number> ms, avg: <number> ms, max: <number> ms, frames dropped: <number>/<number>
+```
+
+## How to run
+Protopipe has the following `CLI` options to configure the execution behaviour:  
+
+`--cfg <path>` - Path to configuration file.       
+`--drop_frames`- **Optional**. Drop frames if they come earlier than stream is completed. E.g if `stream` works with `target_fps: 10` (~`100ms` latency) but stream iteration takes `150ms` - the next iteration will be triggered only in `50ms` if option is enabled.           
+`--pipeline` - **Optional**. Enables pipelined execution for all scenarios/streams.                      
+`--niter <value>` - **Optional**. Number of iterations. If specified overwrites termination criterion specified in configuration file for all scenarios/streams.             
+`-t <value>` - **Optional**. Time in seconds. If specified overwrites termination criterion specified in configuration file for all scenarios/streams.  
+`--mode <value>` - **Optional**. Execution mode: *performance*, *reference*, *validation* (**Default**: *performance*)  
+`--exec_filter <value>` - **Optional**. Run only the scenarios that match provided string pattern.  
+`--inference_only` - **Optional**. Run only inference execution for every model excluding i/o data transfer (**Default**: true)  
+
+### Filtering
+Sometime it's needed to run particular set of scenarios specified in config file rather than all of them.   
+For example consider the following config file with three scenarios specified in `scenarios.yaml`:
+```
+model_dir:
+  local: /models/
+device_name: CPU
+multi_inference:
+- input_stream_list:
+  - network:
+    - { name: A.xml }
+- input_stream_list:
+  - network:
+    - { name: B.xml }
+- input_stream_list:
+  - network:
+    - { name: C.xml }
+```
+By default all scenarios are assigned unique names according to the following `multi_inference_<number>` pattern.    
+E.g scenario with model `A.xml` has default name `multi_inference_0`.    
+Use `-exec_filter <value>` CLI option to control what scenarios from config should be executed:   
+```
+./protopipe -cfg scenarios.yaml -niter 100 -exec_filter=".*[0-1]"
+```
+Only `multi_inference_0` and `multi_inference_1` scenarios will be executed.  
+
+It's also possible to overwrite the default names in config file:
+```
+model_dir:
+  local: /models/
+device_name: CPU
+multi_inference:
+- name: Model-A-Scenario
+  input_stream_list:
+  - network:
+    - { name: A.xml }
+- name: Model-B-Scenario
+  input_stream_list:
+  - network:
+    - { name: B.xml }
+- name: Model-C-Scenario
+  input_stream_list:
+  - network:
+    - { name: C.xml }
+```
+and use them for filtering:
+```
+./protopipe --cfg scenarios.yaml --niter 100 --exec_filter ".*-[AB].*"
+```
+Only `Model-A-Scenario` and `Model-B-Scenario` scenarios will be executed.
+
+**Note**: Protopipe uses [std::regex](https://en.cppreference.com/w/cpp/regex) rules for pattern matching.
+
+## Use cases
+Once scenario configuration is defined (see: [How to configure](#how-to-configure)) it can be used for various uses cases.
+### Measure performance
+`Protopipe` can report the performance statistics, consider the following run example:
+```
+./protopipe --cfg config.yaml --drop_frames -t 30
+```
+Example of output:
+```
+stream 0: throughput: 7.62659 FPS, latency: min: 93.804 ms, avg: 111.31 ms, max: 145.178 ms, frames dropped: 290/390
+```
+It might be also interesting to play with the following `CLI` options:
+- `--drop_frames=false` - Disables frame drop. By default, if iteration doesn't fit into 1000 / `target_fps` latency interval, the next iteration will be skipped.
+- `--inference_only=false` - Enables i/o data transfer for inference. By default only inference time is captured in performance statistics.
+- `--pipeline` - Enables ***pipelined*** execution.
+
+### Generate reference
+As the prerequisite for accuracy validation it's useful to have a mechanism that provides an opportunity to generate the reference output data to compare with. In Protopipe in can be done by using the `reference` mode.
+Use additional parameters to configure `reference` mode:
+- `input_data` - **Required**. Path that contain input data for the model, if entity under the path is empty, input data will be generated randomly and dumped into the path specified.
+- `output_data` - **Required**. Path where to dump reference output data.
+- `random` - **Optional**. Initializer to generate input data randomly. (Default: ` { dist: uniform, low: 0.0, high: 255 }`)
+
+Examples:
+```
+random: { dist: uniform, low: -1.0, high: 1.0 } # specified globally for all models
+multi_inference:
+- input_stream_list:
+  - network:
+    - { name: A.xml, ip: FP16, input_data: A-inputs/, output_data: B-inputs/ }
+      # overwrites global initializer for the model B.xml
+    - { name: B.xml, ip: FP16, input_data: B-inputs/, output_data: B-outptus/, random: { name: uniform, low: 0, high: 255.0 }
+```
+
+Run `Protopipe` in `reference` mode:
+```
+./protopipe -cfg config.yaml -mode reference -niter 10
+```
+Output:
+```
+stream 0: Reference data has been generated for 10 iteration(s)
+```
+
+### Validate accuracy
+Protopipe has the dedicated `validation` mode to perform accuracy validation. Existing configuration file can be simply extended to perform accuracy validation:
+
+- `save_validation_outputs` - **Optional**. Accepts the path where to dump actual execution outputs. (Default: disabled)
+- `metric` - **Optional**. Accuracy metric to compare actual vs reference outputs. (Default: `{ name: norm, tolerance: 0.0 }`)
+- `input_data` - **Required**. Path that contain input data for the model.
+- `output_data` - **Required**. Path that contain **reference** data to compare with.  
+
+**Note**: If folder is provided either for **input_data** or **output_data**, it must be in the following format:
+```
+input_data/
+  <input_layer_name>/
+    input_0.bin
+	input_1.bin
+	...
+	input_N.bin
+
+output_data/
+  <output_layer_name>/
+    output_0.bin
+	output_1.bin
+	...
+	output_N.bin
+```
+**Note**: input and output data can be generated automatically by using `Protopipe` in **reference** mode. (see: [Generate reference](#generate-reference))
+
+Examples:
+```
+- { name: model.xml, ip: FP16, input_data: input_data/, output_data: output_data/ }
+- { name: model.xml, ip: FP16, input_data: input.bin, output_data: output.bin }
+- { name: model.xml, ip: FP16, input_data: { data: input.bin }, output_data: { result: output.bin} }
+```
+
+### Supported metrics
+1. L2 Norm: $$\text{Norm}(\mathbf{A}, \mathbf{B}) = \sqrt{\sum_{i,j} (A_{i,j} - B_{i,j})^2}$$  
+Parameters:
+    - `name: norm` - **Required**. Enables L2 Norm metric.
+	- `tolerance` - **Required**. If value of metric is greater than **tolerance** it will be treated as **FAIL**.
+3. Cosine similarity: $$\text{Cosine}(\mathbf{A}, \mathbf{B}) = \frac{\mathbf{A} \cdot \mathbf{B}}{\| \mathbf{A} \|_2 \| \mathbf{B} \|_2}$$  
+Parameters:
+    - `name: cosine` - **Required**. Enables cosine similarity metric.
+	- `threshold` - **Required**. If value of metric is lower than **threshold** it will be treated as **FAIL**.
+3. NRMSE : $$\text{NRMSE}(\mathbf{A}, \mathbf{B}) = \frac{1}{D}\sqrt{\frac{1}{N}\sum_{i=1}^N(A_i - B_i)^2}$$
+Where,
+$$D = \text{max}(0.001, \text{max}(A_{max}-A_{min}\text{, } B_{max}-B_{min}))$$
+Parameters:
+    - `name: nrmse` - **Required**. Enables nrmse metric.
+	- `tolerance` - **Required**. If value of metric is greater than **tolerance** it will be treated as **FAIL**.
+
+### Example
+Consider the following `config.yaml`:
+```
+model_dir:
+  local: C:\workspace\models
+device_name: NPU
+compiler_type: MLIR
+log_level: INFO
+
+save_validation_outputs: actual-outputs/
+metric: { name: norm, tolerance: 0.01 }
+
+multi_inference:
+- input_stream_list:
+  - network:
+    - { name: A.xml, ip: FP16, input_data: A-inputs/, output_data: A-outputs/ }
+      # overwrites the global metric for the model B.xml
+    - { name: B.xml, ip: FP16, input_data: B-inputs/, output_data: B-outputs/, metric: { name: norm, tolerance: 0.0 }
+```
+
+Use `reference` mode to generate the input random data for every model and calculate reference outputs
+**Note**: If reference device is different, it can be changed in config file (`device_name`) accordingly
+```
+./protopipe --cfg config.yaml --mode reference -niter 10
+```
+Use `validation` mode to perform accuracy validation:
+```
+./protopipe --cfg config.yaml --mode validation -t 15
+```
+Example of successful validation:
+```
+stream 0: Validation has passed for <number> iteration(s)
+```
+In case of accuracy issues the output will be the following:
+```
+stream 0: Accuraccy check failed on <number> iteration(s) (first 10):
+Iteration <number>:
+  Model: A, Layer: <name>, Metric: Norm{tolerance: 0.01}, Reason: <number> > 0.01;
+```
+
+## How to build
+### Prerequisites
+1. Clone `npu-plugin` repository
+2. Build OpenCV G-API with OpenVINO/ONNXRT support
+#### Build OpenCV G-API with OpenVINO/ONNXRT support
+1. Clone OpenCV repo:
+    ```
+    git clone https://github.com/opencv/opencv
+    cd opencv && git checkout 78195bc3df
+    ```
+2. Build OpenCV G-API:
+	```
+    mkdir -p build && cd build
+    cmake ../ -DBUILD_LIST=gapi                             \
+              -DCMAKE_BUILD_TYPE=Release                    \
+              -DWITH_OPENVINO=ON                            \
+              -DOpenVINO_DIR=<path-to-openvino-install-dir> \
+              -DWITH_ONNX=ON                                \
+              -DORT_INSTALL_DIR=<path-to-onnxrt-install-dir>
+    cmake --build . --config Release --target opencv_gapi --parallel
+	```
+### In-plugin build
+
+1. Clone and build [OpenVINO](https://github.com/openvinotoolkit/openvino) from sources
+2. Build OpenCV G-API with OpenVINO / ONNXRT support
+3. Clone `npu-plugin` repository
+	```
+	git clone https://github.com/openvinotoolkit/npu_plugin
+	git submodule update --init --recursive
+	```
+4. Build `Protopipe` as part of the `npu-plugin` build:
+	```
+	mkdir build && cd build
+	cmake ../ -DOpenCV_DIR=<path-to-opencv-build> -DOpenVINODeveloperPackage_DIR=<path-to-openvino-build>
+	cmake --build . --config Release --target protopipe --parallel
+	```
+
+### Standalone build
+1. Build `yaml-cpp`
+	```
+	mkdir -p yaml-cpp_build cd && yaml-cpp_build
+	cmake ../<npu-plugin>/thirdparty/yaml-cpp -DCMAKE_INSTALL_PREFIX=install
+	cmake --build . --config Release --target install --parallel
+	```
+2. Build `gflags`
+	```
+	git clone https://github.com/gflags/gflags
+	cd gflags
+	mkdir -p gflags_build cd && gflags_build
+	cmake ../ -DCMAKE_INSTALL_PREFIX=install
+	cmake --build . --config Release --target install --parallel
+	```
+3. Build `Protopipe`
+	```
+	mkdir -b protopipe_build && cd protopipe_build
+	cmake <npu-plugin>/tools/protopipe/                              \
+	      -DOpenCV_DIR=<path-to-opencv-build                         \
+	      -Dyaml_cpp_DIR=<yaml-cpp_build/install/lib/cmake/yaml-cpp> \
+	      -Dgflags_DIR=<gflags_build/install>                        \
+	      -DOpenVINO_DIR=<path>                                      \
+	           
+	cmake --build . --config Release --target protopipe --parallel
+	```
+### Verify the installation
+**Note**: Make sure `opencv_*` libraries are visible in the environment:
+- Windows: 
+	```
+	set PATH=<path-to-opencv>\build\bin\Release\;%PATH%
+	```
+- Linux:
+	```
+	export LD_LIBRARY_PATH=<path-to-opencv>/build/lib/:$LD_LIBRARY_PATH
+	```
+**Note**: If `OpenCV` has been build with `ONNXRT` support, all `ONNXRT` related libraries must be located in the same folder as `protopipe` executable.
+
+Run `Protopipe` with -h flag to verify installation:
+```
+> protopipe.exe -h
+```
+Successful build will show the information about `Protopipe` CLI options:
+```
+protopipe [OPTIONS]
+
+ Common options:
+    -h                      Optional. Print the usage message.
+    -cfg <value>            Path to the configuration file.
+    -pipeline               Optional. Enable pipelined execution.
+    -drop_frames            Optional. Drop frames if they come earlier than pipeline is completed.
+    -mode <value>           Optional. Simulation mode: performance (default), reference, validation.
+    -niter <value>          Optional. Number of iterations. If specified overwrites termination criterion for all scenarios in configuration file.
+    -t <value>              Optional. Time in seconds. If specified overwrites termination criterion for all scenarios in configuration file.
+    -inference_only         Optional. Run only inference execution for every model excluding i/o data transfer. Applicable only for "performance" mode. (default: true).
+    -exec_filter            Optional. Run the scenarios that match provided string pattern.
+```
diff --git a/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake b/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake
new file mode 100644
index 00000000000000..090756f86c44c0
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake
@@ -0,0 +1,63 @@
+#
+# Copyright (C) 2024 Intel Corporation.
+# SPDX-License-Identifier: Apache 2.0
+#
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+if("${CMAKE_BUILD_TYPE}" STREQUAL "")
+  set(CMAKE_BUILD_TYPE "Release")
+endif()
+
+find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+find_package(Threads REQUIRED)
+find_package(OpenCV 4.9.0 REQUIRED COMPONENTS gapi)
+
+find_package(yaml-cpp QUIET)
+find_package(gflags QUIET)
+
+if (NOT yaml-cpp_FOUND)
+    set(YAML_CPP_SOURCES_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/yaml-cpp")
+    message(STATUS "yaml-cpp package was not found. Trying to find source package in ${YAML_CPP_SOURCES_PATH}.")
+    if(EXISTS ${YAML_CPP_SOURCES_PATH})
+        message(STATUS "yaml-cpp source package found. yaml-cpp will be built from sources.")
+        add_subdirectory(${YAML_CPP_SOURCES_PATH} yaml-cpp EXCLUDE_FROM_ALL)
+    else()
+        message(FATAL_ERROR "yaml-cpp package and sources were not found. CMake will exit." )
+    endif()
+endif()
+
+if (NOT gflags_FOUND)
+    set(GFLAGS_SOURCES_PATH "${PACKAGE_PREFIX_DIR}/samples/cpp/thirdparty/gflags")
+    message(STATUS "gflags package was not found. Trying to find source package in ${GFLAGS_SOURCES_PATH}.")
+    if(EXISTS ${GFLAGS_SOURCES_PATH})
+        message(STATUS "gflags source package found. gflags will be built from sources.")
+        add_subdirectory(${GFLAGS_SOURCES_PATH} gflags EXCLUDE_FROM_ALL)
+    else()
+        message(FATAL_ERROR "gflags was not found. CMake will exit." )
+    endif()
+endif()
+
+set(DEPENDENCIES
+        Threads::Threads
+        gflags
+        yaml-cpp
+        openvino::runtime
+        opencv_gapi
+)
+
+if (WIN32)
+    list(APPEND DEPENDENCIES "winmm.lib")
+endif()
+
+file(GLOB_RECURSE SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp")
+list(APPEND SOURCES main.cpp)
+
+add_executable(${TARGET_NAME} ${SOURCES})
+target_link_libraries(${TARGET_NAME} PRIVATE ${DEPENDENCIES})
+target_include_directories(${TARGET_NAME} PUBLIC "${PROJECT_SOURCE_DIR}/src/")
+
+install(TARGETS ${TARGET_NAME}
+        DESTINATION "tools/${TARGET_NAME}"
+        COMPONENT npu_tools)
diff --git a/src/plugins/intel_npu/tools/protopipe/main.cpp b/src/plugins/intel_npu/tools/protopipe/main.cpp
new file mode 100644
index 00000000000000..8596ba864335ca
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/main.cpp
@@ -0,0 +1,266 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation.
+// SPDX-License-Identifier: Apache 2.0
+//
+
+#include <future>
+#include <iostream>
+#include <regex>
+
+#include <gflags/gflags.h>
+
+#include "parser/parser.hpp"
+#include "scenario/scenario_graph.hpp"
+#include "simulation/performance_mode.hpp"
+#include "simulation/reference_mode.hpp"
+#include "simulation/validation_mode.hpp"
+
+#include "utils/error.hpp"
+#include "utils/logger.hpp"
+
+static constexpr char help_message[] = "Optional. Print the usage message.";
+static constexpr char cfg_message[] = "Path to the configuration file.";
+static constexpr char device_message[] =
+        "Optional. Device name. If specified overwrites device specified in config file.";
+static constexpr char pipeline_message[] = "Optional. Enable pipelined execution.";
+static constexpr char drop_message[] = "Optional. Drop frames if they come earlier than pipeline is completed.";
+static constexpr char mode_message[] = "Optional. Simulation mode: performance (default), reference, validation.";
+static constexpr char niter_message[] = "Optional. Number of iterations. If specified overwrites termination criterion"
+                                        " for all scenarios in configuration file.";
+static constexpr char exec_time_message[] = "Optional. Time in seconds. If specified overwrites termination criterion"
+                                            " for all scenarios in configuration file.";
+static constexpr char inference_only_message[] =
+        "Optional. Run only inference execution for every model excluding i/o data transfer."
+        " Applicable only for \"performance\" mode. (default: true).";
+
+static constexpr char exec_filter_msg[] = "Optional. Run the scenarios that match provided string pattern.";
+
+DEFINE_bool(h, false, help_message);
+DEFINE_string(cfg, "", cfg_message);
+DEFINE_string(d, "", device_message);
+DEFINE_bool(pipeline, false, pipeline_message);
+DEFINE_bool(drop_frames, false, drop_message);
+DEFINE_string(mode, "performance", mode_message);
+DEFINE_uint64(niter, 0, niter_message);
+DEFINE_uint64(t, 0, exec_time_message);
+DEFINE_bool(inference_only, true, inference_only_message);
+DEFINE_string(exec_filter, ".*", exec_filter_msg);
+
+static void showUsage() {
+    std::cout << "protopipe [OPTIONS]" << std::endl;
+    std::cout << std::endl;
+    std::cout << " Common options:            " << std::endl;
+    std::cout << "    -h                      " << help_message << std::endl;
+    std::cout << "    -cfg <value>            " << cfg_message << std::endl;
+    std::cout << "    -pipeline               " << pipeline_message << std::endl;
+    std::cout << "    -drop_frames            " << drop_message << std::endl;
+    std::cout << "    -d <value>              " << device_message << std::endl;
+    std::cout << "    -mode <value>           " << mode_message << std::endl;
+    std::cout << "    -niter <value>          " << niter_message << std::endl;
+    std::cout << "    -t <value>              " << exec_time_message << std::endl;
+    std::cout << "    -inference_only         " << inference_only_message << std::endl;
+    std::cout << "    -exec_filter            " << exec_filter_msg << std::endl;
+    std::cout << std::endl;
+}
+
+bool parseCommandLine(int* argc, char*** argv) {
+    gflags::ParseCommandLineNonHelpFlags(argc, argv, true);
+
+    if (FLAGS_h) {
+        showUsage();
+        return false;
+    }
+
+    if (FLAGS_cfg.empty()) {
+        throw std::invalid_argument("Path to config file is required");
+    }
+
+    std::cout << "Parameters:" << std::endl;
+    std::cout << "    Config file:             " << FLAGS_cfg << std::endl;
+    std::cout << "    Pipelining is enabled:   " << std::boolalpha << FLAGS_pipeline << std::endl;
+    std::cout << "    Simulation mode:         " << FLAGS_mode << std::endl;
+    std::cout << "    Inference only:          " << std::boolalpha << FLAGS_inference_only << std::endl;
+    std::cout << "    Device:                  " << FLAGS_d << std::endl;
+    return true;
+}
+
+static ICompiled::Ptr compileSimulation(Simulation::Ptr simulation, const bool pipelined, const bool drop_frames) {
+    LOG_INFO() << "Compile simulation" << std::endl;
+    if (pipelined) {
+        return simulation->compilePipelined(drop_frames);
+    }
+    return simulation->compileSync(drop_frames);
+};
+
+class ThreadRunner {
+public:
+    using F = std::function<void()>;
+    void add(F&& func) {
+        m_funcs.push_back(std::move(func));
+    }
+    void run();
+
+private:
+    std::vector<F> m_funcs;
+};
+
+void ThreadRunner::run() {
+    std::vector<std::future<void>> futures;
+    futures.reserve(m_funcs.size());
+    for (auto&& func : m_funcs) {
+        futures.push_back(std::async(std::launch::async, std::move(func)));
+    }
+    for (auto& future : futures) {
+        future.get();
+    };
+};
+
+class Task {
+public:
+    Task(ICompiled::Ptr&& compiled, std::string&& name, ITermCriterion::Ptr&& criterion);
+
+    void operator()();
+    const Result& result() const;
+    const std::string& name() const;
+
+private:
+    ICompiled::Ptr m_compiled;
+    std::string m_name;
+    ITermCriterion::Ptr m_criterion;
+
+    Result m_result;
+};
+
+Task::Task(ICompiled::Ptr&& compiled, std::string&& name, ITermCriterion::Ptr&& criterion)
+        : m_compiled(std::move(compiled)), m_name(std::move(name)), m_criterion(std::move(criterion)) {
+}
+
+void Task::operator()() {
+    try {
+        m_result = m_compiled->run(m_criterion);
+    } catch (const std::exception& e) {
+        m_result = Error{e.what()};
+    }
+}
+
+const Result& Task::result() const {
+    return m_result;
+}
+
+const std::string& Task::name() const {
+    return m_name;
+}
+
+static Simulation::Ptr createSimulation(const std::string& mode, StreamDesc&& stream, const bool inference_only,
+                                        const Config& config) {
+    Simulation::Ptr simulation;
+    // NB: Common parameters for all simulations
+    Simulation::Config cfg{stream.name, stream.frames_interval_in_us, config.disable_high_resolution_timer,
+                           std::move(stream.graph), std::move(stream.infer_params_map)};
+    if (mode == "performance") {
+        PerformanceSimulation::Options opts{config.initializer, std::move(stream.initializers_map),
+                                            std::move(stream.input_data_map), inference_only,
+                                            std::move(stream.target_latency)};
+        simulation = std::make_shared<PerformanceSimulation>(std::move(cfg), std::move(opts));
+    } else if (mode == "reference") {
+        CalcRefSimulation::Options opts{config.initializer, std::move(stream.initializers_map),
+                                        std::move(stream.input_data_map), std::move(stream.output_data_map)};
+        simulation = std::make_shared<CalcRefSimulation>(std::move(cfg), std::move(opts));
+    } else if (mode == "validation") {
+        ValSimulation::Options opts{config.metric, std::move(stream.metrics_map), std::move(stream.input_data_map),
+                                    std::move(stream.output_data_map), std::move(stream.per_iter_outputs_path)};
+        simulation = std::make_shared<ValSimulation>(std::move(cfg), std::move(opts));
+    } else {
+        throw std::logic_error("Unsupported simulation mode: " + mode);
+    }
+    ASSERT(simulation);
+    return simulation;
+}
+
+int main(int argc, char* argv[]) {
+    // NB: Intentionally wrapped into try-catch to display exceptions occur on windows.
+    try {
+        if (!parseCommandLine(&argc, &argv)) {
+            return 0;
+        }
+        ReplaceBy replace_by{FLAGS_d};
+
+        auto parser = std::make_shared<ScenarioParser>(FLAGS_cfg);
+
+        LOG_INFO() << "Parse scenarios from " << FLAGS_cfg << " config file" << std::endl;
+        auto config = parser->parseScenarios(replace_by);
+        LOG_INFO() << "Found " << config.scenarios.size() << " scenario(s)" << std::endl;
+
+        // NB: Overwrite termination criteria for all scenarios if specified via CLI
+        ITermCriterion::Ptr global_criterion;
+        if (FLAGS_niter != 0u) {
+            LOG_INFO() << "Termination criterion of " << FLAGS_niter << " iteration(s) will be used for all scenarios"
+                       << std::endl;
+            global_criterion = std::make_shared<Iterations>(FLAGS_niter);
+        }
+        if (FLAGS_t != 0u) {
+            if (global_criterion) {
+                // TODO: In fact, it make sense to have them both enabled.
+                THROW_ERROR("-niter and -t options can't be specified together!");
+            }
+            LOG_INFO() << "Termination criterion of " << FLAGS_t << " second(s) will be used for all scenarios"
+                       << std::endl;
+            // NB: TimeOut accepts microseconds
+            global_criterion = std::make_shared<TimeOut>(FLAGS_t * 1'000'000);
+        }
+
+        std::regex filter_regex{FLAGS_exec_filter};
+        bool any_scenario_failed = false;
+        for (auto&& scenario : config.scenarios) {
+            // NB: Skip the scenarios that don't match provided filter pattern
+            if (!std::regex_match(scenario.name, filter_regex)) {
+                LOG_INFO() << "Skip the scenario " << scenario.name << " as it doesn't match the -exec_filter=\""
+                           << FLAGS_exec_filter << "\" pattern" << std::endl;
+                continue;
+            }
+            LOG_INFO() << "Start processing " << scenario.name << std::endl;
+
+            ThreadRunner runner;
+            std::vector<Task> tasks;
+            tasks.reserve(scenario.streams.size());
+            for (auto&& stream : scenario.streams) {
+                auto criterion = stream.criterion;
+                auto stream_name = stream.name;
+                if (global_criterion) {
+                    if (criterion) {
+                        LOG_INFO() << "Stream: " << stream_name
+                                   << " termination criterion is overwritten by CLI parameter" << std::endl;
+                    }
+                    criterion = global_criterion->clone();
+                }
+                auto simulation = createSimulation(FLAGS_mode, std::move(stream), FLAGS_inference_only, config);
+                auto compiled = compileSimulation(simulation, FLAGS_pipeline, FLAGS_drop_frames);
+                tasks.emplace_back(std::move(compiled), std::move(stream_name), std::move(criterion));
+                runner.add(std::ref(tasks.back()));
+            }
+
+            LOG_INFO() << "Run " << tasks.size() << " stream(s) asynchronously" << std::endl;
+            runner.run();
+            LOG_INFO() << "Execution has finished" << std::endl;
+
+            for (const auto& task : tasks) {
+                if (!task.result()) {
+                    // NB: Scenario failed if any of the streams failed
+                    any_scenario_failed = true;
+                }
+                std::cout << "stream " << task.name() << ": " << task.result().str() << std::endl;
+            }
+            std::cout << "\n";
+        }
+        if (any_scenario_failed) {
+            return EXIT_FAILURE;
+        }
+    } catch (const std::exception& e) {
+        std::cout << e.what() << std::endl;
+        throw;
+    } catch (...) {
+        std::cout << "Unknown error" << std::endl;
+        throw;
+    }
+    return 0;
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/graph.cpp b/src/plugins/intel_npu/tools/protopipe/src/graph.cpp
new file mode 100644
index 00000000000000..d13d2954a21b12
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/graph.cpp
@@ -0,0 +1,140 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <stack>
+
+#include "graph.hpp"
+
+Nodes Node::srcNodes() const {
+    Nodes src_nodes;
+    src_nodes.reserve(m_src_edges.size());
+    std::transform(m_src_edges.begin(), m_src_edges.end(), std::back_inserter(src_nodes), [](EdgeHandle edge) {
+        return edge->srcNode();
+    });
+    return src_nodes;
+}
+
+Nodes Node::dstNodes() const {
+    Nodes dst_nodes;
+    dst_nodes.reserve(m_dst_edges.size());
+    std::transform(m_dst_edges.begin(), m_dst_edges.end(), std::back_inserter(dst_nodes), [](EdgeHandle edge) {
+        return edge->dstNode();
+    });
+    return dst_nodes;
+}
+
+Edges Node::srcEdges() const {
+    return {m_src_edges.begin(), m_src_edges.end()};
+}
+
+Edges Node::dstEdges() const {
+    return {m_dst_edges.begin(), m_dst_edges.end()};
+}
+
+NodeHandle Graph::create() {
+    auto node = std::make_shared<Node>();
+    NodeHandle nh(node);
+    m_nodes.emplace(node.get(), MetaPtr<Node>{node, Meta{}});
+    return nh;
+}
+
+void Graph::remove(NodeHandle nh) {
+    auto src_edges = nh->srcEdges();
+    for (size_t i = 0; i < src_edges.size(); ++i) {
+        remove(src_edges[i]);
+    }
+    auto dst_edges = nh->dstEdges();
+    for (size_t i = 0; i < dst_edges.size(); ++i) {
+        remove(dst_edges[i]);
+    }
+    m_nodes.erase(nh.get());
+}
+
+void Graph::remove(EdgeHandle eh) {
+    auto src = eh->srcNode();
+    auto dst = eh->dstNode();
+    src->m_dst_edges.erase(eh);
+    dst->m_src_edges.erase(eh);
+    m_edges.erase(eh.get());
+};
+
+EdgeHandle Graph::link(NodeHandle src, NodeHandle dst) {
+    auto edge = std::make_shared<Edge>(src, dst);
+    EdgeHandle eh{edge};
+    m_edges.emplace(edge.get(), MetaPtr<Edge>{edge, Meta{}});
+    src->m_dst_edges.insert(eh);
+    dst->m_src_edges.insert(eh);
+    return eh;
+}
+
+Meta& Graph::meta(NodeHandle handle) {
+    const auto it = m_nodes.find(handle.get());
+    ASSERT(it != m_nodes.end());
+    return it->second.meta;
+}
+
+const Meta& Graph::meta(NodeHandle handle) const {
+    const auto it = m_nodes.find(handle.get());
+    ASSERT(it != m_nodes.end());
+    return it->second.meta;
+}
+
+Meta& Graph::meta(EdgeHandle handle) {
+    const auto it = m_edges.find(handle.get());
+    ASSERT(it != m_edges.end());
+    return it->second.meta;
+}
+
+const Meta& Graph::meta(EdgeHandle handle) const {
+    const auto it = m_edges.find(handle.get());
+    ASSERT(it != m_edges.end());
+    return it->second.meta;
+}
+
+std::vector<NodeHandle> Graph::nodes() const {
+    std::vector<NodeHandle> ret;
+    std::transform(m_nodes.begin(), m_nodes.end(), std::back_inserter(ret), [](const auto& p) {
+        return NodeHandle{p.second.ptr};
+    });
+    return ret;
+}
+
+static void dfs(NodeHandle& nh, std::unordered_set<NodeHandle>& visited, std::stack<NodeHandle>& stack) {
+    visited.insert(nh);
+    auto dst_nodes = nh->dstNodes();
+    for (auto dst_nh : dst_nodes) {
+        auto it = visited.find(dst_nh);
+        if (it == visited.end()) {
+            dfs(dst_nh, visited, stack);
+        }
+    }
+    stack.push(nh);
+};
+
+std::vector<NodeHandle> Graph::sorted() const {
+    std::unordered_set<NodeHandle> visited;
+    std::stack<NodeHandle> stack;
+    const auto nodes = this->nodes();
+    for (auto nh : nodes) {
+        auto it = visited.find(nh);
+        if (it == visited.end()) {
+            dfs(nh, visited, stack);
+        }
+    }
+    std::vector<NodeHandle> sorted;
+    while (!stack.empty()) {
+        sorted.push_back(stack.top());
+        stack.pop();
+    }
+    return sorted;
+}
+
+Meta& Meta::operator+=(const Meta& other) {
+    for (const auto& p : other.store) {
+        ASSERT(store.emplace(p.first, p.second).second);
+    }
+    return *this;
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/graph.hpp b/src/plugins/intel_npu/tools/protopipe/src/graph.hpp
new file mode 100644
index 00000000000000..66aeccbe156d09
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/graph.hpp
@@ -0,0 +1,168 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <any>
+#include <functional>
+#include <memory>
+#include <typeindex>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "utils/error.hpp"
+
+template <typename T>
+class WeakHandle {
+public:
+    explicit WeakHandle(std::shared_ptr<T> obj): m_obj(obj) {
+    }
+    T* get() const {
+        return m_obj.lock().get();
+    }
+    T* operator->() const {
+        return get();
+    }
+    bool operator==(const WeakHandle& other) const {
+        return get() == other.get();
+    }
+
+private:
+    std::weak_ptr<T> m_obj;
+};
+
+namespace std {
+template <typename T>
+struct hash<WeakHandle<T>> {
+    uint64_t operator()(const WeakHandle<T>& handle) const {
+        return std::hash<T*>()(handle.get());
+    }
+};
+}  // namespace std
+
+class Graph;
+class Node;
+class Edge;
+
+using NodeHandle = WeakHandle<Node>;
+using EdgeHandle = WeakHandle<Edge>;
+using Nodes = std::vector<NodeHandle>;
+using Edges = std::vector<EdgeHandle>;
+using NodeSet = std::unordered_set<NodeHandle>;
+using EdgeSet = std::unordered_set<EdgeHandle>;
+
+class Node {
+    friend class Graph;
+    using Ptr = std::shared_ptr<Node>;
+
+public:
+    Nodes srcNodes() const;
+    Nodes dstNodes() const;
+    Edges srcEdges() const;
+    Edges dstEdges() const;
+
+private:
+    EdgeSet m_src_edges;
+    EdgeSet m_dst_edges;
+};
+
+class Edge {
+    friend class Graph;
+    using Ptr = std::shared_ptr<Edge>;
+
+public:
+    Edge(NodeHandle src, NodeHandle dst): m_src(src), m_dst(dst) {
+    }
+    NodeHandle srcNode() const {
+        return m_src;
+    }
+    NodeHandle dstNode() const {
+        return m_dst;
+    }
+
+private:
+    NodeHandle m_src;
+    NodeHandle m_dst;
+};
+
+class Meta {
+public:
+    template <typename T>
+    void set(T&& meta);
+    template <typename T>
+    const T& get() const;
+    template <typename T>
+    T& get();
+    template <typename T>
+    bool has() const;
+    Meta& operator+=(const Meta& other);
+
+private:
+    using MetaStore = std::unordered_map<std::type_index, std::any>;
+    MetaStore store;
+};
+
+template <typename T>
+void Meta::set(T&& meta) {
+    // NB: Check if there is no such meta yet.
+    ASSERT(store.emplace(std::type_index(typeid(T)), std::forward<T>(meta)).second);
+}
+
+template <typename T>
+bool Meta::has() const {
+    auto it = store.find(std::type_index(typeid(T)));
+    return it != store.end();
+}
+
+template <typename T>
+const T& Meta::get() const {
+    const auto it = store.find(std::type_index(typeid(T)));
+    ASSERT(it != store.end());
+    return *std::any_cast<T>(&it->second);
+}
+
+template <typename T>
+T& Meta::get() {
+    auto it = store.find(std::type_index(typeid(T)));
+    ASSERT(it != store.end());
+    return *std::any_cast<T>(&it->second);
+}
+
+class Graph {
+public:
+    NodeHandle create();
+    void remove(NodeHandle nh);
+    void remove(EdgeHandle eh);
+    EdgeHandle link(NodeHandle src, NodeHandle dst);
+
+    Meta& meta() {
+        return m_graph_meta;
+    }
+    const Meta& meta() const {
+        return m_graph_meta;
+    }
+
+    Meta& meta(NodeHandle handle);
+    const Meta& meta(NodeHandle handle) const;
+    Meta& meta(EdgeHandle handle);
+    const Meta& meta(EdgeHandle handle) const;
+
+    std::vector<NodeHandle> nodes() const;
+    std::vector<NodeHandle> sorted() const;
+
+private:
+    template <typename T>
+    struct MetaPtr {
+        std::shared_ptr<T> ptr;
+        Meta meta;
+    };
+    template <typename T>
+    using MetaMap = std::unordered_map<T*, MetaPtr<T>>;
+
+    Meta m_graph_meta;
+    MetaMap<Node> m_nodes;
+    MetaMap<Edge> m_edges;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp
new file mode 100644
index 00000000000000..34099d36a69fdb
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp
@@ -0,0 +1,872 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "parser/config.hpp"
+
+#include "utils/error.hpp"
+#include "utils/logger.hpp"
+
+#include <filesystem>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <opencv2/opencv.hpp>  // depth
+
+namespace fs = std::filesystem;
+
+struct GlobalOptions {
+    std::string blob_dir = ".";
+    std::string model_dir = ".";
+    std::string device_name = "NPU";
+    std::string log_level = "NONE";
+    std::string compiler_type = "DRIVER";
+    std::optional<std::filesystem::path> save_validation_outputs;
+};
+
+struct Network {
+    std::string tag;
+    InferenceParams params;
+    LayerVariantAttr<std::string> input_data;
+    LayerVariantAttr<std::string> output_data;
+    LayerVariantAttr<IRandomGenerator::Ptr> initializers;
+    LayerVariantAttr<IAccuracyMetric::Ptr> accuracy_metrics;
+};
+
+struct InferOp {
+    InferenceParams params;
+    LayerVariantAttr<std::string> input_data;
+    LayerVariantAttr<std::string> output_data;
+    LayerVariantAttr<IRandomGenerator::Ptr> initializers;
+    LayerVariantAttr<IAccuracyMetric::Ptr> accuracy_metrics;
+};
+
+struct CPUOp {
+    uint64_t time_in_us;
+};
+
+struct CompoundOp {
+    uint64_t repeat_count;
+    InferenceParamsMap params;
+    ScenarioGraph subgraph;
+};
+
+struct OpDesc {
+    std::string tag;
+    using OpType = std::variant<InferOp, CPUOp, CompoundOp>;
+    OpType op;
+};
+
+// NB: Handles duplicating tags.
+class TagsManager {
+public:
+    std::string add(const std::string& tag);
+
+private:
+    std::unordered_multiset<std::string> m_tags;
+};
+
+std::string TagsManager::add(const std::string& tag) {
+    std::string t = tag;
+    m_tags.insert(t);
+    const auto c = m_tags.count(t);
+    if (c > 1) {
+        t += "-" + std::to_string(c);
+    }
+    return t;
+}
+
+static LogLevel toLogLevel(const std::string& lvl) {
+    if (lvl == "NONE")
+        return LogLevel::None;
+    if (lvl == "INFO")
+        return LogLevel::Info;
+    if (lvl == "DEBUG")
+        return LogLevel::Debug;
+    THROW_ERROR("Unsupported log level: " << lvl);
+}
+
+static int toDepth(const std::string& prec) {
+    if (prec == "FP32")
+        return CV_32F;
+    if (prec == "FP16")
+        return CV_16F;
+    if (prec == "U8")
+        return CV_8U;
+    if (prec == "I32")
+        return CV_32S;
+    throw std::logic_error("Unsupported precision type: " + prec);
+}
+
+static AttrMap<int> toDepth(const AttrMap<std::string>& attrmap) {
+    AttrMap<int> depthmap;
+    for (const auto& [name, str_depth] : attrmap) {
+        depthmap.emplace(name, toDepth(str_depth));
+    }
+    return depthmap;
+}
+
+static LayerVariantAttr<int> toDepth(const LayerVariantAttr<std::string>& attr) {
+    LayerVariantAttr<int> depthattr;
+    if (std::holds_alternative<std::string>(attr)) {
+        depthattr = toDepth(std::get<std::string>(attr));
+    } else {
+        depthattr = toDepth(std::get<AttrMap<std::string>>(attr));
+    }
+    return depthattr;
+}
+
+static std::string toPriority(const std::string& priority) {
+    if (priority == "LOW") {
+        return "LOW";
+    }
+    if (priority == "NORMAL") {
+        return "MEDIUM";
+    }
+    if (priority == "HIGH") {
+        return "HIGH";
+    }
+    throw std::logic_error("Unsupported model priority: " + priority);
+}
+
+static ScenarioGraph buildGraph(const std::vector<OpDesc>& op_descs,
+                                const std::vector<std::vector<std::string>>& connections);
+
+namespace YAML {
+
+template <typename T>
+struct convert<std::vector<T>> {
+    static bool decode(const Node& node, std::vector<T>& vec) {
+        if (!node.IsSequence()) {
+            return false;
+        }
+
+        for (auto& child : node) {
+            vec.push_back(child.as<T>());
+        }
+        return true;
+    }
+};
+
+template <typename K, typename V>
+struct convert<std::map<K, V>> {
+    static bool decode(const Node& node, std::map<K, V>& map) {
+        if (!node.IsMap()) {
+            return false;
+        }
+        for (const auto& itr : node) {
+            map.emplace(itr.first.as<K>(), itr.second.as<V>());
+        }
+        return true;
+    }
+};
+
+template <typename T>
+struct convert<LayerVariantAttr<T>> {
+    static bool decode(const Node& node, LayerVariantAttr<T>& layer_attr) {
+        if (node.IsMap()) {
+            layer_attr = node.as<std::map<std::string, T>>();
+        } else {
+            layer_attr = node.as<T>();
+        }
+        return true;
+    }
+};
+
+template <>
+struct convert<UniformGenerator::Ptr> {
+    static bool decode(const Node& node, UniformGenerator::Ptr& generator) {
+        if (!node["low"]) {
+            THROW_ERROR("Uniform distribution must have \"low\" attribute");
+        }
+        if (!node["high"]) {
+            THROW_ERROR("Uniform distribution must have \"high\" attribute");
+        }
+        generator = std::make_shared<UniformGenerator>(node["low"].as<double>(), node["high"].as<double>());
+        return true;
+    }
+};
+
+template <>
+struct convert<IRandomGenerator::Ptr> {
+    static bool decode(const Node& node, IRandomGenerator::Ptr& generator) {
+        if (!node["dist"]) {
+            THROW_ERROR("\"random\" must have \"dist\" attribute!");
+        }
+        const auto dist = node["dist"].as<std::string>();
+        if (dist == "uniform") {
+            generator = node.as<UniformGenerator::Ptr>();
+        } else {
+            THROW_ERROR("Unsupported random distribution: \"" << dist << "\"");
+        }
+        return true;
+    }
+};
+
+template <>
+struct convert<Norm::Ptr> {
+    static bool decode(const Node& node, Norm::Ptr& metric) {
+        // NB: If bigger than tolerance - fail.
+        if (!node["tolerance"]) {
+            THROW_ERROR("Metric \"norm\" must have \"tolerance\" attribute!");
+        }
+        const auto tolerance = node["tolerance"].as<double>();
+        metric = std::make_shared<Norm>(tolerance);
+        return true;
+    }
+};
+
+template <>
+struct convert<Cosine::Ptr> {
+    static bool decode(const Node& node, Cosine::Ptr& metric) {
+        // NB: If lower than threshold - fail.
+        if (!node["threshold"]) {
+            THROW_ERROR("Metric \"cosine\" must have \"threshold\" attribute!");
+        }
+        const auto threshold = node["threshold"].as<double>();
+        metric = std::make_shared<Cosine>(threshold);
+        return true;
+    }
+};
+
+template <>
+struct convert<NRMSE::Ptr> {
+    static bool decode(const Node& node, NRMSE::Ptr& metric) {
+        // NB: If bigger than tolerance - fail.
+        if (!node["tolerance"]) {
+            THROW_ERROR("Metric \"nrmse\" must have \"tolerance\" attribute!");
+        }
+        const auto tolerance = node["tolerance"].as<double>();
+        metric = std::make_shared<NRMSE>(tolerance);
+        return true;
+    }
+};
+
+template <>
+struct convert<IAccuracyMetric::Ptr> {
+    static bool decode(const Node& node, IAccuracyMetric::Ptr& metric) {
+        const auto type = node["name"].as<std::string>();
+        if (type == "norm") {
+            metric = node.as<Norm::Ptr>();
+        } else if (type == "cosine") {
+            metric = node.as<Cosine::Ptr>();
+        } else if (type == "nrmse") {
+            metric = node.as<NRMSE::Ptr>();
+        } else {
+            THROW_ERROR("Unsupported metric type: " << type);
+        }
+        return true;
+    }
+};
+
+template <>
+struct convert<GlobalOptions> {
+    static bool decode(const Node& node, GlobalOptions& opts) {
+        if (node["model_dir"]) {
+            if (!node["model_dir"]["local"]) {
+                THROW_ERROR("\"model_dir\" must contain \"local\" key!");
+            }
+            opts.model_dir = node["model_dir"]["local"].as<std::string>();
+        }
+
+        if (node["blob_dir"]) {
+            if (!node["blob_dir"]["local"]) {
+                THROW_ERROR("\"blob_dir\" must contain \"local\" key!");
+            }
+            opts.blob_dir = node["blob_dir"]["local"].as<std::string>();
+        }
+
+        if (node["device_name"]) {
+            opts.device_name = node["device_name"].as<std::string>();
+        }
+
+        if (node["log_level"]) {
+            opts.log_level = node["log_level"].as<std::string>();
+        }
+
+        if (node["compiler_type"]) {
+            opts.compiler_type = node["compiler_type"].as<std::string>();
+        }
+
+        if (node["save_validation_outputs"]) {
+            const auto path = node["save_validation_outputs"].as<std::string>();
+            opts.save_validation_outputs = std::make_optional(std::filesystem::path{path});
+        }
+
+        return true;
+    }
+};
+
+template <>
+struct convert<OpenVINOParams> {
+    static bool decode(const Node& node, OpenVINOParams& params) {
+        // FIXME: Worth to separate these two
+        const auto name = node["name"] ? node["name"].as<std::string>() : node["path"].as<std::string>();
+        fs::path path{name};
+        if (path.extension() == ".xml") {
+            auto bin_path = path;
+            bin_path.replace_extension(".bin");
+            params.path = OpenVINOParams::ModelPath{path.string(), bin_path.string()};
+        } else if (path.extension() == ".blob") {
+            params.path = OpenVINOParams::BlobPath{path.string()};
+        } else {
+            // NB: *.onnx, *.pdpd, and any other format supported in future
+            params.path = OpenVINOParams::ModelPath{path.string(), "" /*weights*/};
+        }
+        // NB: If "device" isn't presented in config for network,
+        // the device specified globally will be substitued later on
+        if (node["device"]) {
+            params.device = node["device"].as<std::string>();
+        }
+
+        if (node["ip"]) {
+            params.input_precision = toDepth(node["ip"].as<LayerVariantAttr<std::string>>());
+        }
+
+        if (node["op"]) {
+            params.output_precision = toDepth(node["op"].as<LayerVariantAttr<std::string>>());
+        }
+
+        if (node["il"]) {
+            params.input_layout = node["il"].as<LayerVariantAttr<std::string>>();
+        }
+
+        if (node["ol"]) {
+            params.output_layout = node["ol"].as<LayerVariantAttr<std::string>>();
+        }
+
+        if (node["iml"]) {
+            params.input_model_layout = node["iml"].as<LayerVariantAttr<std::string>>();
+        }
+
+        if (node["oml"]) {
+            params.output_model_layout = node["oml"].as<LayerVariantAttr<std::string>>();
+        }
+
+        if (node["config"]) {
+            params.config = node["config"].as<std::map<std::string, std::string>>();
+        }
+
+        // NB: Note, it should be handled after "config" is set above
+        if (node["priority"]) {
+            params.config.emplace("MODEL_PRIORITY", toPriority(node["priority"].as<std::string>()));
+        }
+
+        if (node["nireq"]) {
+            params.nireq = node["nireq"].as<size_t>();
+        }
+        return true;
+    }
+};
+
+template <>
+struct convert<ONNXRTParams::OpenVINO> {
+    static bool decode(const Node& node, ONNXRTParams::OpenVINO& ov_ep) {
+        if (node["params"]) {
+            ov_ep.params_map = node["params"].as<std::map<std::string, std::string>>();
+        }
+        if (node["device_type"]) {
+            std::string device_type = node["device_type"].as<std::string>();
+            // Check if device_type already exists in params_map (collision check)
+            if (ov_ep.params_map.count("device_type") > 0) {
+                THROW_ERROR("Configuration error: 'device_type' has already been specified in the params.");
+            } else {
+                ov_ep.params_map["device_type"] = device_type;
+            }
+        }
+        return true;
+    }
+};
+
+template <>
+struct convert<ONNXRTParams::EP> {
+    static bool decode(const Node& node, ONNXRTParams::EP& ep) {
+        const auto ep_name = node["name"].as<std::string>();
+        if (ep_name == "OV") {
+            ep = node.as<ONNXRTParams::OpenVINO>();
+        } else {
+            THROW_ERROR("Unsupported \"ep name\" value: " << ep_name);
+        }
+        return true;
+    }
+};
+
+template <>
+struct convert<ONNXRTParams> {
+    static bool decode(const Node& node, ONNXRTParams& params) {
+        // FIXME: Worth to separate these two
+        params.model_path = node["name"] ? node["name"].as<std::string>() : node["path"].as<std::string>();
+        if (node["session_options"]) {
+            params.session_options = node["session_options"].as<std::map<std::string, std::string>>();
+        }
+        if (node["ep"]) {
+            params.ep = node["ep"].as<ONNXRTParams::EP>();
+        }
+        return true;
+    }
+};
+
+template <>
+struct convert<Network> {
+    static bool decode(const Node& node, Network& network) {
+        // NB: Take path stem as network tag
+        // Note that at this point, it's fine if names aren't unique
+        const auto name = node["name"].as<std::string>();
+        network.tag = std::filesystem::path{name}.stem().string();
+        // NB: OpenVINO is default to keep back compatibility for config syntax
+        const auto framework = node["framework"] ? node["framework"].as<std::string>() : "openvino";
+        if (framework == "openvino") {
+            // NB: Parse OpenVINO model parameters such as path, device, precision, etc
+            network.params = node.as<OpenVINOParams>();
+        } else if (framework == "onnxrt") {
+            network.params = node.as<ONNXRTParams>();
+        } else {
+            THROW_ERROR("Unsupported \"framework:\" value: " << framework);
+        }
+
+        if (node["random"]) {
+            network.initializers = node["random"].as<LayerVariantAttr<IRandomGenerator::Ptr>>();
+        }
+        if (node["metric"]) {
+            network.accuracy_metrics = node["metric"].as<LayerVariantAttr<IAccuracyMetric::Ptr>>();
+        }
+        if (node["input_data"]) {
+            network.input_data = node["input_data"].as<LayerVariantAttr<std::string>>();
+        }
+
+        if (node["output_data"]) {
+            network.output_data = node["output_data"].as<LayerVariantAttr<std::string>>();
+        }
+        return true;
+    }
+};
+
+template <>
+struct convert<CPUOp> {
+    static bool decode(const Node& node, CPUOp& op) {
+        // TODO: Assert there are no more options provided
+        op.time_in_us = node["time_in_us"] ? node["time_in_us"].as<uint64_t>() : 0u;
+        return true;
+    }
+};
+
+template <>
+struct convert<InferOp> {
+    static bool decode(const Node& node, InferOp& op) {
+        const auto framework = node["framework"] ? node["framework"].as<std::string>() : "openvino";
+        if (framework == "openvino") {
+            // NB: Parse OpenVINO model parameters such as path, device, precision, etc
+            op.params = node.as<OpenVINOParams>();
+        } else if (framework == "onnxrt") {
+            op.params = node.as<ONNXRTParams>();
+        } else {
+            THROW_ERROR("Unsupported \"framework:\" value: " << framework);
+        }
+
+        if (node["random"]) {
+            op.initializers = node["random"].as<LayerVariantAttr<IRandomGenerator::Ptr>>();
+        }
+        if (node["metric"]) {
+            op.accuracy_metrics = node["metric"].as<LayerVariantAttr<IAccuracyMetric::Ptr>>();
+        }
+        if (node["input_data"]) {
+            op.input_data = node["input_data"].as<LayerVariantAttr<std::string>>();
+        }
+
+        if (node["output_data"]) {
+            op.output_data = node["output_data"].as<LayerVariantAttr<std::string>>();
+        }
+        return true;
+    }
+};
+
+template <>
+struct convert<OpDesc> {
+    static bool decode(const Node& node, OpDesc& opdesc) {
+        opdesc.tag = node["tag"].as<std::string>();
+        auto type = node["type"] ? node["type"].as<std::string>() : "Infer";
+        auto repeat_count = node["repeat_count"] ? node["repeat_count"].as<uint64_t>() : 1u;
+        ASSERT(repeat_count > 0)
+        if (repeat_count > 1u) {
+            // NB: repeat_count > 1u assume that "Compound" operation will be used
+            type = "Compound";
+        }
+        if (type == "Infer") {
+            opdesc.op = node.as<InferOp>();
+        } else if (type == "CPU") {
+            opdesc.op = node.as<CPUOp>();
+        } else if (type == "Compound") {
+            std::vector<std::vector<std::string>> connections;
+            if (node["connections"]) {
+                connections = node["connections"].as<std::vector<std::vector<std::string>>>();
+            }
+            auto op_descs = node["op_desc"].as<std::vector<OpDesc>>();
+            InferenceParamsMap inference_params;
+            for (const auto& op_desc : op_descs) {
+                if (std::holds_alternative<InferOp>(op_desc.op)) {
+                    inference_params.emplace(op_desc.tag, std::get<InferOp>(op_desc.op).params);
+                }
+            }
+            opdesc.op = CompoundOp{repeat_count, std::move(inference_params), buildGraph(op_descs, connections)};
+        } else {
+            THROW_ERROR("Unsupported operation type: \"" << type << "\"!");
+        }
+        return true;
+    }
+};
+
+}  // namespace YAML
+
+static std::vector<std::vector<Network>> parseNetworks(const YAML::Node& node) {
+    ASSERT(node.IsSequence());
+    TagsManager tgs_mngr;
+    std::vector<std::vector<Network>> networks_list;
+    for (const auto& subnode : node) {
+        if (subnode.IsSequence()) {
+            networks_list.push_back(subnode.as<std::vector<Network>>());
+        } else {
+            networks_list.push_back({subnode.as<Network>()});
+        }
+        // NB: Ensure all network tags are unique!
+        for (auto& network : networks_list.back()) {
+            network.tag = tgs_mngr.add(network.tag);
+        }
+    }
+    return networks_list;
+}
+
+static ScenarioGraph buildGraph(const std::vector<std::vector<Network>>& networks_list, const uint32_t delay_in_us) {
+    ScenarioGraph graph;
+    auto src = graph.makeSource();
+    std::vector<DataNode> producers = {src};
+    for (uint32_t list_idx = 0; list_idx < networks_list.size(); ++list_idx) {
+        auto& networks = networks_list[list_idx];
+        // NB: Delay if specified, will not be added to the beginning
+        // and end of the stream, ONLY between models
+        if (list_idx != 0u && delay_in_us != 0u) {
+            auto delay = graph.makeDelay(delay_in_us);
+            for (auto p : producers) {
+                graph.link(p, delay);
+            }
+            producers = {delay.out()};
+        }
+        std::vector<DataNode> curr_outs;
+        curr_outs.reserve(networks.size());
+        for (uint32_t net_idx = 0; net_idx < networks.size(); ++net_idx) {
+            auto infer = graph.makeInfer(networks[net_idx].tag);
+            for (auto p : producers) {
+                graph.link(p, infer);
+            }
+            curr_outs.push_back(infer.out());
+        }
+        producers = std::move(curr_outs);
+    }
+    return graph;
+}
+
+static InferenceParams adjustParams(OpenVINOParams&& params, const GlobalOptions& opts, const ReplaceBy& replace_by) {
+    // NB: Adjust the model path according to base directories provided for blobs & models
+    auto& path = params.path;
+    if (std::holds_alternative<OpenVINOParams::ModelPath>(path)) {
+        auto& model_path = std::get<OpenVINOParams::ModelPath>(path);
+        fs::path model_file_path{model_path.model};
+        fs::path bin_file_path{model_path.bin};
+        if (model_file_path.is_relative()) {
+            model_path.model = (opts.model_dir / model_file_path).string();
+        }
+        if (!model_path.bin.empty() && bin_file_path.is_relative()) {
+            model_path.bin = (opts.model_dir / bin_file_path).string();
+        }
+    } else {
+        ASSERT(std::holds_alternative<OpenVINOParams::BlobPath>(path));
+        auto& blob_path = std::get<OpenVINOParams::BlobPath>(path);
+        fs::path blob_file_path{blob_path.blob};
+        if (blob_file_path.is_relative()) {
+            blob_path.blob = (opts.blob_dir / blob_file_path).string();
+        }
+    }
+    // NB: Adjust device property based on opts.device_name or replace_by
+
+    if (!replace_by.device.empty()) {
+        // NB: ReplaceBy has priority - overwrite
+        params.device = replace_by.device;
+    } else if (params.device.empty()) {
+        // NB: Otherwise, if empty - take the value from global device name
+        params.device = opts.device_name;
+    }
+
+    // NB: Compiler type is only relevant for NPU device
+    if (params.device == "NPU") {
+        // NB: Don't overwrite compiler type if it already has been
+        // specified explicitly for particular model
+        if (const auto it = params.config.find("NPU_COMPILER_TYPE"); it == params.config.end()) {
+            params.config.emplace("NPU_COMPILER_TYPE", opts.compiler_type);
+        }
+    }
+    return std::move(params);
+}
+
+static InferenceParams adjustParams(ONNXRTParams&& params, const GlobalOptions& opts) {
+    fs::path model_file_path{params.model_path};
+    if (model_file_path.is_relative()) {
+        params.model_path = (opts.model_dir / model_file_path).string();
+    }
+    return std::move(params);
+}
+
+static InferenceParams adjustParams(InferenceParams&& params, const GlobalOptions& opts, const ReplaceBy& replace_by) {
+    if (std::holds_alternative<OpenVINOParams>(params)) {
+        return adjustParams(std::get<OpenVINOParams>(std::move(params)), opts, replace_by);
+    }
+    ASSERT(std::holds_alternative<ONNXRTParams>(params));
+    return adjustParams(std::get<ONNXRTParams>(std::move(params)), opts);
+}
+
+static StreamDesc parseStream(const YAML::Node& node, const GlobalOptions& opts, const std::string& default_name,
+                              const ReplaceBy& replace_by) {
+    StreamDesc stream;
+
+    // FIXME: Create a function for the duplicate code below
+    stream.name = node["name"] ? node["name"].as<std::string>() : default_name;
+    stream.frames_interval_in_us = 0u;
+    if (node["frames_interval_in_ms"]) {
+        stream.frames_interval_in_us = node["frames_interval_in_ms"].as<uint32_t>() * 1000u;
+        if (node["target_fps"]) {
+            THROW_ERROR("Both \"target_fps\" and \"frames_interval_in_ms\" are defined for the stream: \""
+                        << stream.name << "\"! Please specify only one of them as they are mutually exclusive.");
+        }
+    } else if (node["target_fps"]) {
+        uint32_t target_fps = node["target_fps"].as<uint32_t>();
+        stream.frames_interval_in_us = (target_fps != 0) ? (1000u * 1000u / target_fps) : 0;
+    }
+
+    if (node["target_latency_in_ms"]) {
+        stream.target_latency = std::make_optional(node["target_latency_in_ms"].as<double>());
+        if (stream.target_latency < 0) {
+            THROW_ERROR("\"target_latency_in_ms\" is negative for the stream: \"" << stream.name << "\"!");
+        }
+    }
+    if (node["exec_time_in_secs"]) {
+        const auto exec_time_in_secs = node["exec_time_in_secs"].as<uint64_t>();
+        stream.criterion = std::make_shared<TimeOut>(exec_time_in_secs * 1'000'000);
+    }
+    if (node["iteration_count"]) {
+        const auto iteration_count = node["iteration_count"].as<uint64_t>();
+        stream.criterion = std::make_shared<Iterations>(iteration_count);
+    }
+
+    auto networks_list = parseNetworks(node["network"]);
+    const auto delay_in_us = node["delay_in_us"] ? node["delay_in_us"].as<uint32_t>() : 0u;
+    stream.graph = buildGraph(networks_list, delay_in_us);
+    // NB: Collect network parameters
+    for (auto& networks : networks_list) {
+        for (auto& network : networks) {
+            stream.metrics_map.emplace(network.tag, std::move(network.accuracy_metrics));
+            stream.initializers_map.emplace(network.tag, std::move(network.initializers));
+            stream.input_data_map.emplace(network.tag, std::move(network.input_data));
+            stream.output_data_map.emplace(network.tag, std::move(network.output_data));
+            stream.infer_params_map.emplace(network.tag, adjustParams(std::move(network.params), opts, replace_by));
+        }
+    }
+    return stream;
+}
+
+using DependencyMap = std::unordered_map<std::string, std::unordered_set<std::string>>;
+
+static ScenarioGraph buildGraph(const std::vector<OpDesc>& op_descs,
+                                const std::vector<std::vector<std::string>>& connections) {
+    // NB: Build the graph based on list of operations and connections between them
+    //
+    // The algorithm is straightforward:
+    // 1) For every operation create corresponding graph node
+    // 2) Go though connections and create the dependency map
+    // 3) Go through every operation and connect with its dependencies
+    //   3.1) If operation has no dependencies, connect it directly with the source
+
+    // NB: For the fast access to operation node by name
+    std::unordered_map<std::string, OpNode> op_node_map;
+    // NB: To store the list of dependencies for every operation
+    std::unordered_map<std::string, std::unordered_set<std::string>> dependency_map;
+
+    // (1) For every operation create corresponding graph node
+    ScenarioGraph graph;
+    for (const auto& desc : op_descs) {
+        // NB: Initialize dependency list for every operation
+        dependency_map[desc.tag];
+        // FIXME: Implement visitor
+        if (std::holds_alternative<InferOp>(desc.op)) {
+            op_node_map.emplace(desc.tag, graph.makeInfer(desc.tag));
+        } else if (std::holds_alternative<CompoundOp>(desc.op)) {
+            const auto& compound = std::get<CompoundOp>(desc.op);
+            op_node_map.emplace(
+                    desc.tag, graph.makeCompound(compound.repeat_count, compound.subgraph, compound.params, desc.tag));
+        } else {
+            ASSERT(std::holds_alternative<CPUOp>(desc.op));
+            const auto& cpu = std::get<CPUOp>(desc.op);
+            op_node_map.emplace(desc.tag, graph.makeDelay(cpu.time_in_us));
+        }
+    }
+
+    // (2) Go though connections and create the dependency map
+    for (const auto& tags : connections) {
+        if (tags.size() < 2) {
+            THROW_ERROR("Connections list must be at least size of 2!");
+        }
+        for (uint32_t i = 1; i < tags.size(); ++i) {
+            // [A, B, C] - means B depends on A, and C depends on B
+            auto deps_it = dependency_map.find(tags[i]);
+            if (deps_it == dependency_map.end()) {
+                THROW_ERROR("Operation \"" << tags[i] << "\" hasn't been registered in op_desc list!");
+            }
+            if (tags[i - 1] == tags[i]) {
+                THROW_ERROR("Operation \"" << tags[i] << "\" cannot be connected with itself!");
+            }
+            auto& dep_set = deps_it->second;
+            // NB: Check if such connection already exists
+            auto is_inserted = deps_it->second.emplace(tags[i - 1]).second;
+            if (!is_inserted) {
+                THROW_ERROR("Connection between \"" << tags[i - 1] << "\" and \"" << tags[i]
+                                                    << "\" operations already exists!");
+            }
+        }
+    }
+
+    // (3) Go through every operation and connect with its dependencies
+    auto src = graph.makeSource();
+    for (const auto& [tag, deps] : dependency_map) {
+        auto op = op_node_map.at(tag);
+        // (3.1) If operation has no dependencies, connect it directly to the source
+        if (deps.empty()) {
+            graph.link(src, op);
+        } else {
+            for (auto dep_tag : deps) {
+                auto dep = op_node_map.at(dep_tag);
+                graph.link(dep.out(), op);
+            }
+        }
+    }
+    return graph;
+}
+
+static StreamDesc parseAdvancedStream(const YAML::Node& node, const GlobalOptions& opts,
+                                      const std::string& default_name, const ReplaceBy& replace_by) {
+    StreamDesc stream;
+
+    // FIXME: Create a function for the duplicate code below
+    stream.name = node["name"] ? node["name"].as<std::string>() : default_name;
+    stream.frames_interval_in_us = 0u;
+    if (node["frames_interval_in_ms"]) {
+        stream.frames_interval_in_us = node["frames_interval_in_ms"].as<uint32_t>() * 1000u;
+        if (node["target_fps"]) {
+            THROW_ERROR("Both \"target_fps\" and \"frames_interval_in_ms\" are defined for the stream: \""
+                        << stream.name << "\"! Please specify only one of them as they are mutually exclusive.");
+        }
+    } else if (node["target_fps"]) {
+        uint32_t target_fps = node["target_fps"].as<uint32_t>();
+        stream.frames_interval_in_us = (target_fps != 0) ? (1000u * 1000u / target_fps) : 0;
+    }
+
+    if (node["target_latency_in_ms"]) {
+        stream.target_latency = std::make_optional(node["target_latency_in_ms"].as<double>());
+        if (stream.target_latency < 0) {
+            THROW_ERROR("\"target_latency_in_ms\" is negative for the stream: \"" << stream.name << "\"!");
+        }
+    }
+    if (node["exec_time_in_secs"]) {
+        const auto exec_time_in_secs = node["exec_time_in_secs"].as<uint64_t>();
+        stream.criterion = std::make_shared<TimeOut>(exec_time_in_secs * 1'000'000);
+    }
+    if (node["iteration_count"]) {
+        const auto iteration_count = node["iteration_count"].as<uint64_t>();
+        stream.criterion = std::make_shared<Iterations>(iteration_count);
+    }
+
+    auto op_descs = node["op_desc"].as<std::vector<OpDesc>>();
+    std::vector<std::vector<std::string>> connections;
+    if (node["connections"]) {
+        connections = node["connections"].as<std::vector<std::vector<std::string>>>();
+    }
+
+    for (auto& desc : op_descs) {
+        if (std::holds_alternative<InferOp>(desc.op)) {
+            auto&& infer = std::get<InferOp>(desc.op);
+            stream.metrics_map.emplace(desc.tag, std::move(infer.accuracy_metrics));
+            stream.initializers_map.emplace(desc.tag, std::move(infer.initializers));
+            stream.input_data_map.emplace(desc.tag, std::move(infer.input_data));
+            stream.output_data_map.emplace(desc.tag, std::move(infer.output_data));
+            stream.infer_params_map.emplace(desc.tag, adjustParams(std::move(infer.params), opts, replace_by));
+        }
+        if (std::holds_alternative<CompoundOp>(desc.op)) {
+            auto& compound = std::get<CompoundOp>(desc.op);
+            InferenceParamsMap& params_map = compound.params;
+            for (auto& pair : params_map) {
+                pair.second = adjustParams(std::move(pair.second), opts, replace_by);
+            }
+        }
+    }
+
+    stream.graph = buildGraph(op_descs, connections);
+    return stream;
+}
+
+static std::vector<StreamDesc> parseStreams(const YAML::Node& node, const GlobalOptions& opts,
+                                            const ReplaceBy& replace_by) {
+    std::vector<StreamDesc> streams;
+    uint32_t stream_idx = 0;
+    for (const auto& subnode : node) {
+        const auto default_name = std::to_string(stream_idx);
+        auto stream = subnode["op_desc"] ? parseAdvancedStream(subnode, opts, default_name, replace_by)
+                                         : parseStream(subnode, opts, default_name, replace_by);
+        streams.push_back(std::move(stream));
+        ++stream_idx;
+    }
+    return streams;
+}
+
+static std::vector<ScenarioDesc> parseScenarios(const YAML::Node& node, const GlobalOptions& opts,
+                                                const ReplaceBy& replace_by) {
+    std::vector<ScenarioDesc> scenarios;
+    for (const auto& subnode : node) {
+        ScenarioDesc scenario;
+        scenario.name = subnode["name"] ? subnode["name"].as<std::string>()
+                                        : "multi_inference_" + std::to_string(scenarios.size());
+        scenario.streams = parseStreams(subnode["input_stream_list"], opts, replace_by);
+
+        if (opts.save_validation_outputs) {
+            for (auto& stream : scenario.streams) {
+                const auto& root_path = opts.save_validation_outputs.value();
+                std::string stream_dir = "stream_" + stream.name;
+                std::filesystem::path stream_outputs_path = root_path / scenario.name / stream_dir;
+                stream.per_iter_outputs_path = std::make_optional(std::move(stream_outputs_path));
+            }
+        }
+        scenarios.push_back(std::move(scenario));
+    }
+    return scenarios;
+}
+
+Config parseConfig(const YAML::Node& node, const ReplaceBy& replace_by) {
+    const auto global_opts = node.as<GlobalOptions>();
+
+    // FIXME: Perhaps should be done somewhere else...
+    Logger::global_lvl = toLogLevel(global_opts.log_level);
+
+    Config config;
+    config.scenarios = parseScenarios(node["multi_inference"], global_opts, replace_by);
+
+    ASSERT(!config.scenarios.empty());
+    if (node["metric"]) {
+        config.metric = node["metric"].as<IAccuracyMetric::Ptr>();
+    }
+    if (node["random"]) {
+        config.initializer = node["random"].as<IRandomGenerator::Ptr>();
+    }
+
+    config.disable_high_resolution_timer = false;
+    if (node["disable_high_resolution_waitable_timer"]) {
+        config.disable_high_resolution_timer = node["disable_high_resolution_waitable_timer"].as<bool>();
+    }
+    return config;
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/config.hpp b/src/plugins/intel_npu/tools/protopipe/src/parser/config.hpp
new file mode 100644
index 00000000000000..1dec64ece423b6
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/parser/config.hpp
@@ -0,0 +1,12 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "parser/parser.hpp"
+
+#include <yaml-cpp/yaml.h>
+
+Config parseConfig(const YAML::Node& root, const ReplaceBy& replace_by);
diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/parser.cpp b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.cpp
new file mode 100644
index 00000000000000..b4f48b7415615c
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.cpp
@@ -0,0 +1,20 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "parser/parser.hpp"
+#include "parser/config.hpp"
+
+#include "utils/error.hpp"
+
+#include <yaml-cpp/yaml.h>
+
+ScenarioParser::ScenarioParser(const std::string& filepath): m_filepath(filepath) {
+}
+
+Config ScenarioParser::parseScenarios(const ReplaceBy& replace_by) {
+    const auto root = YAML::LoadFile(m_filepath);
+    // TODO: Extend to any other config syntax
+    return parseConfig(root, replace_by);
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/parser.hpp b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.hpp
new file mode 100644
index 00000000000000..ec228ee8070fd3
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.hpp
@@ -0,0 +1,61 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <filesystem>
+#include <string>
+#include <vector>
+
+#include "scenario/criterion.hpp"
+#include "scenario/inference.hpp"
+#include "scenario/scenario_graph.hpp"
+
+struct StreamDesc {
+    // NB: Commons parameters for all modes
+    std::string name;
+    uint64_t frames_interval_in_us;
+    ScenarioGraph graph;
+    InferenceParamsMap infer_params_map;
+    ITermCriterion::Ptr criterion;
+    // Mode specific params
+    ModelsAttrMap<IAccuracyMetric::Ptr> metrics_map;
+    ModelsAttrMap<IRandomGenerator::Ptr> initializers_map;
+    ModelsAttrMap<std::string> input_data_map;
+    ModelsAttrMap<std::string> output_data_map;
+    std::optional<double> target_latency;
+    std::optional<std::filesystem::path> per_iter_outputs_path;
+};
+
+struct ScenarioDesc {
+    std::string name;
+    std::vector<StreamDesc> streams;
+    bool disable_high_resolution_timer;
+};
+
+struct Config {
+    IRandomGenerator::Ptr initializer;
+    IAccuracyMetric::Ptr metric;
+    bool disable_high_resolution_timer;
+    std::vector<ScenarioDesc> scenarios;
+};
+
+struct ReplaceBy {
+    std::string device;
+};
+
+struct IScenarioParser {
+    virtual Config parseScenarios(const ReplaceBy& replace_by) = 0;
+    virtual ~IScenarioParser() = default;
+};
+
+class ScenarioParser : public IScenarioParser {
+public:
+    ScenarioParser(const std::string& filepath);
+    Config parseScenarios(const ReplaceBy& replace_by) override;
+
+private:
+    std::string m_filepath;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/result.cpp b/src/plugins/intel_npu/tools/protopipe/src/result.cpp
new file mode 100644
index 00000000000000..23c6c315eaf123
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/result.cpp
@@ -0,0 +1,22 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "result.hpp"
+#include "utils/error.hpp"
+
+Result::Result(const Error& error): m_status(error){};
+Result::Result(const Success& success): m_status(success){};
+
+Result::operator bool() const {
+    return std::holds_alternative<Success>(m_status);
+}
+
+std::string Result::str() const {
+    if (std::holds_alternative<Success>(m_status)) {
+        return std::get<Success>(m_status).msg;
+    }
+    ASSERT(std::holds_alternative<Error>(m_status));
+    return std::get<Error>(m_status).reason;
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/result.hpp b/src/plugins/intel_npu/tools/protopipe/src/result.hpp
new file mode 100644
index 00000000000000..08cbd7b06fc940
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/result.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <variant>
+
+struct Success {
+    std::string msg;
+};
+struct Error {
+    std::string reason;
+};
+
+class Result {
+public:
+    Result() = default;  // monostate (empty)
+    Result(const Error& error);
+    Result(const Success& success);
+
+    operator bool() const;
+    std::string str() const;
+
+private:
+    using Status = std::variant<std::monostate, Error, Success>;
+    Status m_status;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.cpp
new file mode 100644
index 00000000000000..9f779b8dab8cfd
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.cpp
@@ -0,0 +1,121 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "scenario/accuracy_metrics.hpp"
+
+#include "utils/error.hpp"
+
+Norm::Norm(const double tolerance): m_tolerance(tolerance){};
+
+Result Norm::compare(const cv::Mat& lhs, const cv::Mat& rhs) {
+    cv::Mat lhsf32, rhsf32;
+    lhs.convertTo(lhsf32, CV_32F);
+    rhs.convertTo(rhsf32, CV_32F);
+
+    ASSERT(lhsf32.total() == rhsf32.total());
+    auto value = cv::norm(lhsf32, rhsf32);
+
+    if (value > m_tolerance) {
+        std::stringstream ss;
+        ss << value << " > " << m_tolerance;
+        return Error{ss.str()};
+    }
+    return Success{};
+}
+
+std::string Norm::str() {
+    std::stringstream ss;
+    ss << "Norm{tolerance: " << m_tolerance << "}";
+    return ss.str();
+}
+
+Cosine::Cosine(const double threshold): m_threshold(threshold){};
+
+Result Cosine::compare(const cv::Mat& lhs, const cv::Mat& rhs) {
+    cv::Mat lhsf32, rhsf32;
+    lhs.convertTo(lhsf32, CV_32F);
+    rhs.convertTo(rhsf32, CV_32F);
+
+    ASSERT(lhsf32.total() == rhsf32.total());
+    const auto* lhsptr = lhsf32.ptr<float>();
+    const auto* rhsptr = rhsf32.ptr<float>();
+
+    double lhsdot = 0.0, rhsdot = 0.0, numr = 0.0;
+    for (size_t i = 0; i < lhsf32.total(); ++i) {
+        numr += lhsptr[i] * rhsptr[i];
+        lhsdot += lhsptr[i] * lhsptr[i];
+        rhsdot += rhsptr[i] * rhsptr[i];
+    }
+
+    const double eps = 1e-9;
+    if (lhsdot < eps || rhsdot < eps) {
+        return Error{"Division by zero!"};
+    }
+
+    const double similarity = numr / (std::sqrt(lhsdot) * std::sqrt(rhsdot));
+    if (similarity > (1.0 + eps) || similarity < -(1.0 + eps)) {
+        std::stringstream ss;
+        ss << "Invalid result " << similarity << " (valid range [-1 : +1])";
+        return Error{ss.str()};
+    }
+
+    if (m_threshold - eps > similarity) {
+        std::stringstream ss;
+        ss << similarity << " < " << m_threshold;
+        return Error{ss.str()};
+    }
+    return Success{};
+}
+
+std::string Cosine::str() {
+    std::stringstream ss;
+    ss << "Cosine{threshold: " << m_threshold << "}";
+    return ss.str();
+}
+
+NRMSE::NRMSE(const double tolerance): m_tolerance(tolerance){};
+
+Result NRMSE::compare(const cv::Mat& lhs, const cv::Mat& rhs) {
+    cv::Mat lhsf32, rhsf32;
+    lhs.convertTo(lhsf32, CV_32F);
+    rhs.convertTo(rhsf32, CV_32F);
+
+    const auto size = lhsf32.total();
+    if (size == 0) {
+        std::stringstream ss;
+        ss << "Empty output and reference tensors, nrmse loss set to 0" << std::endl;
+        return Success{};
+    }
+
+    const auto* lhsptr = lhsf32.ptr<float>();
+    const auto* rhsptr = rhsf32.ptr<float>();
+
+    double error = 0.0;
+    float lhsmax = 0.0, rhsmax = 0.0, lhsmin = 0.0, rhsmin = 0.0;
+
+    for (size_t i = 0; i < size; ++i) {
+        const auto diff = lhsptr[i] - rhsptr[i];
+        error += diff * diff;
+        lhsmax = std::max(lhsptr[i], lhsmax);
+        rhsmax = std::max(rhsptr[i], rhsmax);
+        lhsmin = std::min(lhsptr[i], lhsmin);
+        rhsmin = std::min(rhsptr[i], rhsmin);
+    }
+
+    double nrmse = sqrt(error / size) / std::max(0.001f, std::max(lhsmax - lhsmin, rhsmax - rhsmin));
+
+    if (m_tolerance < nrmse) {
+        std::stringstream ss;
+        ss << nrmse << " > " << m_tolerance;
+        return Error{ss.str()};
+    }
+    return Success{};
+}
+
+std::string NRMSE::str() {
+    std::stringstream ss;
+    ss << "nrmse{tolerance: " << m_tolerance << "}";
+    return ss.str();
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.hpp
new file mode 100644
index 00000000000000..010039360ecb9b
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.hpp
@@ -0,0 +1,52 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <opencv2/core/core.hpp>
+#include <string>
+
+#include "result.hpp"
+
+struct IAccuracyMetric {
+    using Ptr = std::shared_ptr<IAccuracyMetric>;
+    virtual Result compare(const cv::Mat& lhs, const cv::Mat& rhs) = 0;
+    virtual std::string str() = 0;
+    virtual ~IAccuracyMetric() = default;
+};
+
+class Norm : public IAccuracyMetric {
+public:
+    using Ptr = std::shared_ptr<Norm>;
+    explicit Norm(const double tolerance);
+    Result compare(const cv::Mat& lhs, const cv::Mat& rhs) override;
+    std::string str() override;
+
+private:
+    double m_tolerance;
+};
+
+class Cosine : public IAccuracyMetric {
+public:
+    using Ptr = std::shared_ptr<Cosine>;
+    explicit Cosine(const double threshold);
+    Result compare(const cv::Mat& lhs, const cv::Mat& rhs) override;
+    std::string str() override;
+
+private:
+    double m_threshold;
+};
+
+class NRMSE : public IAccuracyMetric {
+public:
+    using Ptr = std::shared_ptr<NRMSE>;
+    explicit NRMSE(const double tolerance);
+    Result compare(const cv::Mat& lhs, const cv::Mat& rhs) override;
+    std::string str() override;
+
+private:
+    double m_tolerance;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.cpp
new file mode 100644
index 00000000000000..b348fe92e811cb
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.cpp
@@ -0,0 +1,72 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "criterion.hpp"
+
+#include <chrono>
+
+#include "utils/utils.hpp"
+
+Iterations::Iterations(uint64_t num_iters): m_num_iters(num_iters), m_counter(0) {
+}
+
+bool Iterations::check() const {
+    return m_counter != m_num_iters;
+}
+
+void Iterations::update() {
+    ++m_counter;
+}
+
+void Iterations::init() {
+    m_counter = 0;
+}
+
+ITermCriterion::Ptr Iterations::clone() const {
+    return std::make_shared<Iterations>(*this);
+}
+
+TimeOut::TimeOut(uint64_t time_in_us): m_time_in_us(time_in_us), m_start_ts(-1) {
+}
+
+bool TimeOut::check() const {
+    return utils::timestamp<std::chrono::microseconds>() - m_start_ts < m_time_in_us;
+}
+
+void TimeOut::update(){/* do nothing */};
+
+void TimeOut::init() {
+    m_start_ts = utils::timestamp<std::chrono::microseconds>();
+}
+
+ITermCriterion::Ptr TimeOut::clone() const {
+    return std::make_shared<TimeOut>(*this);
+}
+
+CombinedCriterion::CombinedCriterion(ITermCriterion::Ptr lhs, ITermCriterion::Ptr rhs): m_lhs(lhs), m_rhs(rhs) {
+}
+
+CombinedCriterion::CombinedCriterion(const CombinedCriterion& other) {
+    m_lhs = other.m_lhs->clone();
+    m_rhs = other.m_rhs->clone();
+}
+
+bool CombinedCriterion::check() const {
+    return m_lhs->check() && m_rhs->check();
+}
+
+void CombinedCriterion::update() {
+    m_lhs->update();
+    m_rhs->update();
+};
+
+void CombinedCriterion::init() {
+    m_lhs->init();
+    m_rhs->init();
+}
+
+ITermCriterion::Ptr CombinedCriterion::clone() const {
+    return std::make_shared<CombinedCriterion>(*this);
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.hpp
new file mode 100644
index 00000000000000..28b440a7b3b0a3
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.hpp
@@ -0,0 +1,58 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+struct ITermCriterion {
+    using Ptr = std::shared_ptr<ITermCriterion>;
+    virtual void init() = 0;
+    virtual void update() = 0;
+    virtual bool check() const = 0;
+    virtual ITermCriterion::Ptr clone() const = 0;
+};
+
+class Iterations : public ITermCriterion {
+public:
+    Iterations(uint64_t num_iters);
+
+    void init() override;
+    void update() override;
+    bool check() const override;
+    ITermCriterion::Ptr clone() const override;
+
+private:
+    uint64_t m_num_iters;
+    uint64_t m_counter;
+};
+
+class TimeOut : public ITermCriterion {
+public:
+    TimeOut(uint64_t time_in_us);
+
+    void init() override;
+    void update() override;
+    bool check() const override;
+    ITermCriterion::Ptr clone() const override;
+
+private:
+    uint64_t m_time_in_us;
+    uint64_t m_start_ts;
+};
+
+class CombinedCriterion : public ITermCriterion {
+public:
+    CombinedCriterion(ITermCriterion::Ptr lhs, ITermCriterion::Ptr rhs);
+    CombinedCriterion(const CombinedCriterion& other);
+
+    void init() override;
+    void update() override;
+    bool check() const override;
+    ITermCriterion::Ptr clone() const override;
+
+private:
+    ITermCriterion::Ptr m_lhs, m_rhs;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.cpp
new file mode 100644
index 00000000000000..c1648f3755cbfd
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.cpp
@@ -0,0 +1,17 @@
+//
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "scenario/inference.hpp"
+
+#include <algorithm>
+#include <iterator>
+
+std::vector<std::string> extractLayerNames(const std::vector<LayerInfo>& layers) {
+    std::vector<std::string> names;
+    std::transform(layers.begin(), layers.end(), std::back_inserter(names), [](const auto& layer) {
+        return layer.name;
+    });
+    return names;
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp
new file mode 100644
index 00000000000000..c4fd85aa26721a
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp
@@ -0,0 +1,111 @@
+//
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <map>
+#include <optional>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <variant>
+#include <vector>
+
+template <typename T>
+using AttrMap = std::map<std::string, T>;
+// NB: This type is supposed to be used to hold in/out layers
+// attributes such as precision, layout, shape etc.
+//
+// User can provide attributes either:
+// 1. std::monostate - No value specified explicitly.
+// 2. Attr - value specified explicitly that should be broadcasted to all layers.
+// 3. AttrMap[str->T] - map specifies value for particular layer.
+template <typename Attr>
+using LayerVariantAttr = std::variant<std::monostate, AttrMap<Attr>, Attr>;
+
+// NB: Map of model tag -> LayerVariantAttr<T>
+template <typename T>
+using ModelsAttrMap = std::unordered_map<std::string, LayerVariantAttr<T>>;
+
+struct LayerInfo {
+    std::string name;
+    std::vector<int> dims;
+    int prec;
+};
+using LayersInfo = std::vector<LayerInfo>;
+
+std::vector<std::string> extractLayerNames(const std::vector<LayerInfo>& layers);
+
+template <typename K, typename V>
+std::optional<V> lookUp(const std::map<K, V>& map, const K& key) {
+    const auto it = map.find(key);
+    if (it == map.end()) {
+        return {};
+    }
+    return std::make_optional(std::move(it->second));
+}
+
+template <typename T>
+static AttrMap<T> unpackLayerAttr(const LayerVariantAttr<T>& attr, const std::vector<std::string>& layer_names,
+                                  const std::string& attrname) {
+    AttrMap<T> attrmap;
+    if (std::holds_alternative<T>(attr)) {
+        auto value = std::get<T>(attr);
+        for (const auto& name : layer_names) {
+            attrmap.emplace(name, value);
+        }
+    } else if (std::holds_alternative<AttrMap<T>>(attr)) {
+        attrmap = std::get<AttrMap<T>>(attr);
+        std::unordered_set<std::string> layers_set{layer_names.begin(), layer_names.end()};
+        for (const auto& [name, attr] : attrmap) {
+            const auto it = layers_set.find(name);
+            if (it == layers_set.end()) {
+                throw std::logic_error("Failed to find layer \"" + name + "\" to specify " + attrname);
+            }
+        }
+    }
+    return attrmap;
+}
+
+struct OpenVINOParams {
+    struct ModelPath {
+        std::string model;
+        std::string bin;
+    };
+    struct BlobPath {
+        std::string blob;
+    };
+    using Path = std::variant<ModelPath, BlobPath>;
+
+    // NB: Mandatory parameters
+    Path path;
+    std::string device;
+    // NB: Optional parameters
+    LayerVariantAttr<int> input_precision;
+    LayerVariantAttr<int> output_precision;
+    LayerVariantAttr<std::string> input_layout;
+    LayerVariantAttr<std::string> output_layout;
+    LayerVariantAttr<std::string> input_model_layout;
+    LayerVariantAttr<std::string> output_model_layout;
+    std::map<std::string, std::string> config;
+    size_t nireq = 1u;
+};
+
+struct ONNXRTParams {
+    std::string model_path;
+    std::map<std::string, std::string> session_options;
+    // TODO: Extend for other available ONNXRT EP (e.g DML, CoreML, TensorRT, etc)
+    struct OpenVINO {
+        std::map<std::string, std::string> params_map;
+    };
+    // NB: std::monostate stands for the default MLAS Execution provider
+    using EP = std::variant<std::monostate, OpenVINO>;
+    EP ep;
+};
+
+using InferenceParams = std::variant<std::monostate, OpenVINOParams, ONNXRTParams>;
+using InferenceParamsMap = std::unordered_map<std::string, InferenceParams>;
diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.cpp
new file mode 100644
index 00000000000000..96984966fbc6fc
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.cpp
@@ -0,0 +1,40 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "scenario/scenario_graph.hpp"
+
+DataNode::DataNode(Graph* graph, NodeHandle nh): m_nh(nh) {
+    graph->meta(nh).set(Data{});
+};
+
+OpNode::OpNode(NodeHandle nh, DataNode out_data): m_nh(nh), m_out_data(out_data) {
+}
+
+DataNode OpNode::out() {
+    return m_out_data;
+}
+
+DataNode ScenarioGraph::makeSource() {
+    NodeHandle nh = m_graph.create();
+    m_graph.meta(nh).set(Source{});
+    return DataNode(&m_graph, nh);
+}
+
+void ScenarioGraph::link(DataNode data, OpNode op) {
+    m_graph.link(data.m_nh, op.m_nh);
+}
+
+OpNode ScenarioGraph::makeInfer(const std::string& tag) {
+    return makeOp(Infer{tag});
+}
+
+OpNode ScenarioGraph::makeDelay(uint64_t time_in_us) {
+    return makeOp(Delay{time_in_us});
+}
+
+OpNode ScenarioGraph::makeCompound(uint64_t repeat_count, ScenarioGraph subgraph, InferenceParamsMap infer_params,
+                                   const std::string& tag) {
+    return makeOp(Compound{repeat_count, subgraph, infer_params, tag});
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.hpp
new file mode 100644
index 00000000000000..a9b6523a6be52d
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.hpp
@@ -0,0 +1,102 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <functional>
+
+#include "graph.hpp"
+#include "scenario/accuracy_metrics.hpp"
+#include "scenario/inference.hpp"
+#include "utils/data_providers.hpp"
+
+struct Source {};
+struct Data {};
+
+class DataNode {
+public:
+    DataNode(Graph* graph, NodeHandle nh);
+
+private:
+    friend class ScenarioGraph;
+    NodeHandle m_nh;
+};
+
+class OpNode;
+template <>
+struct std::hash<OpNode>;
+
+class OpNode {
+public:
+    OpNode(NodeHandle nh, DataNode out_data);
+    DataNode out();
+
+private:
+    friend class ScenarioGraph;
+    friend struct std::hash<OpNode>;
+    NodeHandle m_nh;
+    DataNode m_out_data;
+};
+
+namespace std {
+template <>
+struct hash<OpNode> {
+    uint64_t operator()(const OpNode& op_node) const {
+        return std::hash<NodeHandle>()(op_node.m_nh);
+    }
+};
+}  // namespace std
+
+class ScenarioGraph {
+public:
+    DataNode makeSource();
+    OpNode makeInfer(const std::string& tag);
+    OpNode makeDelay(uint64_t time_in_us);
+    OpNode makeCompound(uint64_t repeat_count, ScenarioGraph subgraph, InferenceParamsMap infer_params,
+                        const std::string& tag);
+
+    void link(DataNode data, OpNode op);
+
+    template <typename F>
+    void pass(F&& f) {
+        f(m_graph);
+    }
+
+private:
+    template <typename Kind>
+    OpNode makeOp(Kind&& kind);
+
+private:
+    Graph m_graph;
+};
+
+struct Infer {
+    std::string tag;
+};
+
+struct Delay {
+    uint64_t time_in_us;
+};
+
+struct Compound {
+    uint64_t repeat_count;
+    ScenarioGraph subgraph;
+    InferenceParamsMap infer_params;
+    std::string tag;
+};
+
+struct Op {
+    using Kind = std::variant<Infer, Delay, Compound>;
+    Kind kind;
+};
+
+template <typename Kind>
+OpNode ScenarioGraph::makeOp(Kind&& kind) {
+    auto op_nh = m_graph.create();
+    auto out_nh = m_graph.create();
+    m_graph.meta(op_nh).set(Op{std::forward<Kind>(kind)});
+    m_graph.link(op_nh, out_nh);
+    return OpNode(op_nh, DataNode(&m_graph, out_nh));
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.cpp
new file mode 100644
index 00000000000000..ad0abc7fe89f9b
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.cpp
@@ -0,0 +1,42 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "simulation/computation.hpp"
+
+Computation::Computation(cv::GComputation&& comp, cv::GCompileArgs&& args, std::vector<Meta>&& metas, GraphDesc&& desc)
+        : m_comp(std::move(comp)),
+          m_compile_args(std::move(args)),
+          m_out_meta(std::move(metas)),
+          m_desc(std::move(desc)) {
+}
+
+uint32_t Computation::getMaxParallelBranches() const {
+    return m_desc.max_parallel_branches;
+}
+
+const std::vector<Meta>& Computation::getOutMeta() const {
+    return m_out_meta;
+}
+
+cv::GCompiled Computation::compile(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args) {
+    auto compile_args = m_compile_args;
+    compile_args += std::move(args);
+    return m_comp.compile(std::move(in_meta), std::move(compile_args));
+}
+
+cv::GStreamingCompiled Computation::compileStreaming(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args) {
+    auto compile_args = m_compile_args;
+    compile_args += std::move(args);
+    return m_comp.compileStreaming(std::move(in_meta), std::move(compile_args));
+}
+
+cv::GMetaArgs descr_of(const std::vector<DummySource::Ptr>& sources) {
+    cv::GMetaArgs meta;
+    meta.reserve(sources.size());
+    for (auto src : sources) {
+        meta.push_back(src->descr_of());
+    }
+    return meta;
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.hpp
new file mode 100644
index 00000000000000..f9eba3b8c95a5f
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.hpp
@@ -0,0 +1,36 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "graph.hpp"
+#include "simulation/dummy_source.hpp"
+
+#include <opencv2/gapi/core.hpp>
+#include <vector>
+
+class Computation {
+public:
+    // NB: Holds information about Graph structure
+    struct GraphDesc {
+        const uint32_t max_parallel_branches;
+    };
+
+    Computation(cv::GComputation&& comp, cv::GCompileArgs&& args, std::vector<Meta>&& metas, GraphDesc&& desc);
+
+    uint32_t getMaxParallelBranches() const;
+    const std::vector<Meta>& getOutMeta() const;
+
+    cv::GCompiled compile(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args = {});
+    cv::GStreamingCompiled compileStreaming(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args = {});
+
+private:
+    cv::GComputation m_comp;
+    cv::GCompileArgs m_compile_args;
+    std::vector<Meta> m_out_meta;
+    GraphDesc m_desc;
+};
+
+cv::GMetaArgs descr_of(const std::vector<DummySource::Ptr>& sources);
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.cpp
new file mode 100644
index 00000000000000..d43a84ef5fe3a8
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.cpp
@@ -0,0 +1,462 @@
+//
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "simulation/computation_builder.hpp"
+#include "simulation/layers_reader.hpp"
+#include "simulation/operations.hpp"
+#include "simulation/performance_mode.hpp"
+#include "simulation/simulation.hpp"
+
+#include "utils/error.hpp"
+
+#include <opencv2/gapi/streaming/meta.hpp>
+
+struct OpBuilder {
+    void build(NodeHandle nh, const Infer& infer);
+    void build(NodeHandle nh, const Delay& delay);
+    void build(NodeHandle nh, const Compound& compound);
+
+    Graph& graph;
+    IBuildStrategy::Ptr strategy;
+    const InferenceParamsMap& params_map;
+};
+
+void OpBuilder::build(NodeHandle nh, const Compound& compound) {
+    // Retrieving destination nodes of the current node nh
+    auto out_nhs = nh->dstNodes();
+
+    // NB: The Dummy node ensures proper handling of multiple inputs
+    auto dummy_nh = graph.create();
+    auto provider = std::make_shared<CircleBuffer>(utils::createRandom({1}, CV_8U));
+    DummyCall dummy_call{{provider}, 0};
+    graph.meta(dummy_nh).set(GOperation{std::move(dummy_call)});
+    auto in_nhs = nh->srcNodes();
+
+    // removing input edges to go through dummy node and not to compound node
+    auto src_edges = nh->srcEdges();
+    for (size_t i = 0; i < src_edges.size(); ++i) {
+        graph.remove(src_edges[i]);
+    }
+
+    for (uint32_t i = 0; i < in_nhs.size(); ++i) {
+        graph.meta(graph.link(in_nhs[i], dummy_nh)).set(InputIdx{i});  // Linking in_nhs with dummy_nh
+    }
+
+    auto dummy_out_nh = graph.create();  // Creating output dunmmy node
+    graph.meta(graph.link(dummy_nh, dummy_out_nh))
+            .set(OutputIdx{0u});  // linking dummy node handle and output dummy node handle
+    graph.meta(dummy_out_nh).set(GData{});
+    graph.meta(graph.link(dummy_out_nh, nh)).set(InputIdx{0u});
+
+    ASSERT(nh->dstEdges().size() == 1u);
+    auto dst_edge = nh->dstEdges().front();
+    graph.meta(dst_edge).set(OutputIdx{0u});
+
+    graph.meta(graph.link(nh, out_nhs.front())).set(OutputIdx{0u});
+
+    ModelsAttrMap<std::string> input_data_map;
+    ModelsAttrMap<IRandomGenerator::Ptr> initializers_map;
+
+    for (const auto& [tag, params] : compound.infer_params) {
+        input_data_map[tag];
+        initializers_map[tag];
+    }
+
+    PerformanceSimulation::Options opts{
+            nullptr,  // global_initializer
+            initializers_map,
+            input_data_map,
+            true,  // inference_only
+            {}     // target latency
+    };
+
+    Simulation::Config cfg{compound.tag,
+                           0u,     // frames_interval_in_ms
+                           false,  // disable_high_resolution_timer
+                           compound.subgraph, compound.infer_params};
+
+    auto compiled = std::make_shared<PerformanceSimulation>(std::move(cfg), std::move(opts))
+                            ->compileSync(false /*drop_frames*/);
+    auto term_criterion = std::make_shared<Iterations>(compound.repeat_count);
+    auto f = [compiled, term_criterion]() {
+        compiled->run(term_criterion);
+    };
+
+    CompoundCall compound_call{f};
+    graph.meta(nh).set(GOperation{std::move(compound_call)});
+}
+
+void OpBuilder::build(NodeHandle nh, const Delay& delay) {
+    auto in_nhs = nh->srcNodes();
+    auto out_nhs = nh->dstNodes();
+    // FIXME: Once nh is removed, delay info is no longer alive!!!
+    const auto time_in_us = delay.time_in_us;
+    graph.remove(nh);
+
+    auto delay_nh = graph.create();
+    auto provider = std::make_shared<CircleBuffer>(utils::createRandom({1}, CV_8U));
+    graph.meta(delay_nh).set(GOperation{DummyCall{{provider}, time_in_us}});
+
+    for (uint32_t i = 0; i < in_nhs.size(); ++i) {
+        graph.meta(graph.link(in_nhs[i], delay_nh)).set(InputIdx{i});
+    }
+    graph.meta(graph.link(delay_nh, out_nhs.front())).set(OutputIdx{0u});
+}
+
+void OpBuilder::build(NodeHandle nh, const Infer& infer) {
+    const auto& params = params_map.at(infer.tag);
+    auto [in_layers, out_layers] = LayersReader::readLayers(params);
+    InferDesc desc{infer.tag, std::move(in_layers), std::move(out_layers)};
+
+    auto out_nhs = nh->dstNodes();
+    ASSERT(out_nhs.size() == 1);
+
+    auto [providers, in_meta, out_meta, disable_copy] = strategy->build(desc);
+    ASSERT(providers.size() == desc.input_layers.size());
+    ASSERT(in_meta.size() == desc.input_layers.size());
+    ASSERT(out_meta.size() == desc.output_layers.size());
+
+    // NB: Check if some of the Delay's was fused to this Infer
+    uint64_t delay_in_us = 0u;
+    if (graph.meta(nh).has<Delay>()) {
+        delay_in_us = graph.meta(nh).get<Delay>().time_in_us;
+    }
+
+    auto dummy_nh = graph.create();
+    DummyCall dummy_call{providers, delay_in_us, disable_copy};
+    graph.meta(dummy_nh).set(GOperation{std::move(dummy_call)});
+    auto in_nhs = nh->srcNodes();
+    for (uint32_t i = 0; i < in_nhs.size(); ++i) {
+        graph.meta(graph.link(in_nhs[i], dummy_nh)).set(InputIdx{i});
+    }
+
+    graph.remove(nh);
+
+    auto infer_nh = graph.create();
+    for (uint32_t layer_idx = 0; layer_idx < desc.input_layers.size(); ++layer_idx) {
+        // NB: Create dummy out node and link with dummy.
+        auto dummy_out_nh = graph.create();
+        graph.meta(dummy_out_nh) += std::move(in_meta[layer_idx]);
+        graph.meta(graph.link(dummy_nh, dummy_out_nh)).set(OutputIdx{layer_idx});
+        graph.meta(dummy_out_nh).set(GData{});
+        // NB: Finally link dummy out with infer
+        graph.meta(graph.link(dummy_out_nh, infer_nh)).set(InputIdx{layer_idx});
+    }
+
+    auto out_nh = out_nhs.front();
+    graph.meta(graph.link(infer_nh, out_nh)).set(OutputIdx{0u});
+    graph.meta(out_nh) += out_meta.front();
+    for (uint32_t layer_idx = 1; layer_idx < desc.output_layers.size(); ++layer_idx) {
+        auto infer_out_nh = graph.create();
+        graph.meta(infer_out_nh) = std::move(out_meta[layer_idx]);
+        graph.meta(infer_out_nh).set(GData{});
+        graph.meta(graph.link(infer_nh, infer_out_nh)).set(OutputIdx{layer_idx});
+    }
+
+    InferCall infer_call{desc.tag, extractLayerNames(desc.input_layers), extractLayerNames(desc.output_layers)};
+    graph.meta(infer_nh).set(GOperation{std::move(infer_call)});
+};
+
+static bool fuseDelay(Graph& graph, NodeHandle nh, const Delay& delay) {
+    // NB: Current fusing is trivial and applied only for the following case:
+    // 1) Delay has only single Infer reader
+    // 2) Infer doesn't have any other writers except Delay
+    // e.g: [Delay] -> (out) -> [Infer]
+
+    // NB: Access readers of delay output data node.
+    auto delay_out_nh = nh->dstNodes().front();
+    auto out_edges = delay_out_nh->dstEdges();
+    // NB: Don't fuse Delay either if it has multiple readers
+    // or doesn't have readers at all (1)
+    if (out_edges.size() != 1u) {
+        return false;
+    }
+
+    auto out_edge = out_edges.front();
+    auto op_nh = out_edge->dstNode();
+    auto op = graph.meta(op_nh).get<Op>().kind;
+    // NB: Don't fuse Delay if reader either not an Infer (1)
+    // or it has other writers except Delay (2).
+    if (!std::holds_alternative<Infer>(op) || op_nh->srcEdges().size() != 1u) {
+        // TODO: Can be also fused to another "delay".
+        return false;
+    }
+
+    // NB: Fuse the Delay into Infer:
+    // 1) Assign Delay meta directly to Infer
+    // 2) Remove Delay node
+    // 3) Redirect Delay writers to Infer
+    graph.meta(op_nh).set(delay);
+    for (auto in_nh : nh->srcNodes()) {
+        graph.link(in_nh, op_nh);
+    }
+    graph.remove(nh);
+    graph.remove(delay_out_nh);
+
+    return true;
+}
+
+struct Protocol {
+    cv::GProtoArgs graph_inputs;
+    cv::GProtoArgs graph_outputs;
+};
+
+enum class NodeState { EXPLORING, VISITED };
+
+static void visit(NodeHandle nh, std::unordered_map<NodeHandle, NodeState>& state) {
+    auto curr_node_it = state.emplace(nh, NodeState::EXPLORING).first;
+    for (const auto& dst_nh : nh->dstNodes()) {
+        const auto dst_it = state.find(dst_nh);
+        if (dst_it == state.end()) {
+            visit(dst_nh, state);
+        } else if (dst_it->second == NodeState::EXPLORING) {
+            THROW_ERROR("Scenario graph has a cycle!");
+        }
+    }
+    curr_node_it->second = NodeState::VISITED;
+};
+
+namespace passes {
+
+// NB: Throw an exception if there is a cycle in graph
+void throwIfCycle(Graph& graph) {
+    std::unordered_map<NodeHandle, NodeState> state;
+    for (const auto& nh : graph.nodes()) {
+        if (state.find(nh) == state.end()) {
+            visit(nh, state);
+        }
+    }
+}
+
+// NB: Determines what would be the computation graph
+// inputs and outputs and marks intermediate data nodes
+void init(Graph& graph) {
+    ASSERT(!graph.nodes().empty());
+    uint32_t num_sources = 0;
+    for (auto nh : graph.nodes()) {
+        if (graph.meta(nh).has<Source>()) {
+            ++num_sources;
+            graph.meta(nh).set(GraphInput{});
+        } else {
+            // NB: Check that graph is connected
+            ASSERT(!nh->srcNodes().empty());
+        }
+        if (nh->dstNodes().empty()) {
+            ASSERT(graph.meta(nh).has<Data>());
+            graph.meta(nh).set(GraphOutput{});
+        }
+        if (!graph.meta(nh).has<Op>()) {
+            ASSERT(graph.meta(nh).has<Data>());
+            graph.meta(nh).set(GData{});
+        }
+    }
+    ASSERT(num_sources != 0);
+};
+
+// NB: Fuses delay to the inference nodes as the delay can be performed
+// as part of the model dummy preprocessing
+void fuseDelays(Graph& graph) {
+    // NB: Iterate over graph nodes until all delays are fused.
+    while (true) {
+        bool is_fused = false;
+        for (auto nh : graph.nodes()) {
+            if (!graph.meta(nh).has<Op>()) {
+                continue;
+            }
+            auto op = graph.meta(nh).get<Op>().kind;
+            if (std::holds_alternative<Delay>(op)) {
+                auto delay = std::get<Delay>(op);
+                if (fuseDelay(graph, nh, delay)) {
+                    is_fused = true;
+                    break;
+                }
+            }
+        }
+        // NB: If delay was fused, some of the nodes were removed
+        // Iterate one more time...
+        if (!is_fused) {
+            break;
+        }
+    }
+};
+
+// NB: Finds the maximum parallelism depth to tell concurrent executor
+// how many threads should be used for execution
+void findMaxParallelBranches(Graph& graph, uint32_t& max_parallel_branches) {
+    // NB: Basically the maximum parallelism in computational graph
+    // is the maximum width of its level in BFS traversal, taking into
+    // account that dependencies for the node are resolved
+    std::unordered_set<NodeHandle> curr_lvl;
+    for (auto nh : graph.nodes()) {
+        if (graph.meta(nh).has<Source>()) {
+            for (auto op_nh : nh->dstNodes()) {
+                curr_lvl.emplace(op_nh);
+            }
+        }
+    }
+
+    std::unordered_set<NodeHandle> visited;
+
+    auto get_all_deps = [&](auto nh) {
+        std::unordered_set<NodeHandle> deps;
+        for (auto in_nhs : nh->srcNodes()) {
+            for (auto op_nhs : in_nhs->srcNodes()) {
+                deps.emplace(op_nhs);
+            }
+        }
+        return deps;
+    };
+
+    auto all_deps_resolved = [&](auto nh) {
+        auto deps = get_all_deps(nh);
+        return std::all_of(deps.begin(), deps.end(), [&](auto dep) {
+            return visited.find(dep) != visited.end();
+        });
+    };
+
+    max_parallel_branches = static_cast<uint32_t>(curr_lvl.size());
+    while (!curr_lvl.empty()) {
+        std::unordered_set<NodeHandle> next_lvl;
+        for (auto nh : curr_lvl) {
+            visited.emplace(nh);
+            ASSERT(nh->dstNodes().size() == 1u);
+            auto data_nh = nh->dstNodes().front();
+            for (auto op_nh : data_nh->dstNodes()) {
+                if (all_deps_resolved(op_nh)) {
+                    next_lvl.emplace(op_nh);
+                }
+            }
+        }
+        if (next_lvl.size() > max_parallel_branches) {
+            max_parallel_branches = static_cast<uint32_t>(next_lvl.size());
+        }
+        curr_lvl = std::move(next_lvl);
+    }
+}
+
+// NB: Build "G" operations according to scenario graph nodes
+void buildOperations(Graph& graph, IBuildStrategy::Ptr strategy, const InferenceParamsMap& params_map) {
+    OpBuilder builder{graph, strategy, params_map};
+    for (auto nh : graph.nodes()) {
+        // NB: Skip data nodes
+        if (!graph.meta(nh).has<Op>()) {
+            continue;
+        }
+        std::visit(
+                [nh, &builder](const auto& op) {
+                    builder.build(nh, op);
+                },
+                graph.meta(nh).get<Op>().kind);
+    }
+
+    for (auto nh : graph.nodes()) {
+        // NB: Make sure all data nodes that needs to be
+        // dumped or validated are graph outputs.
+        if (!graph.meta(nh).has<GraphOutput>() && (graph.meta(nh).has<Validate>() || graph.meta(nh).has<Dump>())) {
+            graph.meta(nh).set(GraphOutput{});
+        }
+    }
+};
+
+void buildComputation(Graph& graph, Protocol& proto) {
+    cv::GProtoArgs graph_inputs;
+    cv::GProtoArgs graph_outputs;
+
+    std::unordered_map<NodeHandle, cv::GProtoArg> all_data;
+    auto sorted = graph.sorted();
+
+    // NB: Initialize "G" inputs
+    for (auto nh : sorted) {
+        if (graph.meta(nh).has<GraphInput>()) {
+            auto it = all_data.emplace(nh, cv::GProtoArg{cv::GMat()}).first;
+            graph_inputs.push_back(it->second);
+        }
+    }
+    // NB: Apply "G" operations in topological order
+    for (auto nh : sorted) {
+        if (graph.meta(nh).has<GOperation>()) {
+            const auto& operation = graph.meta(nh).get<GOperation>();
+            // NB: Map input args to the correct input index.
+            std::unordered_map<uint32_t, cv::GProtoArg> idx_to_arg;
+            auto in_ehs = nh->srcEdges();
+            for (auto in_eh : in_ehs) {
+                ASSERT(graph.meta(in_eh).has<InputIdx>());
+                const uint32_t in_idx = graph.meta(in_eh).get<InputIdx>().idx;
+                auto arg = all_data.at(in_eh->srcNode());
+                idx_to_arg.emplace(in_idx, arg);
+            }
+            cv::GProtoArgs in_args;
+            for (uint32_t idx = 0; idx < idx_to_arg.size(); ++idx) {
+                in_args.push_back(idx_to_arg.at(idx));
+            }
+            // NB: Link G-API operation with its io data.
+            auto out_args = operation.on(in_args);
+            // TODO: Validation in/out amount and types...
+            // NB: Map output args to the correct index.
+            auto out_ehs = nh->dstEdges();
+            for (auto out_eh : out_ehs) {
+                ASSERT(graph.meta(out_eh).has<OutputIdx>());
+                const uint32_t out_idx = graph.meta(out_eh).get<OutputIdx>().idx;
+                auto out_nh = out_eh->dstNode();
+                all_data.emplace(out_nh, out_args[out_idx]);
+            }
+        }
+    }
+
+    // NB: Collect "G" outputs
+    for (auto nh : graph.nodes()) {
+        if (graph.meta(nh).has<GraphOutput>()) {
+            graph_outputs.push_back(all_data.at(nh));
+        }
+    }
+
+    ASSERT(!graph_inputs.empty())
+    ASSERT(!graph_outputs.empty())
+    // NB: Finally save computation i/o to build GComputation later on
+    proto = Protocol{std::move(graph_inputs), std::move(graph_outputs)};
+}
+
+static void collectOutputMeta(Graph& graph, std::vector<Meta>& out_meta) {
+    for (auto nh : graph.nodes()) {
+        if (graph.meta(nh).has<GraphOutput>()) {
+            out_meta.push_back(graph.meta(nh));
+        }
+    }
+}
+
+}  // namespace passes
+
+ComputationBuilder::ComputationBuilder(IBuildStrategy::Ptr strategy): m_strategy(strategy) {
+}
+
+Computation ComputationBuilder::build(ScenarioGraph& graph, const InferenceParamsMap& infer_params,
+                                      const ComputationBuilder::Options& opts) {
+    uint32_t max_parallel_branches = 1u;
+    auto compile_args = cv::compile_args(cv::gapi::kernels<GCPUDummyM, GCPUCompound>());
+    std::vector<Meta> outputs_meta;
+    Protocol proto;
+
+    using namespace std::placeholders;
+    graph.pass(passes::throwIfCycle);
+    graph.pass(passes::init);
+    graph.pass(passes::fuseDelays);
+    graph.pass(std::bind(passes::findMaxParallelBranches, _1, std::ref(max_parallel_branches)));
+    graph.pass(std::bind(passes::buildOperations, _1, m_strategy, std::cref(infer_params)));
+    graph.pass(std::bind(passes::buildComputation, _1, std::ref(proto)));
+    graph.pass(std::bind(passes::collectOutputMeta, _1, std::ref(outputs_meta)));
+
+    if (opts.add_perf_meta) {
+        // FIXME: Must work with any G-Type!
+        ASSERT(cv::util::holds_alternative<cv::GMat>(proto.graph_outputs.front()));
+        cv::GMat g = cv::util::get<cv::GMat>(proto.graph_outputs.front());
+        proto.graph_outputs.emplace_back(cv::gapi::streaming::timestamp(g).strip());
+        proto.graph_outputs.emplace_back(cv::gapi::streaming::seq_id(g).strip());
+    }
+
+    cv::GComputation comp(cv::GProtoInputArgs{std::move(proto.graph_inputs)},
+                          cv::GProtoOutputArgs{std::move(proto.graph_outputs)});
+
+    return Computation{std::move(comp), std::move(compile_args), std::move(outputs_meta), {max_parallel_branches}};
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.hpp
new file mode 100644
index 00000000000000..6a51b068065284
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.hpp
@@ -0,0 +1,74 @@
+//
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "result.hpp"
+#include "scenario/inference.hpp"
+#include "scenario/scenario_graph.hpp"
+#include "simulation/computation.hpp"
+#include "utils/data_providers.hpp"
+
+#include <filesystem>
+#include <functional>
+#include <memory>
+
+struct InputIdx {
+    uint32_t idx;
+};
+
+struct OutputIdx {
+    uint32_t idx;
+};
+
+struct GraphInput {};
+struct GraphOutput {};
+struct GData {};
+struct GOperation {
+    using F = std::function<cv::GProtoArgs(const cv::GProtoArgs&)>;
+    F on;
+};
+
+struct Dump {
+    std::filesystem::path path;
+};
+
+struct Validate {
+    using F = std::function<Result(const cv::Mat& lhs, const cv::Mat& rhs)>;
+    F validator;
+    std::vector<cv::Mat> reference;
+};
+
+struct InferDesc {
+    std::string tag;
+    LayersInfo input_layers;
+    LayersInfo output_layers;
+};
+
+struct IBuildStrategy {
+    using Ptr = std::shared_ptr<IBuildStrategy>;
+    struct InferBuildInfo {
+        std::vector<IDataProvider::Ptr> providers;
+        std::vector<Meta> inputs_meta;
+        std::vector<Meta> outputs_meta;
+        const bool disable_copy;
+    };
+    // NB: Extend for any further node types needed
+    virtual InferBuildInfo build(const InferDesc& infer) = 0;
+};
+
+class ComputationBuilder {
+public:
+    explicit ComputationBuilder(IBuildStrategy::Ptr strategy);
+
+    struct Options {
+        bool add_perf_meta;
+    };
+
+    Computation build(ScenarioGraph& graph, const InferenceParamsMap& infer_params, const Options& opts);
+
+private:
+    IBuildStrategy::Ptr m_strategy;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.cpp
new file mode 100644
index 00000000000000..3b10767b34135f
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.cpp
@@ -0,0 +1,89 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "dummy_source.hpp"
+
+#include <opencv2/gapi/streaming/meta.hpp>
+
+#include "utils/utils.hpp"
+
+DummySource::DummySource(const uint64_t frames_interval_in_us, const bool drop_frames,
+                         const bool disable_high_resolution_timer)
+        // NB: 0 is special value means no limit fps for source.
+        : m_latency_in_us(frames_interval_in_us),
+          m_drop_frames(drop_frames),
+          m_timer(SleepTimer::create(disable_high_resolution_timer)),
+          // NB: Used for simulation, just return 1 byte.
+          m_mat(utils::createRandom({1}, CV_8U)) {
+}
+
+bool DummySource::pull(cv::gapi::wip::Data& data) {
+    using namespace std::chrono;
+    using namespace cv::gapi::streaming;
+    using ts_t = microseconds;
+
+    // NB: Wait m_latency_in_us before return the first frame.
+    if (m_next_tick_ts == -1) {
+        m_next_tick_ts = utils::timestamp<ts_t>() + m_latency_in_us;
+    }
+
+    int64_t curr_ts = utils::timestamp<ts_t>();
+    if (curr_ts < m_next_tick_ts) {
+        /*
+         *            curr_ts
+         *               |
+         *    ------|----*-----|------->
+         *                     ^
+         *               m_next_tick_ts
+         *
+         *
+         * NB: New frame will be produced at the m_next_tick_ts point.
+         */
+        m_timer->wait(ts_t{m_next_tick_ts - curr_ts});
+    } else if (m_latency_in_us != 0) {
+        /*
+         *                                       curr_ts
+         *                         +1         +2    |
+         *    |----------|----------|----------|----*-----|------->
+         *               ^                     ^
+         *         m_next_tick_ts ------------->
+         *
+         */
+
+        // NB: Count how many frames have been produced since last pull (m_next_tick_ts).
+        int64_t num_frames = static_cast<int64_t>((curr_ts - m_next_tick_ts) / m_latency_in_us);
+        // NB: Shift m_next_tick_ts to the nearest tick before curr_ts.
+        m_next_tick_ts += num_frames * m_latency_in_us;
+        // NB: if drop_frames is enabled, update current seq_id and wait for the next tick, otherwise
+        // return last written frame (+2 at the picture above) immediately.
+        if (m_drop_frames) {
+            // NB: Shift tick to the next frame.
+            m_next_tick_ts += m_latency_in_us;
+            // NB: Wait for the next frame.
+            m_timer->wait(ts_t{m_next_tick_ts - curr_ts});
+            // NB: Drop already produced frames + update seq_id for the current.
+            m_curr_seq_id += num_frames + 1;
+        }
+    }
+    // NB: Just increase reference counter not to release mat memory
+    // after assigning it to the data.
+    cv::Mat mat = m_mat;
+
+    data.meta[meta_tag::timestamp] = utils::timestamp<ts_t>();
+    data.meta[meta_tag::seq_id] = m_curr_seq_id++;
+    data = mat;
+    m_next_tick_ts += m_latency_in_us;
+
+    return true;
+}
+
+cv::GMetaArg DummySource::descr_of() const {
+    return cv::GMetaArg{cv::descr_of(m_mat)};
+}
+
+void DummySource::reset() {
+    m_next_tick_ts = -1;
+    m_curr_seq_id = 0;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.hpp
new file mode 100644
index 00000000000000..304e4e7ef2f512
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.hpp
@@ -0,0 +1,37 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <chrono>
+#include <memory>
+#include <thread>
+
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/streaming/source.hpp>  // cv::gapi::wip::IStreamSource
+
+#include "utils/timer.hpp"
+#include "utils/utils.hpp"
+
+class DummySource final : public cv::gapi::wip::IStreamSource {
+public:
+    using Ptr = std::shared_ptr<DummySource>;
+
+    explicit DummySource(const uint64_t frames_interval_in_us, const bool drop_frames,
+                         const bool disable_high_resolution_timer);
+
+    bool pull(cv::gapi::wip::Data& data) override;
+    cv::GMetaArg descr_of() const override;
+    void reset();
+
+private:
+    uint64_t m_latency_in_us;
+    bool m_drop_frames;
+    IWaitable::Ptr m_timer;
+
+    cv::Mat m_mat;
+    int64_t m_next_tick_ts = -1;
+    int64_t m_curr_seq_id = 0;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.cpp
new file mode 100644
index 00000000000000..4a0fa451dace91
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "executor.hpp"
+#include "utils/error.hpp"
+
+#include <chrono>
+
+PipelinedExecutor::PipelinedExecutor(cv::GStreamingCompiled&& compiled): m_compiled(std::move(compiled)) {
+}
+
+PipelinedExecutor::Output PipelinedExecutor::runLoop(cv::GRunArgs&& inputs, Callback callback,
+                                                     ITermCriterion::Ptr criterion) {
+    if (!criterion) {
+        THROW_ERROR("Termination criterion hasn't been specified!");
+    }
+
+    using namespace std::chrono;
+    using clock_t = high_resolution_clock;
+
+    m_compiled.setSource(std::move(inputs));
+    criterion->init();
+
+    const auto start_tick = clock_t::now();
+    m_compiled.start();
+    while (criterion->check()) {
+        if (!callback(m_compiled)) {
+            break;
+        }
+        criterion->update();
+    }
+    const auto end_tick = clock_t::now();
+    // NB: Some frames might be in queue just wait until they processed.
+    // They shouldn't be taken into account since execution is over.
+    m_compiled.stop();
+    return Output{static_cast<uint64_t>(duration_cast<microseconds>(end_tick - start_tick).count())};
+}
+
+SyncExecutor::SyncExecutor(cv::GCompiled&& compiled): m_compiled(std::move(compiled)) {
+}
+
+SyncExecutor::Output SyncExecutor::runLoop(Callback callback, ITermCriterion::Ptr criterion) {
+    if (!criterion) {
+        THROW_ERROR("Termination criterion hasn't been specified!");
+    }
+
+    using namespace std::chrono;
+    using clock_t = high_resolution_clock;
+
+    const auto start_tick = clock_t::now();
+    criterion->init();
+    while (criterion->check()) {
+        if (!callback(m_compiled)) {
+            break;
+        }
+        criterion->update();
+    }
+    const auto end_tick = clock_t::now();
+    return Output{static_cast<uint64_t>(duration_cast<microseconds>(end_tick - start_tick).count())};
+}
+
+void SyncExecutor::reset() {
+    m_compiled.prepareForNewStream();
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.hpp
new file mode 100644
index 00000000000000..17d32937b8ba54
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <opencv2/gapi/gcompiled.hpp>   // cv::GCompiled
+#include <opencv2/gapi/gstreaming.hpp>  // cv::GStreamingCompiled
+
+#include "scenario/criterion.hpp"
+
+class PipelinedExecutor {
+public:
+    explicit PipelinedExecutor(cv::GStreamingCompiled&& compiled);
+
+    struct Output {
+        uint64_t elapsed_us;
+    };
+    using Callback = std::function<bool(cv::GStreamingCompiled&)>;
+
+    Output runLoop(cv::GRunArgs&& inputs, Callback callback, ITermCriterion::Ptr criterion);
+
+private:
+    cv::GStreamingCompiled m_compiled;
+};
+
+class SyncExecutor {
+public:
+    explicit SyncExecutor(cv::GCompiled&& compiled);
+
+    struct Output {
+        uint64_t elapsed_us;
+    };
+    using Callback = std::function<bool(cv::GCompiled&)>;
+
+    Output runLoop(Callback callback, ITermCriterion::Ptr criterion);
+    void reset();
+
+private:
+    cv::GCompiled m_compiled;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.cpp
new file mode 100644
index 00000000000000..f3b621c68e8f99
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.cpp
@@ -0,0 +1,155 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "layers_data.hpp"
+
+#include <fstream>
+#include <regex>
+
+#include "utils/error.hpp"
+#include "utils/logger.hpp"
+#include "utils/utils.hpp"
+
+std::string normalizeLayerName(const std::string& layer_name) {
+    std::string normalized = layer_name;
+    std::unordered_set<char> prohibited = {'\\', '/', ':', '*', '?', '"', '<', '>'};
+    std::replace_if(
+            normalized.begin(), normalized.end(),
+            [&prohibited](char ch) {
+                return prohibited.find(ch) != prohibited.end();
+            },
+            '_');
+    return normalized;
+};
+
+std::vector<cv::Mat> uploadLayerData(const std::filesystem::path& path, const std::string& tag,
+                                     const LayerInfo& layer) {
+    if (!std::filesystem::exists(path) || !std::filesystem::is_directory(path)) {
+        THROW_ERROR("Failed to find data folder: " << path << " for model: " << tag << ", layer: " << layer.name);
+    }
+    std::string iter_file_pattern = "iter_(\\d+)\\.bin";
+    std::regex regex(iter_file_pattern);
+    std::unordered_map<int, std::filesystem::path> iter_files_map;
+    for (const auto& entry : std::filesystem::directory_iterator{path}) {
+        std::smatch match;
+        const auto& filename = entry.path().filename().string();
+        if (std::regex_match(filename, match, regex)) {
+            const auto iter_idx = std::stoi(match[1].str());
+            iter_files_map.emplace(iter_idx, entry);
+        }
+    }
+    std::vector<cv::Mat> out_mats;
+    for (int i = 0; i < iter_files_map.size(); ++i) {
+        if (auto it = iter_files_map.find(i); it != iter_files_map.end()) {
+            cv::Mat mat;
+            utils::createNDMat(mat, layer.dims, layer.prec);
+            utils::readFromBinFile(it->second.string(), mat);
+            out_mats.push_back(std::move(mat));
+        } else {
+            THROW_ERROR("Failed to find data for iteration: " << i << ", model: " << tag << ", layer: " << layer.name);
+        }
+    }
+    return out_mats;
+}
+
+using LayersDataMap = std::unordered_map<std::string, std::vector<cv::Mat>>;
+LayersDataMap uploadFromDirectory(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers) {
+    LayersDataMap layers_data;
+    for (const auto& layer : layers) {
+        auto normalized = normalizeLayerName(layer.name);
+        auto data = uploadLayerData(path / normalized, tag, layer);
+        if (data.empty()) {
+            THROW_ERROR("No iterations data found for model: " << tag << ", layer: " << layer.name);
+        }
+        LOG_INFO() << "    - Found " << data.size() << " iteration(s) for layer: " << layer.name << std::endl;
+        layers_data.emplace(layer.name, std::move(data));
+    }
+    return layers_data;
+}
+
+LayersDataMap uploadData(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers,
+                         LayersType type) {
+    ASSERT(!layers.empty());
+    const std::string kLayersTypeStr = type == LayersType::INPUT ? "input" : "output";
+    if (!std::filesystem::exists(path)) {
+        THROW_ERROR("" << path << " must exist to upload layers data!")
+    }
+    LayersDataMap layers_data;
+    if (std::filesystem::is_directory(path)) {
+        layers_data = uploadFromDirectory(path, tag, layers);
+    } else {
+        if (layers.size() > 1u) {
+            THROW_ERROR("Model: " << tag << " must have exactly one " << kLayersTypeStr
+                                  << " layer in order to upload data from: " << path);
+        }
+        const auto& layer = layers.front();
+        cv::Mat mat;
+        utils::createNDMat(mat, layer.dims, layer.prec);
+        utils::readFromBinFile(path.string(), mat);
+        LOG_INFO() << "    - Found single iteration data for model: " << tag << ", layer: " << layer.name << std::endl;
+        layers_data = {{layer.name, std::vector<cv::Mat>{mat}}};
+    }
+    // NB: layers_data can't be empty as long as layers vector is non-empty.
+    const auto kNumPerLayerIterations = layers_data.begin()->second.size();
+    // NB: All i/o layers for model must have the equal amount of data.
+    for (const auto& [layer_name, data_vec] : layers_data) {
+        if (data_vec.size() != kNumPerLayerIterations) {
+            THROW_ERROR("Model: " << tag << " has different amount of data for " << kLayersTypeStr
+                                  << " layer: " << layer_name << "(" << data_vec.size() << ") and layer: "
+                                  << layers_data.begin()->first << "(" << kNumPerLayerIterations << ")");
+        }
+    }
+    return layers_data;
+}
+
+bool isDirectory(const std::filesystem::path& path) {
+    if (std::filesystem::exists(path)) {
+        return std::filesystem::is_directory(path);
+    }
+    return path.extension().empty();
+}
+
+std::vector<IDataProvider::Ptr> createConstantProviders(LayersDataMap&& layers_data,
+                                                        const std::vector<std::string>& layer_names) {
+    std::vector<IDataProvider::Ptr> providers;
+    for (const auto& layer_name : layer_names) {
+        auto layer_data = layers_data.at(layer_name);
+        providers.push_back(std::make_shared<CircleBuffer>(std::move(layer_data)));
+    }
+    return providers;
+}
+
+std::vector<IDataProvider::Ptr> createRandomProviders(const LayersInfo& layers,
+                                                      const std::map<std::string, IRandomGenerator::Ptr>& generators) {
+    std::vector<IDataProvider::Ptr> providers;
+    for (const auto& layer : layers) {
+        auto generator = generators.at(layer.name);
+        auto provider = std::make_shared<RandomProvider>(generator, layer.dims, layer.prec);
+        LOG_INFO() << "    - Random generator: " << generator->str() << " will be used for layer: " << layer.name
+                   << std::endl;
+        providers.push_back(std::move(provider));
+    }
+    return providers;
+}
+
+std::vector<std::filesystem::path> createDirectoryLayout(const std::filesystem::path& path,
+                                                         const std::vector<std::string>& layer_names) {
+    std::vector<std::filesystem::path> dirs_path;
+    std::filesystem::create_directories(path);
+    for (const auto& layer_name : layer_names) {
+        // NB: Use normalized layer name to create dir
+        // to store reference data for particular layer.
+        std::filesystem::path curr_dir = path / normalizeLayerName(layer_name);
+        dirs_path.push_back(curr_dir);
+        std::filesystem::create_directory(curr_dir);
+        {
+            // NB: Save the original layer name;
+            std::ofstream file{curr_dir / "layer_name.txt"};
+            ASSERT(file.is_open());
+            file << layer_name;
+        }
+    }
+    return dirs_path;
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.hpp
new file mode 100644
index 00000000000000..6d2b9bc6716212
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.hpp
@@ -0,0 +1,57 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <filesystem>
+
+#include "scenario/inference.hpp"
+#include "utils/data_providers.hpp"
+
+std::string normalizeLayerName(const std::string& layer_name);
+std::vector<cv::Mat> uploadLayerData(const std::filesystem::path& path, const std::string& tag, const LayerInfo& layer);
+
+enum class LayersType { INPUT = 0, OUTPUT };
+using LayersDataMap = std::unordered_map<std::string, std::vector<cv::Mat>>;
+LayersDataMap uploadFromDirectory(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers);
+
+LayersDataMap uploadData(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers,
+                         LayersType type);
+
+bool isDirectory(const std::filesystem::path& path);
+
+std::vector<IDataProvider::Ptr> createConstantProviders(LayersDataMap&& layers_data,
+                                                        const std::vector<std::string>& layer_names);
+
+std::vector<IDataProvider::Ptr> createRandomProviders(const LayersInfo& layers,
+                                                      const std::map<std::string, IRandomGenerator::Ptr>& generators);
+
+std::vector<std::filesystem::path> createDirectoryLayout(const std::filesystem::path& path,
+                                                         const std::vector<std::string>& layer_names);
+template <typename T>
+std::map<std::string, T> unpackWithDefault(const LayerVariantAttr<T>& attr, const std::vector<std::string>& layer_names,
+                                           const T& def_value) {
+    std::map<std::string, T> result;
+    if (std::holds_alternative<std::monostate>(attr)) {
+        for (const auto& layer_name : layer_names) {
+            result.emplace(layer_name, def_value);
+        }
+    } else if (std::holds_alternative<T>(attr)) {
+        auto val = std::get<T>(attr);
+        for (const auto& layer_name : layer_names) {
+            result.emplace(layer_name, val);
+        }
+    } else {
+        auto map = std::get<AttrMap<T>>(attr);
+        for (const auto& layer_name : layer_names) {
+            if (auto it = map.find(layer_name); it != map.end()) {
+                result.emplace(layer_name, it->second);
+            } else {
+                result.emplace(layer_name, def_value);
+            }
+        }
+    }
+    return result;
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.cpp
new file mode 100644
index 00000000000000..72c1e9539773e3
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.cpp
@@ -0,0 +1,46 @@
+//
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "simulation/layers_reader.hpp"
+#include "scenario/inference.hpp"
+#include "utils/error.hpp"
+#include "utils/logger.hpp"
+
+OpenVINOLayersReader& getOVReader() {
+    static OpenVINOLayersReader reader;
+    return reader;
+}
+
+static std::string getModelFileName(const InferenceParams& params) {
+    if (std::holds_alternative<OpenVINOParams>(params)) {
+        const auto& ov_params = std::get<OpenVINOParams>(params);
+        if (std::holds_alternative<OpenVINOParams::ModelPath>(ov_params.path)) {
+            return std::get<OpenVINOParams::ModelPath>(ov_params.path).model;
+        } else {
+            ASSERT(std::holds_alternative<OpenVINOParams::BlobPath>(ov_params.path));
+            return std::get<OpenVINOParams::BlobPath>(ov_params.path).blob;
+        }
+    } else if (std::holds_alternative<ONNXRTParams>(params)) {
+        return std::get<ONNXRTParams>(params).model_path;
+    } else {
+        THROW_ERROR("Unsupported model parameters type!");
+    }
+    // NB: Unreachable
+    ASSERT(false);
+}
+
+InOutLayers LayersReader::readLayers(const InferenceParams& params) {
+    LOG_INFO() << "Reading model " << getModelFileName(params) << std::endl;
+    if (std::holds_alternative<OpenVINOParams>(params)) {
+        const auto& ov = std::get<OpenVINOParams>(params);
+        return getOVReader().readLayers(ov);
+    }
+    ASSERT(std::holds_alternative<ONNXRTParams>(params));
+    const auto& ort = std::get<ONNXRTParams>(params);
+    // NB: Using OpenVINO to read the i/o layers information for *.onnx model
+    OpenVINOParams ov;
+    ov.path = OpenVINOParams::ModelPath{ort.model_path, ""};
+    return getOVReader().readLayers(ov, true /* use_results_names */);
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.hpp
new file mode 100644
index 00000000000000..1d701272255fb0
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "scenario/inference.hpp"
+
+#include <memory>
+
+struct InOutLayers {
+    LayersInfo in_layers;
+    LayersInfo out_layers;
+};
+
+class OpenVINOLayersReader {
+public:
+    OpenVINOLayersReader();
+    InOutLayers readLayers(const OpenVINOParams& params, const bool use_results_names = false);
+
+private:
+    class Impl;
+    std::shared_ptr<Impl> m_impl;
+};
+
+namespace LayersReader {
+InOutLayers readLayers(const InferenceParams& params);
+}  // namespace LayersReader
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.cpp
new file mode 100644
index 00000000000000..1b353dbf6e7288
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.cpp
@@ -0,0 +1,131 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "simulation/operations.hpp"
+#include "utils/error.hpp"
+
+cv::GProtoArgs InferCall::operator()(const cv::GProtoArgs& inputs) {
+    cv::GInferInputs infer_inputs;
+    for (int i = 0; i < inputs.size(); ++i) {
+        auto gmat = cv::util::get<cv::GMat>(inputs[i]);
+        infer_inputs[input_names[i]] = gmat;
+    }
+    auto infer_outputs = cv::gapi::infer(tag, infer_inputs);
+    cv::GProtoArgs outputs;
+    for (int i = 0; i < output_names.size(); ++i) {
+        outputs.emplace_back(infer_outputs.at(output_names[i]));
+    }
+    return outputs;
+}
+
+std::vector<cv::GMat> GDummyM::on(const std::vector<cv::GMat>& ins, const uint64_t delay_in_us,
+                                  const std::vector<IDataProvider::Ptr>& providers, const bool disable_copy) {
+    std::vector<cv::GShape> shapes;
+    std::vector<cv::detail::OpaqueKind> op_kinds;
+    std::vector<cv::detail::HostCtor> host_ctors;
+    std::vector<cv::GArg> gargs;
+    std::vector<cv::detail::OpaqueKind> out_kinds;
+
+    gargs.emplace_back(providers);
+    gargs.emplace_back(delay_in_us);
+    gargs.emplace_back(disable_copy);
+
+    for (int i = 0; i < ins.size(); ++i) {
+        auto shape = cv::detail::GTypeTraits<cv::GMat>::shape;
+        shapes.push_back(shape);
+        auto op_kind = cv::detail::GTypeTraits<cv::GMat>::op_kind;
+        op_kinds.push_back(op_kind);
+        host_ctors.push_back(cv::detail::GObtainCtor<cv::GMat>::get());
+        gargs.emplace_back(ins[i]);
+    }
+
+    const size_t num_outputs = providers.size();
+    for (int i = 0; i < num_outputs; ++i) {
+        auto op_kind = cv::detail::GTypeTraits<cv::GMat>::op_kind;
+        out_kinds.push_back(op_kind);
+    }
+
+    using namespace std::placeholders;
+    cv::GKernel k{GDummyM::id(),
+                  "",
+                  std::bind(&GDummyM::getOutMeta, _1, _2),
+                  std::move(shapes),
+                  std::move(op_kinds),
+                  std::move(host_ctors),
+                  std::move(out_kinds)};
+
+    cv::GCall call(std::move(k));
+    call.setArgs(std::move(gargs));
+
+    std::vector<cv::GMat> outs;
+    outs.reserve(num_outputs);
+    for (int i = 0; i < num_outputs; ++i) {
+        outs.push_back(call.yield(i));
+    }
+
+    return outs;
+}
+
+cv::GMetaArgs GDummyM::getOutMeta(const cv::GMetaArgs&, const cv::GArgs& args) {
+    const auto& providers = args.front().get<std::vector<IDataProvider::Ptr>>();
+    cv::GMetaArgs out_metas;
+    out_metas.reserve(providers.size());
+    for (auto provider : providers) {
+        out_metas.emplace_back(provider->desc());
+    }
+    return out_metas;
+}
+
+cv::gapi::GBackend GCPUDummyM::backend() {
+    return cv::gapi::cpu::backend();
+}
+
+cv::GCPUKernel GCPUDummyM::kernel() {
+    return cv::GCPUKernel(&GCPUDummyM::call, &GCPUDummyM::setup);
+}
+
+void GCPUDummyM::setup(const cv::GMetaArgs& metas, cv::GArgs gargs, cv::GArg& state, const cv::GCompileArgs& args) {
+    state = cv::GArg(std::make_shared<State>());
+    auto providers = gargs.front().get<std::vector<IDataProvider::Ptr>>();
+    for (auto& provider : providers) {
+        provider->reset();
+    }
+}
+
+void GCPUDummyM::call(cv::GCPUContext& ctx) {
+    using namespace std::chrono;
+    const bool disable_copy = ctx.inArg<bool>(2u);
+    uint64_t elapsed = disable_copy ? 0u : utils::measure<microseconds>([&]() {
+        auto& providers = ctx.inArg<std::vector<IDataProvider::Ptr>>(0u);
+        for (size_t i = 0; i < providers.size(); ++i) {
+            providers[i]->pull(ctx.outMatR(static_cast<int>(i)));
+        }
+    });
+    const auto delay_in_us = ctx.inArg<uint64_t>(1u);
+    utils::busyWait(microseconds{std::max(delay_in_us - elapsed, uint64_t{0})});
+}
+
+cv::GProtoArgs DummyCall::operator()(const cv::GProtoArgs& inputs) {
+    std::vector<cv::GMat> gmats;
+    gmats.reserve(inputs.size());
+    for (auto& in : inputs) {
+        gmats.emplace_back(cv::util::get<cv::GMat>(in));
+    }
+    auto outputs = GDummyM::on(gmats, delay_in_us, providers, disable_copy);
+    cv::GProtoArgs proto_outputs;
+    for (auto& out : outputs) {
+        proto_outputs.emplace_back(cv::GProtoArg{out});
+    }
+    return proto_outputs;
+}
+
+cv::GProtoArgs CompoundCall::operator()(const cv::GProtoArgs& inputs) {
+    ASSERT(inputs.size() == 1)
+    cv::GMat in = cv::util::get<cv::GMat>(inputs[0]);
+
+    cv::GProtoArgs proto_outputs;
+    proto_outputs.emplace_back(GCompound::on(in, function));
+    return proto_outputs;
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.hpp
new file mode 100644
index 00000000000000..cce38c9d83d07f
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.hpp
@@ -0,0 +1,77 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <opencv2/gapi/cpu/gcpukernel.hpp>  // GAPI_OCV_KERNEL
+#include <opencv2/gapi/gkernel.hpp>         // G_API_OP
+#include <opencv2/gapi/infer.hpp>
+
+#include "utils/data_providers.hpp"
+#include "utils/utils.hpp"
+
+// clang-format off
+struct InferCall {
+    cv::GProtoArgs operator()(const cv::GProtoArgs& inputs);
+
+    std::string              tag;
+    std::vector<std::string> input_names;
+    std::vector<std::string> output_names;
+};
+
+struct DummyState { };
+struct GDummyM {
+    static const char *id() { return "custom.dummym"; }
+    static std::vector<cv::GMat> on(const std::vector<cv::GMat>           &ins,
+                                    const uint64_t                        delay_in_us,
+                                    const std::vector<IDataProvider::Ptr> &providers,
+                                    const bool                            disable_copy);
+    static cv::GMetaArgs getOutMeta(const cv::GMetaArgs&, const cv::GArgs &args);
+};
+
+struct GCPUDummyM: public cv::detail::KernelTag {
+    using API = GDummyM;
+    using State = DummyState;
+
+    static cv::gapi::GBackend backend();
+    static cv::GCPUKernel kernel();
+    static void setup(const cv::GMetaArgs    &metas,
+                      cv::GArgs              gargs,
+                      cv::GArg               &state,
+                      const cv::GCompileArgs &args);
+    static void call(cv::GCPUContext &ctx);
+};
+
+struct DummyCall {
+    std::vector<IDataProvider::Ptr> providers;
+    uint64_t delay_in_us;
+    // NB: Don't pull data from providers if enabled
+    bool disable_copy = false;
+    cv::GProtoArgs operator()(const cv::GProtoArgs& inputs);
+};
+
+using F = std::function<void()>;
+
+G_TYPED_KERNEL(GCompound, <cv::GMat(cv::GMat, F)>, "custom.compound")
+{
+    static cv::GMatDesc outMeta(cv::GMatDesc in, F){
+        return in;
+    }
+};
+
+GAPI_OCV_KERNEL(GCPUCompound, GCompound)
+{
+    static void run(const cv::Mat& in,
+                    F function,
+                    cv::Mat& out)
+    {
+        function();
+    }
+};
+
+struct CompoundCall {
+    cv::GProtoArgs operator()(const cv::GProtoArgs& inputs);
+    F function;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp
new file mode 100644
index 00000000000000..57527cef0cc4aa
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp
@@ -0,0 +1,215 @@
+//
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "simulation/layers_reader.hpp"
+
+#include <opencv2/core.hpp>  // CV_*
+#include <openvino/openvino.hpp>
+
+#include "utils/error.hpp"
+
+#include <fstream>
+
+class OpenVINOLayersReader::Impl {
+public:
+    InOutLayers readLayers(const OpenVINOParams& params, const bool use_results_names);
+
+private:
+    InOutLayers readFromBlob(const std::string& blob, const std::string& device,
+                             const std::map<std::string, std::string>& config);
+
+    InOutLayers readFromModel(const std::string& xml, const std::string& bin, const OpenVINOParams& params,
+                              const bool use_results_names);
+
+private:
+    ov::Core m_core;
+};
+
+OpenVINOLayersReader::OpenVINOLayersReader(): m_impl(new OpenVINOLayersReader::Impl{}) {
+}
+
+static ov::element::Type toElementType(int cvdepth) {
+    switch (cvdepth) {
+    case CV_8U:
+        return ov::element::u8;
+    case CV_32S:
+        return ov::element::i32;
+    case CV_32F:
+        return ov::element::f32;
+    case CV_16F:
+        return ov::element::f16;
+    }
+    throw std::logic_error("Failed to convert opencv depth to ov::element::Type");
+}
+
+static std::vector<int> toDims(const std::vector<size_t>& sz_vec) {
+    std::vector<int> result;
+    result.reserve(sz_vec.size());
+    for (auto sz : sz_vec) {
+        // FIXME: Probably requires some check...
+        result.push_back(static_cast<int>(sz));
+    }
+    return result;
+}
+
+static int toPrecision(ov::element::Type prec) {
+    switch (prec) {
+    case ov::element::u8:
+        return CV_8U;
+    case ov::element::i32:
+        return CV_32S;
+    case ov::element::f32:
+        return CV_32F;
+    case ov::element::f16:
+        return CV_16F;
+    case ov::element::i64:
+        return CV_32S;
+    }
+    throw std::logic_error("Unsupported OV precision");
+}
+
+template <typename InfoVec>
+std::vector<LayerInfo> ovToLayersInfo(const InfoVec& vec) {
+    std::vector<LayerInfo> layers;
+    layers.reserve(vec.size());
+    std::transform(vec.begin(), vec.end(), std::back_inserter(layers), [](const auto& node) {
+        return LayerInfo{node.get_any_name(), toDims(node.get_shape()), toPrecision(node.get_element_type())};
+    });
+    return layers;
+};
+
+static void cfgInputPreproc(ov::preprocess::PrePostProcessor& ppp, const std::shared_ptr<ov::Model>& model,
+                            const AttrMap<int>& input_precision, const AttrMap<std::string>& input_layout,
+                            const AttrMap<std::string>& input_model_layout) {
+    for (const auto& input : model->inputs()) {
+        const auto& name = input.get_any_name();
+        auto& ii = ppp.input(name);
+
+        const auto ip = lookUp(input_precision, name);
+        if (ip.has_value()) {
+            ii.tensor().set_element_type(toElementType(*ip));
+        }
+
+        const auto il = lookUp(input_layout, name);
+        if (il.has_value()) {
+            ii.tensor().set_layout(ov::Layout(*il));
+        }
+
+        const auto iml = lookUp(input_model_layout, name);
+        if (iml.has_value()) {
+            ii.model().set_layout(ov::Layout(*iml));
+        }
+    }
+}
+
+static void cfgOutputPostproc(ov::preprocess::PrePostProcessor& ppp, const std::shared_ptr<ov::Model>& model,
+                              const AttrMap<int>& output_precision, const AttrMap<std::string>& output_layout,
+                              const AttrMap<std::string> output_model_layout) {
+    for (const auto& output : model->outputs()) {
+        const auto& name = output.get_any_name();
+        auto& oi = ppp.output(name);
+
+        const auto op = lookUp(output_precision, name);
+        if (op.has_value()) {
+            oi.tensor().set_element_type(toElementType(*op));
+        }
+
+        const auto ol = lookUp(output_layout, name);
+        if (ol.has_value()) {
+            oi.tensor().set_layout(ov::Layout(*ol));
+        }
+
+        const auto oml = lookUp(output_model_layout, name);
+        if (oml.has_value()) {
+            oi.model().set_layout(ov::Layout(*oml));
+        }
+    }
+}
+
+static std::vector<std::string> extractLayerNames(const std::vector<ov::Output<ov::Node>>& nodes) {
+    std::vector<std::string> names;
+    std::transform(nodes.begin(), nodes.end(), std::back_inserter(names), [](const auto& node) {
+        return node.get_any_name();
+    });
+    return names;
+}
+
+InOutLayers OpenVINOLayersReader::Impl::readFromModel(const std::string& model_path, const std::string& bin_path,
+                                                      const OpenVINOParams& params, const bool use_results_names) {
+    auto model = m_core.read_model(model_path, bin_path);
+    {
+        ov::preprocess::PrePostProcessor ppp(model);
+
+        const auto& input_names = extractLayerNames(model->inputs());
+        const auto ip_map = unpackLayerAttr(params.input_precision, input_names, "input precision");
+        const auto il_map = unpackLayerAttr(params.input_layout, input_names, "input layout");
+        const auto iml_map = unpackLayerAttr(params.input_model_layout, input_names, "input model layout");
+        cfgInputPreproc(ppp, model, ip_map, il_map, iml_map);
+
+        const auto& output_names = extractLayerNames(model->outputs());
+        const auto op_map = unpackLayerAttr(params.output_precision, output_names, "output precision");
+        const auto ol_map = unpackLayerAttr(params.output_layout, output_names, "output layout");
+        const auto oml_map = unpackLayerAttr(params.output_model_layout, output_names, "output model layout");
+        cfgOutputPostproc(ppp, model, op_map, ol_map, oml_map);
+
+        model = ppp.build();
+    }
+
+    auto input_layers = ovToLayersInfo(model->inputs());
+    auto output_layers = ovToLayersInfo(model->outputs());
+
+    // FIXME: UGLY WA in order to use layer names obtained by OV reader in ONNXRT.
+    // Ideally there should be corresponding ONNXRT reader instead!!!
+    // Result nodes friendly names preserve the names from original model,
+    // so the could be used in different framework (not only OpenVINO)
+    if (use_results_names) {
+        const auto& results = model->get_results();
+        for (int i = 0; i < results.size(); ++i) {
+            auto result_name = results[i]->get_friendly_name();
+            // This suffix is hardcoded at the OpenVINO side
+            const std::string suffix = "/sink_port_0";
+            const auto kSuffixStartPos = result_name.length() - suffix.length();
+            // Check that suffix is still presented at the OpenVINO side
+            ASSERT(result_name.substr(kSuffixStartPos) == suffix);
+            // Drop the suffix as it's not needed and update the name
+            result_name = result_name.substr(0, kSuffixStartPos);
+            output_layers[i].name = result_name;
+        }
+    }
+
+    return {std::move(input_layers), std::move(output_layers)};
+}
+
+InOutLayers OpenVINOLayersReader::Impl::readFromBlob(const std::string& blob, const std::string& device,
+                                                     const std::map<std::string, std::string>& config) {
+    std::ifstream file(blob, std::ios_base::in | std::ios_base::binary);
+    if (!file.is_open()) {
+        THROW_ERROR("Failed to import model from: " << blob);
+    }
+
+    auto compiled_model = m_core.import_model(file, device, {config.begin(), config.end()});
+
+    auto input_layers = ovToLayersInfo(compiled_model.inputs());
+    auto output_layers = ovToLayersInfo(compiled_model.outputs());
+
+    return {std::move(input_layers), std::move(output_layers)};
+}
+
+InOutLayers OpenVINOLayersReader::Impl::readLayers(const OpenVINOParams& params, const bool use_results_names) {
+    if (std::holds_alternative<OpenVINOParams::ModelPath>(params.path)) {
+        const auto& path = std::get<OpenVINOParams::ModelPath>(params.path);
+        return readFromModel(path.model, path.bin, params, use_results_names);
+    }
+    ASSERT(std::holds_alternative<OpenVINOParams::BlobPath>(params.path));
+    // NB: use_results_names is WA for reading layer names for the further usage in ONNXRT
+    // since ONNXRT is always ModelPath case (*.onnx format), no need to handle this for *.blob's
+    ASSERT(!use_results_names);
+    const auto& path = std::get<OpenVINOParams::BlobPath>(params.path);
+    return readFromBlob(path.blob, params.device, params.config);
+}
+
+InOutLayers OpenVINOLayersReader::readLayers(const OpenVINOParams& params, const bool use_results_names) {
+    return m_impl->readLayers(params, use_results_names);
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.cpp
new file mode 100644
index 00000000000000..4e47b34e3d2d35
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.cpp
@@ -0,0 +1,337 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "performance_mode.hpp"
+
+#include "simulation/computation_builder.hpp"
+#include "simulation/executor.hpp"
+#include "simulation/layers_data.hpp"
+#include "utils/logger.hpp"
+#include "utils/utils.hpp"
+
+#include <opencv2/gapi/gproto.hpp>    // cv::GCompileArgs
+#include <opencv2/gapi/infer/ov.hpp>  // ov::benchmark_mode{}
+
+#include <chrono>
+
+class PerformanceMetrics {
+public:
+    PerformanceMetrics(const uint64_t elapsed, const std::vector<int64_t> latency, const std::vector<int64_t> seq_ids);
+    friend std::ostream& operator<<(std::ostream& os, const PerformanceMetrics& metrics);
+
+private:
+    // TODO: avg, min, max statistics can be encapsulated.
+    double avg_latency_ms;
+    double min_latency_ms;
+    double max_latency_ms;
+    int64_t total_frames;
+    double fps;
+    int64_t dropped;
+};
+
+PerformanceMetrics::PerformanceMetrics(const uint64_t elapsed_us, const std::vector<int64_t> latency_us,
+                                       const std::vector<int64_t> seq_ids) {
+    avg_latency_ms = utils::avg(latency_us) / 1000.0;
+    min_latency_ms = utils::min(latency_us) / 1000.0;
+    max_latency_ms = utils::max(latency_us) / 1000.0;
+    double elapsed_ms = static_cast<double>(elapsed_us / 1000.0);
+    fps = latency_us.size() / elapsed_ms * 1000;
+
+    dropped = 0;
+    int64_t prev_seq_id = seq_ids[0];
+    for (size_t i = 1; i < seq_ids.size(); ++i) {
+        dropped += seq_ids[i] - prev_seq_id - 1;
+        prev_seq_id = seq_ids[i];
+    }
+    total_frames = seq_ids.back() + 1;
+}
+
+std::ostream& operator<<(std::ostream& os, const PerformanceMetrics& metrics) {
+    os << "throughput: " << metrics.fps << " FPS, latency: min: " << metrics.min_latency_ms
+       << " ms, avg: " << metrics.avg_latency_ms << " ms, max: " << metrics.max_latency_ms
+       << " ms, frames dropped: " << metrics.dropped << "/" << metrics.total_frames;
+    return os;
+}
+
+namespace {
+
+struct InputDataVisitor {
+    InputDataVisitor(const InferDesc& _infer, const PerformanceSimulation::Options& _opts)
+            : infer(_infer), opts(_opts), providers(infer.input_layers.size()) {
+    }
+
+    void operator()(std::monostate);
+    void operator()(const std::string&);
+    void operator()(const LayerVariantAttr<std::string>&);
+
+    const InferDesc& infer;
+    const PerformanceSimulation::Options& opts;
+    std::vector<IDataProvider::Ptr> providers;
+};
+
+void InputDataVisitor::operator()(std::monostate) {
+    LOG_INFO() << "Input data path for model: " << infer.tag << " hasn't been provided. Will be generated randomly"
+               << std::endl;
+    auto initializers = opts.initializers_map.at(infer.tag);
+    auto default_initialzer =
+            opts.global_initializer ? opts.global_initializer : std::make_shared<UniformGenerator>(0.0, 255.0);
+    auto per_layer_initializers =
+            unpackWithDefault(initializers, extractLayerNames(infer.input_layers), default_initialzer);
+    providers = createRandomProviders(infer.input_layers, per_layer_initializers);
+};
+
+void InputDataVisitor::operator()(const std::string& path_str) {
+    const std::filesystem::path path{path_str};
+    if (std::filesystem::exists(path)) {
+        LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag << " exists - data will be uploaded"
+                   << std::endl;
+        auto layers_data = uploadData(path, infer.tag, infer.input_layers, LayersType::INPUT);
+        providers = createConstantProviders(std::move(layers_data), extractLayerNames(infer.input_layers));
+    } else {
+        auto initializers = opts.initializers_map.at(infer.tag);
+        auto default_initialzer =
+                opts.global_initializer ? opts.global_initializer : std::make_shared<UniformGenerator>(0.0, 255.0);
+        auto per_layer_initializers =
+                unpackWithDefault(initializers, extractLayerNames(infer.input_layers), default_initialzer);
+        LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag
+                   << " provided but doesn't exist - will be generated randomly" << std::endl;
+        providers = createRandomProviders(infer.input_layers, per_layer_initializers);
+    }
+}
+
+void InputDataVisitor::operator()(const LayerVariantAttr<std::string>&) {
+    THROW_ERROR("Performance mode supports input data in form of either directory or single file!");
+};
+
+}  // anonymous namespace
+
+PerformanceStrategy::PerformanceStrategy(const PerformanceSimulation::Options& _opts): opts(_opts){};
+
+IBuildStrategy::InferBuildInfo PerformanceStrategy::build(const InferDesc& infer) {
+    const auto& input_data = opts.input_data_map.at(infer.tag);
+    InputDataVisitor in_data_visitor{infer, opts};
+    std::visit(in_data_visitor, input_data);
+    // NB: No special I/O meta for this mode
+    std::vector<Meta> inputs_meta(infer.input_layers.size(), Meta{});
+    std::vector<Meta> outputs_meta(infer.output_layers.size(), Meta{});
+    return {std::move(in_data_visitor.providers), std::move(inputs_meta), std::move(outputs_meta), opts.inference_only};
+}
+
+namespace {
+
+class SyncSimulation : public SyncCompiled {
+public:
+    struct Options {
+        uint32_t after_iter_delay_in_us = 0u;
+    };
+
+    SyncSimulation(cv::GCompiled&& compiled, std::vector<DummySource::Ptr>&& sources, const size_t num_outputs,
+                   const Options& options);
+
+    Result run(ITermCriterion::Ptr criterion) override;
+
+private:
+    void reset();
+    bool process(cv::GCompiled& pipeline);
+
+    SyncExecutor m_exec;
+    std::vector<DummySource::Ptr> m_sources;
+    std::vector<cv::Mat> m_out_mats;
+    int64_t m_ts, m_seq_id;
+
+    std::vector<int64_t> m_per_iter_latency;
+    std::vector<int64_t> m_per_iter_seq_ids;
+
+    Options m_opts;
+};
+
+class PipelinedSimulation : public PipelinedCompiled {
+public:
+    PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector<DummySource::Ptr>&& sources,
+                        const size_t num_outputs);
+
+    Result run(ITermCriterion::Ptr criterion) override;
+
+private:
+    bool process(cv::GStreamingCompiled& pipeline);
+
+    PipelinedExecutor m_exec;
+    std::vector<DummySource::Ptr> m_sources;
+    cv::optional<int64_t> m_ts, m_seq_id;
+    std::vector<cv::optional<cv::Mat>> m_opt_mats;
+
+    std::vector<int64_t> m_per_iter_latency;
+    std::vector<int64_t> m_per_iter_seq_ids;
+};
+
+//////////////////////////////// SyncSimulation ///////////////////////////////
+SyncSimulation::SyncSimulation(cv::GCompiled&& compiled, std::vector<DummySource::Ptr>&& sources,
+                               const size_t num_outputs, const SyncSimulation::Options& options)
+        : m_exec(std::move(compiled)),
+          m_sources(std::move(sources)),
+          m_out_mats(num_outputs),
+          m_ts(-1),
+          m_seq_id(-1),
+          m_opts(options) {
+    LOG_DEBUG() << "Run warm-up iteration" << std::endl;
+    this->run(std::make_shared<Iterations>(1u));
+    LOG_DEBUG() << "Warm-up has finished successfully." << std::endl;
+}
+
+void SyncSimulation::reset() {
+    for (auto src : m_sources) {
+        src->reset();
+    }
+    m_exec.reset();
+};
+
+Result SyncSimulation::run(ITermCriterion::Ptr criterion) {
+    using namespace std::placeholders;
+    auto cb = std::bind(&SyncSimulation::process, this, _1);
+    auto out = m_exec.runLoop(cb, criterion);
+    PerformanceMetrics metrics(out.elapsed_us, m_per_iter_latency, m_per_iter_seq_ids);
+    m_per_iter_latency.clear();
+    m_per_iter_seq_ids.clear();
+    std::stringstream ss;
+    ss << metrics;
+    this->reset();
+    return Success{ss.str()};
+};
+
+bool SyncSimulation::process(cv::GCompiled& pipeline) {
+    using ts_t = std::chrono::microseconds;
+    auto pipeline_outputs = cv::gout();
+    // NB: Reference is mandatory there since copying empty
+    // Mat may lead to weird side effects.
+    for (auto& out_mat : m_out_mats) {
+        pipeline_outputs += cv::gout(out_mat);
+    }
+    pipeline_outputs += cv::gout(m_ts);
+    pipeline_outputs += cv::gout(m_seq_id);
+
+    cv::GRunArgs pipeline_inputs;
+    pipeline_inputs.reserve(m_sources.size());
+    for (auto src : m_sources) {
+        cv::gapi::wip::Data data;
+        src->pull(data);
+        pipeline_inputs.push_back(std::move(data));
+    }
+    pipeline(std::move(pipeline_inputs), std::move(pipeline_outputs));
+    const auto curr_ts = utils::timestamp<ts_t>();
+    m_per_iter_latency.push_back(curr_ts - m_ts);
+    m_per_iter_seq_ids.push_back(m_seq_id);
+
+    // NB: Do extra busy wait to simulate the user's post processing after stream.
+    if (m_opts.after_iter_delay_in_us != 0) {
+        utils::busyWait(std::chrono::microseconds{m_opts.after_iter_delay_in_us});
+    }
+    return true;
+}
+
+//////////////////////////////// PipelinedSimulation ///////////////////////////////
+PipelinedSimulation::PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector<DummySource::Ptr>&& sources,
+                                         const size_t num_outputs)
+        : m_exec(std::move(compiled)), m_sources(std::move(sources)), m_opt_mats(num_outputs) {
+    LOG_DEBUG() << "Run warm-up iteration" << std::endl;
+    this->run(std::make_shared<Iterations>(1u));
+    LOG_DEBUG() << "Warm-up has finished successfully." << std::endl;
+}
+
+Result PipelinedSimulation::run(ITermCriterion::Ptr criterion) {
+    auto pipeline_inputs = cv::gin();
+    for (auto source : m_sources) {
+        pipeline_inputs += cv::gin(static_cast<cv::gapi::wip::IStreamSource::Ptr>(source));
+    }
+
+    using namespace std::placeholders;
+    auto cb = std::bind(&PipelinedSimulation::process, this, _1);
+    auto out = m_exec.runLoop(std::move(pipeline_inputs), cb, criterion);
+    PerformanceMetrics metrics(out.elapsed_us, m_per_iter_latency, m_per_iter_seq_ids);
+    m_per_iter_latency.clear();
+    m_per_iter_seq_ids.clear();
+
+    std::stringstream ss;
+    ss << metrics;
+
+    // NB: Reset sources since they may have their state changed.
+    for (auto src : m_sources) {
+        src->reset();
+    }
+    return Success{ss.str()};
+};
+
+bool PipelinedSimulation::process(cv::GStreamingCompiled& pipeline) {
+    using ts_t = std::chrono::microseconds;
+    cv::GOptRunArgsP pipeline_outputs;
+    for (auto& opt_mat : m_opt_mats) {
+        pipeline_outputs.emplace_back(cv::gout(opt_mat)[0]);
+    }
+    pipeline_outputs.emplace_back(cv::gout(m_ts)[0]);
+    pipeline_outputs.emplace_back(cv::gout(m_seq_id)[0]);
+    const bool has_data = pipeline.pull(std::move(pipeline_outputs));
+    const auto curr_ts = utils::timestamp<ts_t>();
+    ASSERT(m_ts.has_value());
+    ASSERT(m_seq_id.has_value());
+    m_per_iter_latency.push_back(curr_ts - *m_ts);
+    m_per_iter_seq_ids.push_back(*m_seq_id);
+    return has_data;
+}
+
+}  // anonymous namespace
+
+PerformanceSimulation::PerformanceSimulation(Simulation::Config&& cfg, PerformanceSimulation::Options&& opts)
+        : Simulation(std::move(cfg)),
+          m_opts(std::move(opts)),
+          m_strategy(std::make_shared<PerformanceStrategy>(m_opts)),
+          m_comp(ComputationBuilder{m_strategy}.build(m_cfg.graph, m_cfg.params, {true /* add performance meta */})) {
+}
+
+std::shared_ptr<PipelinedCompiled> PerformanceSimulation::compilePipelined(DummySources&& sources,
+                                                                           cv::GCompileArgs&& compile_args) {
+    if (m_opts.inference_only) {
+        // TODO: Extend also for ONNXRT backend
+        compile_args += cv::compile_args(cv::gapi::wip::ov::benchmark_mode{});
+    }
+    auto compiled = m_comp.compileStreaming(descr_of(sources), std::move(compile_args));
+    return std::make_shared<PipelinedSimulation>(std::move(compiled), std::move(sources), m_comp.getOutMeta().size());
+}
+
+std::shared_ptr<SyncCompiled> PerformanceSimulation::compileSync(const bool drop_frames) {
+    auto compile_args = cv::compile_args(getNetworksPackage());
+    if (m_opts.inference_only) {
+        // TODO: Extend also for ONNXRT backend
+        compile_args += cv::compile_args(cv::gapi::wip::ov::benchmark_mode{});
+    }
+
+    const uint32_t max_parallel_branches = m_comp.getMaxParallelBranches();
+    if (max_parallel_branches > 1u) {
+        LOG_INFO() << "Found at most " << max_parallel_branches
+                   << " parallel branches in graph,"
+                      " so threaded executor will be used"
+                   << std::endl;
+        ;
+        compile_args += cv::compile_args(cv::use_threaded_executor{max_parallel_branches});
+    }
+
+    auto sources = createSources(drop_frames);
+    SyncSimulation::Options options{0u};
+    if (m_opts.target_latency.has_value()) {
+        if (!drop_frames) {
+            THROW_ERROR("Target latency for the stream is only supported when frames drop is enabled!");
+        }
+        // NB: There is no way to specify more than one source currently so assert if it happened.
+        ASSERT(sources.size() == 1u);
+        const double target_latency_in_ms = m_opts.target_latency.value();
+        const uint64_t source_latency_in_ms = m_cfg.frames_interval_in_us / 1000u;
+        if (target_latency_in_ms > source_latency_in_ms) {
+            THROW_ERROR("Target latency must be less or equal than source latency!");
+        }
+        options.after_iter_delay_in_us = static_cast<uint32_t>(source_latency_in_ms - target_latency_in_ms) * 1000u;
+    }
+
+    auto compiled = m_comp.compile(descr_of(sources), std::move(compile_args));
+    return std::make_shared<SyncSimulation>(std::move(compiled), std::move(sources), m_comp.getOutMeta().size(),
+                                            options);
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.hpp
new file mode 100644
index 00000000000000..16eff684c4e2de
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.hpp
@@ -0,0 +1,41 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include "simulation/computation.hpp"
+#include "simulation/computation_builder.hpp"
+#include "simulation/simulation.hpp"
+
+struct PerformanceStrategy;
+class PerformanceSimulation : public Simulation {
+public:
+    struct Options {
+        IRandomGenerator::Ptr global_initializer;
+        ModelsAttrMap<IRandomGenerator::Ptr> initializers_map;
+        ModelsAttrMap<std::string> input_data_map;
+        const bool inference_only;
+        std::optional<double> target_latency;
+    };
+    explicit PerformanceSimulation(Simulation::Config&& cfg, Options&& opts);
+
+    std::shared_ptr<PipelinedCompiled> compilePipelined(DummySources&& sources,
+                                                        cv::GCompileArgs&& compiler_args) override;
+    std::shared_ptr<SyncCompiled> compileSync(const bool drop_frames) override;
+
+private:
+    Options m_opts;
+    std::shared_ptr<PerformanceStrategy> m_strategy;
+    Computation m_comp;
+};
+
+struct PerformanceStrategy : public IBuildStrategy {
+    explicit PerformanceStrategy(const PerformanceSimulation::Options& opts);
+    IBuildStrategy::InferBuildInfo build(const InferDesc& infer) override;
+
+    const PerformanceSimulation::Options& opts;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.cpp
new file mode 100644
index 00000000000000..6eb55ee11fcc30
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.cpp
@@ -0,0 +1,361 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reference_mode.hpp"
+
+#include <fstream>
+
+#include "simulation/computation_builder.hpp"
+#include "simulation/executor.hpp"
+#include "simulation/layers_data.hpp"
+#include "utils/logger.hpp"
+#include "utils/utils.hpp"
+
+#include <opencv2/gapi/gproto.hpp>  // cv::GCompileArgs
+
+namespace {
+
+struct InputDataVisitor {
+    InputDataVisitor(const InferDesc& _infer, const CalcRefSimulation::Options& _opts)
+            : infer(_infer), opts(_opts), providers(infer.input_layers.size()), metas(infer.input_layers.size()) {
+    }
+
+    void operator()(std::monostate);
+    void operator()(const std::string&);
+    void operator()(const LayerVariantAttr<std::string>&);
+
+    InferDesc infer;
+    const CalcRefSimulation::Options& opts;
+    // NB: Relevant when input reference data already exists and need to
+    // generate exactly the same amount of output data.
+    // Note that this value must be the same for all models within stream.
+    cv::util::optional<uint64_t> model_required_iterations;
+    std::vector<IDataProvider::Ptr> providers;
+    std::vector<Meta> metas;
+};
+
+void InputDataVisitor::operator()(std::monostate) {
+    THROW_ERROR("Reference mode requires output data path to be provided"
+                " in form of either directory or single file!");
+};
+
+void InputDataVisitor::operator()(const LayerVariantAttr<std::string>&) {
+    THROW_ERROR("Reference mode requires output data path to be provided"
+                " in form of either directory or single file!");
+};
+
+void InputDataVisitor::operator()(const std::string& path_str) {
+    // NB: Single path provided - either single file or directory.
+    const auto input_names = extractLayerNames(infer.input_layers);
+    const auto& initializers = opts.initializers_map.at(infer.tag);
+
+    std::filesystem::path path{path_str};
+    if (std::filesystem::exists(path)) {
+        // NB: Provided path exists - upload input data from there.
+        LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag << " exists - data will be uploaded"
+                   << std::endl;
+        auto layers_data = uploadData(path, infer.tag, infer.input_layers, LayersType::INPUT);
+        // NB: The Number of iterations for every layer is ALWAYS the same.
+        model_required_iterations = cv::util::make_optional(layers_data.begin()->second.size());
+        providers = createConstantProviders(std::move(layers_data), input_names);
+    } else {
+        // NB: Provided path doesn't exist - generate data and dump.
+        LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag
+                   << " doesn't exist - input data will be generated and dumped" << std::endl;
+        std::vector<std::filesystem::path> dump_path_vec;
+        if (isDirectory(path)) {
+            // NB: When the directory is provided, the number of input iterations to be generated aren't
+            // bounded so the "random" providers will generate input data on every iteration that will
+            // be dumped on the disk afterwards.
+            dump_path_vec = createDirectoryLayout(path, input_names);
+        } else {
+            // NB: When the single file is provided, the execution must be limited to perform
+            // only 1 iteration.
+            model_required_iterations = cv::util::optional<uint64_t>(1ul);
+            if (infer.input_layers.size() > 1) {
+                THROW_ERROR("Model: " << infer.tag
+                                      << " must have exactly one input layer in order to dump input data to file: "
+                                      << path);
+            }
+            // NB: In case directories in that path don't exist.
+            std::filesystem::create_directories(path.parent_path());
+            dump_path_vec = {path};
+        }
+        auto default_initialzer =
+                opts.global_initializer ? opts.global_initializer : std::make_shared<UniformGenerator>(0.0, 255.0);
+        auto layer_initializers = unpackWithDefault(initializers, input_names, default_initialzer);
+        providers = createRandomProviders(infer.input_layers, std::move(layer_initializers));
+        for (uint32_t i = 0; i < infer.input_layers.size(); ++i) {
+            metas[i].set(Dump{dump_path_vec[i]});
+        }
+    }
+}
+
+struct OutputDataVisitor {
+    OutputDataVisitor(const InferDesc& _infer, const CalcRefSimulation::Options& _opts)
+            : infer(_infer), opts(_opts), metas(infer.output_layers.size()) {
+    }
+
+    void operator()(std::monostate);
+    void operator()(const std::string&);
+    void operator()(const LayerVariantAttr<std::string>&);
+
+    InferDesc infer;
+    const CalcRefSimulation::Options& opts;
+    std::vector<Meta> metas;
+};
+
+void OutputDataVisitor::operator()(std::monostate) {
+    THROW_ERROR("Reference mode requires output data path to be provided"
+                " in form of either directory or single file!");
+}
+
+void OutputDataVisitor::operator()(const LayerVariantAttr<std::string>&) {
+    THROW_ERROR("Reference mode requires output data path to be provided"
+                " in form of either directory or single file!");
+}
+
+void OutputDataVisitor::operator()(const std::string& path_str) {
+    std::filesystem::path path{path_str};
+    // NB: It doesn't matter if path exist or not - regenerate and dump outputs anyway.
+    std::vector<std::filesystem::path> dump_path_vec;
+    if (isDirectory(path)) {
+        dump_path_vec = createDirectoryLayout(path, extractLayerNames(infer.output_layers));
+    } else {
+        if (infer.output_layers.size() > 1) {
+            THROW_ERROR("Model: " << infer.tag
+                                  << " must have exactly one output layer in order to dump output data to file: "
+                                  << path);
+        }
+        dump_path_vec = {path};
+    }
+    for (uint32_t i = 0; i < infer.output_layers.size(); ++i) {
+        const auto& layer = infer.output_layers[i];
+        metas[i].set(Dump{dump_path_vec[i]});
+    }
+}
+
+}  // anonymous namespace
+
+class ReferenceStrategy : public IBuildStrategy {
+public:
+    explicit ReferenceStrategy(const CalcRefSimulation::Options& opts);
+
+    IBuildStrategy::InferBuildInfo build(const InferDesc& infer) override;
+
+    // NB: If specified will force execution to perform exactly require_num_iterations
+    // regardless what user specified.
+    // Use case is when N input iterations are provided,
+    // generate exactly the same amount of output iterations.
+    // Another use case is when there is only single file provided
+    // so only one input / output iteration must be generated.
+    cv::optional<uint64_t> required_num_iterations;
+    const CalcRefSimulation::Options& opts;
+};
+
+ReferenceStrategy::ReferenceStrategy(const CalcRefSimulation::Options& _opts): opts(_opts) {
+}
+
+IBuildStrategy::InferBuildInfo ReferenceStrategy::build(const InferDesc& infer) {
+    const auto& input_data = opts.input_data_map.at(infer.tag);
+    InputDataVisitor in_data_visitor{infer, opts};
+    std::visit(in_data_visitor, input_data);
+    // NB: Check if there is required number iterations for current model
+    // and fail if it's different comparing to other models in stream.
+    if (in_data_visitor.model_required_iterations) {
+        const uint64_t required_iters_value = in_data_visitor.model_required_iterations.value();
+        LOG_INFO() << "Model: " << infer.tag << " will perform at most " << required_iters_value << " iteration(s)"
+                   << std::endl;
+        if (!required_num_iterations) {
+            required_num_iterations = in_data_visitor.model_required_iterations;
+        } else {
+            if (required_iters_value != required_num_iterations.value()) {
+                THROW_ERROR("All models in stream are required to have the same number of iterations!");
+            }
+        }
+    }
+
+    const auto& output_data = opts.output_data_map.at(infer.tag);
+    OutputDataVisitor out_data_visitor{infer, opts};
+    std::visit(out_data_visitor, output_data);
+
+    return {std::move(in_data_visitor.providers), std::move(in_data_visitor.metas), std::move(out_data_visitor.metas)};
+}
+
+static void updateCriterion(ITermCriterion::Ptr* criterion, cv::util::optional<uint64_t> required_num_iterations) {
+    if (required_num_iterations.has_value()) {
+        if (*criterion) {
+            // NB: Limit user's termination criterion to perfom at most m_required_num_iterations
+            *criterion = std::make_shared<CombinedCriterion>(
+                    *criterion, std::make_shared<Iterations>(required_num_iterations.value()));
+        } else {
+            *criterion = std::make_shared<Iterations>(required_num_iterations.value());
+        }
+    }
+}
+
+static void dumpIterOutput(const cv::Mat& mat, const Dump& dump, const size_t iter) {
+    auto dump_path = dump.path;
+    if (isDirectory(dump.path)) {
+        std::stringstream ss;
+        ss << "iter_" << iter << ".bin";
+        dump_path = dump_path / ss.str();
+    }
+    utils::writeToBinFile(dump_path.string(), mat);
+};
+
+namespace {
+
+class SyncSimulation : public SyncCompiled {
+public:
+    SyncSimulation(cv::GCompiled&& compiled, std::vector<DummySource::Ptr>&& sources, std::vector<Meta>&& out_meta,
+                   cv::util::optional<uint64_t> required_num_iterations);
+
+    Result run(ITermCriterion::Ptr criterion) override;
+
+private:
+    bool process(cv::GCompiled& pipeline);
+
+    SyncExecutor m_exec;
+    std::vector<DummySource::Ptr> m_sources;
+    std::vector<Meta> m_out_meta;
+    std::vector<cv::Mat> m_out_mats;
+    size_t m_iter_idx;
+    cv::optional<uint64_t> m_required_num_iterations;
+};
+
+class PipelinedSimulation : public PipelinedCompiled {
+public:
+    PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector<DummySource::Ptr>&& sources,
+                        std::vector<Meta>&& out_meta, cv::util::optional<uint64_t> required_num_iterations);
+
+    Result run(ITermCriterion::Ptr criterion) override;
+
+private:
+    bool process(cv::GStreamingCompiled& pipeline);
+
+    PipelinedExecutor m_exec;
+    std::vector<DummySource::Ptr> m_sources;
+    std::vector<Meta> m_out_meta;
+    std::vector<cv::optional<cv::Mat>> m_opt_mats;
+    size_t m_iter_idx;
+    cv::optional<uint64_t> m_required_num_iterations;
+};
+
+//////////////////////////////// SyncSimulation ///////////////////////////////
+SyncSimulation::SyncSimulation(cv::GCompiled&& compiled, std::vector<DummySource::Ptr>&& sources,
+                               std::vector<Meta>&& out_meta, cv::util::optional<uint64_t> required_num_iterations)
+        : m_exec(std::move(compiled)),
+          m_sources(std::move(sources)),
+          m_out_meta(std::move(out_meta)),
+          m_out_mats(m_out_meta.size()),
+          m_iter_idx(0u),
+          m_required_num_iterations(required_num_iterations) {
+}
+
+Result SyncSimulation::run(ITermCriterion::Ptr criterion) {
+    for (auto src : m_sources) {
+        src->reset();
+    }
+    using namespace std::placeholders;
+    auto cb = std::bind(&SyncSimulation::process, this, _1);
+    updateCriterion(&criterion, m_required_num_iterations);
+    m_exec.runLoop(cb, criterion);
+    std::stringstream ss;
+    ss << "Reference data has been generated for " << m_iter_idx << " iteration(s)";
+    return Success{ss.str()};
+};
+
+bool SyncSimulation::process(cv::GCompiled& pipeline) {
+    auto pipeline_outputs = cv::gout();
+    // NB: Reference is mandatory there since copying empty
+    // Mat may lead to weird side effects.
+    for (auto& out_mat : m_out_mats) {
+        pipeline_outputs += cv::gout(out_mat);
+    }
+    cv::GRunArgs pipeline_inputs;
+    pipeline_inputs.reserve(m_sources.size());
+    for (auto src : m_sources) {
+        cv::gapi::wip::Data data;
+        src->pull(data);
+        pipeline_inputs.push_back(std::move(data));
+    }
+    pipeline(std::move(pipeline_inputs), std::move(pipeline_outputs));
+    for (size_t i = 0; i < m_out_mats.size(); ++i) {
+        if (m_out_meta[i].has<Dump>()) {
+            const auto& dump = m_out_meta[i].get<Dump>();
+            dumpIterOutput(m_out_mats[i], dump, m_iter_idx);
+        }
+    }
+    ++m_iter_idx;
+    return true;
+}
+
+//////////////////////////////// PipelinedSimulation ///////////////////////////////
+PipelinedSimulation::PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector<DummySource::Ptr>&& sources,
+                                         std::vector<Meta>&& out_meta,
+                                         cv::util::optional<uint64_t> required_num_iterations)
+        : m_exec(std::move(compiled)),
+          m_sources(std::move(sources)),
+          m_out_meta(std::move(out_meta)),
+          m_opt_mats(m_out_meta.size()),
+          m_iter_idx(0u),
+          m_required_num_iterations(required_num_iterations) {
+}
+
+Result PipelinedSimulation::run(ITermCriterion::Ptr criterion) {
+    auto pipeline_inputs = cv::gin();
+    for (auto source : m_sources) {
+        pipeline_inputs += cv::gin(static_cast<cv::gapi::wip::IStreamSource::Ptr>(source));
+    }
+    using namespace std::placeholders;
+    auto cb = std::bind(&PipelinedSimulation::process, this, _1);
+    updateCriterion(&criterion, m_required_num_iterations);
+    m_exec.runLoop(std::move(pipeline_inputs), cb, criterion);
+    std::stringstream ss;
+    ss << "Reference data has been generated for " << m_iter_idx << " iteration(s)";
+    return Success{ss.str()};
+};
+
+bool PipelinedSimulation::process(cv::GStreamingCompiled& pipeline) {
+    cv::GOptRunArgsP pipeline_outputs;
+    for (auto& opt_mat : m_opt_mats) {
+        pipeline_outputs.emplace_back(cv::gout(opt_mat)[0]);
+    }
+    const bool has_data = pipeline.pull(std::move(pipeline_outputs));
+    for (size_t i = 0; i < m_out_meta.size(); ++i) {
+        if (m_out_meta[i].has<Dump>()) {
+            const auto& dump = m_out_meta[i].get<Dump>();
+            ASSERT(m_opt_mats[i].has_value());
+            dumpIterOutput(m_opt_mats[i].value(), dump, m_iter_idx);
+        }
+    }
+    ++m_iter_idx;
+    return has_data;
+}
+
+}  // anonymous namespace
+
+CalcRefSimulation::CalcRefSimulation(Simulation::Config&& cfg, CalcRefSimulation::Options&& opts)
+        : Simulation(std::move(cfg)),
+          m_opts(std::move(opts)),
+          m_strategy(std::make_shared<ReferenceStrategy>(m_opts)),
+          m_comp(ComputationBuilder{m_strategy}.build(m_cfg.graph, m_cfg.params, {false /* add performance meta */})) {
+}
+
+std::shared_ptr<PipelinedCompiled> CalcRefSimulation::compilePipelined(DummySources&& sources,
+                                                                       cv::GCompileArgs&& compile_args) {
+    auto compiled = m_comp.compileStreaming(descr_of(sources), std::move(compile_args));
+    auto out_meta = m_comp.getOutMeta();
+    return std::make_shared<PipelinedSimulation>(std::move(compiled), std::move(sources), std::move(out_meta),
+                                                 m_strategy->required_num_iterations);
+}
+
+std::shared_ptr<SyncCompiled> CalcRefSimulation::compileSync(DummySources&& sources, cv::GCompileArgs&& compile_args) {
+    auto compiled = m_comp.compile(descr_of(sources), std::move(compile_args));
+    auto out_meta = m_comp.getOutMeta();
+    return std::make_shared<SyncSimulation>(std::move(compiled), std::move(sources), std::move(out_meta),
+                                            m_strategy->required_num_iterations);
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.hpp
new file mode 100644
index 00000000000000..22d2fd92cce2c6
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.hpp
@@ -0,0 +1,35 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include "simulation/computation.hpp"
+#include "simulation/simulation.hpp"
+
+class ReferenceStrategy;
+class CalcRefSimulation : public Simulation {
+public:
+    struct Options {
+        // FIXME: In fact, there should be only input data initializers
+        // and the path where to dump outputs
+        IRandomGenerator::Ptr global_initializer;
+        ModelsAttrMap<IRandomGenerator::Ptr> initializers_map;
+        ModelsAttrMap<std::string> input_data_map;
+        ModelsAttrMap<std::string> output_data_map;
+    };
+
+    explicit CalcRefSimulation(Simulation::Config&& cfg, Options&& opts);
+
+    std::shared_ptr<PipelinedCompiled> compilePipelined(DummySources&& sources,
+                                                        cv::GCompileArgs&& compile_args) override;
+    std::shared_ptr<SyncCompiled> compileSync(DummySources&& sources, cv::GCompileArgs&& compiler_args) override;
+
+private:
+    Options m_opts;
+    std::shared_ptr<ReferenceStrategy> m_strategy;
+    Computation m_comp;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp
new file mode 100644
index 00000000000000..52f57c2881a3b6
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp
@@ -0,0 +1,131 @@
+//
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "simulation/simulation.hpp"
+
+#include "scenario/inference.hpp"
+#include "utils/error.hpp"
+
+#include <opencv2/gapi/infer/onnx.hpp>  // onnx::Params
+#include <opencv2/gapi/infer/ov.hpp>    // ov::Params
+
+static cv::gapi::GNetPackage getNetPackage(const std::string& tag, const OpenVINOParams& params) {
+    using P = cv::gapi::ov::Params<cv::gapi::Generic>;
+    std::unique_ptr<P> network;
+    if (std::holds_alternative<OpenVINOParams::ModelPath>(params.path)) {
+        const auto& model_path = std::get<OpenVINOParams::ModelPath>(params.path);
+        network = std::make_unique<P>(tag, model_path.model, model_path.bin, params.device);
+    } else {
+        GAPI_Assert(std::holds_alternative<OpenVINOParams::BlobPath>(params.path));
+        const auto& blob_path = std::get<OpenVINOParams::BlobPath>(params.path);
+        network = std::make_unique<P>(tag, blob_path.blob, params.device);
+    }
+
+    network->cfgPluginConfig(params.config);
+    network->cfgNumRequests(params.nireq);
+
+    // NB: Pre/Post processing can be configured only for Model case.
+    if (std::holds_alternative<OpenVINOParams::ModelPath>(params.path)) {
+        if (std::holds_alternative<int>(params.output_precision)) {
+            network->cfgOutputTensorPrecision(std::get<int>(params.output_precision));
+        } else if (std::holds_alternative<AttrMap<int>>(params.output_precision)) {
+            network->cfgOutputTensorPrecision(std::get<AttrMap<int>>(params.output_precision));
+        }
+
+        if (std::holds_alternative<std::string>(params.input_layout)) {
+            network->cfgInputTensorLayout(std::get<std::string>(params.input_layout));
+        } else if (std::holds_alternative<AttrMap<std::string>>(params.input_layout)) {
+            network->cfgInputTensorLayout(std::get<AttrMap<std::string>>(params.input_layout));
+        }
+
+        if (std::holds_alternative<std::string>(params.output_layout)) {
+            network->cfgOutputTensorLayout(std::get<std::string>(params.output_layout));
+        } else if (std::holds_alternative<AttrMap<std::string>>(params.output_layout)) {
+            network->cfgOutputTensorLayout(std::get<AttrMap<std::string>>(params.output_layout));
+        }
+
+        if (std::holds_alternative<std::string>(params.input_model_layout)) {
+            network->cfgInputModelLayout(std::get<std::string>(params.input_model_layout));
+        } else if (std::holds_alternative<AttrMap<std::string>>(params.input_model_layout)) {
+            network->cfgInputModelLayout(std::get<AttrMap<std::string>>(params.input_model_layout));
+        }
+
+        if (std::holds_alternative<std::string>(params.output_model_layout)) {
+            network->cfgOutputModelLayout(std::get<std::string>(params.output_model_layout));
+        } else if (std::holds_alternative<AttrMap<std::string>>(params.output_model_layout)) {
+            network->cfgOutputModelLayout(std::get<AttrMap<std::string>>(params.output_model_layout));
+        }
+    }
+    return cv::gapi::networks(*network);
+}
+
+static void cfgExecutionProvider(cv::gapi::onnx::Params<cv::gapi::Generic>& network,
+                                 const ONNXRTParams::OpenVINO& ovep) {
+    network.cfgAddExecutionProvider(cv::gapi::onnx::ep::OpenVINO{ovep.params_map});
+}
+
+static void cfgExecutionProvider(cv::gapi::onnx::Params<cv::gapi::Generic>& network, const ONNXRTParams::EP& ep) {
+    // NB: Nothing to configure for default MLAS EP
+    if (std::holds_alternative<std::monostate>(ep)) {
+        return;
+    }
+    // TODO: Extend for any other available execution provider
+    ASSERT(std::holds_alternative<ONNXRTParams::OpenVINO>(ep));
+    cfgExecutionProvider(network, std::get<ONNXRTParams::OpenVINO>(ep));
+}
+
+static cv::gapi::GNetPackage getNetPackage(const std::string& tag, const ONNXRTParams& params) {
+    cv::gapi::onnx::Params<cv::gapi::Generic> network{tag, params.model_path};
+    network.cfgSessionOptions(params.session_options);
+    cfgExecutionProvider(network, params.ep);
+    return cv::gapi::networks(network);
+}
+
+static cv::gapi::GNetPackage getNetPackage(const std::string& tag, const InferenceParams& params) {
+    if (std::holds_alternative<OpenVINOParams>(params)) {
+        return getNetPackage(tag, std::get<OpenVINOParams>(params));
+    }
+    ASSERT(std::holds_alternative<ONNXRTParams>(params));
+    return getNetPackage(tag, std::get<ONNXRTParams>(params));
+}
+
+cv::gapi::GNetPackage Simulation::getNetworksPackage() const {
+    cv::gapi::GNetPackage networks;
+    for (const auto& [tag, params] : m_cfg.params) {
+        networks += getNetPackage(tag, params);
+    }
+    return networks;
+}
+
+Simulation::Simulation(Config&& cfg): m_cfg(std::move(cfg)){};
+
+std::vector<DummySource::Ptr> Simulation::createSources(const bool drop_frames) {
+    auto src = std::make_shared<DummySource>(m_cfg.frames_interval_in_us, drop_frames,
+                                             m_cfg.disable_high_resolution_timer);
+    return {src};
+};
+
+std::shared_ptr<PipelinedCompiled> Simulation::compilePipelined(const bool drop_frames) {
+    if (drop_frames) {
+        THROW_ERROR("Pipelined simulation doesn't support frames drop!");
+    }
+    // NB: Hardcoded for pipelining mode as the best option
+    auto compile_args = cv::compile_args(getNetworksPackage());
+    compile_args += cv::compile_args(cv::gapi::streaming::queue_capacity{1u});
+    return compilePipelined(createSources(drop_frames), std::move(compile_args));
+}
+
+std::shared_ptr<SyncCompiled> Simulation::compileSync(const bool drop_frames) {
+    auto compile_args = cv::compile_args(getNetworksPackage());
+    return compileSync(createSources(drop_frames), std::move(compile_args));
+}
+
+std::shared_ptr<PipelinedCompiled> Simulation::compilePipelined(DummySources&&, cv::GCompileArgs&&) {
+    THROW_ERROR("Not implemented!");
+};
+
+std::shared_ptr<SyncCompiled> Simulation::compileSync(DummySources&&, cv::GCompileArgs&&) {
+    THROW_ERROR("Not implemented!");
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.hpp
new file mode 100644
index 00000000000000..b60eaf6b5a3148
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.hpp
@@ -0,0 +1,57 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include "result.hpp"
+#include "scenario/criterion.hpp"
+#include "scenario/inference.hpp"
+#include "scenario/scenario_graph.hpp"
+#include "simulation/dummy_source.hpp"
+
+#include <opencv2/gapi/infer.hpp>  // cv::gapi::GNetPackage
+
+struct ICompiled {
+    using Ptr = std::shared_ptr<ICompiled>;
+    virtual Result run(ITermCriterion::Ptr) = 0;
+};
+
+struct PipelinedCompiled : public ICompiled {};
+struct SyncCompiled : public ICompiled {};
+
+using DummySources = std::vector<DummySource::Ptr>;
+
+class Simulation {
+public:
+    using Ptr = std::shared_ptr<Simulation>;
+
+    struct Config {
+        std::string stream_name;
+        uint64_t frames_interval_in_us;
+        bool disable_high_resolution_timer;
+        ScenarioGraph graph;
+        InferenceParamsMap params;
+    };
+
+    explicit Simulation(Config&& cfg);
+
+    virtual std::shared_ptr<PipelinedCompiled> compilePipelined(const bool drop_frames);
+    virtual std::shared_ptr<SyncCompiled> compileSync(const bool drop_frames);
+
+    virtual ~Simulation() = default;
+
+protected:
+    virtual std::shared_ptr<PipelinedCompiled> compilePipelined(DummySources&& sources,
+                                                                cv::GCompileArgs&& compile_args);
+    virtual std::shared_ptr<SyncCompiled> compileSync(DummySources&& sources, cv::GCompileArgs&& compile_args);
+
+    std::vector<DummySource::Ptr> createSources(const bool drop_frames);
+    cv::gapi::GNetPackage getNetworksPackage() const;
+
+protected:
+    Config m_cfg;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.cpp
new file mode 100644
index 00000000000000..c6544522287048
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.cpp
@@ -0,0 +1,363 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "simulation/validation_mode.hpp"
+
+#include "scenario/accuracy_metrics.hpp"
+#include "simulation/computation_builder.hpp"
+#include "simulation/executor.hpp"
+#include "simulation/layers_data.hpp"
+#include "simulation/validation_mode.hpp"
+#include "utils/logger.hpp"
+#include "utils/utils.hpp"
+
+#include <opencv2/gapi/gproto.hpp>  // cv::GCompileArgs
+
+class LayerValidator {
+public:
+    LayerValidator(const std::string& tag, const std::string& layer_name, IAccuracyMetric::Ptr metric);
+    Result operator()(const cv::Mat& lhs, const cv::Mat& rhs);
+
+private:
+    std::string m_tag;
+    std::string m_layer_name;
+    IAccuracyMetric::Ptr m_metric;
+};
+
+LayerValidator::LayerValidator(const std::string& tag, const std::string& layer_name, IAccuracyMetric::Ptr metric)
+        : m_tag(tag), m_layer_name(layer_name), m_metric(metric) {
+}
+
+Result LayerValidator::operator()(const cv::Mat& lhs, const cv::Mat& rhs) {
+    auto result = m_metric->compare(lhs, rhs);
+    if (!result) {
+        std::stringstream ss;
+        ss << "Model: " << m_tag << ", Layer: " << m_layer_name << ", Metric: " << m_metric->str()
+           << ", Reason: " << result.str() << ";";
+        return Error{ss.str()};
+    }
+    return Success{"Passed"};
+}
+
+namespace {
+
+struct InputDataVisitor {
+    InputDataVisitor(const InferDesc& _infer, const ValSimulation::Options& _opts)
+            : infer(_infer), opts(_opts), providers(infer.input_layers.size()), metas(infer.input_layers.size()) {
+    }
+
+    void operator()(std::monostate);
+    void operator()(const std::string&);
+    void operator()(const LayerVariantAttr<std::string>&);
+
+    InferDesc infer;
+    const ValSimulation::Options& opts;
+    std::vector<IDataProvider::Ptr> providers;
+    std::vector<Meta> metas;
+};
+
+void InputDataVisitor::operator()(std::monostate) {
+    THROW_ERROR("Validation mode requires input data path to be provided"
+                " in form of either directory or single file!");
+};
+
+void InputDataVisitor::operator()(const LayerVariantAttr<std::string>&) {
+    THROW_ERROR("Validation mode requires input data path to be provided"
+                " in form of either directory or single file!");
+};
+
+void InputDataVisitor::operator()(const std::string& path_str) {
+    std::filesystem::path path{path_str};
+    LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag << " exists - data will be uploaded"
+               << std::endl;
+    auto layers_data = uploadData(path, infer.tag, infer.input_layers, LayersType::INPUT);
+    providers = createConstantProviders(std::move(layers_data), extractLayerNames(infer.input_layers));
+};
+
+struct OutputDataVisitor {
+    OutputDataVisitor(const InferDesc& _infer, const ValSimulation::Options& _opts)
+            : infer(_infer), opts(_opts), metas(infer.output_layers.size()) {
+    }
+
+    void operator()(std::monostate);
+    void operator()(const std::string&);
+    void operator()(const LayerVariantAttr<std::string>&);
+
+    InferDesc infer;
+    const ValSimulation::Options& opts;
+    std::vector<Meta> metas;
+};
+
+void OutputDataVisitor::operator()(std::monostate) {
+    THROW_ERROR("Validation mode requires output data path to be provided"
+                " in form of either directory or single file!");
+}
+
+void OutputDataVisitor::operator()(const LayerVariantAttr<std::string>&) {
+    THROW_ERROR("Validation mode requires output data path to be provided"
+                " in form of either directory or single file!");
+}
+
+void OutputDataVisitor::operator()(const std::string& path_str) {
+    auto default_metric = opts.global_metric ? opts.global_metric : std::make_shared<Norm>(0.0);
+    auto per_layer_metrics =
+            unpackWithDefault(opts.metrics_map.at(infer.tag), extractLayerNames(infer.output_layers), default_metric);
+    std::filesystem::path path{path_str};
+    LOG_INFO() << "Reference output data path: " << path << " for model: " << infer.tag
+               << " exists - data will be uploaded" << std::endl;
+    auto layers_data = uploadData(path, infer.tag, infer.output_layers, LayersType::OUTPUT);
+    for (uint32_t i = 0; i < infer.output_layers.size(); ++i) {
+        const auto& layer = infer.output_layers[i];
+        LayerValidator validator{infer.tag, layer.name, per_layer_metrics.at(layer.name)};
+        metas[i].set(Validate{std::move(validator), layers_data.at(layer.name)});
+    }
+}
+
+}  // anonymous namespace
+
+class ValidationStrategy : public IBuildStrategy {
+public:
+    explicit ValidationStrategy(const ValSimulation::Options& _opts): opts(_opts) {
+    }
+
+    IBuildStrategy::InferBuildInfo build(const InferDesc& infer) override {
+        const auto& input_data = opts.input_data_map.at(infer.tag);
+        InputDataVisitor in_data_visitor{infer, opts};
+        std::visit(in_data_visitor, input_data);
+
+        const auto& output_data = opts.output_data_map.at(infer.tag);
+        OutputDataVisitor out_data_visitor{infer, opts};
+        std::visit(out_data_visitor, output_data);
+
+        if (opts.per_iter_outputs_path.has_value()) {
+            auto model_dir = opts.per_iter_outputs_path.value() / infer.tag;
+            // NB: Remove the data from the previous run if such exist
+            LOG_INFO() << "Actual output data for model: " << infer.tag
+                       << " will be dumped and replaced at path: " << model_dir << std::endl;
+            std::filesystem::remove_all(model_dir);
+            auto dump_path_vec = createDirectoryLayout(model_dir, extractLayerNames(infer.output_layers));
+            for (uint32_t i = 0; i < infer.output_layers.size(); ++i) {
+                out_data_visitor.metas[i].set(Dump{dump_path_vec[i]});
+            }
+        }
+
+        // NB: No special input meta for this mode.
+        std::vector<Meta> input_meta(infer.input_layers.size(), Meta{});
+        return {std::move(in_data_visitor.providers), std::move(input_meta), std::move(out_data_visitor.metas)};
+    }
+
+    const ValSimulation::Options& opts;
+};
+
+struct FailedIter {
+    size_t iter_idx;
+    std::vector<std::string> reasons;
+};
+
+static Result reportValidationResult(const std::vector<FailedIter>& failed_iters, const size_t total_iters) {
+    std::stringstream ss;
+    if (!failed_iters.empty()) {
+        const auto kItersToShow = 10u;
+        const auto kLimit = failed_iters.size() < kItersToShow ? failed_iters.size() : kItersToShow;
+        ss << "Accuraccy check failed on " << failed_iters.size() << " iteration(s)"
+           << " (first " << kLimit << "):";
+        ss << "\n";
+        for (uint32_t i = 0; i < kLimit; ++i) {
+            ss << "Iteration " << failed_iters[i].iter_idx << ":\n";
+            for (const auto& reason : failed_iters[i].reasons) {
+                ss << "  " << reason << "\n";
+            }
+        }
+        return Error{ss.str()};
+    }
+    ss << "Validation has passed for " << total_iters << " iteration(s)";
+    return Success{ss.str()};
+}
+
+static std::vector<std::string> validateOutputs(const std::vector<cv::Mat>& out_mats, const std::vector<Meta>& out_meta,
+                                                const size_t iter_idx) {
+    std::vector<std::string> failed_list;
+    for (size_t i = 0; i < out_mats.size(); ++i) {
+        if (out_meta[i].has<Validate>()) {
+            const auto& val = out_meta[i].get<Validate>();
+            const auto& refvec = val.reference;
+            ASSERT(!refvec.empty());
+            const auto& refmat = refvec[iter_idx % refvec.size()];
+            auto result = val.validator(refmat, out_mats[i]);
+            if (!result) {
+                failed_list.push_back(std::move(result.str()));
+            }
+        }
+    }
+    return failed_list;
+}
+
+static void dumpOutputs(const std::vector<cv::Mat>& out_mats, const std::vector<Meta>& out_meta,
+                        const size_t iter_idx) {
+    for (size_t i = 0; i < out_mats.size(); ++i) {
+        if (out_meta[i].has<Dump>()) {
+            std::stringstream ss;
+            ss << "iter_" << iter_idx << ".bin";
+            auto dump_path = out_meta[i].get<Dump>().path / ss.str();
+            utils::writeToBinFile(dump_path.string(), out_mats[i]);
+        }
+    }
+}
+
+namespace {
+
+class SyncSimulation : public SyncCompiled {
+public:
+    SyncSimulation(cv::GCompiled&& compiled, std::vector<DummySource::Ptr>&& sources, std::vector<Meta>&& out_meta);
+
+    Result run(ITermCriterion::Ptr criterion) override;
+
+private:
+    bool process(cv::GCompiled& pipeline);
+
+    SyncExecutor m_exec;
+    std::vector<DummySource::Ptr> m_sources;
+    std::vector<Meta> m_out_meta;
+    std::vector<cv::Mat> m_out_mats;
+    size_t m_iter_idx;
+    std::vector<FailedIter> m_failed_iters;
+};
+
+class PipelinedSimulation : public PipelinedCompiled {
+public:
+    PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector<DummySource::Ptr>&& sources,
+                        std::vector<Meta>&& out_meta);
+
+    Result run(ITermCriterion::Ptr criterion) override;
+
+private:
+    bool process(cv::GStreamingCompiled& pipeline);
+
+    PipelinedExecutor m_exec;
+    std::vector<DummySource::Ptr> m_sources;
+    std::vector<Meta> m_out_meta;
+    std::vector<cv::optional<cv::Mat>> m_opt_mats;
+    size_t m_iter_idx;
+    std::vector<FailedIter> m_failed_iters;
+};
+
+//////////////////////////////// SyncSimulation ///////////////////////////////
+SyncSimulation::SyncSimulation(cv::GCompiled&& compiled, std::vector<DummySource::Ptr>&& sources,
+                               std::vector<Meta>&& out_meta)
+        : m_exec(std::move(compiled)),
+          m_sources(std::move(sources)),
+          m_out_meta(std::move(out_meta)),
+          m_out_mats(m_out_meta.size()),
+          m_iter_idx(0u) {
+}
+
+Result SyncSimulation::run(ITermCriterion::Ptr criterion) {
+    for (auto src : m_sources) {
+        src->reset();
+    }
+    using namespace std::placeholders;
+    auto cb = std::bind(&SyncSimulation::process, this, _1);
+    m_exec.runLoop(cb, criterion);
+    return reportValidationResult(m_failed_iters, m_iter_idx);
+};
+
+bool SyncSimulation::process(cv::GCompiled& pipeline) {
+    auto pipeline_outputs = cv::gout();
+    // NB: Reference is mandatory there since copying empty
+    // Mat may lead to weird side effects.
+    for (auto& out_mat : m_out_mats) {
+        pipeline_outputs += cv::gout(out_mat);
+    }
+    cv::GRunArgs pipeline_inputs;
+    pipeline_inputs.reserve(m_sources.size());
+    for (auto src : m_sources) {
+        cv::gapi::wip::Data data;
+        src->pull(data);
+        pipeline_inputs.push_back(std::move(data));
+    }
+    pipeline(std::move(pipeline_inputs), std::move(pipeline_outputs));
+
+    dumpOutputs(m_out_mats, m_out_meta, m_iter_idx);
+    auto failed_list = validateOutputs(m_out_mats, m_out_meta, m_iter_idx);
+    if (!failed_list.empty()) {
+        m_failed_iters.push_back(FailedIter{m_iter_idx, std::move(failed_list)});
+    }
+    ++m_iter_idx;
+    return true;
+}
+
+//////////////////////////////// PipelinedSimulation ///////////////////////////////
+PipelinedSimulation::PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector<DummySource::Ptr>&& sources,
+                                         std::vector<Meta>&& out_meta)
+        : m_exec(std::move(compiled)),
+          m_sources(std::move(sources)),
+          m_out_meta(std::move(out_meta)),
+          m_opt_mats(m_out_meta.size()),
+          m_iter_idx(0u) {
+}
+
+Result PipelinedSimulation::run(ITermCriterion::Ptr criterion) {
+    auto pipeline_inputs = cv::gin();
+    for (auto source : m_sources) {
+        pipeline_inputs += cv::gin(static_cast<cv::gapi::wip::IStreamSource::Ptr>(source));
+    }
+    using namespace std::placeholders;
+    auto cb = std::bind(&PipelinedSimulation::process, this, _1);
+    m_exec.runLoop(std::move(pipeline_inputs), cb, criterion);
+    return reportValidationResult(m_failed_iters, m_iter_idx);
+};
+
+bool PipelinedSimulation::process(cv::GStreamingCompiled& pipeline) {
+    cv::GOptRunArgsP pipeline_outputs;
+    for (auto& opt_mat : m_opt_mats) {
+        pipeline_outputs.emplace_back(cv::gout(opt_mat)[0]);
+    }
+    const bool has_data = pipeline.pull(std::move(pipeline_outputs));
+    std::vector<cv::Mat> out_mats;
+    out_mats.reserve(m_opt_mats.size());
+    for (auto opt_mat : m_opt_mats) {
+        ASSERT(opt_mat.has_value());
+        out_mats.push_back(opt_mat.value());
+    }
+
+    dumpOutputs(out_mats, m_out_meta, m_iter_idx);
+    auto failed_list = validateOutputs(out_mats, m_out_meta, m_iter_idx);
+    if (!failed_list.empty()) {
+        m_failed_iters.push_back(FailedIter{m_iter_idx, std::move(failed_list)});
+    }
+    ++m_iter_idx;
+    return has_data;
+}
+
+}  // anonymous namespace
+
+ValSimulation::ValSimulation(Simulation::Config&& cfg, ValSimulation::Options&& opts)
+        : Simulation(std::move(cfg)),
+          m_opts(std::move(opts)),
+          m_strategy(std::make_shared<ValidationStrategy>(m_opts)),
+          m_comp(ComputationBuilder{m_strategy}.build(m_cfg.graph, m_cfg.params, {false /* add performance meta */})) {
+}
+
+std::shared_ptr<PipelinedCompiled> ValSimulation::compilePipelined(DummySources&& sources,
+                                                                   cv::GCompileArgs&& compile_args) {
+    auto compiled = m_comp.compileStreaming(descr_of(sources), std::move(compile_args));
+    auto out_meta = m_comp.getOutMeta();
+    return std::make_shared<PipelinedSimulation>(std::move(compiled), std::move(sources), std::move(out_meta));
+}
+
+std::shared_ptr<SyncCompiled> ValSimulation::compileSync(DummySources&& sources, cv::GCompileArgs&& compile_args) {
+    const uint32_t max_parallel_branches = m_comp.getMaxParallelBranches();
+    if (max_parallel_branches > 1u) {
+        LOG_INFO() << "Found at most " << max_parallel_branches
+                   << " parallel branches in graph,"
+                      " so threaded executor will be used"
+                   << std::endl;
+        ;
+        compile_args += cv::compile_args(cv::use_threaded_executor{max_parallel_branches});
+    }
+    auto compiled = m_comp.compile(descr_of(sources), std::move(compile_args));
+    auto out_meta = m_comp.getOutMeta();
+    return std::make_shared<SyncSimulation>(std::move(compiled), std::move(sources), std::move(out_meta));
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.hpp
new file mode 100644
index 00000000000000..180c802803a68c
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.hpp
@@ -0,0 +1,34 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <filesystem>
+#include <memory>
+
+#include "simulation/computation.hpp"
+#include "simulation/simulation.hpp"
+
+class ValidationStrategy;
+class ValSimulation : public Simulation {
+public:
+    struct Options {
+        IAccuracyMetric::Ptr global_metric;
+        ModelsAttrMap<IAccuracyMetric::Ptr> metrics_map;
+        ModelsAttrMap<std::string> input_data_map;
+        ModelsAttrMap<std::string> output_data_map;
+        std::optional<std::filesystem::path> per_iter_outputs_path;
+    };
+    explicit ValSimulation(Simulation::Config&& cfg, Options&& opts);
+
+    std::shared_ptr<PipelinedCompiled> compilePipelined(DummySources&& sources,
+                                                        cv::GCompileArgs&& compile_args) override;
+    std::shared_ptr<SyncCompiled> compileSync(DummySources&& sources, cv::GCompileArgs&& compiler_args) override;
+
+private:
+    Options m_opts;
+    std::shared_ptr<ValidationStrategy> m_strategy;
+    Computation m_comp;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.cpp
new file mode 100644
index 00000000000000..f3eaf7756e1793
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.cpp
@@ -0,0 +1,64 @@
+//
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include "data_providers.hpp"
+
+#include <sstream>
+
+#include "utils.hpp"
+#include "utils/error.hpp"
+
+UniformGenerator::UniformGenerator(double low, double high): m_low(low), m_high(high) {
+    ASSERT(low <= high);
+}
+
+void UniformGenerator::generate(cv::Mat& mat) {
+    cv::randu(mat, m_low, m_high);
+}
+
+std::string UniformGenerator::str() const {
+    std::stringstream ss;
+    ss << "{dist: uniform, range: [" << m_low << ", " << m_high << "]}";
+    return ss.str();
+}
+
+RandomProvider::RandomProvider(IRandomGenerator::Ptr impl, const std::vector<int>& dims, const int depth)
+        : m_impl(impl), m_dims(dims), m_depth(depth) {
+}
+
+void RandomProvider::pull(cv::Mat& mat) {
+    utils::createNDMat(mat, m_dims, m_depth);
+    m_impl->generate(mat);
+}
+
+cv::GMatDesc RandomProvider::desc() {
+    if (m_dims.size() == 2u) {
+        return cv::GMatDesc{m_depth, 1, cv::Size(m_dims[1], m_dims[0])};
+    }
+    return cv::GMatDesc{m_depth, m_dims};
+}
+
+CircleBuffer::CircleBuffer(const std::vector<cv::Mat>& buffer): m_buffer(buffer), m_pos(0u) {
+    ASSERT(!m_buffer.empty());
+}
+
+CircleBuffer::CircleBuffer(std::vector<cv::Mat>&& buffer): m_buffer(std::move(buffer)), m_pos(0u) {
+    ASSERT(!m_buffer.empty());
+}
+
+CircleBuffer::CircleBuffer(cv::Mat mat): CircleBuffer(std::vector<cv::Mat>{mat}) {
+}
+
+void CircleBuffer::pull(cv::Mat& mat) {
+    m_buffer[m_pos++].copyTo(mat);
+    if (m_pos == m_buffer.size()) {
+        m_pos = 0;
+    }
+}
+
+cv::GMatDesc CircleBuffer::desc() {
+    return cv::descr_of(m_buffer[0]);
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.hpp
new file mode 100644
index 00000000000000..2bd45b7f19cc25
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.hpp
@@ -0,0 +1,70 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <opencv2/gapi/gmat.hpp>
+
+struct IDataProvider {
+    using Ptr = std::shared_ptr<IDataProvider>;
+    virtual void pull(cv::Mat& mat) = 0;
+    virtual cv::GMatDesc desc() = 0;
+    virtual void reset() = 0;
+    virtual ~IDataProvider() = default;
+};
+
+class IRandomGenerator {
+public:
+    using Ptr = std::shared_ptr<IRandomGenerator>;
+    virtual void generate(cv::Mat& mat) = 0;
+    virtual ~IRandomGenerator() = default;
+    virtual std::string str() const = 0;
+};
+
+class UniformGenerator : public IRandomGenerator {
+public:
+    using Ptr = std::shared_ptr<UniformGenerator>;
+    UniformGenerator(double low, double high);
+    void generate(cv::Mat& mat) override;
+    virtual std::string str() const override;
+
+private:
+    double m_low, m_high;
+};
+
+class RandomProvider : public IDataProvider {
+public:
+    RandomProvider(IRandomGenerator::Ptr impl, const std::vector<int>& dims, const int depth);
+
+    void pull(cv::Mat& mat) override;
+    cv::GMatDesc desc() override;
+    void reset() override { /* do nothing */
+    }
+
+private:
+    IRandomGenerator::Ptr m_impl;
+    std::vector<int> m_dims;
+    int m_depth;
+};
+
+class CircleBuffer : public IDataProvider {
+public:
+    CircleBuffer(const std::vector<cv::Mat>& buffer);
+    CircleBuffer(std::vector<cv::Mat>&& buffer);
+    CircleBuffer(cv::Mat mat);
+
+    void pull(cv::Mat& mat) override;
+    cv::GMatDesc desc() override;
+    void reset() override {
+        m_pos = 0;
+    }
+
+private:
+    std::vector<cv::Mat> m_buffer;
+    uint64_t m_pos;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/error.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/error.hpp
new file mode 100644
index 00000000000000..23cb2a8f46436c
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/utils/error.hpp
@@ -0,0 +1,39 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <iostream>
+#include <sstream>
+
+namespace details {
+
+[[noreturn]] inline void assert_abort(const char* str, const int line, const char* file, const char* func) {
+    std::stringstream ss;
+    ss << file << ":" << line << ": Assertion " << str << " in function " << func << " failed\n";
+    std::cerr << ss.str() << std::flush;
+    abort();
+}
+
+[[noreturn]] inline void throw_error(const char* str) {
+    std::stringstream ss;
+    ss << "An exception thrown! " << str << std::flush;
+    throw std::logic_error(ss.str());
+}
+
+}  // namespace details
+
+#define ASSERT(expr)                                                      \
+    {                                                                     \
+        if (!(expr))                                                      \
+            ::details::assert_abort(#expr, __LINE__, __FILE__, __func__); \
+    }
+
+#define THROW_ERROR(msg)                          \
+    {                                             \
+        std::ostringstream os;                    \
+        os << msg;                                \
+        ::details::throw_error(os.str().c_str()); \
+    }
diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/logger.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.cpp
new file mode 100644
index 00000000000000..ccba64e701975c
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "utils/logger.hpp"
+
+#include <iostream>
+
+LogLevel Logger::global_lvl = LogLevel::None;
+
+Logger::Logger(LogLevel lvl): m_lvl(lvl) {
+}
+
+std::stringstream& Logger::stream() {
+    return m_ss;
+}
+
+Logger::~Logger() {
+    if (m_lvl <= Logger::global_lvl) {
+        switch (m_lvl) {
+        case LogLevel::Info:
+            std::cout << "[ INFO ] " << m_ss.str();
+            break;
+        case LogLevel::Debug:
+            std::cout << "[ DEBUG ] " << m_ss.str();
+            break;
+        default:
+                /* do nothing */;
+        }
+    }
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/logger.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.hpp
new file mode 100644
index 00000000000000..e8b1f5df7f8fa3
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <sstream>
+
+enum class LogLevel {
+    None = 0,
+    Info = 1,
+    Debug = 2,
+};
+
+class Logger {
+public:
+    static LogLevel global_lvl;
+    explicit Logger(LogLevel lvl);
+    std::stringstream& stream();
+    ~Logger();
+
+private:
+    LogLevel m_lvl;
+    std::stringstream m_ss;
+};
+
+#define LOG_INFO() Logger{LogLevel::Info}.stream()
+#define LOG_DEBUG() Logger{LogLevel::Debug}.stream()
diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/timer.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.cpp
new file mode 100644
index 00000000000000..a1fc0f4c2643c4
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.cpp
@@ -0,0 +1,73 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "timer.hpp"
+#include "utils.hpp"
+
+#include <thread>
+
+#if defined(_WIN32)
+#include <windows.h>
+
+class WinTimer : public SleepTimer {
+public:
+    WinTimer(bool disable_high_resolution_timer);
+    void wait(std::chrono::microseconds time) override;
+    ~WinTimer();
+
+private:
+    HANDLE m_handle = nullptr;
+};
+
+WinTimer::WinTimer(bool disable_high_resolution_timer) {
+    // FIXME: It should be called once.
+    timeBeginPeriod(1);
+    m_handle = CreateWaitableTimerEx(
+            NULL, NULL, disable_high_resolution_timer ? 0 : CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS);
+}
+
+void WinTimer::wait(std::chrono::microseconds time) {
+    LARGE_INTEGER li;
+    using ns_t = std::chrono::nanoseconds;
+    using ns_100_t = std::chrono::duration<ns_t::rep, std::ratio_multiply<std::ratio<100>, ns_t::period>>;
+
+    li.QuadPart = -std::chrono::duration_cast<ns_100_t>(time).count();
+    if (!SetWaitableTimer(m_handle, &li, 0, NULL, NULL, false)) {
+        CloseHandle(m_handle);
+        throw std::logic_error("WinTimer failed to setup");
+    }
+
+    if (WaitForSingleObject(m_handle, INFINITE) != WAIT_OBJECT_0) {
+        CloseHandle(m_handle);
+        throw std::logic_error("WinTimer failed to sleep");
+    }
+}
+
+WinTimer::~WinTimer() {
+    CancelWaitableTimer(m_handle);
+    CloseHandle(m_handle);
+}
+
+#endif  // defined(_WIN32)
+
+class ChronoTimer : public SleepTimer {
+    void wait(std::chrono::microseconds time) override;
+};
+
+void ChronoTimer::wait(std::chrono::microseconds time) {
+    std::this_thread::sleep_for(time);
+}
+
+SleepTimer::Ptr SleepTimer::create(bool disable_high_resolution_timer) {
+#if defined(_WIN32)
+    return std::make_shared<WinTimer>(disable_high_resolution_timer);
+#else
+    return std::make_shared<ChronoTimer>();
+#endif
+}
+
+void BusyTimer::wait(std::chrono::microseconds time) {
+    utils::busyWait(time);
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/timer.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.hpp
new file mode 100644
index 00000000000000..423966ad2300a9
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <chrono>
+#include <functional>
+#include <memory>
+
+struct IWaitable {
+    using Ptr = std::shared_ptr<IWaitable>;
+    virtual void wait(std::chrono::microseconds time) = 0;
+    virtual ~IWaitable() = default;
+};
+
+struct SleepTimer : public IWaitable {
+    using Ptr = std::shared_ptr<SleepTimer>;
+    static Ptr create(bool disable_high_resolution_timer = false);
+};
+
+struct BusyTimer : public IWaitable {
+    void wait(std::chrono::microseconds time) override;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/utils.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.cpp
new file mode 100644
index 00000000000000..94081dd295229e
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.cpp
@@ -0,0 +1,84 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "utils.hpp"
+
+#include <opencv2/gapi/own/assert.hpp>
+
+#include <fstream>
+
+namespace utils {
+
+void createNDMat(cv::Mat& mat, const std::vector<int>& dims, int depth) {
+    GAPI_Assert(!dims.empty());
+    mat.create(dims, depth);
+    if (dims.size() == 1) {
+        // FIXME: Well-known 1D mat WA
+        mat.dims = 1;
+    }
+}
+
+void generateRandom(cv::Mat& out) {
+    switch (out.depth()) {
+    case CV_8U:
+        cv::randu(out, 0, 255);
+        break;
+    case CV_32S:
+        cv::randu(out, 0, 255);
+        break;
+    case CV_32F:
+        cv::randu(out, 0.f, 255.f);
+        break;
+    case CV_16F: {
+        std::vector<int> dims;
+        for (int i = 0; i < out.size.dims(); ++i) {
+            dims.push_back(out.size[i]);
+        }
+        cv::Mat fp32_mat;
+        createNDMat(fp32_mat, dims, CV_32F);
+        cv::randu(fp32_mat, 0.f, 255.f);
+        fp32_mat.convertTo(out, out.type());
+        break;
+    }
+    default:
+        throw std::logic_error("Unsupported preprocessing depth");
+    }
+}
+
+cv::Mat createRandom(const std::vector<int>& dims, int depth) {
+    cv::Mat mat;
+    createNDMat(mat, dims, depth);
+    generateRandom(mat);
+    return mat;
+}
+
+void readFromBinFile(const std::string& filepath, cv::Mat& mat) {
+    std::ifstream ifs(filepath, std::ios::binary | std::ios::ate);
+
+    if (!ifs.is_open()) {
+        throw std::logic_error("Failed to open: " + filepath);
+    }
+
+    const auto file_byte_size = ifs.tellg();
+    ifs.seekg(0, std::ios::beg);
+
+    const auto mat_byte_size = mat.total() * mat.elemSize();
+    if (file_byte_size != mat_byte_size) {
+        throw std::logic_error("Failed to read cv::Mat from binary file: " + filepath + ". Mat size: " +
+                               std::to_string(mat_byte_size) + ", File size: " + std::to_string(file_byte_size));
+    }
+
+    ifs.read(mat.ptr<char>(), mat_byte_size);
+}
+
+void writeToBinFile(const std::string& filepath, const cv::Mat& mat) {
+    std::ofstream fout(filepath, std::ios::out | std::ios::binary);
+    if (!fout.is_open()) {
+        throw std::logic_error("Failed to open/create: " + filepath);
+    }
+    fout.write(mat.ptr<const char>(), mat.total() * mat.elemSize());
+}
+
+}  // namespace utils
diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/utils.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.hpp
new file mode 100644
index 00000000000000..a2ee4bdcf742d5
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.hpp
@@ -0,0 +1,65 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <numeric>
+#include <thread>
+#include <vector>
+
+#include <opencv2/core.hpp>
+#include <opencv2/gapi/own/assert.hpp>
+
+namespace utils {
+
+void createNDMat(cv::Mat& mat, const std::vector<int>& dims, int depth);
+void generateRandom(cv::Mat& out);
+cv::Mat createRandom(const std::vector<int>& dims, int depth);
+
+template <typename duration_t>
+typename duration_t::rep measure(std::function<void()> f) {
+    using namespace std::chrono;
+    auto start = high_resolution_clock::now();
+    f();
+    return duration_cast<duration_t>(high_resolution_clock::now() - start).count();
+}
+
+template <typename duration_t>
+typename duration_t::rep timestamp() {
+    using namespace std::chrono;
+    auto now = high_resolution_clock::now();
+    return duration_cast<duration_t>(now.time_since_epoch()).count();
+}
+
+inline void busyWait(std::chrono::microseconds delay) {
+    auto start_ts = timestamp<std::chrono::microseconds>();
+    auto end_ts = start_ts;
+    auto time_to_wait = delay.count();
+
+    while (end_ts - start_ts < time_to_wait) {
+        end_ts = timestamp<std::chrono::microseconds>();
+    }
+}
+
+template <typename T>
+double avg(const std::vector<T>& vec) {
+    return std::accumulate(vec.begin(), vec.end(), 0.0) / vec.size();
+}
+
+template <typename T>
+T max(const std::vector<T>& vec) {
+    return *std::max_element(vec.begin(), vec.end());
+}
+
+template <typename T>
+T min(const std::vector<T>& vec) {
+    return *std::min_element(vec.begin(), vec.end());
+}
+
+void readFromBinFile(const std::string& filepath, cv::Mat& mat);
+void writeToBinFile(const std::string& filepath, const cv::Mat& mat);
+
+}  // namespace utils
diff --git a/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt b/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt
index 09ed0db315785c..e6c24566777d4b 100644
--- a/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt
+++ b/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt
@@ -26,7 +26,7 @@ foreach(LIB opencv_core opencv_imgproc opencv_imgcodecs)
 endforeach()
 
 if(NOT MISSING_DEPENDENCIES STREQUAL "")
-    message(WARNING "${TARGET_NAME} tool is disabled due to missing dependencies: ${MISSING_DEPENDENCIES}")
+    message(STATUS "NPU ${TARGET_NAME} tool is disabled due to missing dependencies: ${MISSING_DEPENDENCIES}")
     return()
 endif()
 
diff --git a/tests/model_hub_tests/transformation_tests/models/gptq-torchfx-models-precommit b/tests/model_hub_tests/transformation_tests/models/gptq-torchfx-models-precommit
new file mode 100644
index 00000000000000..b796dd2bf13b5a
--- /dev/null
+++ b/tests/model_hub_tests/transformation_tests/models/gptq-torchfx-models-precommit
@@ -0,0 +1 @@
+atorsvn/TinyLlama-1.1B-Chat-v0.3-gptq-4bit,https://huggingface.co/atorsvn/TinyLlama-1.1B-Chat-v0.3-gptq-4bit
diff --git a/tests/model_hub_tests/transformation_tests/test_gptq_torchfx_transformations.py b/tests/model_hub_tests/transformation_tests/test_gptq_torchfx_transformations.py
new file mode 100644
index 00000000000000..dc57c02285e448
--- /dev/null
+++ b/tests/model_hub_tests/transformation_tests/test_gptq_torchfx_transformations.py
@@ -0,0 +1,102 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
+import hashlib
+from openvino.frontend.pytorch.torchdynamo.execute import compiled_cache
+import models_hub_common.utils as utils
+import pytest
+import os
+
+def patch_gptq(config):
+    do_gptq_patching = False
+    config_dict = config.to_dict()
+    quantization_config = config_dict.get("quantization_config", None)
+    do_gptq_patching = quantization_config and quantization_config["quant_method"] == "gptq"
+    orig_cuda_check = torch.cuda.is_available
+    orig_post_init_model = None
+    if do_gptq_patching:
+        torch.set_default_dtype(torch.float32)
+        torch.cuda.is_available = lambda: False
+
+        from optimum.gptq import GPTQQuantizer
+
+        orig_post_init_model = GPTQQuantizer.post_init_model
+
+        def post_init_model(self, model):
+            from auto_gptq import exllama_set_max_input_length
+
+            class StoreAttr(object):
+                pass
+
+            model.quantize_config = StoreAttr()
+            model.quantize_config.desc_act = self.desc_act
+            if self.desc_act and not self.disable_exllama and self.max_input_length is not None:
+                model = exllama_set_max_input_length(model, self.max_input_length)
+            return model
+
+        GPTQQuantizer.post_init_model = post_init_model
+    return orig_cuda_check, orig_post_init_model
+
+def run_gptq_torchfx(tmp_path, model_id, model_link, prompt_result_pair):
+    config = AutoConfig.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float32)
+    cuda, post_init = patch_gptq(config)
+    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, torch_dtype=torch.float32)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        trust_remote_code=True,
+        config=config,
+        device_map='cpu',
+        torch_dtype=torch.float32
+    )
+    
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=4,
+        do_sample=True,
+        temperature=0.01,
+        top_p=0.01,
+        top_k=1,
+        repetition_penalty=1.1,
+        num_beams=1,
+    )
+
+    prompt = prompt_result_pair["prompt"]
+    expected_md5 = prompt_result_pair["result_md5"]
+    
+    model.model.forward = torch.compile(model.model.forward, backend="openvino", dynamic=True, fullgraph=True, options={'aot_autograd': True})
+    
+    result_ov = pipe(prompt)
+    md5_ov = hashlib.new("md5", result_ov[0]['generated_text'].encode(), usedforsecurity=False).hexdigest()
+    
+    u4_ops = ["FullyConnected",]
+    num_u4_ops = 0
+    num_u4_ops_supported = 0
+    for pid in compiled_cache:
+        for op in compiled_cache[pid].get_runtime_model().get_ordered_ops():
+            if (str(op.get_rt_info()["layerType"].get()) in u4_ops):
+                u4_exec = (str(op.get_rt_info()["runtimePrecision"].get()) == "u4")
+                if u4_exec:
+                    num_u4_ops_supported += 1
+                num_u4_ops += 1
+    
+    assert(expected_md5 == md5_ov), "Output does not match with the expected output"
+    assert((num_u4_ops > 0) and (num_u4_ops == num_u4_ops_supported)), "Runtime precision is not u4"
+
+@pytest.mark.precommit
+@pytest.mark.parametrize("model_name, model_link, mark, reason", utils.get_models_list(os.path.join(os.path.dirname(__file__), "models", "gptq-torchfx-models-precommit")))
+@pytest.mark.parametrize('prompt_result_pair', ([
+    {"prompt" : "Tell me about AI", "result_md5" : "4385ccbce14627ae91f846b4c8a3f145"},
+]))
+def test_gptq_torchfx_precommit(tmp_path, model_name, model_link, mark, reason, prompt_result_pair, ie_device):
+    assert mark is None or mark == 'skip' or mark == 'xfail', \
+        "Incorrect test case: {}, {}".format(model_name, model_link)
+    if mark == 'skip':
+        pytest.skip(reason)
+    elif mark == 'xfail':
+        pytest.xfail(reason)
+    run_gptq_torchfx(tmp_path, model_name, model_link, prompt_result_pair)
+
diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch
index 0bda286eb83252..40e1f6f66f52e8 100644
--- a/tests/requirements_pytorch
+++ b/tests/requirements_pytorch
@@ -38,6 +38,9 @@ torchaudio==2.2.2; platform_system == "Darwin" and platform_machine == "x86_64"
 wheel==0.44.0
 PyYAML==6.0.2
 kornia==0.7.3
+super-image==0.1.7
+# huggingface-hub required for super-image
+huggingface-hub==0.25.2
 
 # use latest released version once it's available
 git+https://github.com/huggingface/optimum-intel.git@main; python_version < "3.12"