intel
diff --git a/‎LICENSE.txt
Lines changed: 1 addition & 1 deletion b/‎LICENSE.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 258 additions & 164 deletions b/‎README.md
Lines changed: 258 additions & 164 deletions
diff --git a/‎cmake/Modules/FindTorchCCL.cmake
Lines changed: 3 additions & 0 deletions b/‎cmake/Modules/FindTorchCCL.cmake
Lines changed: 3 additions & 0 deletions
diff --git a/‎docker/Dockerfile
Lines changed: 20 additions & 18 deletions b/‎docker/Dockerfile
Lines changed: 20 additions & 18 deletions
diff --git a/‎docker/README.md
Lines changed: 1 addition & 0 deletions b/‎docker/README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎ideep/ideep/abstract_types.hpp
Lines changed: 1 addition & 0 deletions b/‎ideep/ideep/abstract_types.hpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎ideep/ideep/attributes.hpp
Lines changed: 29 additions & 0 deletions b/‎ideep/ideep/attributes.hpp
Lines changed: 29 additions & 0 deletions
diff --git a/‎scripts/cpu/common/__init__.py b/‎scripts/cpu/common/__init__.py
diff --git a/‎scripts/cpu/common/aten_sig_parser.py
Lines changed: 0 additions & 187 deletions b/‎scripts/cpu/common/aten_sig_parser.py
Lines changed: 0 additions & 187 deletions
diff --git a/‎scripts/cpu/common/codegen.py
Lines changed: 0 additions & 15 deletions b/‎scripts/cpu/common/codegen.py
Lines changed: 0 additions & 15 deletions
@@ -187,7 +187,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2020-2021 Intel Corporation
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
 
@@ -17,7 +17,10 @@ SET(TORCHCCL_INCLUDE_DIR)
 
 SET(TORCHCCL_ROOT "${PROJECT_SOURCE_DIR}/third_party/torch_ccl")
 
+SET(CMAKE_INSTALL_PREFIX_SAVED "${CMAKE_INSTALL_PREFIX}")
+SET(CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX_SAVED}/../torch_ccl")
 ADD_SUBDIRECTORY(${TORCHCCL_ROOT})
+SET(CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX_SAVED}")
 IF(NOT TARGET torch_ccl)
     MESSAGE(FATAL_ERROR "Failed to include torch_ccl target")
 ENDIF()
 
@@ -1,12 +1,12 @@
 # syntax = docker/dockerfile:experimental
 # based onhttps://github.com/pytorch/pytorch/blob/master/Dockerfile
-# 
+#
 # NOTE: To build this you will need a docker version > 18.06 with
 #       experimental enabled and DOCKER_BUILDKIT=1
 #
 #       If you do not use buildkit you are not going to have a good time
 #
-#       For reference: 
+#       For reference:
 #           https://docs.docker.com/develop/develop-images/build_enhancements/
 
 ARG BASE_IMAGE=ubuntu:20.04
@@ -20,11 +20,13 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
     vim \
     build-essential \
     ccache \
-    libjemalloc-dev \
+    libgoogle-perftools-dev \
     numactl \
     cmake \
     libjpeg-dev \
+    pybind11-dev \
     libpng-dev \
+    pybind11-dev \
     && rm -rf /var/lib/apt/lists/*
 RUN /usr/sbin/update-ccache-symlinks
 RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
@@ -40,30 +42,30 @@ RUN curl -fsSL -v -o ~/miniconda.sh -O  https://repo.anaconda.com/miniconda/Mini
     /opt/conda/bin/conda clean -ya
 
 FROM dev-base AS build
+ARG IPEX_VERSION=v1.9.0
+ARG PYTORCH_VERSION=v1.9.0
+ARG TORCHVISION_VERSION=0.10.0+cpu
+ARG TORCHAUDIO_VERSION=0.9.0
 COPY --from=conda /opt/conda /opt/conda
-ARG TORCHVISION_VERSION=0.6
 RUN --mount=type=cache,target=/opt/ccache \
-    pip install torchvision==${TORCHVISION_VERSION}+cpu --no-deps \
-    -f https://download.pytorch.org/whl/torch_stable.html && \
-    pip install lark-parser hypothesis && \
+    pip install torch==${PYTORCH_VERSION}+cpu torchvision==${TORCHVISION_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html && \
     git clone https://github.com/intel/intel-extension-for-pytorch && \
-    cd intel-extension-for-pytorch && git submodule sync && \
-    git submodule update --init --recursive && \
-    git clone https://github.com/pytorch/pytorch && \
-    cd pytorch && git checkout v1.7.0 && git submodule sync && \
+    cd intel-extension-for-pytorch && \
+    git checkout ${IPEX_VERSION} && \
+    git submodule sync && \
     git submodule update --init --recursive && \
-    git apply ../torch_patches/xpu-1.7.patch && \
-    USE_MKLDNN=1 USE_CUDA=0 USE_NNPACK=0 USE_CUDNN=0 \
-    CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" pip install -v . && \
-    cd .. && pip install -v . && rm -rf *
+    pip3 install -r requirements.txt && \
+    python setup.py bdist_wheel && \
+    pip3 install dist/*.whl && \
+    cd .. && rm -rf intel-extension-for-pytorch
 
 FROM dev-base as dev
 COPY --from=build /opt/conda /opt/conda
 ARG OMP_NUM_THREADS=1
 ENV OMP_NUM_THREADS ${OMP_NUM_THREADS}
 ARG KMP_BLOCKTIME=1
-ENV KMP_BLOCKTIME ${KMP_BLOCKTIME} 
+ENV KMP_BLOCKTIME ${KMP_BLOCKTIME}
 ARG KMP_HW_SUBSET=1T
 ENV KMP_HW_SUBSET ${KMP_HW_SUBSET}
-ENV MALLOC_CONF "oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1"
-ENV LD_PRELOAD "/opt/conda/lib/libiomp5.so /usr/lib/x86_64-linux-gnu/libjemalloc.so"
+ENV LD_PRELOAD "/opt/conda/lib/libiomp5.so /usr/lib/x86_64-linux-gnu/libtcmalloc.so"
+ENV LD_LIBRARY_PATH "/opt/conda/lib/python3.8/site-packages/lib/"
@@ -11,4 +11,5 @@
   ```console
   $ cd $DOCKERFILE_DIR
   $ DOCKER_BUILDKIT=1 docker build -t intel-extension-for-pytorch:test .
+  $ docker run intel-extension-for-pytorch:test python -c "import torch;import intel_pytorch_extension as ipex;print('torch:', torch.__version__,' ipex:',ipex.__version__)"
   ```
@@ -52,6 +52,7 @@ using key_t = std::string;
 #endif
 
 const scale_t IDEEP_DEF_SCALE {1.0f};
+const std::vector<int32_t> DIL_DEF_ZERO_POINT{0};
 
 enum lowp_kind {
   u8s8 = 0,
 
@@ -14,6 +14,35 @@ struct attr_t : public dnnl::primitive_attr {
 
   attr_t(int mask, const scale_t& scales) { set_output_scales(mask, scales); }
 
+  /* TODO: for rnn input quantization with scale + shift from f32 to u8
+   Failed to use it in IPEX since:
+   x_aten is in ntc and is an aten tensor
+   x_dil = x_aten.transpose(0,1)
+   x_dil will become a dil tensor
+   x_dil_storage = try_gen_dil_storage(x_dil)
+   x_dil_storage will have the stride that corresponds to an ntc format
+   When we use set_rnn_data_qparams on x_dil_storage, cannot pass the format
+   check
+  */
+  attr_t(float scale, float shift) {
+    set_rnn_data_qparams(scale, shift);
+  }
+
+  attr_t(
+      const scale_t& scales,
+      const std::vector<int32_t>& shift,
+      bool rnn_data_quantize) {
+    set_output_scales(0, scales);
+    if (rnn_data_quantize) {
+      // Workaround: for rnn input quantization with scale + shift from f32 to
+      // u8
+      set_zero_points(DNNL_ARG_DST, 0, shift);
+    } else {
+      // for rnn input dequantization with scale + shift from u8 to f32
+      set_zero_points(DNNL_ARG_SRC, 0, shift);
+    }
+  }
+
   std::pair<scale_t, int> get_output_scales() const {
     dnnl_dim_t count;
     int c_mask;