Merge branch 'main' into 750-save-csv-v2

helmholtz-analytics · Mar 28, 2022 · 4e7a23c · 4e7a23c
2 parents 6fe203c + 2c9ec1a
commit 4e7a23c
Show file tree

Hide file tree

Showing 26 changed files with 264 additions and 68 deletions.
diff --git a/.github/ISSUE_TEMPLATE/support_latest_pytorch.md b/.github/ISSUE_TEMPLATE/support_latest_pytorch.md
@@ -0,0 +1,14 @@
+---
+name: Support new PyTorch release
+title: 'Support new PyTorch release'
+labels: 'enhancement'
+assignees: 'ClaudiaComito'
+---
+
+A new PyTorch release is out. Action needed:
+
+- run CPU tests
+- run GPU tests
+- fix bugs
+- expand tests
+- release as minor version
diff --git a/.github/workflows/codesee-arch-diagram.yml b/.github/workflows/codesee-arch-diagram.yml
@@ -0,0 +1,81 @@
+on:
+  push:
+    branches:
+      - main
+  pull_request_target:
+    types: [opened, synchronize, reopened]
+
+name: CodeSee Map
+
+jobs:
+  test_map_action:
+    runs-on: ubuntu-latest
+    continue-on-error: true
+    name: Run CodeSee Map Analysis
+    steps:
+      - name: checkout
+        id: checkout
+        uses: actions/checkout@v2
+        with:
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          ref: ${{ github.event.pull_request.head.ref }}
+          fetch-depth: 0
+
+      # codesee-detect-languages has an output with id languages.
+      - name: Detect Languages
+        id: detect-languages
+        uses: Codesee-io/codesee-detect-languages-action@latest
+
+      - name: Configure JDK 16
+        uses: actions/setup-java@v2
+        if: ${{ fromJSON(steps.detect-languages.outputs.languages).java }}
+        with:
+          java-version: '16'
+          distribution: 'zulu'
+
+      # CodeSee Maps Go support uses a static binary so there's no setup step required.
+
+      - name: Configure Node.js 14
+        uses: actions/setup-node@v2
+        if: ${{ fromJSON(steps.detect-languages.outputs.languages).javascript }}
+        with:
+          node-version: '14'
+
+      - name: Configure Python 3.x
+        uses: actions/setup-python@v2
+        if: ${{ fromJSON(steps.detect-languages.outputs.languages).python }}
+        with:
+          python-version: '3.10'
+          architecture: 'x64'
+
+      - name: Configure Ruby '3.x'
+        uses: ruby/setup-ruby@v1
+        if: ${{ fromJSON(steps.detect-languages.outputs.languages).ruby }}
+        with:
+          ruby-version: '3.0'
+
+      # CodeSee Maps Rust support uses a static binary so there's no setup step required.
+
+      - name: Generate Map
+        id: generate-map
+        uses: Codesee-io/codesee-map-action@latest
+        with:
+          step: map
+          github_ref: ${{ github.ref }}
+          languages: ${{ steps.detect-languages.outputs.languages }}
+
+      - name: Upload Map
+        id: upload-map
+        uses: Codesee-io/codesee-map-action@latest
+        with:
+          step: mapUpload
+          api_token: ${{ secrets.CODESEE_ARCH_DIAG_API_TOKEN }}
+          github_ref: ${{ github.ref }}
+
+      - name: Insights
+        id: insights
+        uses: Codesee-io/codesee-map-action@latest
+        with:
+          step: insights
+          api_token: ${{ secrets.CODESEE_ARCH_DIAG_API_TOKEN }}
+          github_ref: ${{ github.ref }}
diff --git a/.github/workflows/latest-pytorch-support.yml b/.github/workflows/latest-pytorch-support.yml
@@ -0,0 +1,56 @@
+name: Support latest PyTorch
+
+on:
+  push:
+    paths:
+      - '.github/workflows/pytorch-release-versions/*'
+env:
+  previous_pytorch: $(grep 'torch>=' setup.py | awk -F '<=' '{print $2}' | tr -d '",')
+  new_pytorch: $(<.github/workflows/pytorch-release-versions/pytorch-latest.txt)
+permissions:
+  contents: write
+  issues: write
+  pull-requests: write
+jobs:
+  latest-torch-support:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: JasonEtco/create-an-issue@v2
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        id:
+          create-issue
+        with:
+          filename: .github/ISSUE_TEMPLATE/support_latest_pytorch.md
+          milestone: 1
+          update_existing: true
+          search_existing: open
+      - name: Check out new branch
+        uses: actions/checkout@v2
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          ref: 'main'
+      - name: Update setup.py
+        run: |
+          echo ${{ env.previous_pytorch }}
+          echo ${{ env.new_pytorch }}
+          sed -i '/torch>=/ s/'"${{ env.previous_pytorch }}"'/'"${{ env.new_pytorch }}"'/g' setup.py
+          sed -i 's/'"${{ env.previous_pytorch }}"'/'"${{ env.new_pytorch }}"'/g' .github/workflows/pytorch-release-versions/pytorch-latest.txt
+      - name: Define env variable
+        run: |
+          echo "new=$(<.github/workflows/pytorch-release-versions/pytorch-latest.txt)" >> $GITHUB_ENV
+      - name: Create PR from branch
+        uses: peter-evans/create-pull-request@v3
+        with:
+            base: main
+            delete-branch: true
+            token: ${{ secrets.GITHUB_TOKEN }}
+            commit-message: Support latest PyTorch release
+            title: Support PyTorch ${{ env.new }}
+            body: |
+              Run tests on latest PyTorch release
+              Issue/s resolved: #${{ steps.create-issue.outputs.number }}
+              Auto-generated by [create-pull-request][1]
+              [1]: https://github.com/peter-evans/create-pull-request
+            reviewers: ClaudiaComito, mtar, coquelin77
diff --git a/.github/workflows/pytorch-latest-release.yml b/.github/workflows/pytorch-latest-release.yml
@@ -0,0 +1,27 @@
+name: Get latest PyTorch release version
+on:
+  schedule:
+    - cron:  '0 5 * * 1-5'
+jobs:
+  get-version:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          ref: 'main'
+      - name: Fetch PyTorch release version
+        run: |
+          curl -sL https://api.github.com/repos/pytorch/pytorch/releases/latest | \
+          jq -r ".tag_name" | tr -d 'v' > .github/workflows/pytorch-release-versions/pytorch-latest.txt
+      - name: Check for modified files
+        id: git-check
+        run: echo ::set-output name=modified::$([ -z "`git status --porcelain`" ] && echo "false" || echo "true")
+      - name: Commit latest PyTorch release version
+        if: steps.git-check.outputs.modified == 'true'
+        run: |
+          echo "new=$(<.github/workflows/pytorch-release-versions/pytorch-latest.txt)" >> $GITHUB_ENV
+          git config --global user.name 'ClaudiaComito'
+          git config --global user.email 'c.comito@fz-juelich.de@users.noreply.github.com'
+          git commit -am "New PyTorch release ${{ env.new }}"
+          git push
diff --git a/.github/workflows/pytorch-release-versions/pytorch-latest.txt b/.github/workflows/pytorch-release-versions/pytorch-latest.txt
@@ -0,0 +1 @@
+1.11.0
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,11 +10,11 @@ repos:
     -   id: check-added-large-files
     -   id: flake8
 -   repo: https://github.com/psf/black
-    rev: 19.3b0
+    rev: 22.1.0
     hooks:
     -   id: black
 -   repo: https://github.com/pycqa/pydocstyle
-    rev: 5.0.1  # pick a git hash / tag to point to
+    rev: 6.1.1  # pick a git hash / tag to point to
     hooks:
     -   id: pydocstyle
         exclude: 'tests|benchmarks|examples|scripts|setup.py'  #|heat/utils/data/mnist.py|heat/utils/data/_utils.py  ?
diff --git a/examples/lasso/demo.py b/examples/lasso/demo.py
@@ -24,7 +24,7 @@
 y = ht.load_hdf5(diabetes_path, dataset="y", split=0)
 
 # normalize dataset #DoTO this goes into the lasso fit routine soon as issue #106 is solved
-X = X / ht.sqrt((ht.mean(X ** 2, axis=0)))
+X = X / ht.sqrt((ht.mean(X**2, axis=0)))
 
 # HeAT lasso instance
 estimator = lasso.Lasso(max_iter=100)

diff --git a/examples/nn/imagenet-DASO.py b/examples/nn/imagenet-DASO.py
@@ -400,9 +400,7 @@ def main():
     # model = tDDP(model) -> done in the ht model initialization
     # Scale learning rate based on global batch size
     # todo: change the learning rate adjustments to be reduce on plateau
-    args.lr = (
-        0.0125
-    )  # (1. / args.world_size * (5 * (args.world_size - 1) / 6.)) * 0.0125 * args.world_size
+    args.lr = 0.0125  # (1. / args.world_size * (5 * (args.world_size - 1) / 6.)) * 0.0125 * args.world_size
     # args.lr = (1. / args.world_size * (5 * (args.world_size - 1) / 6.)) * 0.0125 * args.world_size
     optimizer = torch.optim.SGD(
         model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay

diff --git a/heat/classification/tests/test_knn.py b/heat/classification/tests/test_knn.py
@@ -69,15 +69,19 @@ def test_exception(self):
             knn = KNeighborsClassifier(n_neighbors=1)
             knn.fit(c, a)
 
-    def test_utility(self,):
+    def test_utility(
+        self,
+    ):
         a = ht.array([1, 2, 3, 4])
         b = ht.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]])
 
         one_hot = KNeighborsClassifier.one_hot_encoding(a)
         self.assertTrue((one_hot == b).all())
 
     @unittest.skipUnless(ht.supports_hdf5(), "Requires HDF5")
-    def test_fit_one_hot(self,):
+    def test_fit_one_hot(
+        self,
+    ):
         x = ht.load_hdf5("heat/datasets/iris.h5", dataset="data")
 
         # keys as label array

diff --git a/heat/core/communication.py b/heat/core/communication.py
@@ -752,7 +752,7 @@ def __reduce_like(
         sendbuf: Union[DNDarray, torch.Tensor, Any],
         recvbuf: Union[DNDarray, torch.Tensor, Any],
         *args,
-        **kwargs
+        **kwargs,
     ) -> Tuple[Optional[DNDarray, torch.Tensor]]:
         """
         Generic function for reduction operations.
@@ -1005,7 +1005,7 @@ def __allgather_like(
         sendbuf: Union[DNDarray, torch.Tensor, Any],
         recvbuf: Union[DNDarray, torch.Tensor, Any],
         axis: int,
-        **kwargs
+        **kwargs,
     ):
         """
         Generic function for allgather operations.
@@ -1203,7 +1203,7 @@ def __alltoall_like(
         recvbuf: Union[DNDarray, torch.Tensor, Any],
         send_axis: int,
         recv_axis: int,
-        **kwargs
+        **kwargs,
     ):
         """
         Generic function for alltoall operations.
@@ -1482,7 +1482,7 @@ def __scatter_like(
         recv_axis: int,
         send_factor: int = 1,
         recv_factor: int = 1,
-        **kwargs
+        **kwargs,
     ):
         """
         Generic function for scatter and gather operations.

diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py
@@ -687,10 +687,10 @@ def __getitem__(self, key: Union[int, Tuple[int, ...], List[int, ...]]) -> DNDar
         l_dtype = self.dtype.torch_type()
         advanced_ind = False
         if isinstance(key, DNDarray) and key.ndim == self.ndim:
-            """ if the key is a DNDarray and it has as many dimensions as self, then each of the
-                entries in the 0th dim refer to a single element. To handle this, the key is split
-                into the torch tensors for each dimension. This signals that advanced indexing is
-                to be used. """
+            """if the key is a DNDarray and it has as many dimensions as self, then each of the
+            entries in the 0th dim refer to a single element. To handle this, the key is split
+            into the torch tensors for each dimension. This signals that advanced indexing is
+            to be used."""
             # NOTE: this gathers the entire key on every process!!
             # TODO: remove this resplit!!
             key = manipulations.resplit(key)
@@ -706,9 +706,9 @@ def __getitem__(self, key: Union[int, Tuple[int, ...], List[int, ...]]) -> DNDar
                 key = [key]
             advanced_ind = True
         elif not isinstance(key, tuple):
-            """ this loop handles all other cases. DNDarrays which make it to here refer to
-                advanced indexing slices, as do the torch tensors. Both DNDaarrys and torch.Tensors
-                are cast into lists here by PyTorch. lists mean advanced indexing will be used"""
+            """this loop handles all other cases. DNDarrays which make it to here refer to
+            advanced indexing slices, as do the torch tensors. Both DNDaarrys and torch.Tensors
+            are cast into lists here by PyTorch. lists mean advanced indexing will be used"""
             h = [slice(None, None, None)] * max(self.ndim, 1)
             if isinstance(key, DNDarray):
                 key = manipulations.resplit(key)
@@ -1413,10 +1413,10 @@ def __setitem__(
         # motived to do this they are welcome to, but i have no time right now
         # print(key)
         if isinstance(key, DNDarray) and key.ndim == self.ndim:
-            """ if the key is a DNDarray and it has as many dimensions as self, then each of the
-                entries in the 0th dim refer to a single element. To handle this, the key is split
-                into the torch tensors for each dimension. This signals that advanced indexing is
-                to be used. """
+            """if the key is a DNDarray and it has as many dimensions as self, then each of the
+            entries in the 0th dim refer to a single element. To handle this, the key is split
+            into the torch tensors for each dimension. This signals that advanced indexing is
+            to be used."""
             key = manipulations.resplit(key)
             if key.larray.dtype in [torch.bool, torch.uint8]:
                 key = indexing.nonzero(key)
@@ -1429,9 +1429,9 @@ def __setitem__(
             else:
                 key = [key]
         elif not isinstance(key, tuple):
-            """ this loop handles all other cases. DNDarrays which make it to here refer to
-                advanced indexing slices, as do the torch tensors. Both DNDaarrys and torch.Tensors
-                are cast into lists here by PyTorch. lists mean advanced indexing will be used"""
+            """this loop handles all other cases. DNDarrays which make it to here refer to
+            advanced indexing slices, as do the torch tensors. Both DNDaarrys and torch.Tensors
+            are cast into lists here by PyTorch. lists mean advanced indexing will be used"""
             h = [slice(None, None, None)] * self.ndim
             if isinstance(key, DNDarray):
                 key = manipulations.resplit(key)

diff --git a/heat/core/io.py b/heat/core/io.py
@@ -38,7 +38,6 @@ def supports_hdf5() -> bool:
         """
         return False
 
-
 else:
     # add functions to exports
     __all__.extend(["load_hdf5", "save_hdf5"])
@@ -242,7 +241,6 @@ def supports_netcdf() -> bool:
         """
         return False
 
-
 else:
     # add functions to visible exports
     __all__.extend(["load_netcdf", "save_netcdf"])
@@ -358,7 +356,7 @@ def save_netcdf(
         dimension_names: Union[list, tuple, str] = None,
         is_unlimited: bool = False,
         file_slices: Union[Iterable[int], slice, bool] = slice(None),
-        **kwargs: Dict[str, object]
+        **kwargs: Dict[str, object],
     ):
         """
         Saves data to a netCDF4 file. Attempts to utilize parallel I/O if possible.

diff --git a/heat/core/manipulations.py b/heat/core/manipulations.py
@@ -3872,6 +3872,16 @@ def topk(
      DNDarray([[2, 1],
                [2, 1]], dtype=ht.int64, device=cpu:0, split=1))
     """
+    if out is not None:
+        if out[0].dtype != a.dtype:
+            raise RuntimeError(
+                "dtypes of 'out[0]' and 'a' do not match, found {} != {}".format(
+                    out[0].dtype, a.dtype
+                )
+            )
+        if out[1].dtype != types.int64:
+            raise RuntimeError("dtype of 'out[1]' is not ht.int64, found {}".format(out[1].dtype))
+
     dim = stride_tricks.sanitize_axis(a.gshape, dim)
 
     neutral_value = sanitation.sanitize_infinity(a)