mosaicml · cli99 · Jan 17, 2024 · Nov 30, 2023 · Dec 1, 2023 · Dec 4, 2023
@@ -6,7 +6,7 @@
 import argparse
 import time
 
-from mcli import RunConfig, RunStatus, create_run, follow_run_logs, stop_run, wait_for_run_status
+from mcli import RunConfig, RunStatus, create_run, follow_run_logs, wait_for_run_status
 
 if __name__ == '__main__':
 
@@ -67,8 +67,6 @@
 
     export COMMON_ARGS="-v --durations=20 -m '{args.pytest_markers}' {s3_bucket_flag} {clear_tmp_path_flag}"
 
-    export PYTHONUNBUFFERED=1
-
     make test PYTEST='{args.pytest_command}' EXTRA_ARGS="$COMMON_ARGS --codeblocks"
 
     make test-dist PYTEST='{args.pytest_command}' EXTRA_ARGS="$COMMON_ARGS" WORLD_SIZE=2
@@ -79,13 +77,25 @@
     '''
     config = RunConfig(
         name=name,
-        cluster=args.cluster,
-        gpu_type=args.gpu_type,
-        gpu_num=args.gpu_num,
+        compute={
+            'cluster': args.cluster,
+            'gpu_type': args.gpu_type,
+            'gpus': args.gpu_num
+        },
         image=args.image,
         integrations=[git_integration],
         command=command,
         scheduling={'max_duration': args.timeout / 60 / 60},
+        env_variables=[
+            {
+                'key': 'MOSAICML_PLATFORM',
+                'value': 'False',
+            },
+            {
+                'key': 'PYTHONUNBUFFERED',
+                'value': '1',
+            },
+        ],
     )
 
     # Create run
@@ -102,7 +112,7 @@
         print(line, end='')
 
     print('[GHA] Run completed. Waiting for run to finish...')
-    run = wait_for_run_status(run, status='completed')
+    run = wait_for_run_status(run, status=RunStatus.COMPLETED)
 
-    # Fail if command exited with non-zero exit code or timed out
-    assert run.status == RunStatus.COMPLETED
+    # Fail if command exited with non-zero exit code or timed out (didn't reach COMPLETED)
+    assert run.status == RunStatus.COMPLETED, f'Run {run.name} did not complete: {run.status} ({run.reason})'
@@ -18,7 +18,7 @@ defaults:
 jobs:
   code-quality:
     runs-on: ubuntu-20.04
-    timeout-minutes: 10
+    timeout-minutes: 15
     strategy:
       matrix:
         python_version:

@@ -27,6 +27,11 @@ jobs:
             markers: 'not daily and not remote and not gpu and not vision and not doctest'
             pytest_command: 'coverage run -m pytest'
             composer_package_name: 'mosaicml'
+          # - name: 'cpu-3.10-2.2'
+          #   container: mosaicml/pytorch:2.2.0_cu121-nightly20231213-python3.10-ubuntu20.04
+          #   markers: 'not daily and not remote and not gpu and not vision and not doctest'
+          #   pytest_command: 'coverage run -m pytest'
+          #   composer_package_name: 'mosaicml'
           - name: 'cpu-vision'
             container: mosaicml/pytorch_vision:1.13.1_cpu-python3.10-ubuntu20.04
             markers: 'not daily and not remote and not gpu and vision and not doctest'

@@ -17,6 +17,11 @@ jobs:
             markers: 'not daily and not remote and gpu and (doctest or not doctest)'
             pytest_command: 'coverage run -m pytest'
             composer_package_name: 'mosaicml'
+          # - name: 'gpu-3.10-2.2'
+          #   container: mosaicml/pytorch:2.2.0_cu121-nightly20231213-python3.10-ubuntu20.04
+          #   markers: 'not daily and not remote and gpu and (doctest or not doctest)'
+          #   pytest_command: 'coverage run -m pytest'
+          #   composer_package_name: 'mosaicml'
     name: ${{ matrix.name }}
     if: github.repository_owner == 'mosaicml'
     with:

@@ -110,7 +110,7 @@ repos:
         types: [python]
         pass_filenames: false
         args: [--warnings]
-        additional_dependencies: ["pyright@1.1.256"]
+        additional_dependencies: ["pyright@1.1.310"]
   - repo: https://github.com/trufflesecurity/trufflehog.git
     rev: v3.40.0
     hooks:

@@ -17,11 +17,11 @@
 # as an owner for all sections, so anyone on Composer Eng can approve any Composer PR
 # According to the CODEOWNER docs, the last match takes precedence, so @mosaicml/composer-team-eng
 # must be mentioned for each rule below.
-/composer/algorithms/ @dskhudia @mvpatel2000 @nik-mosaic
+/composer/algorithms/ @mosaicml/composer-team-eng
 /composer/cli/ @mosaicml/composer-team-eng
 /composer/datasets/ @mosaicml/composer-team-eng
-/composer/functional/ @dblalock @mvpatel2000
-/composer/loggers/ @eracah @dakinggg
+/composer/functional/ @mosaicml/composer-team-eng @dblalock
+/composer/loggers/ @mosaicml/composer-team-eng @eracah @dakinggg
 /composer/loss/ @mosaicml/composer-team-eng
 /composer/metrics/ @mosaicml/composer-team-eng
 /composer/models/ @mosaicml/composer-team-eng

@@ -46,7 +46,6 @@ def apply(self, state: State, event: Event, logger: Logger):
 from composer.algorithms.cutout import CutOut
 from composer.algorithms.ema import EMA
 from composer.algorithms.factorize import Factorize
-from composer.algorithms.fused_layernorm import FusedLayerNorm
 from composer.algorithms.gated_linear_units import GatedLinearUnits
 from composer.algorithms.ghost_batchnorm import GhostBatchNorm
 from composer.algorithms.gradient_clipping import GradientClipping
@@ -79,7 +78,6 @@ def apply(self, state: State, event: Event, logger: Logger):
     'CutOut',
     'EMA',
     'Factorize',
-    'FusedLayerNorm',
     'GatedLinearUnits',
     'GhostBatchNorm',
     'GradientClipping',

@@ -6,7 +6,8 @@
 from composer.utils import MissingConditionalImportError
 
 try:
-    from composer.algorithms.alibi.attention_surgery_functions import _bert, _gpt2  # pyright: reportUnusedImport=none
+    from composer.algorithms.alibi.attention_surgery_functions import _bert  # pyright: ignore[reportUnusedImport]
+    from composer.algorithms.alibi.attention_surgery_functions import _gpt2  # pyright: ignore[reportUnusedImport]
     from composer.algorithms.alibi.attention_surgery_functions.utils import policy_registry
 except ImportError as e:
     raise MissingConditionalImportError(extra_deps_group='nlp', conda_package='transformers') from e

@@ -1,6 +1,7 @@
 # Copyright 2022 MosaicML Composer authors
 # SPDX-License-Identifier: Apache-2.0
 
+import copy
 import math
 from types import MethodType
 from typing import Optional, Tuple
@@ -20,13 +21,14 @@ def bert_embedding_converter(module: torch.nn.Module, module_index: int, max_seq
     """
     assert isinstance(module, (BertEmbeddings, RobertaEmbeddings))
     del module_index  # unused
-    zero_and_freeze_expand_position_embeddings(module,
+    new_module = copy.deepcopy(module)
+    zero_and_freeze_expand_position_embeddings(new_module,
                                                max_sequence_length,
                                                position_embedding_attribute='position_embeddings')
 
-    module_device = next(module.parameters()).device
-    module.register_buffer('position_ids', torch.arange(max_sequence_length).expand((1, -1)).to(module_device))
-    return module
+    module_device = next(new_module.parameters()).device
+    new_module.register_buffer('position_ids', torch.arange(max_sequence_length).expand((1, -1)).to(module_device))
+    return new_module
 
 
 @policy_registry.register(BertSelfAttention, RobertaSelfAttention)

@@ -29,10 +29,12 @@
 __all__ = ['ColOut', 'ColOutTransform', 'colout_batch']
 
 
-def colout_batch(sample: Union[ImgT, Tuple[ImgT, ImgT]],
-                 p_row: float = 0.15,
-                 p_col: float = 0.15,
-                 resize_target: Union[bool, str] = 'auto') -> Union[ImgT, Tuple[ImgT, ImgT]]:
+def colout_batch(
+        sample: Union[ImgT, Tuple[ImgT, ImgT]],
+        p_row: float = 0.15,
+        p_col: float = 0.15,
+        resize_target: Union[bool,
+                             str] = 'auto') -> Union[torch.Tensor, ImgT, Tuple[Tensor, Tensor], Tuple[ImgT, ImgT]]:
     """Applies ColOut augmentation to a batch of images and (optionally) targets,
     dropping the same random rows and columns from all images and targets in a batch.
 
@@ -136,7 +138,10 @@ def __init__(self, p_row: float = 0.15, p_col: float = 0.15, resize_target: Unio
         self.p_col = p_col
         self.resize_target = resize_target
 
-    def __call__(self, sample: Union[ImgT, Tuple[ImgT, ImgT]]) -> Union[ImgT, Tuple[ImgT, ImgT]]:
+    def __call__(
+            self, sample: Union[ImgT,
+                                Tuple[ImgT,
+                                      ImgT]]) -> Union[torch.Tensor, ImgT, Tuple[Tensor, Tensor], Tuple[ImgT, ImgT]]:
         """Drops random rows and columns from up to two images.
 
         Args:

@@ -327,8 +327,8 @@ def solution_for_rank(self, input: torch.Tensor, rank: int) -> LowRankSolution:
 
     def apply_solution(self, solution: LowRankSolution):
         self.latent_size = solution.rank
-        self.module0.out_channels = solution.rank
-        self.module1.in_channels = solution.rank
+        self.module0.out_channels = solution.rank  # pyright: ignore[reportGeneralTypeIssues]
+        self.module1.in_channels = solution.rank  # pyright: ignore[reportGeneralTypeIssues]
         _apply_solution_to_module_parameters(solution, self.module0, self.module1, transpose=False)
 
     @staticmethod
@@ -452,8 +452,8 @@ def solution_for_rank(self, input: torch.Tensor, rank: int) -> LowRankSolution:
 
     def apply_solution(self, solution: LowRankSolution) -> None:
         self.latent_size = solution.rank
-        self.module0.out_features = solution.rank
-        self.module1.in_features = solution.rank
+        self.module0.out_features = solution.rank  # pyright: ignore[reportGeneralTypeIssues]
+        self.module1.in_features = solution.rank  # pyright: ignore[reportGeneralTypeIssues]
         _apply_solution_to_module_parameters(solution, self.module0, self.module1, transpose=True)
 
     @staticmethod
@@ -471,9 +471,10 @@ def max_allowed_latent_channels(in_features: int, out_features: int) -> int:
 
     @staticmethod
     def from_linear(module: torch.nn.Linear, module_ix: int = -1, **kwargs) -> FactorizedLinear:
-        ret = FactorizedLinear(in_features=module.in_features,
-                               out_features=module.out_features,
-                               bias=((module.bias is not None) and (module.bias is not False)),
-                               **kwargs)
+        ret = FactorizedLinear(
+            in_features=module.in_features,
+            out_features=module.out_features,
+            bias=(module.bias is not None and module.bias is not False),  # pyright: ignore[reportUnnecessaryComparison]
+            **kwargs)
         ret.reset_parameters()
         return ret