Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion colossalai/_analyzer/fx/tracer/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def _tracer_override(self):
# override the tracer to support custom modules and checkpointing
if self.trace_act_ckpt:
orig_ckpt_func_apply = torch.utils.checkpoint.CheckpointFunction.apply
orig_ckpt_func_without_reentrant = torch.utils.checkpoint._checkpoint_without_reentrant
orig_ckpt_func_without_reentrant = torch.utils.checkpoint._checkpoint_without_reentrant_generator

def checkpoint(run_function, preserve_rng_state=False, *args):
self.ckpt_regions.append(self.ckpt_idx)
Expand Down
8 changes: 8 additions & 0 deletions colossalai/nn/layer/scaled_softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@

from colossalai.kernel.kernel_loader import ScaledMaskedSoftmaxLoader, ScaledUpperTriangleMaskedSoftmaxLoader

# NOTE: These kernels are compiled on specific GPU arch and not widely applicable.
# try:
# from colossalai._C import scaled_masked_softmax as scaled_masked_softmax, scaled_upper_triangle_masked_softmax_cuda as scaled_upper_triang_masked_softmax
# except ImportError:

scaled_masked_softmax = None
scaled_upper_triang_masked_softmax = None


class AttnMaskType(enum.Enum):
padding = 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def synthesize_data():


def main():
colossalai.launch_from_torch(config="./config.py")
colossalai.legacy.launch_from_torch(config="./config.py")

logger = get_dist_logger()

Expand Down
4 changes: 2 additions & 2 deletions examples/tutorial/hybrid_parallel/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ def __len__(self):

def main():
# launch from torch
parser = colossalai.get_default_parser()
parser = colossalai.legacy.get_default_parser()
args = parser.parse_args()
colossalai.launch_from_torch(config=args.config)
colossalai.legacy.launch_from_torch(config=args.config)

# get logger
logger = get_dist_logger()
Expand Down
6 changes: 3 additions & 3 deletions examples/tutorial/large_batch_optimizer/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@ def __len__(self):

def main():
# initialize distributed setting
parser = colossalai.get_default_parser()
parser = colossalai.legacy.get_default_parser()
parser.add_argument(
"--optimizer", choices=["lars", "lamb"], help="Choose your large-batch optimizer", required=True
)
args = parser.parse_args()

# launch from torch
colossalai.launch_from_torch(config=args.config)
colossalai.legacy.launch_from_torch(config=args.config)

# get logger
logger = get_dist_logger()
Expand Down Expand Up @@ -73,7 +73,7 @@ def main():
)

# initialize
engine, train_dataloader, test_dataloader, _ = colossalai.initialize(
engine, train_dataloader, test_dataloader, _ = colossalai.legacy.initialize(
model=model,
optimizer=optimizer,
criterion=criterion,
Expand Down
4 changes: 2 additions & 2 deletions examples/tutorial/opt/opt/run_clm.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def get_time_stamp():


def parse_args():
parser = colossalai.get_default_parser()
parser = colossalai.legacy.get_default_parser()
parser.add_argument("-s", "--synthetic", action="store_true")
parser.add_argument(
"--dataset_name",
Expand Down Expand Up @@ -289,7 +289,7 @@ def __len__(self):
def main():
args = parse_args()
disable_existing_loggers()
colossalai.launch_from_torch(config=dict())
colossalai.legacy.launch_from_torch(config=dict())
logger = get_dist_logger()
is_main_process = dist.get_rank() == 0

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import torch
import torch.nn as nn

from colossalai.kernel.cuda_native import LayerNorm
from colossalai.kernel.jit import bias_dropout_add_fused_inference, bias_dropout_add_fused_train
from colossalai.legacy.nn.layer.parallel_sequence import TransformerSelfAttentionRing
from colossalai.nn.layer.layernorm import MixedFusedLayerNorm as LayerNorm

from .dropout import get_bias_dropout_add
from .mlp import TransformerMLP
Expand Down
4 changes: 2 additions & 2 deletions examples/tutorial/sequence_parallel/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def pipeline_data_process_func(stage_output, micro_batch_data):
def main():
# initialize
parse_args()
colossalai.launch_from_torch(config="./config.py", seed=1234, backend="nccl")
colossalai.legacy.launch_from_torch(config="./config.py", seed=1234, backend="nccl")

logger = get_dist_logger()

Expand Down Expand Up @@ -136,7 +136,7 @@ def main():
logger.info(f"LR Scheduler is built with {warmup_steps} warmup steps and {gpc.config.DECAY_ITERS} decay steps")

# # init
engine, *dummy = colossalai.initialize(model, optimizer, criterion, verbose=True)
engine, *dummy = colossalai.legacy.initialize(model, optimizer, criterion, verbose=True)

# build timer
timer = MultiTimer()
Expand Down