Skip to content

Commit

Permalink
local
Browse files Browse the repository at this point in the history
  • Loading branch information
Dashiell Stander authored and Dashiell Stander committed Sep 19, 2022
1 parent cdc7c47 commit 20bc05c
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
7 changes: 3 additions & 4 deletions configs/13B_deduped.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@
"output_layer_parallelism": "column",

# these should provide some speedup but takes a while to build, set to true if desired
"scaled-upper-triang-masked-softmax-fusion": true,
"bias-gelu-fusion": true,

"scaled-upper-triang-masked-softmax-fusion": false,
"bias-gelu-fusion": false,
# optimizer settings
"optimizer": {
"type": "Adam",
Expand Down Expand Up @@ -105,5 +104,5 @@
"wandb_project": "pythia",
"wandb_group": "13B dedupe",
"launcher": "srun",
"deepspeed_mpi": true
"deepspeed_mpi": false
}
9 changes: 6 additions & 3 deletions debug_srun.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#SBATCH --job-name="dash-neox"
#SBATCH --partition=compute-od-gpu
#SBATCH --partition=gpu
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1 # Crucial - only 1 task per dist per node!
#SBATCH --cpus-per-task=32 # Number of cores per tasks
Expand All @@ -10,6 +10,7 @@
#SBATCH --output=%x_%j.out # Set this dir where you want slurm outs to go
#SBATCH --error=%x_%j.out # Set this dir where you want slurm outs to go
#SBATCH --exclusive
#SBATCH --comment neox

module load intelmpi

Expand Down Expand Up @@ -53,10 +54,12 @@ TRAIN_PATH=/fsx/dashiell/gpt-neox
export TORCHELASTIC_ERROR_FILE=$TRAIN_PATH/tmp/torch-elastic-error.json

# Env setup
source /fsx/dashiell/miniconda3/conda/bin/activate neox
source /fsx/dashiell/miniconda3/bin/activate neox
cd $TRAIN_PATH

srun python $TRAIN_PATH/deepy.py $TRAIN_PATH/train.py \
which python
which nvcc
srun --comment neox python $TRAIN_PATH/deepy.py $TRAIN_PATH/train.py \
--conf_dir configs 13B_deduped.yml

set +x
2 changes: 0 additions & 2 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
git+https://github.com/EleutherAI/DeeperSpeed.git@ef9002f62c85efd727aa465d41987deb8e2e54dd#egg=deepspeed
einops==0.3.0
ftfy==6.0.1
git+https://github.com/EleutherAI/lm_dataformat.git@4eec05349977071bf67fc072290b95e31c8dd836
lm_eval==0.2.0
mpi4py==3.0.3
numpy==1.22.0
pybind11==2.6.2
regex
Expand Down

0 comments on commit 20bc05c

Please sign in to comment.