Skip to content

Commit 6985e58

Browse files
committed
more updates on examples
1 parent c1e0689 commit 6985e58

9 files changed

+20
-20
lines changed

examples/sc21/run_figure_11.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ MBS=1
2525
HS=20480
2626
NAH=128
2727
DDP=local
28-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
28+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
2929

3030

3131
# Name of the job.

examples/sc21/run_figure_12.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ GBS=12
1616

1717
# Set interleaved schedule options.
1818
if [ ${INTERLEAVED} == "YES" ]; then
19-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 2 "
19+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 "
2020
elif [ ${INTERLEAVED} == "NO" ]; then
21-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
21+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
2222
else
2323
echo "Invalid configuration"
2424
exit 1

examples/sc21/run_figure_13.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ NLS=32
2424
HS=20480
2525
NAH=128
2626
DDP=local
27-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
27+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
2828
NNODES=8
2929

3030

examples/sc21/run_figure_14.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ NLS=32
2525
HS=3840
2626
NAH=32
2727
DDP=local
28-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
28+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
2929
NNODES=8
3030

3131

examples/sc21/run_figure_15.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ NLS=32
2525
HS=3840
2626
NAH=32
2727
DDP=local
28-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
28+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
2929
NNODES=8
3030

3131

examples/sc21/run_figure_16.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ NLS=32
2121
HS=15360
2222
NAH=128
2323
DDP=local
24-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
24+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
2525
NNODES=8
2626

2727

examples/sc21/run_figure_17.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ GBS=1
1616

1717
# Set activation recomputation.
1818
if [ ${ACTIVATION_RECOMPUTATION} == "YES" ]; then
19-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
19+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
2020
elif [ ${ACTIVATION_RECOMPUTATION} == "NO" ]; then
2121
MEGATRON_EXTRA_PARAMS=""
2222
else

examples/sc21/run_figure_18.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ GBS=12
1616

1717
# Set scatter-gather communication optimization options.
1818
if [ ${SCATTER_GATHER} == "YES" ]; then
19-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 2 "
19+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 "
2020
elif [ ${SCATTER_GATHER} == "NO" ]; then
21-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 2 --no-scatter-gather-tensors-in-pipeline "
21+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 --no-scatter-gather-tensors-in-pipeline "
2222
else
2323
echo "Invalid configuration"
2424
exit 1

examples/sc21/run_table_1.sh

+10-10
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if [ ${MODEL_SIZE} == "1.7B" ]; then
2121
NAH=24
2222
DDP=torch
2323
NNODES=4
24-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
24+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
2525
elif [ ${MODEL_SIZE} == "3.6B" ]; then
2626
TP=2
2727
PP=1
@@ -32,7 +32,7 @@ elif [ ${MODEL_SIZE} == "3.6B" ]; then
3232
NAH=32
3333
DDP=torch
3434
NNODES=8
35-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
35+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
3636
elif [ ${MODEL_SIZE} == "7.5B" ]; then
3737
TP=4
3838
PP=1
@@ -43,7 +43,7 @@ elif [ ${MODEL_SIZE} == "7.5B" ]; then
4343
NAH=32
4444
DDP=torch
4545
NNODES=16
46-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
46+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
4747
elif [ ${MODEL_SIZE} == "18B" ]; then
4848
TP=8
4949
PP=1
@@ -54,7 +54,7 @@ elif [ ${MODEL_SIZE} == "18B" ]; then
5454
NAH=48
5555
DDP=torch
5656
NNODES=32
57-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
57+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
5858
elif [ ${MODEL_SIZE} == "39B" ]; then
5959
TP=8
6060
PP=2
@@ -65,7 +65,7 @@ elif [ ${MODEL_SIZE} == "39B" ]; then
6565
NAH=64
6666
DDP=local
6767
NNODES=64
68-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
68+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
6969
elif [ ${MODEL_SIZE} == "76B" ]; then
7070
TP=8
7171
PP=4
@@ -76,7 +76,7 @@ elif [ ${MODEL_SIZE} == "76B" ]; then
7676
NAH=80
7777
DDP=local
7878
NNODES=128
79-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 5"
79+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 5"
8080
elif [ ${MODEL_SIZE} == "145B" ]; then
8181
TP=8
8282
PP=8
@@ -87,7 +87,7 @@ elif [ ${MODEL_SIZE} == "145B" ]; then
8787
NAH=96
8888
DDP=local
8989
NNODES=192
90-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 5 "
90+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 5 "
9191
elif [ ${MODEL_SIZE} == "310B" ]; then
9292
TP=8
9393
PP=16
@@ -98,7 +98,7 @@ elif [ ${MODEL_SIZE} == "310B" ]; then
9898
NAH=128
9999
DDP=local
100100
NNODES=240
101-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 3 "
101+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 3 "
102102
elif [ ${MODEL_SIZE} == "530B" ]; then
103103
TP=8
104104
PP=35
@@ -109,7 +109,7 @@ elif [ ${MODEL_SIZE} == "530B" ]; then
109109
NAH=128
110110
DDP=local
111111
NNODES=315
112-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 1 "
112+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 1 "
113113
elif [ ${MODEL_SIZE} == "1T" ]; then
114114
TP=8
115115
PP=64
@@ -120,7 +120,7 @@ elif [ ${MODEL_SIZE} == "1T" ]; then
120120
NAH=160
121121
DDP=local
122122
NNODES=384
123-
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
123+
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
124124
else
125125
echo "Invalid configuration"
126126
exit 1

0 commit comments

Comments
 (0)