@@ -21,7 +21,7 @@ if [ ${MODEL_SIZE} == "1.7B" ]; then
21
21
NAH=24
22
22
DDP=torch
23
23
NNODES=4
24
- MEGATRON_EXTRA_PARAMS=" --checkpoint-activations "
24
+ MEGATRON_EXTRA_PARAMS=" --activations- checkpoint-method uniform "
25
25
elif [ ${MODEL_SIZE} == " 3.6B" ]; then
26
26
TP=2
27
27
PP=1
@@ -32,7 +32,7 @@ elif [ ${MODEL_SIZE} == "3.6B" ]; then
32
32
NAH=32
33
33
DDP=torch
34
34
NNODES=8
35
- MEGATRON_EXTRA_PARAMS=" --checkpoint-activations "
35
+ MEGATRON_EXTRA_PARAMS=" --activations- checkpoint-method uniform "
36
36
elif [ ${MODEL_SIZE} == " 7.5B" ]; then
37
37
TP=4
38
38
PP=1
@@ -43,7 +43,7 @@ elif [ ${MODEL_SIZE} == "7.5B" ]; then
43
43
NAH=32
44
44
DDP=torch
45
45
NNODES=16
46
- MEGATRON_EXTRA_PARAMS=" --checkpoint-activations "
46
+ MEGATRON_EXTRA_PARAMS=" --activations- checkpoint-method uniform "
47
47
elif [ ${MODEL_SIZE} == " 18B" ]; then
48
48
TP=8
49
49
PP=1
@@ -54,7 +54,7 @@ elif [ ${MODEL_SIZE} == "18B" ]; then
54
54
NAH=48
55
55
DDP=torch
56
56
NNODES=32
57
- MEGATRON_EXTRA_PARAMS=" --checkpoint-activations "
57
+ MEGATRON_EXTRA_PARAMS=" --activations- checkpoint-method uniform "
58
58
elif [ ${MODEL_SIZE} == " 39B" ]; then
59
59
TP=8
60
60
PP=2
@@ -65,7 +65,7 @@ elif [ ${MODEL_SIZE} == "39B" ]; then
65
65
NAH=64
66
66
DDP=local
67
67
NNODES=64
68
- MEGATRON_EXTRA_PARAMS=" --checkpoint-activations "
68
+ MEGATRON_EXTRA_PARAMS=" --activations- checkpoint-method uniform "
69
69
elif [ ${MODEL_SIZE} == " 76B" ]; then
70
70
TP=8
71
71
PP=4
@@ -76,7 +76,7 @@ elif [ ${MODEL_SIZE} == "76B" ]; then
76
76
NAH=80
77
77
DDP=local
78
78
NNODES=128
79
- MEGATRON_EXTRA_PARAMS=" --checkpoint-activations --num-layers-per-virtual-pipeline-stage 5"
79
+ MEGATRON_EXTRA_PARAMS=" --activations- checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 5"
80
80
elif [ ${MODEL_SIZE} == " 145B" ]; then
81
81
TP=8
82
82
PP=8
@@ -87,7 +87,7 @@ elif [ ${MODEL_SIZE} == "145B" ]; then
87
87
NAH=96
88
88
DDP=local
89
89
NNODES=192
90
- MEGATRON_EXTRA_PARAMS=" --checkpoint-activations --num-layers-per-virtual-pipeline-stage 5 "
90
+ MEGATRON_EXTRA_PARAMS=" --activations- checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 5 "
91
91
elif [ ${MODEL_SIZE} == " 310B" ]; then
92
92
TP=8
93
93
PP=16
@@ -98,7 +98,7 @@ elif [ ${MODEL_SIZE} == "310B" ]; then
98
98
NAH=128
99
99
DDP=local
100
100
NNODES=240
101
- MEGATRON_EXTRA_PARAMS=" --checkpoint-activations --num-layers-per-virtual-pipeline-stage 3 "
101
+ MEGATRON_EXTRA_PARAMS=" --activations- checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 3 "
102
102
elif [ ${MODEL_SIZE} == " 530B" ]; then
103
103
TP=8
104
104
PP=35
@@ -109,7 +109,7 @@ elif [ ${MODEL_SIZE} == "530B" ]; then
109
109
NAH=128
110
110
DDP=local
111
111
NNODES=315
112
- MEGATRON_EXTRA_PARAMS=" --checkpoint-activations --num-layers-per-virtual-pipeline-stage 1 "
112
+ MEGATRON_EXTRA_PARAMS=" --activations- checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 1 "
113
113
elif [ ${MODEL_SIZE} == " 1T" ]; then
114
114
TP=8
115
115
PP=64
@@ -120,7 +120,7 @@ elif [ ${MODEL_SIZE} == "1T" ]; then
120
120
NAH=160
121
121
DDP=local
122
122
NNODES=384
123
- MEGATRON_EXTRA_PARAMS=" --checkpoint-activations "
123
+ MEGATRON_EXTRA_PARAMS=" --activations- checkpoint-method uniform "
124
124
else
125
125
echo " Invalid configuration"
126
126
exit 1
0 commit comments