File tree 4 files changed +0
-8
lines changed
4 files changed +0
-8
lines changed Original file line number Diff line number Diff line change @@ -6,15 +6,13 @@ export XLA_FLAGS="\
6
6
--xla_gpu_enable_latency_hiding_scheduler = true \
7
7
--xla_gpu_enable_triton_gemm = false \
8
8
--xla_gpu_graph_level = 0 \
9
- --xla_gpu_enable_highest_priority_async_stream = true \
10
9
--xla_gpu_all_reduce_combine_threshold_bytes = ${ THRESHOLD_BYTES } \
11
10
--xla_gpu_all_gather_combine_threshold_bytes = $( (THRESHOLD_BYTES/(NUM_NODES* NUM_GPUS) )) \
12
11
--xla_gpu_reduce_scatter_combine_threshold_bytes = $( (THRESHOLD_BYTES/(NUM_NODES* NUM_GPUS* 2) )) \
13
12
--xla_gpu_enable_pipelined_all_gather = true \
14
13
--xla_gpu_enable_pipelined_reduce_scatter = true \
15
14
--xla_gpu_enable_pipelined_all_reduce = true \
16
15
--xla_gpu_enable_while_loop_double_buffering = true \
17
- --xla_gpu_enable_triton_softmax_fusion = false \
18
16
--xla_gpu_enable_all_gather_combine_by_dim = false \
19
17
--xla_gpu_enable_reduce_scatter_combine_by_dim = false \
20
18
--xla_disable_hlo_passes = rematerialization \
Original file line number Diff line number Diff line change @@ -3,8 +3,6 @@ THRESHOLD_BYTES=51200
3
3
export XLA_FLAGS = "\
4
4
--xla_gpu_enable_latency_hiding_scheduler = true \
5
5
--xla_allow_excess_precision \
6
- --xla_gpu_enable_highest_priority_async_stream = true \
7
- --xla_gpu_enable_triton_softmax_fusion = false \
8
6
--xla_gpu_all_reduce_combine_threshold_bytes = ${ THRESHOLD_BYTES } \
9
7
--xla_gpu_graph_level = 0 \
10
8
"
Original file line number Diff line number Diff line change @@ -3,8 +3,6 @@ THRESHOLD_BYTES=33554432
3
3
export XLA_FLAGS = "\
4
4
--xla_gpu_enable_latency_hiding_scheduler = true \
5
5
--xla_allow_excess_precision \
6
- --xla_gpu_enable_highest_priority_async_stream = true \
7
- --xla_gpu_enable_triton_softmax_fusion = false \
8
6
--xla_gpu_all_reduce_combine_threshold_bytes = ${ THRESHOLD_BYTES } \
9
7
--xla_gpu_graph_level = 0 \
10
8
--xla_gpu_enable_cudnn_fmha = false \
Original file line number Diff line number Diff line change @@ -5,8 +5,6 @@ REDUCE_SCATTER_THRESHOLD_BYTES=402653184
5
5
export XLA_FLAGS = "\
6
6
--xla_gpu_enable_latency_hiding_scheduler = true \
7
7
--xla_allow_excess_precision \
8
- --xla_gpu_enable_highest_priority_async_stream = true \
9
- --xla_gpu_enable_triton_softmax_fusion = false \
10
8
--xla_gpu_all_reduce_combine_threshold_bytes = ${ ALL_REDUCE_THRESHOLD_BYTES } \
11
9
--xla_gpu_graph_level = 0 \
12
10
--xla_gpu_all_gather_combine_threshold_bytes = ${ ALL_GATHER_THRESHOLD_BYTES } \
You can’t perform that action at this time.
0 commit comments