Skip to content
This repository was archived by the owner on Jun 4, 2024. It is now read-only.
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit d5bfb7b

Browse files
authoredJan 25, 2022
[RLlib] Preparatory PR for multi-agent multi-GPU learner (alpha-star style) #3 (#21652)
1 parent b2cd123 commit d5bfb7b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+375
-281
lines changed
 

‎rllib/BUILD

+26-19
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ py_test(
8181
)
8282

8383
py_test(
84-
name = "learning_cartpole_a2c_fake_gpus",
84+
name = "learning_tests_cartpole_a2c_fake_gpus",
8585
main = "tests/run_regression_tests.py",
8686
tags = ["team:ml", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "fake_gpus"],
8787
size = "large",
@@ -126,15 +126,22 @@ py_test(
126126

127127
# APPO
128128
py_test(
129-
name = "learning_tests_cartpole_appo",
129+
name = "learning_tests_cartpole_appo_no_vtrace",
130130
main = "tests/run_regression_tests.py",
131131
tags = ["team:ml", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete"],
132132
size = "large",
133133
srcs = ["tests/run_regression_tests.py"],
134-
data = [
135-
"tuned_examples/ppo/cartpole-appo.yaml",
136-
"tuned_examples/ppo/cartpole-appo-vtrace.yaml"
137-
],
134+
data = ["tuned_examples/ppo/cartpole-appo.yaml"],
135+
args = ["--yaml-dir=tuned_examples/ppo"]
136+
)
137+
138+
py_test(
139+
name = "learning_tests_cartpole_appo_vtrace",
140+
main = "tests/run_regression_tests.py",
141+
tags = ["team:ml", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete"],
142+
size = "large",
143+
srcs = ["tests/run_regression_tests.py"],
144+
data = ["tuned_examples/ppo/cartpole-appo-vtrace.yaml"],
138145
args = ["--yaml-dir=tuned_examples/ppo"]
139146
)
140147

@@ -151,7 +158,7 @@ py_test(
151158
)
152159

153160
py_test(
154-
name = "learning_frozenlake_appo",
161+
name = "learning_tests_frozenlake_appo",
155162
main = "tests/run_regression_tests.py",
156163
tags = ["team:ml", "learning_tests", "learning_tests_discrete"],
157164
size = "large",
@@ -161,7 +168,7 @@ py_test(
161168
)
162169

163170
py_test(
164-
name = "learning_cartpole_appo_fake_gpus",
171+
name = "learning_tests_cartpole_appo_fake_gpus",
165172
main = "tests/run_regression_tests.py",
166173
tags = ["team:ml", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "fake_gpus"],
167174
size = "large",
@@ -208,7 +215,7 @@ py_test(
208215
)
209216

210217
py_test(
211-
name = "learning_pendulum_ddpg_fake_gpus",
218+
name = "learning_tests_pendulum_ddpg_fake_gpus",
212219
main = "tests/run_regression_tests.py",
213220
tags = ["team:ml", "learning_tests", "learning_tests_pendulum", "learning_tests_continuous", "fake_gpus"],
214221
size = "large",
@@ -263,7 +270,7 @@ py_test(
263270
)
264271

265272
py_test(
266-
name = "learning_cartpole_dqn_fake_gpus",
273+
name = "learning_tests_cartpole_dqn_fake_gpus",
267274
main = "tests/run_regression_tests.py",
268275
tags = ["team:ml", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "fake_gpus"],
269276
size = "large",
@@ -286,7 +293,7 @@ py_test(
286293
)
287294

288295
py_test(
289-
name = "learning_cartpole_simpleq_fake_gpus",
296+
name = "learning_tests_cartpole_simpleq_fake_gpus",
290297
main = "tests/run_regression_tests.py",
291298
tags = ["team:ml", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "fake_gpus"],
292299
size = "medium",
@@ -318,7 +325,7 @@ py_test(
318325
)
319326

320327
py_test(
321-
name = "learning_cartpole_impala_fake_gpus",
328+
name = "learning_tests_cartpole_impala_fake_gpus",
322329
main = "tests/run_regression_tests.py",
323330
tags = ["team:ml", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "fake_gpus"],
324331
size = "large",
@@ -352,7 +359,7 @@ py_test(
352359
)
353360

354361
py_test(
355-
name = "learning_cartpole_pg_fake_gpus",
362+
name = "learning_tests_cartpole_pg_fake_gpus",
356363
main = "tests/run_regression_tests.py",
357364
tags = ["team:ml", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "fake_gpus"],
358365
size = "large",
@@ -403,7 +410,7 @@ py_test(
403410
)
404411

405412
py_test(
406-
name = "learning_cartpole_ppo_fake_gpus",
413+
name = "learning_tests_cartpole_ppo_fake_gpus",
407414
main = "tests/run_regression_tests.py",
408415
tags = ["team:ml", "learning_tests", "learning_tests_cartpole", "learning_tests_discrete", "fake_gpus"],
409416
size = "large",
@@ -455,7 +462,7 @@ py_test(
455462
)
456463

457464
py_test(
458-
name = "learning_stateless_cartpole_r2d2_fake_gpus",
465+
name = "learning_tests_stateless_cartpole_r2d2_fake_gpus",
459466
main = "tests/run_regression_tests.py",
460467
tags = ["team:ml", "learning_tests", "learning_tests_cartpole", "fake_gpus"],
461468
size = "large",
@@ -506,7 +513,7 @@ py_test(
506513
)
507514

508515
py_test(
509-
name = "learning_pendulum_sac_fake_gpus",
516+
name = "learning_tests_pendulum_sac_fake_gpus",
510517
main = "tests/run_regression_tests.py",
511518
tags = ["team:ml", "learning_tests", "learning_tests_pendulum", "learning_tests_continuous", "fake_gpus"],
512519
size = "large",
@@ -845,7 +852,7 @@ py_test(
845852
"--env", "Pendulum-v1",
846853
"--run", "APEX_DDPG",
847854
"--stop", "'{\"training_iteration\": 1}'",
848-
"--config", "'{\"framework\": \"tf\", \"num_workers\": 2, \"optimizer\": {\"num_replay_buffer_shards\": 1}, \"learning_starts\": 100, \"min_iter_time_s\": 1, \"batch_mode\": \"complete_episodes\"}'",
855+
"--config", "'{\"framework\": \"tf\", \"num_workers\": 2, \"optimizer\": {\"num_replay_buffer_shards\": 1}, \"learning_starts\": 100, \"min_time_s_per_reporting\": 1, \"batch_mode\": \"complete_episodes\"}'",
849856
"--ray-num-cpus", "4",
850857
]
851858
)
@@ -928,7 +935,7 @@ py_test(
928935
"--env", "CartPole-v0",
929936
"--run", "IMPALA",
930937
"--stop", "'{\"training_iteration\": 1}'",
931-
"--config", "'{\"framework\": \"tf\", \"num_gpus\": 0, \"num_workers\": 2, \"min_iter_time_s\": 1, \"num_multi_gpu_tower_stacks\": 2, \"replay_buffer_num_slots\": 100, \"replay_proportion\": 1.0}'",
938+
"--config", "'{\"framework\": \"tf\", \"num_gpus\": 0, \"num_workers\": 2, \"min_time_s_per_reporting\": 1, \"num_multi_gpu_tower_stacks\": 2, \"replay_buffer_num_slots\": 100, \"replay_proportion\": 1.0}'",
932939
"--ray-num-cpus", "4",
933940
]
934941
)
@@ -942,7 +949,7 @@ py_test(
942949
"--env", "CartPole-v0",
943950
"--run", "IMPALA",
944951
"--stop", "'{\"training_iteration\": 1}'",
945-
"--config", "'{\"framework\": \"tf\", \"num_gpus\": 0, \"num_workers\": 2, \"min_iter_time_s\": 1, \"num_multi_gpu_tower_stacks\": 2, \"replay_buffer_num_slots\": 100, \"replay_proportion\": 1.0, \"model\": {\"use_lstm\": true}}'",
952+
"--config", "'{\"framework\": \"tf\", \"num_gpus\": 0, \"num_workers\": 2, \"min_time_s_per_reporting\": 1, \"num_multi_gpu_tower_stacks\": 2, \"replay_buffer_num_slots\": 100, \"replay_proportion\": 1.0, \"model\": {\"use_lstm\": true}}'",
946953
"--ray-num-cpus", "4",
947954
]
948955
)

‎rllib/agents/a3c/a2c.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
A3C_CONFIG,
1818
{
1919
"rollout_fragment_length": 20,
20-
"min_iter_time_s": 10,
20+
"min_time_s_per_reporting": 10,
2121
"sample_async": False,
2222

2323
# A2C supports microbatching, in which we accumulate gradients over

0 commit comments

Comments
 (0)
This repository has been archived.