Skip to content

Commit f9d1faf

Browse files
author
Ervin T
authored
Fix issue with different decision intervals for different brains (#3181)
* Move action check into agent_processor * Better loop for iterating over step_info * Add warning for agentmanager not found
1 parent bf01574 commit f9d1faf

File tree

3 files changed

+21
-18
lines changed

3 files changed

+21
-18
lines changed

ml-agents/mlagents/trainers/agent_processor.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ def add_experiences(
153153
del self.episode_rewards[agent_id]
154154
elif not next_info.local_done[next_idx]:
155155
self.episode_steps[agent_id] += 1
156-
self.policy.save_previous_action(
157-
curr_info.agents, take_action_outputs["action"]
158-
)
156+
if "action" in take_action_outputs:
157+
self.policy.save_previous_action(
158+
curr_info.agents, take_action_outputs["action"]
159+
)

ml-agents/mlagents/trainers/env_manager.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from abc import ABC, abstractmethod
2-
from typing import List, Dict, NamedTuple
2+
from typing import List, Dict, NamedTuple, Iterable
33
from mlagents.trainers.brain import AllBrainInfo, BrainParameters
44
from mlagents.trainers.policy import Policy
55
from mlagents.trainers.action_info import ActionInfo
@@ -10,10 +10,9 @@ class EnvironmentStep(NamedTuple):
1010
current_all_brain_info: AllBrainInfo
1111
brain_name_to_action_info: Dict[str, ActionInfo]
1212

13-
def has_actions_for_brain(self, brain_name: str) -> bool:
14-
return brain_name in self.brain_name_to_action_info and bool(
15-
self.brain_name_to_action_info[brain_name].outputs
16-
)
13+
@property
14+
def name_behavior_ids(self) -> Iterable[str]:
15+
return self.brain_name_to_action_info.keys()
1716

1817

1918
class EnvManager(ABC):

ml-agents/mlagents/trainers/trainer_controller.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -294,17 +294,20 @@ def advance(self, env: EnvManager) -> int:
294294
with hierarchical_timer("env_step"):
295295
new_step_infos = env.step()
296296
for step_info in new_step_infos:
297-
for brain_name in self.trainers.keys():
298-
for name_behavior_id in self.brain_name_to_identifier[brain_name]:
299-
if step_info.has_actions_for_brain(name_behavior_id):
300-
_processor = self.managers[name_behavior_id].processor
301-
_processor.add_experiences(
302-
step_info.previous_all_brain_info[name_behavior_id],
303-
step_info.current_all_brain_info[name_behavior_id],
304-
step_info.brain_name_to_action_info[
305-
name_behavior_id
306-
].outputs,
297+
for name_behavior_id in step_info.name_behavior_ids:
298+
if name_behavior_id not in self.managers:
299+
self.logger.warning(
300+
"Agent manager was not created for behavior id {}.".format(
301+
name_behavior_id
307302
)
303+
)
304+
continue
305+
_processor = self.managers[name_behavior_id].processor
306+
_processor.add_experiences(
307+
step_info.previous_all_brain_info[name_behavior_id],
308+
step_info.current_all_brain_info[name_behavior_id],
309+
step_info.brain_name_to_action_info[name_behavior_id].outputs,
310+
)
308311

309312
for brain_name, trainer in self.trainers.items():
310313
if self.train_model and trainer.get_step <= trainer.get_max_steps:

0 commit comments

Comments
 (0)