From d7f1f44fb5c3278318b9a5d26a5decf08b41ad83 Mon Sep 17 00:00:00 2001 From: Deathvenom <65107979+DeathVenom54@users.noreply.github.com> Date: Tue, 5 Oct 2021 09:54:03 +0530 Subject: [PATCH 1/4] Updated OnActionReceived to use new ActionBuffers.ContinuousActions --- docs/Learning-Environment-Create-New.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/Learning-Environment-Create-New.md b/docs/Learning-Environment-Create-New.md index 867e889080..20b34b5346 100644 --- a/docs/Learning-Environment-Create-New.md +++ b/docs/Learning-Environment-Create-New.md @@ -260,14 +260,18 @@ the first determines the force applied along the x-axis; and the second determines the force applied along the z-axis. (If we allowed the Agent to move in three dimensions, then we would need a third action.) -The RollerAgent applies the values from the `action[]` array to its Rigidbody +The RollerAgent applies the values from the `actions.ContinuousActions[]` array to its Rigidbody component `rBody`, using `Rigidbody.AddForce()`: ```csharp -Vector3 controlSignal = Vector3.zero; -controlSignal.x = action[0]; -controlSignal.z = action[1]; -rBody.AddForce(controlSignal * forceMultiplier); +public override void OnActionReceived(ActionBuffers actions) +{ + var continuousActions = actions.ContinuousActions; + Vector3 controlSignal = Vector3.zero; + controlSignal.x = continuousActions[0]; + controlSignal.z = continuousActions[1]; + rBody.AddForce(controlSignal * forceMultiplier); +} ``` #### Rewards From 3ae3475bcaceac81c950c121d71bca042f0f5f6a Mon Sep 17 00:00:00 2001 From: Deathvenom <65107979+DeathVenom54@users.noreply.github.com> Date: Tue, 5 Oct 2021 09:56:28 +0530 Subject: [PATCH 2/4] Updated added code to match code style of docs --- docs/Learning-Environment-Create-New.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/Learning-Environment-Create-New.md b/docs/Learning-Environment-Create-New.md index 20b34b5346..007663e09a 100644 --- a/docs/Learning-Environment-Create-New.md +++ b/docs/Learning-Environment-Create-New.md @@ -264,7 +264,7 @@ The RollerAgent applies the values from the `actions.ContinuousActions[]` array component `rBody`, using `Rigidbody.AddForce()`: ```csharp -public override void OnActionReceived(ActionBuffers actions) +public override void OnActionReceived(ActionBuffers actionBuffers) { var continuousActions = actions.ContinuousActions; Vector3 controlSignal = Vector3.zero; @@ -319,9 +319,10 @@ public float forceMultiplier = 10; public override void OnActionReceived(ActionBuffers actionBuffers) { // Actions, size = 2 + var continuousActions = actions.ContinuousActions; Vector3 controlSignal = Vector3.zero; - controlSignal.x = actionBuffers.ContinuousActions[0]; - controlSignal.z = actionBuffers.ContinuousActions[1]; + controlSignal.x = continuousActions[0]; + controlSignal.z = continuousActions[1]; rBody.AddForce(controlSignal * forceMultiplier); // Rewards From 8f513bb9c70c07dfbbb7933752ec29651b89b255 Mon Sep 17 00:00:00 2001 From: Deathvenom <65107979+DeathVenom54@users.noreply.github.com> Date: Tue, 5 Oct 2021 10:47:58 +0530 Subject: [PATCH 3/4] Added little tips --- docs/Learning-Environment-Create-New.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/Learning-Environment-Create-New.md b/docs/Learning-Environment-Create-New.md index 007663e09a..17a7ecfab8 100644 --- a/docs/Learning-Environment-Create-New.md +++ b/docs/Learning-Environment-Create-New.md @@ -231,7 +231,8 @@ calculate an analytical solution to the problem. In our case, the information our Agent collects includes the position of the target, the position of the agent itself, and the velocity of the agent. This helps the Agent learn to control its speed so it doesn't overshoot the target -and roll off the platform. In total, the agent observation contains 8 values as +and roll off the platform. In total, the agent observation contains 8 values +(mind that each Vector3 can be represented as 3 floats) as implemented below: ```csharp @@ -260,7 +261,7 @@ the first determines the force applied along the x-axis; and the second determines the force applied along the z-axis. (If we allowed the Agent to move in three dimensions, then we would need a third action.) -The RollerAgent applies the values from the `actions.ContinuousActions[]` array to its Rigidbody +The RollerAgent applies the values from the `continuousActions` array to its Rigidbody component `rBody`, using `Rigidbody.AddForce()`: ```csharp @@ -439,6 +440,8 @@ behaviors: Hyperparameters are explained in [the training configuration file documentation](Training-Configuration-File.md) +Make sure the Behaviour name in the `Bahaviour Parameters` component matches the one in the config file + Since this example creates a very simple training environment with only a few inputs and outputs, using small batch and buffer sizes speeds up the training considerably. However, if you add more complexity to the environment or change From 076da243bc1dcbb1c77415041526e83066002dc6 Mon Sep 17 00:00:00 2001 From: Deathvenom <65107979+DeathVenom54@users.noreply.github.com> Date: Thu, 20 Jan 2022 08:08:43 +0530 Subject: [PATCH 4/4] Update code examples to use actionBuffers #5566 --- docs/Learning-Environment-Create-New.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/Learning-Environment-Create-New.md b/docs/Learning-Environment-Create-New.md index 17a7ecfab8..4316984fdb 100644 --- a/docs/Learning-Environment-Create-New.md +++ b/docs/Learning-Environment-Create-New.md @@ -232,8 +232,7 @@ In our case, the information our Agent collects includes the position of the target, the position of the agent itself, and the velocity of the agent. This helps the Agent learn to control its speed so it doesn't overshoot the target and roll off the platform. In total, the agent observation contains 8 values -(mind that each Vector3 can be represented as 3 floats) as -implemented below: +asimplemented below: ```csharp public override void CollectObservations(VectorSensor sensor) @@ -261,13 +260,13 @@ the first determines the force applied along the x-axis; and the second determines the force applied along the z-axis. (If we allowed the Agent to move in three dimensions, then we would need a third action.) -The RollerAgent applies the values from the `continuousActions` array to its Rigidbody +The RollerAgent applies the values from the `actionBuffers.ContinuousActions[]` array to its Rigidbody component `rBody`, using `Rigidbody.AddForce()`: ```csharp public override void OnActionReceived(ActionBuffers actionBuffers) { - var continuousActions = actions.ContinuousActions; + float[] continuousActions = actionBuffers.ContinuousActions; Vector3 controlSignal = Vector3.zero; controlSignal.x = continuousActions[0]; controlSignal.z = continuousActions[1]; @@ -320,7 +319,7 @@ public float forceMultiplier = 10; public override void OnActionReceived(ActionBuffers actionBuffers) { // Actions, size = 2 - var continuousActions = actions.ContinuousActions; + float[] continuousActions = actionBuffers.ContinuousActions; Vector3 controlSignal = Vector3.zero; controlSignal.x = continuousActions[0]; controlSignal.z = continuousActions[1]; @@ -440,7 +439,7 @@ behaviors: Hyperparameters are explained in [the training configuration file documentation](Training-Configuration-File.md) -Make sure the Behaviour name in the `Bahaviour Parameters` component matches the one in the config file +Make sure the Behaviour name in the `Bahaviour Parameters` component matches the one in the config file. Since this example creates a very simple training environment with only a few inputs and outputs, using small batch and buffer sizes speeds up the training