Skip to content

Commit a232d04

Browse files
engintoklupre-commit-ci[bot]Higgcz
authored
docs: Update docs and examples related to GymNE (#31)
* docs: Update docs and examples related to GymNE * Update docs and examples related to GymNE * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> * Update rl_clipup script * Use keyword arguments for VecGymNE in Brax example notebook * Clarify what PicklingLogger does in example codes Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Vojtech Micka <[email protected]>
1 parent cd7f84b commit a232d04

File tree

8 files changed

+41
-22
lines changed

8 files changed

+41
-22
lines changed

README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,12 @@ The following example demonstrates how to solve reinforcement learning tasks tha
119119

120120
```python
121121
from evotorch.algorithms import PGPE
122-
from evotorch.logging import StdOutLogger
122+
from evotorch.logging import StdOutLogger, PicklingLogger
123123
from evotorch.neuroevolution import GymNE
124124

125125
# Declare the problem to solve
126126
problem = GymNE(
127-
env_name="Humanoid-v4", # Solve the Humanoid-v4 task
127+
env="Humanoid-v4", # Solve the Humanoid-v4 task
128128
network="Linear(obs_length, act_length)", # Linear policy
129129
observation_normalization=True, # Normalize the policy inputs
130130
decrease_rewards_by=5.0, # Decrease each reward by 5.0
@@ -156,6 +156,11 @@ searcher = PGPE(
156156
# Instantiate a standard output logger
157157
_ = StdOutLogger(searcher)
158158

159+
# Optional: Instantiate a logger to pickle and save the results periodically.
160+
# In this example, among the saved results will be the center of the search
161+
# distribution, since we are using PGPE which is distribution-based.
162+
_ = PicklingLogger(searcher, interval=10)
163+
159164
# Run the algorithm for the specified amount of generations
160165
searcher.run(500)
161166

docs/advanced_usage/custom_ea.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ With our custom EA defined, we're ready to run a simple experiment. For this we
196196
from evotorch.neuroevolution import GymNE
197197

198198
prob = GymNE(
199-
env_name="CartPole-v1",
199+
env="CartPole-v1",
200200
network="""
201201
Linear(obs_length, 16)
202202
>> Tanh()

docs/advanced_usage/dist_based.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ from evotorch.logging import StdOutLogger
9797

9898
# Declare the problem to solve
9999
problem = GymNE(
100-
env_name="Humanoid-v4", # Solve the Humanoid-v4 task
100+
env="Humanoid-v4", # Solve the Humanoid-v4 task
101101
network="Linear(obs_length, act_length)", # Linear policy
102102
observation_normalization=True, # Normalize the policy inputs
103103
decrease_rewards_by=5.0, # Decrease each reward by 5.0

docs/user_guide/gym.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ from evotorch.neuroevolution import GymNE
3636

3737
problem = GymNE(
3838
# Name of the environment
39-
env_name="LunarLanderContinuous-v2",
39+
env="LunarLanderContinuous-v2",
4040
# Linear policy mapping observations to actions
4141
network="Linear(obs_length, act_length)",
4242
# Use 4 available CPUs. Note that you can modify this value,
@@ -68,7 +68,7 @@ class CustomPolicy(torch.nn.Module):
6868

6969

7070
problem = GymNE(
71-
env_name="LunarLanderContinuous-v2",
71+
env="LunarLanderContinuous-v2",
7272
network=CustomPolicy,
7373
num_actors=4,
7474
)
@@ -89,7 +89,7 @@ You can specify additional arguments to pass to the instantiation of the environ
8989

9090
```python
9191
problem = GymNE(
92-
env_name="LunarLanderContinuous-v2",
92+
env="LunarLanderContinuous-v2",
9393
env_config={
9494
"gravity": -1e-5,
9595
},
@@ -110,7 +110,7 @@ The `num_episodes` argument allows you to evaluate individual networks repeatedl
110110

111111
```python
112112
problem = GymNE(
113-
env_name="LunarLanderContinuous-v2",
113+
env="LunarLanderContinuous-v2",
114114
network=CustomPolicy,
115115
num_actors=4,
116116
num_episodes=5,
@@ -129,7 +129,7 @@ While in practice this means that the problem is non-stationary, as the expectio
129129

130130
```python
131131
problem = GymNE(
132-
env_name="LunarLanderContinuous-v2",
132+
env="LunarLanderContinuous-v2",
133133
network=CustomPolicy,
134134
num_actors=4,
135135
observation_normalization=True,
@@ -167,7 +167,7 @@ For example, the `"Humanoid-v4"` environment [has an `alive_bonus` value of 5](h
167167

168168
```python
169169
problem = GymNE(
170-
env_name="Humanoid-v4",
170+
env="Humanoid-v4",
171171
network=CustomPolicy,
172172
decrease_rewards_by=5.0,
173173
)

examples/notebooks/Brax_Experiments_with_PGPE.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,8 @@
176176
"outputs": [],
177177
"source": [
178178
"problem = VecGymNE(\n",
179-
" \"brax::humanoid\", # solve the brax task named \"humanoid\"\n",
180-
" policy,\n",
179+
" env=\"brax::humanoid\", # solve the brax task named \"humanoid\"\n",
180+
" network=policy,\n",
181181
" #\n",
182182
" # Collect observation stats, and use those stats to normalize incoming observations\n",
183183
" observation_normalization=True,\n",

examples/notebooks/Gym_Experiments_with_PGPE_and_CoSyNE.ipynb

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,21 @@
3535
"source": [
3636
"import torch\n",
3737
"from torch import nn\n",
38+
"from evotorch.decorators import pass_info\n",
3839
"\n",
40+
"\n",
41+
"# The decorator `@pass_info` used below tells the problem class `GymNE`\n",
42+
"# to pass information regarding the gym environment via keyword arguments\n",
43+
"# such as `obs_length` and `act_length`.\n",
44+
"@pass_info\n",
3945
"class LinearPolicy(nn.Module):\n",
40-
" \n",
4146
" def __init__(\n",
4247
" self, \n",
4348
" obs_length: int, # Number of observations from the environment\n",
4449
" act_length: int, # Number of actions of the environment\n",
4550
" bias: bool = True, # Whether the policy should use biases\n",
4651
" **kwargs # Anything else that is passed\n",
47-
" ):\n",
52+
" ):\n",
4853
" super().__init__() # Always call super init for nn Modules\n",
4954
" self.linear = nn.Linear(obs_length, act_length, bias = bias)\n",
5055
" \n",
@@ -71,7 +76,7 @@
7176
"from evotorch.neuroevolution import GymNE\n",
7277
"\n",
7378
"problem = GymNE(\n",
74-
" env_name=\"LunarLanderContinuous-v2\", # Name of the environment\n",
79+
" env=\"LunarLanderContinuous-v2\", # Name of the environment\n",
7580
" network=LinearPolicy, # Linear policy that we defined earlier\n",
7681
" network_args = {'bias': False}, # Linear policy should not use biases\n",
7782
" num_actors= 4, # Use 4 available CPUs. Note that you can modify this value, or use 'max' to exploit all available CPUs\n",
@@ -189,7 +194,7 @@
189194
"outputs": [],
190195
"source": [
191196
"problem = GymNE(\n",
192-
" env_name=\"LunarLanderContinuous-v2\",\n",
197+
" env=\"LunarLanderContinuous-v2\",\n",
193198
" network=LinearPolicy,\n",
194199
" network_args = {'bias': False},\n",
195200
" num_actors= 4, \n",
@@ -250,7 +255,7 @@
250255
"id": "3dcb5243",
251256
"metadata": {},
252257
"source": [
253-
"And once again we can visualize the learned policy. As `CoSyNE` is population based, it does not maintain a 'best estimate' of a good policy. Instead, we simply take the best performing solution from the current population. "
258+
"And once again we can visualize the learned policy. As `Cosyne` is population based, it does not maintain a 'best estimate' of a good policy. Instead, we simply take the best performing solution from the current population. "
254259
]
255260
},
256261
{
@@ -296,7 +301,7 @@
296301
"name": "python",
297302
"nbconvert_exporter": "python",
298303
"pygments_lexer": "ipython3",
299-
"version": "3.8.13"
304+
"version": "3.7.13"
300305
}
301306
},
302307
"nbformat": 4,

examples/scripts/rl_clipup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def main(_config: dict):
225225

226226
# Instantiate the problem class
227227
problem = GymNE(
228-
env_name=env_name,
228+
env=env_name,
229229
network=_config["policy"],
230230
observation_normalization=_config["observation_normalization"],
231231
decrease_rewards_by=none_if_nan(_config["decrease_rewards_by"]),
@@ -268,7 +268,7 @@ def main(_config: dict):
268268
# Create a test problem instance -- note the difference in configuration
269269

270270
test_problem = GymNE(
271-
env_name=actual_env_name, # Using the actual environment name, rather than a modified version
271+
env=actual_env_name, # Using the actual environment name, rather than a modified version
272272
network=_config["policy"],
273273
observation_normalization=_config["observation_normalization"],
274274
decrease_rewards_by=0.0, # Not changing the rewards

examples/scripts/rl_gym.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
# limitations under the License.
1414

1515
from evotorch.algorithms import PGPE
16-
from evotorch.logging import StdOutLogger
16+
from evotorch.logging import PicklingLogger, StdOutLogger
1717
from evotorch.neuroevolution import GymNE
1818

1919
# Specialized Problem class for Gym environments
2020
problem = GymNE(
21-
env_name="Humanoid-v4",
21+
env="Humanoid-v4",
2222
# Linear policy defined using special string syntax supported by EvoTorch
2323
network="Linear(obs_length, act_length)",
2424
observation_normalization=True,
@@ -40,7 +40,16 @@
4040
num_interactions=150000,
4141
popsize_max=3200,
4242
)
43+
44+
# Instantiate a logger that will print the progress to the standard output
4345
logger = StdOutLogger(searcher)
46+
47+
# Optional:
48+
# Instantiate a logger that will, at every 10 generations, pickle and save the results (where the results will include
49+
# the center of the search distribution since we are using PGPE which is a distribution-based search algorithm).
50+
pickler = PicklingLogger(searcher, interval=10)
51+
52+
# Run the search algorithm
4453
searcher.run(500)
4554

4655
# Create a policy to test using the final center of the optimized distribution and visualize its behavior

0 commit comments

Comments
 (0)