File tree Expand file tree Collapse file tree 8 files changed +773
-769
lines changed Expand file tree Collapse file tree 8 files changed +773
-769
lines changed Original file line number Diff line number Diff line change 1313### Other
1414- Updated docker image
1515- constrained gym version: gym>=0.17,<0.20
16+ - Better hyperparameters for A2C/PPO on Pendulum
1617
1718## Release 1.2.0 (2021-09-08)
1819
Original file line number Diff line number Diff line change @@ -77,7 +77,7 @@ and also allow users to have access to pretrained agents.*
7777| ppo | LunarLanderContinuous-v2 | 270.863| 32.072| 1M | 149956| 526|
7878| ppo | MountainCar-v0 | -110.423| 19.473| 1M | 149954| 1358|
7979| ppo | MountainCarContinuous-v0 | 88.343| 2.572| 20k | 149983| 633|
80- | ppo | Pendulum-v0 | -169.887 | 104.904 | 2M | 150000| 750|
80+ | ppo | Pendulum-v0 | -172.225 | 104.159 | 100k | 150000| 750|
8181| ppo | PongNoFrameskip-v4 | 20.989| 0.105| 10M | 599902| 90|
8282| ppo | QbertNoFrameskip-v4 | 15627.108| 3313.538| 10M | 600248| 83|
8383| ppo | ReacherBulletEnv-v0 | 17.091| 11.048| 1M | 150000| 1000|
Original file line number Diff line number Diff line change @@ -38,7 +38,7 @@ Acrobot-v1:
3838 policy : ' MlpPolicy'
3939 ent_coef : .0
4040
41- # Almost tuned
41+ # Tuned
4242Pendulum-v0 :
4343 normalize : True
4444 n_envs : 8
@@ -49,7 +49,7 @@ Pendulum-v0:
4949 n_steps : 8
5050 gae_lambda : 0.9
5151 vf_coef : 0.4
52- gamma : 0.99
52+ gamma : 0.9
5353 use_rms_prop : True
5454 normalize_advantage : False
5555 learning_rate : lin_7e-4
Original file line number Diff line number Diff line change @@ -13,18 +13,20 @@ atari:
1313 vf_coef : 0.5
1414 ent_coef : 0.01
1515
16+ # Tuned
1617Pendulum-v0 :
17- n_envs : 8
18- n_timesteps : !!float 2e6
18+ n_envs : 4
19+ n_timesteps : !!float 1e5
1920 policy : ' MlpPolicy'
20- n_steps : 2048
21- batch_size : 64
21+ n_steps : 1024
2222 gae_lambda : 0.95
23- gamma : 0.99
23+ gamma : 0.9
2424 n_epochs : 10
2525 ent_coef : 0.0
26- learning_rate : !!float 3e-4
26+ learning_rate : !!float 1e-3
2727 clip_range : 0.2
28+ use_sde : True
29+ sde_sample_freq : 4
2830
2931# Tuned
3032CartPole-v1 :
@@ -524,4 +526,4 @@ Walker2d-v2:
524526 n_epochs : 20
525527 gae_lambda : 0.95
526528 max_grad_norm : 1
527- vf_coef : 0.871923
529+ vf_coef : 0.871923
Original file line number Diff line number Diff line change @@ -77,7 +77,7 @@ and also allow users to have access to pretrained agents.*
7777| ppo | LunarLanderContinuous-v2 | 270.863| 32.072| 1M | 149956| 526|
7878| ppo | MountainCar-v0 | -110.423| 19.473| 1M | 149954| 1358|
7979| ppo | MountainCarContinuous-v0 | 88.343| 2.572| 20k | 149983| 633|
80- | ppo | Pendulum-v0 | -169.887 | 104.904 | 2M | 150000| 750|
80+ | ppo | Pendulum-v0 | -172.225 | 104.159 | 100k | 150000| 750|
8181| ppo | PongNoFrameskip-v4 | 20.989| 0.105| 10M | 599902| 90|
8282| ppo | QbertNoFrameskip-v4 | 15627.108| 3313.538| 10M | 600248| 83|
8383| ppo | ReacherBulletEnv-v0 | 17.091| 11.048| 1M | 150000| 1000|
You can’t perform that action at this time.
0 commit comments