Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions RandomBird/FlappyAgent.py

This file was deleted.

17 changes: 17 additions & 0 deletions yuanhaoran/FlappyAgent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

import numpy as np
Q=np.load("trained_Q.npy")

def FlappyPolicy(state, screen):

# Using "state"
y = int(256 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
x = int(state['next_pipe_dist_to_player'])
v = int(state['player_vel'])

action=None
action = int(np.argmax(Q[y][x][v][:]))
if (action == 1):
action = 119

return action
3 changes: 3 additions & 0 deletions yuanhaoran/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

This is the implementation of the game FlappyBird with Q-learning
At first, you should install the PLE framework, and then run the training.py to get the trained data and finally run the run.py to play the game.
1 change: 1 addition & 0 deletions RandomBird/run.py → yuanhaoran/run.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

# You're not allowed to change this file
from ple.games.flappybird import FlappyBird
from ple import PLE
Expand Down
81 changes: 81 additions & 0 deletions yuanhaoran/training.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@

from ple.games.flappybird import FlappyBird
from ple import PLE
import numpy as np
import matplotlib.pyplot as plt

game = FlappyBird()
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)

p.init()
reward = 0.0
nb_games = 1000
cumulated = np.zeros((nb_games))

# parameter of modele
r_1 = 1
r_2 = -100
alpha = 0.04

x_wall = np.zeros((40))
y_wall = np.zeros((40))
v_wall = np.zeros((40))
a_wall = np.zeros((40))
#Q(y,x,v,a) ,a is set of action
Q = np.zeros((512,300, 21, 2))
## fly if y < 273
Q[255:511,:,:,0] = 0.1
Q[0:254,:,:,1] = 0.1
# between the pipe
Q[:,8,:,1] = 0.2 # in the middle: jump
Q[216:256,120:144,:,1] = 0.2 # jump if too low
Q[256:306,120:144,:,0] = 0.2

for i in range(nb_games):
p.reset_game()

while(not p.game_over()):
state = game.getGameState()
screen = p.getScreenRGB()
#instead of using absolute position of pipe, use relative position
y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
x = int(state['next_pipe_dist_to_player'])
v = int(state['player_vel'])

#greedy policy
action = int(np.argmax(Q[y][x][v][:]))
if (action == 1):
action_value = 119
else: action_value=None
if (i>1):
for j in range(37-1, 0, -1):
x_wall[j] = int(x_wall[j-1])
y_wall[j] = int(y_wall[j-1])
v_wall[j] = int(v_wall[j-1])
a_wall[j] = int(a_wall[j-1])
x_wall[0] = int(x)
y_wall[0] = int(y)
v_wall[0] = int(v)
a_wall[0] = int(action)

#reward is +1 if bird fly by the pipe
reward = p.act(action_value)
my_reward=0
if (reward==1):
my_reward = r_1
cumulated[i] += 1
for j in range(1, 40):
Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))

# bad result : -100
if (reward<0):
my_reward = r_2
if (x==20):
for j in range(0, 27):
Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
else:
for j in range(0, 6):
Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))

np.save('trained_Q', Q)