Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
27e8b11
Moved to /MEOT/
MtLouis Jan 24, 2018
379f820
FlappyVoid
MtLouis Jan 24, 2018
1241c87
Update FlappyAgent.py
MtLouis Jan 24, 2018
d780009
Maj Original
MtLouis Jan 24, 2018
282ad6c
Add files via upload
MtLouis Mar 9, 2018
8021ce0
Add files via upload
MtLouis Mar 9, 2018
c1afb34
Add files via upload
MtLouis Mar 9, 2018
007b569
Add files via upload
MtLouis Mar 9, 2018
9cff4ec
Add files via upload
MtLouis Mar 9, 2018
5352d5d
Premier entrainement Gcloud
MtLouis Mar 10, 2018
7ad15ee
Add files via upload
MtLouis Mar 11, 2018
7080753
Résultats trial 2
MtLouis Mar 11, 2018
613757b
Add files via upload
MtLouis Mar 11, 2018
12a51f6
Add files via upload
MtLouis Mar 11, 2018
c6f22fe
prolongation entrainement
MtLouis Mar 11, 2018
ab225eb
prolong ratée
MtLouis Mar 11, 2018
da77eaf
Add files via upload
MtLouis Mar 11, 2018
30c632e
Prolong2
MtLouis Mar 11, 2018
4e2e005
Merge branch 'master' of https://github.com/MtLouis/RLchallenge
MtLouis Mar 11, 2018
b511d5d
prolong
MtLouis Mar 11, 2018
39ded18
Add files via upload
MtLouis Mar 11, 2018
0a1213c
Add files via upload
MtLouis Mar 11, 2018
f8ca3b5
Train 3
MtLouis Mar 11, 2018
57c93ae
Add files via upload
MtLouis Mar 11, 2018
99019ed
Add files via upload
MtLouis Mar 11, 2018
e1f71b5
train 4
MtLouis Mar 11, 2018
c2526a9
commit
MtLouis Mar 11, 2018
00cca56
deletetrash
MtLouis Mar 11, 2018
5f86687
AjoutQl
MtLouis Mar 11, 2018
38d143e
Update README.md
MtLouis Mar 11, 2018
9d7e517
1500iters
MtLouis Mar 11, 2018
ccddc79
700iters
MtLouis Mar 11, 2018
2574136
799ident
MtLouis Mar 11, 2018
93a17d3
1500iters
MtLouis Mar 11, 2018
aabef07
Update README.md
MtLouis Mar 11, 2018
111d5d7
final
MtLouis Mar 11, 2018
b36e7a5
Merge branch 'master' of https://github.com/MtLouis/RLchallenge
MtLouis Mar 11, 2018
332e345
600
MtLouis Mar 11, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
241 changes: 241 additions & 0 deletions MEOT/DQN/DQL.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 16 10:50:17 2018

@author: Louis
"""

import os

os.environ['SDL_VIDEODRIVER'] = 'dummy'
from ple.games.flappybird import FlappyBird
from ple import PLE
import numpy as np
from FlappyAgent import FlappyPolicy
from testG import test_model_G

import matplotlib.pyplot as plt
from skimage.color import rgb2gray
from skimage.transform import resize
from skimage.exposure import rescale_intensity

from keras.models import Sequential, load_model
from keras.layers import Dense, Conv2D, Flatten
import graphviz


from collections import deque

def process_screen(x):

return (255 * resize(rgb2gray(x)[50:, :410], (84, 84))).astype("uint8")


#%% Network Definition
dqn = Sequential()
#1st layer
dqn.add(Conv2D(filters=16, kernel_size=(8,8), strides=4, activation="relu", input_shape=(84,84,4)))
#2nd layer
dqn.add(Conv2D(filters=32, kernel_size=(4,4), strides=2, activation="relu"))
dqn.add(Flatten())
#3rd layer
dqn.add(Dense(units=256, activation="relu"))
#output layer
dqn.add(Dense(units=2, activation="linear"))

dqn.compile(optimizer="rmsprop", loss="mean_squared_error")

#%% Training Fonctions

def epsilon(step):
##Linear decay until step 200 000 then constant
if step<200000:
return 1-step*(0.09/200000)
return .01

def clip_reward(r):
## Shaping Reward : -1, 0.1, 1
if (r==0):
return 0.1
if (r<0):
return -1
return r

def greedy_action(network, x):
Q = network.predict(np.array([x]))
return np.argmax(Q)


#%% Memory_buffer
# A class for the replay memory


class MemoryBuffer:
"An experience replay buffer using numpy arrays"
def __init__(self, length, screen_shape, action_shape):
self.length = length
self.screen_shape = screen_shape
self.action_shape = action_shape
shape = (length,) + screen_shape
self.screens_x = np.zeros(shape, dtype=np.uint8) # starting states
self.screens_y = np.zeros(shape, dtype=np.uint8) # resulting states
shape = (length,) + action_shape
self.actions = np.zeros(shape, dtype=np.uint8) # actions
self.rewards = np.zeros((length,1), dtype=np.int8) # rewards
self.terminals = np.zeros((length,1), dtype=np.bool) # true if resulting state is terminal
self.terminals[-1] = True
self.index = 0 # points one position past the last inserted element
self.size = 0 # current size of the buffer

def append(self, screenx, a, r, screeny, d):
self.screens_x[self.index] = screenx
#plt.imshow(screenx)
#plt.show()
#plt.imshow(self.screens_x[self.index])
#plt.show()
self.actions[self.index] = a
self.rewards[self.index] = r
self.screens_y[self.index] = screeny
self.terminals[self.index] = d
self.index = (self.index+1) % self.length
self.size = np.min([self.size+1,self.length])

def stacked_frames_x(self, index):
im_deque = deque(maxlen=4)
pos = index % self.length
for i in range(4): # todo
im = self.screens_x[pos]
im_deque.appendleft(im)
test_pos = (pos-1) % self.length
if self.terminals[test_pos] == False:
pos = test_pos
return np.stack(im_deque, axis=-1)

def stacked_frames_y(self, index):
im_deque = deque(maxlen=4)
pos = index % self.length
for i in range(4): # todo
im = self.screens_y[pos]
im_deque.appendleft(im)
test_pos = (pos-1) % self.length
if self.terminals[test_pos] == False:
pos = test_pos
return np.stack(im_deque, axis=-1)

def minibatch(self, size):
#return np.random.choice(self.data[:self.size], size=sz, replace=False)
indices = np.random.choice(self.size, size=size, replace=False)
x = np.zeros((size,)+self.screen_shape+(4,))
y = np.zeros((size,)+self.screen_shape+(4,))

for i in range(size):
x[i] = self.stacked_frames_x(indices[i])
y[i] = self.stacked_frames_y(indices[i])
return x, self.actions[indices], self.rewards[indices], y, self.terminals[indices]


#%% Training Episode
# initialize state and replay memory
game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen='store_false')
# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.

p.init()

total_steps = 800000
replay_memory_size = 100000
intermediate_size = 50000
interval_test = 25000
mini_batch_size = 32
gamma = 0.99

average_score = []
max_score= []


p.reset_game()
screen_x = process_screen(p.getScreenRGB())
stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)
x = np.stack(stacked_x, axis=-1)
replay_memory = MemoryBuffer(replay_memory_size, (84,84), (1,))
# initial state for evaluation
evaluation_period = 30
Xtest = np.array([x])
nb_epochs = total_steps // evaluation_period
epoch=-1
scoreQ = np.zeros((nb_epochs))
scoreMC = np.zeros((nb_epochs))
list_actions = [0,119]


# Deep Q-learning with experience replay
for step in range(total_steps):

if (step%intermediate_size==0):
dqn.save('TrainG5_'+str(int(step/intermediate_size))+'.h5')
print('Sauvegarde du modèle : Step = ' + str(step))

if (step%interval_test==0):
avg_temp = 0
max_temp = 0
print('Eval Period : '+str(step))
avg_temp, max_temp = test_model_G(evaluation_period, dqn)
average_score.append(avg_temp)
max_score.append(max_temp)

# evaluation
# if(step%10 == 0):
# epoch = epoch+1
# # evaluation of initial state
# scoreQ[epoch] = np.mean(dqn.predict(Xtest).max(1))
# # roll-out evaluation
# scoreMC[epoch] = MCeval(network=dqn, trials=20, length=700, gamma=gamma)
# action selection

if np.random.rand() < epsilon(step):
if np.random.randint(0,5)==1:
a = 0
else :
a = 1
else:
a = greedy_action(dqn, x)
# step

r=p.act(list_actions[a])
raw_screen_y = p.getScreenRGB()

r = clip_reward(r)
d=p.game_over()

screen_y = process_screen(raw_screen_y)
replay_memory.append(screen_x, a, r, screen_y, d)

# train
if step>step+mini_batch_size:
X,A,R,Y,D = replay_memory.minibatch(mini_batch_size)
QY = dqn.predict(Y)
QYmax = QY.max(1).reshape((mini_batch_size,1))
update = R + gamma * (1-D) * QYmax
QX = dqn.predict(X)
QX[np.arange(mini_batch_size), A.ravel()] = update.ravel()
dqn.train_on_batch(x=X, y=QX)

# prepare next transition
if d==True:
# restart episode
p.reset_game()
screen_x = process_screen(p.getScreenRGB())
stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)
x = np.stack(stacked_x, axis=-1)
else:

# keep going
screen_x = screen_y
stacked_x.append(screen_x)
x = np.stack(stacked_x, axis=-1)


dqn.save('TrainG5_max.h5')

np.savetxt('average.txt',average_score, delimiter=',')
np.savetxt('max.txt',max_score, delimiter=',')
26 changes: 26 additions & 0 deletions MEOT/DQN/FlappyAgent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import numpy as np

import matplotlib.pyplot as plt
from skimage.color import rgb2gray
from skimage.transform import resize
from skimage.exposure import rescale_intensity

from keras.models import Sequential, load_model
from keras.layers import Dense, Conv2D, Flatten
import graphviz

from collections import deque

list_actions = [0,119]
dqn = load_model('TrainG4_max.h5')
def process_screen(x):
return (255 * resize(rgb2gray(x)[50:, :410], (84, 84))).astype("uint8")

def FlappyPolicy(state, screen):
screen_x = process_screen(screen)
stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)
x = np.stack(stacked_x, axis=-1)
action = list_actions[np.argmax(dqn.predict(np.expand_dims(x,axis=0)))]
return action


Binary file added MEOT/DQN/TrainG4_max.h5
Binary file not shown.
39 changes: 39 additions & 0 deletions MEOT/DQN/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# You're not allowed to change this file
from ple.games.flappybird import FlappyBird
from ple import PLE
import numpy as np
from FlappyAgent import FlappyPolicy

game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=True)
# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.

p.init()
reward = 0.0

nb_games = 100
cumulated = np.zeros((nb_games))


for i in range(nb_games):
p.reset_game()

while(not p.game_over()):
state = game.getGameState()
screen = p.getScreenRGB()


action=FlappyPolicy(state, screen) ### Your job is to define this function.



reward = p.act(action)
print(reward)
cumulated[i] = cumulated[i] + reward

average_score = np.mean(cumulated)
max_score = np.max(cumulated)


#####----------

60 changes: 60 additions & 0 deletions MEOT/DQN/testG.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 16 22:13:01 2018

@author: Louis

"""

# Functions used to test during Gcloud training phase.
import os

os.environ['SDL_VIDEODRIVER'] = 'dummy'
from ple.games.flappybird import FlappyBird
from ple import PLE
import numpy as np
from FlappyAgent import FlappyPolicy

import matplotlib.pyplot as plt
from skimage.color import rgb2gray
from skimage.transform import resize
from skimage.exposure import rescale_intensity

from keras.models import Sequential, load_model
from keras.layers import Dense, Conv2D, Flatten
import graphviz

from collections import deque

def process_screen(x):
return (255 * resize(rgb2gray(x)[50:, :410], (84, 84))).astype("uint8")

def test_model_G(nb_games, model):
game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
p.init()
reward = 0.0

cumulated = np.zeros((nb_games))
list_actions = [0,119]

for i in range(nb_games):
p.reset_game()

while(not p.game_over()):
state = game.getGameState()

screen_x = process_screen(p.getScreenRGB())
stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)
x = np.stack(stacked_x, axis=-1)
action = list_actions[np.argmax(model.predict(np.expand_dims(x,axis=0)))]

reward = p.act(action)

cumulated[i] = cumulated[i] + reward

avg_score = np.mean(cumulated)
print('Average : '+ str(avg_score))
mx_score = np.max(cumulated)
print('Max : '+ str(mx_score))
return avg_score, mx_score
Loading