diff --git a/RandomBird/FlappyAgent.py b/RandomBird/FlappyAgent.py
deleted file mode 100644
index 9f3ec84..0000000
--- a/RandomBird/FlappyAgent.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import numpy as np
-
-def FlappyPolicy(state, screen):
-    action=None
-    if(np.random.randint(0,2)<1):
-        action=119
-    return action
-
-
diff --git a/yuanhaoran/FlappyAgent.py b/yuanhaoran/FlappyAgent.py
new file mode 100644
index 0000000..bc327e6
--- /dev/null
+++ b/yuanhaoran/FlappyAgent.py
@@ -0,0 +1,17 @@
+
+import numpy as np
+Q=np.load("trained_Q.npy")
+
+def FlappyPolicy(state, screen):
+
+    # Using "state"
+    y = int(256 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
+    x = int(state['next_pipe_dist_to_player'])
+    v = int(state['player_vel'])
+                
+    action=None
+    action = int(np.argmax(Q[y][x][v][:]))
+    if (action == 1): 
+            action = 119
+          
+    return action
diff --git a/yuanhaoran/README.md b/yuanhaoran/README.md
new file mode 100644
index 0000000..7d70fd3
--- /dev/null
+++ b/yuanhaoran/README.md
@@ -0,0 +1,3 @@
+
+This is the implementation of the game FlappyBird with Q-learning
+At first, you should install the PLE framework, and then run the training.py to get the trained data and finally run the run.py to play the game.
diff --git a/RandomBird/run.py b/yuanhaoran/run.py
similarity index 99%
rename from RandomBird/run.py
rename to yuanhaoran/run.py
index 39b5801..a08f27c 100644
--- a/RandomBird/run.py
+++ b/yuanhaoran/run.py
@@ -1,3 +1,4 @@
+
 # You're not allowed to change this file
 from ple.games.flappybird import FlappyBird
 from ple import PLE
diff --git a/yuanhaoran/training.py b/yuanhaoran/training.py
new file mode 100644
index 0000000..eeec3f1
--- /dev/null
+++ b/yuanhaoran/training.py
@@ -0,0 +1,81 @@
+
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+import matplotlib.pyplot as plt
+
+game = FlappyBird()
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
+
+p.init()
+reward = 0.0
+nb_games = 1000
+cumulated = np.zeros((nb_games))
+
+# parameter of modele
+r_1 = 1
+r_2 = -100
+alpha = 0.04 
+
+x_wall = np.zeros((40))
+y_wall = np.zeros((40))
+v_wall = np.zeros((40))
+a_wall = np.zeros((40))
+#Q(y,x,v,a) ,a is set of action
+Q = np.zeros((512,300, 21, 2))
+## fly if y < 273
+Q[255:511,:,:,0] = 0.1
+Q[0:254,:,:,1] = 0.1
+# between the pipe
+Q[:,8,:,1] = 0.2  # in the middle: jump
+Q[216:256,120:144,:,1] = 0.2  # jump if too low
+Q[256:306,120:144,:,0] = 0.2
+
+for i in range(nb_games):
+    p.reset_game()
+            
+    while(not p.game_over()):
+        state = game.getGameState()
+        screen = p.getScreenRGB()
+        #instead of using absolute position of pipe, use relative position
+        y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
+        x = int(state['next_pipe_dist_to_player'])
+        v = int(state['player_vel'])
+        
+        #greedy policy
+        action = int(np.argmax(Q[y][x][v][:]))
+        if (action == 1): 
+            action_value = 119 
+        else: action_value=None        
+        if (i>1):
+            for j in range(37-1, 0, -1):
+                x_wall[j] = int(x_wall[j-1])
+                y_wall[j] = int(y_wall[j-1])
+                v_wall[j] = int(v_wall[j-1])
+                a_wall[j] = int(a_wall[j-1])
+            x_wall[0] = int(x)
+            y_wall[0] = int(y)
+            v_wall[0] = int(v)
+            a_wall[0] = int(action)
+       
+        #reward is +1 if bird fly by the pipe
+        reward = p.act(action_value)
+        my_reward=0
+        if (reward==1):
+            my_reward = r_1
+            cumulated[i] += 1
+            for j in range(1, 40):
+                Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
+        
+        # bad result : -100
+        if (reward<0):
+            my_reward = r_2
+            if (x==20):
+                for j in range(0, 27):
+                    Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
+            else:
+               for j in range(0, 6):
+                    Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
+
+np.save('trained_Q', Q)
+