import gym import numpy as np from gym.envs.classic_control import rendering def getBlocks(obs): """ Returns a list of blocks. Each block is ((row_top, row_bottom), (col_left, col_right)). :param obs: :return: [((int, int), (int, int))] """ # Drop the top 30 rows obs = obs[30:] # Drop the first 10 columns. obs = np.delete(obs, list(range(10)), 1) (rows, cols, _) = np.shape(obs) # For each column, list the rows where it is non-empty. # (Use obs[r, 0] as the background against which to compare.) coords0 = [(c, [r for r in range(rows) if np.any( (obs[r, c] != obs[r, 0]))]) for c in range(cols)] # Drop the columns with no entries. For the non-empty columns, find the max and min. coords1 = [(minAndMax(rs), c) for (c, rs) in coords0 if rs] # To eliminate duplication, make a set of the maxMin row ranges. rMinsAndMaxesSet = {rMinMax for (rMinMax, _) in coords1} # For each maxMin row range, list the column where it applies. # Get the max and min of those columns. # Sort the result (for easier reading) by column from left to right. blocks = sorted([(rMinMax, minAndMax([c for (rMnMx, c) in coords1 if rMnMx == rMinMax])) for rMinMax in rMinsAndMaxesSet], key=(lambda rc: rc[1])) return blocks def minAndMax(lst): return (min(lst), max(lst)) def play(env): done = False while not done: # inp will be a string -- in our case a single character. inp = input('> ') # The only valid actions are 0 .. 6. # They mean: nop, nop, up, down, up down action = int(inp) if inp in '012345' and inp != '' else 0 (obs, reward, done, debug) = env.step(action) print(getBlocks(obs)) # env.viewer = rendering.SimpleImageViewer(maxwidth=500) env.render('human') env.close() # https://github.com/openai/gym/issues/893 puckPosQueue = [] userPaddleXPos = 0 y_max = 0 lastTargetY = 0 lockedTarget = False lastAction = 0 def calculatePuckTargetYPos(puckPos): # (y, x) if len(puckPosQueue) < 3: return (None, None) # insufficient data # Check for puck direction; if towards user paddle or not if puckPosQueue[2][1] < puckPosQueue[0][1]: return (None, None) # Find the target Y using point slope form of line # Note: Inverted the sign for all Ys, as Y grows from top to bottom in this environment slope = 0 if puckPosQueue[2][1] == puckPosQueue[0][1] else ( -puckPosQueue[2][0] + puckPosQueue[0][0]) / (puckPosQueue[2][1] - puckPosQueue[0][1]) y = slope * (userPaddleXPos - puckPosQueue[2][1]) - puckPosQueue[2][0] y = abs(y) # Find the resulting target by scaling the Y on the environment height return (y_max - y % y_max if int(y/y_max) % 2 != 0 else y % y_max, slope) def updatePuckPosQueue(puckPos): global puckPosQueue if len(puckPosQueue) > 2: puckPosQueue.pop(0) puckPosQueue.append(puckPos) def getPuckTargetYPos(puckPos): global lastTargetY, lockedTarget updatePuckPosQueue(puckPos) if len(puckPosQueue) < 2 or (not lockedTarget and puckPosQueue[1][1] > puckPosQueue[0][1]): # our turn (targetY, slope) = calculatePuckTargetYPos(puckPos) if targetY == None: targetY = y_max/2 # settle the paddle at mid until the target found if targetY != lastTargetY: # to eliminate immediate trajectory change due to wall impact lastTargetY = targetY # do nothing; change in trajectory probably due to immediate slope change at wall impact return 0 lastTargetY = targetY lockedTarget = True # lock the target to avoid future calculations until hit print('Slope:',slope, '; Target:', lastTargetY) # reset the flag once our paddle hit the puck to get ready for next target elif lockedTarget and puckPosQueue[1][1] < puckPosQueue[0][1]: lockedTarget = False return lastTargetY padding = 2 def calculateAction(targetY, userPaddleYPos): """ Settle the paddle containing the target within +-2 distance within the paddle """ if targetY-padding > userPaddleYPos[0] and targetY+padding < userPaddleYPos[1]: return 0 elif targetY+padding+1 > userPaddleYPos[1]: # used lastAction to smoothen out the movement; to avoid high momentum return 3 if lastAction != 3 else 0 else: return 2 if lastAction != 2 else 0 def getAction(obs): global lastAction if len(getBlocks(obs)) < 3: return 0 # kickoff blocks = getBlocks(obs) puckPos = np.mean(np.array(blocks[1]), axis=1).tolist() userPaddleYPos = blocks[2][0] target = getPuckTargetYPos(puckPos) lastAction = calculateAction(target, userPaddleYPos) return lastAction def getAction2(obs): """ Naive way """ if len(getBlocks(obs)) < 3: return 0 # kickoff blocks = getBlocks(obs) puckPos = np.mean(np.array(blocks[1]), axis=1).tolist() userPaddlePos = np.mean(np.array(blocks[2]), axis=1).tolist() return 2 if puckPos[0] < userPaddlePos[0] else 3 def autoPlay(env): """ It calculates the trajectory for the puck once hit by the opponent's paddle, and based on that positions our paddle as soon as it gets 2 consecutive puck coordinates that generates a streight line for its trajectory. It takes any possible wall hit into account as well. Once it calculates the target, it locks it down and do not perform any further calculations until new trajectory is generated upon opponent's successful hit. """ global userPaddleXPos, puckPosQueue, y_max, lockedTarget, lastAction, lastTargetY (obs, reward, done, debug) = env.step(0) # kickoff blocks = getBlocks(obs) # [((row_top, row_bottom), (col_left, col_right))], we need col_left userPaddleXPos = blocks[0][1][0] y_max = env.observation_space.shape[1] done = False while not done: action = getAction(obs) (obs, reward, done, debug) = env.step(action) if reward != 0.0: # round done; reset all tracking points puckPosQueue = [] lockedTarget = False lastAction = 0 lastTargetY = 0 env.render('human') env.close() # https://github.com/openai/gym/issues/893 if __name__ == '__main__': env = gym.make('Pong-v0') env.reset() autoPlay(env)