defminimax_agent(obs, config): from math import inf as infinity from random import choice # 电脑 COMP = 2 # 玩家 HUMAN = 1 columns = config.columns rows = config.rows # 因为是提前一个落子检查,所以只需要满足inarow - 1个连续 inarow = config.inarow - 1 size = rows * columns
defis_win(board, player, column): # 找到当前列的落子位置 row = max([r for r in range(rows) if board[column + (r * columns)] == 0])
defcount(offset_row, offset_column): for i in range(1, inarow + 1): r = row + offset_row * i c = column + offset_column * i # 停止条件 if ( r < 0 or r >= rows or c < 0 or c >= columns or board[c + (r * columns)] != player ): return i - 1 return inarow
return ( count(1, 0) >= inarow # 垂直方向,向下搜 or (count(0, 1) + count(0, -1)) >= inarow # 水平方向,左右两边搜 or (count(-1, -1) + count(1, 1)) >= inarow # 主对角线方向 or (count(-1, 1) + count(1, -1)) >= inarow # 次对角线方向 ) defplay(board, column, player): row = max([r for r in range(rows) if board[column + (r * columns)] == 0]) board[column + (row * columns)] = player defminimax(board, player, depth): if player == HUMAN: best_score = -infinity best_column = None else: best_score = infinity best_column = None # 递归终止条件1 深度达到设定值 if depth == 0: return [0, None] # 递归终止条件2 一方获胜 for column in range(columns): # 遍历可选列,检查是否可以获胜 if board[column] == 0: if is_win(board, player, column): ## 玩家获胜 if player == HUMAN: return [1, column] ## 电脑获胜 else: return [-1, column] for column in range(columns): if board[column] == 0: next_board = board[:] play(next_board, column, player) # 向后看,计算分数 score, _ = minimax(next_board, player % 2 + 1, depth - 1) if player == HUMAN: if score > best_score: best_score = score best_column = column else: if score < best_score: best_score = score best_column = column return [best_score, best_column] max_depth = 4 _, column = minimax(obs.board[:], HUMAN, max_depth) # 兜底策略,如果minimax没找到解,则使用random算法 if column == None: column = choice([c for c in range(columns) if obs.board[c] == 0]) return column
defnegamax_agent(obs, config): from math import inf as infinity from random import choice # 电脑 COMP = 2 # 玩家 HUMAN = 1 columns = config.columns rows = config.rows # 因为是提前一个落子检查,所以只需要满足inarow - 1个连续 inarow = config.inarow - 1 size = rows * columns
defis_win(board, player, column): # 找到当前列的落子位置 row = max([r for r in range(rows) if board[column + (r * columns)] == 0])
defcount(offset_row, offset_column): for i in range(1, inarow + 1): r = row + offset_row * i c = column + offset_column * i # 停止条件 if ( r < 0 or r >= rows or c < 0 or c >= columns or board[c + (r * columns)] != player ): return i - 1 return inarow
return ( count(1, 0) >= inarow # 垂直方向,向下搜 or (count(0, 1) + count(0, -1)) >= inarow # 水平方向,左右两边搜 or (count(-1, -1) + count(1, 1)) >= inarow # 主对角线方向 or (count(-1, 1) + count(1, -1)) >= inarow # 次对角线方向 ) defplay(board, column, player): row = max([r for r in range(rows) if board[column + (r * columns)] == 0]) board[column + (row * columns)] = player defnegamax(board, player, depth): # 递归终止条件1 深度达到设定值 if depth == 0: return [0, None] # 递归终止条件2 一方获胜 for column in range(columns): # 遍历可选列,检查是否可以获胜 if board[column] == 0: if is_win(board, player, column): return [1, column]
best_score = -infinity best_column = None
for column in range(columns): if board[column] == 0: next_board = board[:] play(next_board, column, player) # 向后看,计算分数 score, _ = negamax(next_board, player % 2 + 1, depth - 1) score = -score if score > best_score: best_score = score best_column = column
return [best_score, best_column] max_depth = 4 _, column = negamax(obs.board[:], HUMAN, max_depth) # 兜底策略,如果minimax没找到解,则使用random算法 if column == None: column = choice([c for c in range(columns) if obs.board[c] == 0]) return column