从桌游到代码：用Python模拟《展翅翱翔》AI对手，手把手教你实现策略算法

news2026/5/13 23:37:19

从桌游到代码用Python模拟《展翅翱翔》AI对手手把手教你实现策略算法桌游与编程的碰撞总能产生奇妙的火花。当《展翅翱翔》这款以鸟类生态为主题的策略桌游遇上Python我们不仅能体验游戏的乐趣还能深入探索AI决策的奥秘。本文将带你从零开始用代码构建一个能与你对战的智能对手揭开游戏策略背后的算法面纱。1. 游戏规则的数据建模任何游戏AI的实现都始于对规则的精确建模。《展翅翱翔》的核心机制围绕栖息地、鸟类卡牌和行动选择展开我们需要用适当的数据结构来呈现这些元素。1.1 基础数据结构设计游戏中的鸟类卡牌是核心元素我们可以用一个类来表示class BirdCard: def __init__(self, name, habitat, food_cost, egg_capacity, wingspan, point_value, special_abilityNone): self.name name # 鸟类名称 self.habitat habitat # 栖息地类型forest, grassland, wetland self.food_cost food_cost # 食物需求如{seed:1, fish:1} self.egg_capacity egg_capacity # 最大蛋容量 self.wingspan wingspan # 翼展 self.point_value point_value # 基础分数 self.special_ability special_ability # 特殊能力函数 self.eggs 0 # 当前蛋数量游戏状态的表示则需要更复杂的结构class GameState: def __init__(self): self.habitats { forest: [], # 森林区的鸟类 grassland: [], # 草原区的鸟类 wetland: [] # 沼泽区的鸟类 } self.player_resources { food: {seed: 0, fish: 0, rodent: 0, fruit: 0, invertebrate: 0}, eggs: 0, cards: [] # 手牌 } self.round_tasks [] # 轮末任务 self.bonus_cards [] # 奖励卡 self.current_round 1 self.actions_remaining 2 # 每回合行动次数1.2 游戏动作的编码实现游戏中的四种基本行动需要被转化为可执行的代码逻辑打牌行动def play_bird_card(player, bird_card, habitat): # 检查资源是否足够 if not can_afford(player.resources, bird_card.food_cost): return False # 扣除资源 deduct_resources(player.resources, bird_card.food_cost) # 将卡牌放入指定栖息地 player.habitats[habitat].append(bird_card) player.hand.remove(bird_card) # 触发特殊能力 if bird_card.special_ability: bird_card.special_ability(player) return True获取食物def gain_food(player, food_type, amount1): if food_type in player.resources[food]: player.resources[food][food_type] amount return True return False产蛋def lay_eggs(player, habitat, bird_index, amount1): bird player.habitats[habitat][bird_index] if bird.eggs amount bird.egg_capacity: bird.eggs amount player.resources[eggs] - amount return True return False抽牌def draw_cards(player, amount1): for _ in range(amount): if len(game_deck) 0: player.hand.append(game_deck.pop()) return True2. 游戏核心逻辑的实现有了基础数据结构后我们需要构建游戏的运行框架这是AI能够参与的基础环境。2.1 游戏循环与回合管理游戏的主循环控制着整个流程def game_loop(players): initialize_game(players) for round_num in range(1, 5): # 共4轮游戏 print(f\n 第 {round_num} 轮开始 ) # 轮初设置 setup_round(round_num) # 玩家轮流行动 for player in players: while player.actions_remaining 0: if player.is_ai: action ai_decide_action(player) else: action get_human_action(player) execute_action(player, action) player.actions_remaining - 1 # 轮末计分 resolve_round_tasks(players) # 游戏结束计分 final_scoring(players) declare_winner(players)2.2 行动验证与状态更新每个行动的合法性需要被严格验证def validate_action(player, action): action_type action[type] if action_type play_card: card action[card] habitat action[habitat] return (card in player.hand and habitat in [forest, grassland, wetland] and can_afford(player.resources, card.food_cost)) elif action_type gain_food: return player.actions_remaining 0 elif action_type lay_eggs: habitat action[habitat] bird_idx action[bird_index] return (habitat in player.habitats and 0 bird_idx len(player.habitats[habitat]) and player.resources[eggs] action[amount]) elif action_type draw_cards: return len(game_deck) 0 return False3. AI策略算法的设计现在来到最有趣的部分——让计算机学会玩《展翅翱翔》。我们将实现三种不同复杂度的AI策略。3.1 随机行动基准AI作为基准线我们先实现一个完全随机的AIclass RandomAI: def decide_action(self, game_state): possible_actions [] # 收集所有可能的打牌行动 for card in game_state.player.hand: for habitat in [forest, grassland, wetland]: if can_afford(game_state.player.resources, card.food_cost): possible_actions.append({ type: play_card, card: card, habitat: habitat }) # 添加其他基本行动 possible_actions.append({type: gain_food}) possible_actions.append({type: lay_eggs}) possible_actions.append({type: draw_cards}) # 随机选择一个有效行动 return random.choice(possible_actions)3.2 基于规则的启发式AI更智能的AI需要评估每个行动的潜在价值class RuleBasedAI: def evaluate_action(self, action, game_state): score 0 if action[type] play_card: card action[card] # 基础分数 score card.point_value * 2 # 考虑栖息地平衡 habitat_count len(game_state.player.habitats[action[habitat]]) score - habitat_count * 0.5 # 避免过度集中 # 特殊能力加成 if card.special_ability: score 3 elif action[type] gain_food: # 根据最需要的食物类型评估 needed_food self.identify_most_needed_food(game_state) score 2 if needed_food else 1 elif action[type] lay_eggs: # 根据可产蛋的鸟类数量评估 available_birds sum(1 for h in game_state.player.habitats.values() for b in h if b.eggs b.egg_capacity) score available_birds * 0.5 elif action[type] draw_cards: # 手牌较少时更倾向于抽牌 score max(0, 5 - len(game_state.player.hand)) * 0.8 return score def decide_action(self, game_state): possible_actions self.generate_possible_actions(game_state) scored_actions [(a, self.evaluate_action(a, game_state)) for a in possible_actions] return max(scored_actions, keylambda x: x[1])[0]3.3 蒙特卡洛树搜索(MCTS)AI对于更高级的AI我们可以实现蒙特卡洛树搜索算法class MCTSAI: def __init__(self, iterations100): self.iterations iterations def decide_action(self, game_state): root MCTSNode(game_state) for _ in range(self.iterations): node root # 选择 while not node.is_terminal(): if node.is_fully_expanded(): node node.best_child() else: node node.expand() break # 模拟 result self.simulate(node.game_state) # 回溯 while node is not None: node.update(result) node node.parent return root.best_action() class MCTSNode: def __init__(self, game_state, parentNone, actionNone): self.game_state game_state self.parent parent self.action action self.children [] self.visits 0 self.value 0 self.untried_actions self.get_legal_actions() def best_child(self, c_param1.4): choices_weights [ (child.value / child.visits) c_param * math.sqrt((2 * math.log(self.visits) / child.visits)) for child in self.children ] return self.children[np.argmax(choices_weights)] def expand(self): action self.untried_actions.pop() new_state self.game_state.clone() new_state.execute_action(action) child_node MCTSNode(new_state, self, action) self.children.append(child_node) return child_node def update(self, result): self.visits 1 self.value result4. 策略流派的具体实现《展翅翱翔》玩家社区中形成了几个主流策略我们可以将这些策略编码到AI中。4.1 轮末任务流派实现这个策略专注于完成每轮结束时的任务class RoundTaskStrategy: def __init__(self): self.current_task None def evaluate_round_task(self, game_state): if not game_state.round_tasks: return 0 self.current_task game_state.round_tasks[0] task_type self.current_task[type] if task_type most_cards_in_habitat: habitat self.current_task[habitat] count len(game_state.player.habitats[habitat]) return count * 2 elif task_type most_eggs_on_birds: total_eggs sum(b.eggs for h in game_state.player.habitats.values() for b in h) return total_eggs * 1.5 elif task_type most_food_types: unique_food sum(1 for f in game_state.player.resources[food].values() if f 0) return unique_food * 3 def adjust_action_scores(self, action_scores, game_state): task_bonus self.evaluate_round_task(game_state) for action, score in action_scores.items(): if action[type] play_card: if action[habitat] self.current_task.get(habitat, ): action_scores[action] task_bonus * 0.5 elif action[type] lay_eggs: action_scores[action] task_bonus * 0.3 elif action[type] gain_food: action_scores[action] task_bonus * 0.2 return action_scores4.2 奖励卡流派实现这个策略围绕特定的奖励卡构建class BonusCardStrategy: def __init__(self): self.active_bonuses [] def evaluate_bonus_cards(self, game_state): total_bonus 0 self.active_bonuses [] for card in game_state.player.bonus_cards: if card[type] bird_types: bird_count sum(1 for h in game_state.player.habitats.values() for b in h if b.habitat in card[habitats]) total_bonus bird_count * card[points_per_bird] self.active_bonuses.append((habitat, card[habitats])) elif card[type] egg_counts: egg_total sum(b.eggs for h in game_state.player.habitats.values() for b in h) total_bonus (egg_total // card[eggs_per_point]) * card[points] self.active_bonuses.append((eggs, None)) return total_bonus def adjust_action_scores(self, action_scores, game_state): bonus_value self.evaluate_bonus_cards(game_state) for action, score in action_scores.items(): if action[type] play_card: for bonus in self.active_bonuses: if bonus[0] habitat and action[habitat] in bonus[1]: action_scores[action] bonus_value * 0.4 elif action[type] lay_eggs: for bonus in self.active_bonuses: if bonus[0] eggs: action_scores[action] bonus_value * 0.6 return action_scores4.3 下蛋流派实现专注于最大化产蛋能力的策略class EggLayingStrategy: def evaluate_egg_potential(self, game_state): total_capacity sum(b.egg_capacity for h in game_state.player.habitats.values() for b in h) current_eggs sum(b.eggs for h in game_state.player.habitats.values() for b in h) return (total_capacity - current_eggs) * 2 def adjust_action_scores(self, action_scores, game_state): egg_potential self.evaluate_egg_potential(game_state) for action, score in action_scores.items(): if action[type] play_card: if action[card].egg_capacity 4: # 优先高容量鸟类 action_scores[action] egg_potential * 0.8 elif action[type] lay_eggs: action_scores[action] egg_potential * 1.2 elif action[type] gain_food: action_scores[action] egg_potential * 0.1 return action_scores5. AI的评估与优化构建AI后我们需要评估其表现并不断改进。5.1 评估指标设计衡量AI表现的几个关键指标指标名称计算方法权重平均得分多局游戏得分的平均值40%策略一致性遵循选定策略的行动比例30%资源利用效率(总得分)/(总消耗资源)20%应变能力应对突发情况(如卡牌短缺)的成功率10%5.2 参数调优方法AI策略中的各种权重参数需要优化def optimize_ai_parameters(base_ai, param_ranges, games_per_eval50): best_params None best_score -float(inf) # 使用网格搜索寻找最优参数组合 for params in generate_param_combinations(param_ranges): ai base_ai.with_params(params) total_score 0 for _ in range(games_per_eval): game Game([ai, RandomAI()]) result game.play() total_score result[scores][0] # 我们的AI是玩家0 avg_score total_score / games_per_eval if avg_score best_score: best_score avg_score best_params params return best_params5.3 不同AI的对战分析让我们比较三种AI的表现def compare_ai_performance(ais, num_games100): results {ai.__class__.__name__: [] for ai in ais} for _ in range(num_games): # 随机打乱AI顺序以避免先手优势 shuffled_ais random.sample(ais, len(ais)) game Game(shuffled_ais) game_result game.play() for i, ai in enumerate(shuffled_ais): name ai.__class__.__name__ results[name].append(game_result[scores][i]) # 输出统计结果 print(AI性能比较结果:) for name, scores in results.items(): print(f{name}:) print(f 平均得分: {np.mean(scores):.1f}) print(f 最高得分: {max(scores)}) print(f 得分标准差: {np.std(scores):.1f}) print(f 获胜次数: {sum(1 for s in scores if s max(game_result[scores]))})6. 进阶主题与扩展完成基础AI后我们可以探索更高级的功能和优化。6.1 机器学习方法的应用使用强化学习训练AIclass RLAgent: def __init__(self, state_size, action_size): self.state_size state_size self.action_size action_size self.memory deque(maxlen2000) self.gamma 0.95 # 折扣因子 self.epsilon 1.0 # 探索率 self.epsilon_min 0.01 self.epsilon_decay 0.995 self.learning_rate 0.001 self.model self._build_model() def _build_model(self): model Sequential() model.add(Dense(64, input_dimself.state_size, activationrelu)) model.add(Dense(64, activationrelu)) model.add(Dense(self.action_size, activationlinear)) model.compile(lossmse, optimizerAdam(lrself.learning_rate)) return model def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def act(self, state): if np.random.rand() self.epsilon: return random.randrange(self.action_size) act_values self.model.predict(state) return np.argmax(act_values[0]) def replay(self, batch_size): minibatch random.sample(self.memory, batch_size) for state, action, reward, next_state, done in minibatch: target reward if not done: target reward self.gamma * np.amax(self.model.predict(next_state)[0]) target_f self.model.predict(state) target_f[0][action] target self.model.fit(state, target_f, epochs1, verbose0) if self.epsilon self.epsilon_min: self.epsilon * self.epsilon_decay6.2 并行化与性能优化对于计算密集型的MCTS算法我们可以使用多进程加速def parallel_mcts(root_state, iterations1000, num_processes4): with mp.Pool(processesnum_processes) as pool: results [] for _ in range(iterations // num_processes): # 每个进程运行一个完整的MCTS模拟 async_results [ pool.apply_async(run_mcts_simulation, (root_state,)) for _ in range(num_processes) ] # 收集结果并合并 for res in async_results: node res.get() results.append(node) # 合并所有结果 best_action None best_value -float(inf) action_counts defaultdict(int) action_values defaultdict(float) for node in results: for child in node.children: action_counts[child.action] child.visits action_values[child.action] child.value for action in action_counts: avg_value action_values[action] / action_counts[action] if avg_value best_value: best_value avg_value best_action action return best_action6.3 可视化与调试工具开发可视化工具帮助理解AI决策def visualize_decision_process(ai, game_state): if isinstance(ai, RuleBasedAI): actions ai.generate_possible_actions(game_state) scored_actions [(a, ai.evaluate_action(a, game_state)) for a in actions] # 创建条形图 plt.figure(figsize(10, 6)) action_descs [str(a)[:50] for a, _ in scored_actions] scores [s for _, s in scored_actions] plt.barh(action_descs, scores) plt.xlabel(Action Score) plt.title(AI Action Evaluation) plt.tight_layout() plt.show() elif isinstance(ai, MCTSAI): root MCTSNode(game_state) for _ in range(100): # 快速运行少量迭代 node root while not node.is_terminal(): if node.is_fully_expanded(): node node.best_child() else: node node.expand() break result ai.simulate(node.game_state) while node is not None: node.update(result) node node.parent # 可视化搜索树 visualize_tree(root)7. 实战构建完整的AI对手现在我们将所有部分组合起来创建一个完整的AI对手系统。7.1 系统架构设计完整的AI游戏系统包含以下组件┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ │ │ │ │ │ 游戏引擎 │───▶│ AI核心 │───▶│ 策略管理器 │ │ │ │ │ │ │ └─────────────┘ └─────────────┘ └─────────────┘ ▲ ▲ ▲ │ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ │ │ │ │ │ 状态观测器 │ │ 动作执行器 │ │ 评估反馈器 │ │ │ │ │ │ │ └─────────────┘ └─────────────┘ └─────────────┘7.2 代码整合与接口设计主AI类整合所有组件class WingspanAI: def __init__(self, strategyadaptive): self.strategy strategy self.base_ai RuleBasedAI() self.mcts_ai MCTSAI(iterations50) self.current_plan [] # 策略权重 self.strategy_weights { round_task: 0.4, bonus_card: 0.3, egg_laying: 0.3 } def decide_action(self, game_state): # 每5回合重新评估策略 if game_state.current_round % 5 1 or not self.current_plan: self.assess_game_state(game_state) # 如果有预定计划执行计划中的行动 if self.current_plan: action self.current_plan.pop(0) if self.validate_action(action, game_state): return action # 否则使用MCTS决策关键行动 if len(game_state.player.hand) 3 or game_state.current_round 3: return self.mcts_ai.decide_action(game_state) # 默认使用基于规则的决策 return self.base_ai.decide_action(game_state) def assess_game_state(self, game_state): # 评估当前最适合的策略 round_task_potential RoundTaskStrategy().evaluate_round_task(game_state) bonus_card_potential BonusCardStrategy().evaluate_bonus_cards(game_state) egg_laying_potential EggLayingStrategy().evaluate_egg_potential(game_state) # 根据游戏阶段调整权重 if game_state.current_round 3: self.strategy_weights[egg_laying] * 1.5 # 制定行动计划 self.current_plan self.generate_plan( game_state, round_task_potential, bonus_card_potential, egg_laying_potential )7.3 与人类玩家对战的实现最后实现人机对战界面def human_vs_ai_game(): print(欢迎来到《展翅翱翔》人机对战!) human_name input(请输入你的名字: ) # 初始化游戏 players [ Player(human_name, is_aiFalse), Player(AI对手, is_aiTrue, ai_classWingspanAI) ] game Game(players) # 游戏主循环 while not game.is_game_over(): current_player game.get_current_player() print(f\n {current_player.name}的回合 ) if current_player.is_ai: print(AI正在思考...) action current_player.ai.decide_action(game.get_state_for_player(current_player)) game.execute_action(current_player, action) print(fAI执行了动作: {describe_action(action)}) else: print(当前游戏状态:) display_game_state(game.get_state_for_player(current_player)) print(\n可执行动作:) valid_actions game.get_valid_actions(current_player) for i, action in enumerate(valid_actions): print(f{i1}. {describe_action(action)}) choice int(input(请选择要执行的动作(输入编号): )) - 1 game.execute_action(current_player, valid_actions[choice]) # 游戏结束 print(\n 游戏结束 ) final_scores game.get_final_scores() for player, score in final_scores.items(): print(f{player.name}: {score}分) winner max(final_scores.items(), keylambda x: x[1])[0] print(f\n获胜者是: {winner.name}!)在实现这个AI系统的过程中最有趣的部分是观察不同策略在实际对战中的表现。轮末任务策略在早期游戏中往往表现优异而到了后期下蛋策略通常会后来居上。一个真正强大的AI需要能够根据游戏进程动态调整策略重心这正是我们实现的适应性策略系统的价值所在。

本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若转载，请注明出处：http://www.coloradmin.cn/o/2549921.html

如若内容造成侵权/违法违规/事实不符，请联系多彩编程网进行投诉反馈，一经查实，立即删除！