From 41d9a1254477fd2a7ecd10bf2573dc4b62660465 Mon Sep 17 00:00:00 2001 From: Justin Worthe Date: Sun, 7 Jul 2019 14:46:26 +0200 Subject: Strategy split to allow creating a minimax with pruning strategy --- src/game.rs | 5 +- src/strategy.rs | 244 +----------------------------------------------- src/strategy/mcts.rs | 241 +++++++++++++++++++++++++++++++++++++++++++++++ src/strategy/minimax.rs | 236 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 483 insertions(+), 243 deletions(-) create mode 100644 src/strategy/mcts.rs create mode 100644 src/strategy/minimax.rs diff --git a/src/game.rs b/src/game.rs index 56c583b..4e074ea 100644 --- a/src/game.rs +++ b/src/game.rs @@ -320,6 +320,8 @@ impl GameBoard { // TODO: Destroy health packs + let map_clone = self.map.clone(); + // TODO: iter, filter_map, filter, filter_map... flatten this block down a smidge. for player_index in 0..actions.len() { if let Action::Bomb(p) = actions[player_index] { @@ -333,9 +335,8 @@ impl GameBoard { for &(damage_offset, weapon_damage) in BOMB_DAMAGES.iter() { let target = p + damage_offset; - if self.map.at(target) == Some(true) { + if map_clone.at(target) == Some(true) { self.map.clear(target); - // TODO: Fix this so that simultaneous bomb throwing gives points to both players self.players[player_index].moves_score += DIG_SCORE; } diff --git a/src/strategy.rs b/src/strategy.rs index 0c86eeb..9c79d60 100644 --- a/src/strategy.rs +++ b/src/strategy.rs @@ -1,242 +1,4 @@ -use crate::command::{Action, Command}; -use crate::game::{GameBoard, SimulationOutcome}; +mod mcts; +mod minimax; -use std::cmp; -use std::collections::HashMap; -use std::ops::*; -use time::{Duration, PreciseTime}; - -pub fn choose_move( - state: &GameBoard, - previous_root: Option, - start_time: PreciseTime, - max_time: Duration, -) -> (Command, Node) { - let mut root_node = match previous_root { - None => Node { - state: state.clone(), - score_sum: ScoreSum::new(), - player_score_sums: [HashMap::new(), HashMap::new()], - unexplored: mcts_move_combo(state), - children: HashMap::new(), - }, - Some(mut node) => node - .children - .drain() - .map(|(_k, n)| n) - .find(|n| n.state == *state) - .unwrap_or_else(|| { - eprintln!("Previous round did not appear in the cache"); - Node { - state: state.clone(), - score_sum: ScoreSum::new(), - player_score_sums: [HashMap::new(), HashMap::new()], - unexplored: mcts_move_combo(state), - children: HashMap::new(), - } - }), - }; - - while start_time.to(PreciseTime::now()) < max_time { - let _ = mcts(&mut root_node); - } - - eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); - for (command, score_sum) in &root_node.player_score_sums[0] { - eprintln!( - "{} = {} ({} visits)", - command, - score_sum.avg().val, - score_sum.visit_count - ); - } - - let chosen_command = best_player_move(&root_node); - - root_node - .children - .retain(|[c1, _], _| *c1 == chosen_command); - - (chosen_command, root_node) -} - -pub struct Node { - state: GameBoard, - score_sum: ScoreSum, - player_score_sums: [HashMap; 2], - unexplored: Vec<[Command; 2]>, - children: HashMap<[Command; 2], Node>, -} - -#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] -struct Score { - val: f32, -} - -impl AddAssign for Score { - fn add_assign(&mut self, other: Self) { - self.val = self.val + other.val; - } -} - -impl Div for Score { - type Output = Self; - fn div(self, other: u32) -> Self { - Score { - val: self.val / other as f32, - } - } -} - -impl cmp::Eq for Score {} -impl cmp::Ord for Score { - fn cmp(&self, other: &Score) -> cmp::Ordering { - self.val - .partial_cmp(&other.val) - .unwrap_or(cmp::Ordering::Equal) - } -} - -struct ScoreSum { - sum: Score, - visit_count: u32, -} - -impl ScoreSum { - fn new() -> ScoreSum { - ScoreSum { - sum: Score { val: 0. }, - visit_count: 0, - } - } - fn with_initial(score: Score) -> ScoreSum { - ScoreSum { - sum: score, - visit_count: 1, - } - } - fn avg(&self) -> Score { - self.sum / self.visit_count - } -} - -impl AddAssign for ScoreSum { - fn add_assign(&mut self, other: Score) { - self.sum += other; - self.visit_count = self.visit_count.saturating_add(1); - } -} - -// TODO: Look at transforming this into more of a minimax with AB pruning approach? -fn mcts(node: &mut Node) -> Score { - if node.state.outcome != SimulationOutcome::Continue { - score(&node.state) - } else if let Some(commands) = node.unexplored.pop() { - let mut new_state = node.state.clone(); - new_state.simulate(commands); - let score = rollout(&new_state); - let unexplored = if new_state.outcome == SimulationOutcome::Continue { - mcts_move_combo(&new_state) - } else { - Vec::new() - }; - - let new_node = Node { - state: new_state, - score_sum: ScoreSum::with_initial(score), - player_score_sums: [HashMap::new(), HashMap::new()], - unexplored, - children: HashMap::new(), - }; - node.children.insert(commands, new_node); - - update(node, commands, score); - score - } else { - let commands = choose_existing(node); - let score = mcts( - node.children - .get_mut(&commands) - .expect("The existing node hasn't been tried yet"), - ); - update(node, commands, score); - score - } -} - -fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> { - let player_moves = valid_moves(state, 0); - let opponent_moves = valid_moves(state, 1); - debug_assert!(!player_moves.is_empty(), "No player moves"); - debug_assert!(!opponent_moves.is_empty(), "No opponent moves"); - - let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len()); - for p in &player_moves { - for o in &opponent_moves { - result.push([*p, *o]); - } - } - - result -} - -fn best_player_move(node: &Node) -> Command { - node.player_score_sums[0] - .iter() - .max_by_key(|(_command, score_sum)| score_sum.avg()) - .map(|(command, _score_sum)| *command) - .unwrap_or_else(|| Command::new(Action::DoNothing)) -} - -fn score(state: &GameBoard) -> Score { - Score { - val: match state.outcome { - SimulationOutcome::PlayerWon(0) => 500., - SimulationOutcome::PlayerWon(1) => -500., - _ => (state.players[0].score() - state.players[1].score()) as f32, - }, - } -} - -fn rollout(state: &GameBoard) -> Score { - score(state) -} - -fn choose_existing(node: &Node) -> [Command; 2] { - [choose_one_existing(node, 0), choose_one_existing(node, 1)] -} - -fn choose_one_existing(node: &Node, player_index: usize) -> Command { - let ln_n = (node.score_sum.visit_count as f32).ln(); - let c = 100.; - let multiplier = if player_index == 0 { 1. } else { -1. }; - node.player_score_sums[player_index] - .iter() - .max_by_key(|(_command, score_sum)| { - (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt())) - as i32 - }) - .map(|(command, _score_sum)| *command) - .unwrap_or_else(|| Command::new(Action::DoNothing)) -} - -fn update(node: &mut Node, commands: [Command; 2], score: Score) { - *node.player_score_sums[0] - .entry(commands[0]) - .or_insert_with(ScoreSum::new) += score; - *node.player_score_sums[1] - .entry(commands[1]) - .or_insert_with(ScoreSum::new) += score; - node.score_sum += score; -} - -fn valid_moves(state: &GameBoard, player_index: usize) -> Vec { - state - .valid_shoot_commands(player_index) - .iter() - .chain(state.valid_move_commands(player_index).iter()) - .chain(state.valid_bomb_commands(player_index).iter()) - .chain([Command::new(Action::DoNothing)].iter()) - .cloned() - .collect() -} +pub use mcts::{choose_move, Node}; diff --git a/src/strategy/mcts.rs b/src/strategy/mcts.rs new file mode 100644 index 0000000..0d1540e --- /dev/null +++ b/src/strategy/mcts.rs @@ -0,0 +1,241 @@ +use crate::command::{Action, Command}; +use crate::game::{GameBoard, SimulationOutcome}; + +use std::cmp; +use std::collections::HashMap; +use std::ops::*; +use time::{Duration, PreciseTime}; + +pub fn choose_move( + state: &GameBoard, + previous_root: Option, + start_time: PreciseTime, + max_time: Duration, +) -> (Command, Node) { + let mut root_node = match previous_root { + None => Node { + state: state.clone(), + score_sum: ScoreSum::new(), + player_score_sums: [HashMap::new(), HashMap::new()], + unexplored: mcts_move_combo(state), + children: HashMap::new(), + }, + Some(mut node) => node + .children + .drain() + .map(|(_k, n)| n) + .find(|n| n.state == *state) + .unwrap_or_else(|| { + eprintln!("Previous round did not appear in the cache"); + Node { + state: state.clone(), + score_sum: ScoreSum::new(), + player_score_sums: [HashMap::new(), HashMap::new()], + unexplored: mcts_move_combo(state), + children: HashMap::new(), + } + }), + }; + + while start_time.to(PreciseTime::now()) < max_time { + let _ = mcts(&mut root_node); + } + + eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); + for (command, score_sum) in &root_node.player_score_sums[0] { + eprintln!( + "{} = {} ({} visits)", + command, + score_sum.avg().val, + score_sum.visit_count + ); + } + + let chosen_command = best_player_move(&root_node); + + root_node + .children + .retain(|[c1, _], _| *c1 == chosen_command); + + (chosen_command, root_node) +} + +pub struct Node { + state: GameBoard, + score_sum: ScoreSum, + player_score_sums: [HashMap; 2], + unexplored: Vec<[Command; 2]>, + children: HashMap<[Command; 2], Node>, +} + +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +struct Score { + val: f32, +} + +impl AddAssign for Score { + fn add_assign(&mut self, other: Self) { + self.val = self.val + other.val; + } +} + +impl Div for Score { + type Output = Self; + fn div(self, other: u32) -> Self { + Score { + val: self.val / other as f32, + } + } +} + +impl cmp::Eq for Score {} +impl cmp::Ord for Score { + fn cmp(&self, other: &Score) -> cmp::Ordering { + self.val + .partial_cmp(&other.val) + .unwrap_or(cmp::Ordering::Equal) + } +} + +struct ScoreSum { + sum: Score, + visit_count: u32, +} + +impl ScoreSum { + fn new() -> ScoreSum { + ScoreSum { + sum: Score { val: 0. }, + visit_count: 0, + } + } + fn with_initial(score: Score) -> ScoreSum { + ScoreSum { + sum: score, + visit_count: 1, + } + } + fn avg(&self) -> Score { + self.sum / self.visit_count + } +} + +impl AddAssign for ScoreSum { + fn add_assign(&mut self, other: Score) { + self.sum += other; + self.visit_count = self.visit_count.saturating_add(1); + } +} + +fn mcts(node: &mut Node) -> Score { + if node.state.outcome != SimulationOutcome::Continue { + score(&node.state) + } else if let Some(commands) = node.unexplored.pop() { + let mut new_state = node.state.clone(); + new_state.simulate(commands); + let score = rollout(&new_state); + let unexplored = if new_state.outcome == SimulationOutcome::Continue { + mcts_move_combo(&new_state) + } else { + Vec::new() + }; + + let new_node = Node { + state: new_state, + score_sum: ScoreSum::with_initial(score), + player_score_sums: [HashMap::new(), HashMap::new()], + unexplored, + children: HashMap::new(), + }; + node.children.insert(commands, new_node); + + update(node, commands, score); + score + } else { + let commands = choose_existing(node); + let score = mcts( + node.children + .get_mut(&commands) + .expect("The existing node hasn't been tried yet"), + ); + update(node, commands, score); + score + } +} + +fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> { + let player_moves = valid_moves(state, 0); + let opponent_moves = valid_moves(state, 1); + debug_assert!(!player_moves.is_empty(), "No player moves"); + debug_assert!(!opponent_moves.is_empty(), "No opponent moves"); + + let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len()); + for p in &player_moves { + for o in &opponent_moves { + result.push([*p, *o]); + } + } + + result +} + +fn best_player_move(node: &Node) -> Command { + node.player_score_sums[0] + .iter() + .max_by_key(|(_command, score_sum)| score_sum.avg()) + .map(|(command, _score_sum)| *command) + .unwrap_or_else(|| Command::new(Action::DoNothing)) +} + +fn score(state: &GameBoard) -> Score { + Score { + val: match state.outcome { + SimulationOutcome::PlayerWon(0) => 500., + SimulationOutcome::PlayerWon(1) => -500., + _ => (state.players[0].score() - state.players[1].score()) as f32, + }, + } +} + +fn rollout(state: &GameBoard) -> Score { + score(state) +} + +fn choose_existing(node: &Node) -> [Command; 2] { + [choose_one_existing(node, 0), choose_one_existing(node, 1)] +} + +fn choose_one_existing(node: &Node, player_index: usize) -> Command { + let ln_n = (node.score_sum.visit_count as f32).ln(); + let c = 100.; + let multiplier = if player_index == 0 { 1. } else { -1. }; + node.player_score_sums[player_index] + .iter() + .max_by_key(|(_command, score_sum)| { + (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt())) + as i32 + }) + .map(|(command, _score_sum)| *command) + .unwrap_or_else(|| Command::new(Action::DoNothing)) +} + +fn update(node: &mut Node, commands: [Command; 2], score: Score) { + *node.player_score_sums[0] + .entry(commands[0]) + .or_insert_with(ScoreSum::new) += score; + *node.player_score_sums[1] + .entry(commands[1]) + .or_insert_with(ScoreSum::new) += score; + node.score_sum += score; +} + +fn valid_moves(state: &GameBoard, player_index: usize) -> Vec { + state + .valid_shoot_commands(player_index) + .iter() + .chain(state.valid_move_commands(player_index).iter()) + .chain(state.valid_bomb_commands(player_index).iter()) + .chain([Command::new(Action::DoNothing)].iter()) + .cloned() + .collect() +} diff --git a/src/strategy/minimax.rs b/src/strategy/minimax.rs new file mode 100644 index 0000000..deba92e --- /dev/null +++ b/src/strategy/minimax.rs @@ -0,0 +1,236 @@ +use crate::command::{Action, Command}; +use crate::game::{GameBoard, SimulationOutcome}; + +use std::cmp; +use std::collections::HashMap; +use std::ops::*; +use time::{Duration, PreciseTime}; + +pub fn choose_move( + state: &GameBoard, + previous_root: Option, + start_time: PreciseTime, + max_time: Duration, +) -> (Command, Node) { + let mut root_node = match previous_root { + None => Node { + score_sum: ScoreSum::new(), + player_score_sums: [HashMap::new(), HashMap::new()], + unexplored: mcts_move_combo(state), + children: HashMap::new(), + }, + Some(mut node) => node + .children + .drain() + .map(|(_k, n)| n) + .find(|n| false) // TODO: Identify the last opponent move to use this cache + .unwrap_or_else(|| { + eprintln!("Previous round did not appear in the cache"); + Node { + state: state.clone(), + score_sum: ScoreSum::new(), + player_score_sums: [HashMap::new(), HashMap::new()], + unexplored: mcts_move_combo(state), + children: HashMap::new(), + } + }), + }; + + while start_time.to(PreciseTime::now()) < max_time { + let _ = mcts(&mut root_node); + } + + eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); + for (command, score_sum) in &root_node.player_score_sums[0] { + eprintln!( + "{} = {} ({} visits)", + command, + score_sum.avg().val, + score_sum.visit_count + ); + } + + let chosen_command = best_player_move(&root_node); + + root_node + .children + .retain(|[c1, _], _| *c1 == chosen_command); + + (chosen_command, root_node) +} + +pub struct Node { + score_sum: ScoreSum, + player_score_sums: [HashMap; 2], + unexplored: Vec<[Command; 2]>, + children: HashMap<[Command; 2], Node>, +} + +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +struct Score { + val: f32, +} + +impl AddAssign for Score { + fn add_assign(&mut self, other: Self) { + self.val = self.val + other.val; + } +} + +impl Div for Score { + type Output = Self; + fn div(self, other: u32) -> Self { + Score { + val: self.val / other as f32, + } + } +} + +impl cmp::Eq for Score {} +impl cmp::Ord for Score { + fn cmp(&self, other: &Score) -> cmp::Ordering { + self.val + .partial_cmp(&other.val) + .unwrap_or(cmp::Ordering::Equal) + } +} + +struct ScoreSum { + sum: Score, + visit_count: u32, +} + +impl ScoreSum { + fn new() -> ScoreSum { + ScoreSum { + sum: Score { val: 0. }, + visit_count: 0, + } + } + fn with_initial(score: Score) -> ScoreSum { + ScoreSum { + sum: score, + visit_count: 1, + } + } + fn avg(&self) -> Score { + self.sum / self.visit_count + } +} + +impl AddAssign for ScoreSum { + fn add_assign(&mut self, other: Score) { + self.sum += other; + self.visit_count = self.visit_count.saturating_add(1); + } +} + +// TODO: Transform this into more of a minimax with AB pruning approach? +fn mcts(node: &mut Node) -> Score { + if node.state.outcome != SimulationOutcome::Continue { + score(&node.state) + } else if let Some(commands) = node.unexplored.pop() { + let mut new_state = node.state.clone(); + new_state.simulate(commands); + let score = score(&new_state); + let unexplored = if new_state.outcome == SimulationOutcome::Continue { + mcts_move_combo(&new_state) + } else { + Vec::new() + }; + + let new_node = Node { + state: new_state, + score_sum: ScoreSum::with_initial(score), + player_score_sums: [HashMap::new(), HashMap::new()], + unexplored, + children: HashMap::new(), + }; + node.children.insert(commands, new_node); + + update(node, commands, score); + score + } else { + let commands = choose_existing(node); + let score = mcts( + node.children + .get_mut(&commands) + .expect("The existing node hasn't been tried yet"), + ); + update(node, commands, score); + score + } +} + +fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> { + let player_moves = valid_moves(state, 0); + let opponent_moves = valid_moves(state, 1); + debug_assert!(!player_moves.is_empty(), "No player moves"); + debug_assert!(!opponent_moves.is_empty(), "No opponent moves"); + + let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len()); + for p in &player_moves { + for o in &opponent_moves { + result.push([*p, *o]); + } + } + + result +} + +fn best_player_move(node: &Node) -> Command { + node.player_score_sums[0] + .iter() + .max_by_key(|(_command, score_sum)| score_sum.avg()) + .map(|(command, _score_sum)| *command) + .unwrap_or_else(|| Command::new(Action::DoNothing)) +} + +fn score(state: &GameBoard) -> Score { + Score { + val: match state.outcome { + SimulationOutcome::PlayerWon(0) => 500., + SimulationOutcome::PlayerWon(1) => -500., + _ => (state.players[0].score() - state.players[1].score()) as f32, + }, + } +} + +fn choose_existing(node: &Node) -> [Command; 2] { + [choose_one_existing(node, 0), choose_one_existing(node, 1)] +} + +fn choose_one_existing(node: &Node, player_index: usize) -> Command { + let ln_n = (node.score_sum.visit_count as f32).ln(); + let c = 100.; + let multiplier = if player_index == 0 { 1. } else { -1. }; + node.player_score_sums[player_index] + .iter() + .max_by_key(|(_command, score_sum)| { + (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt())) + as i32 + }) + .map(|(command, _score_sum)| *command) + .unwrap_or_else(|| Command::new(Action::DoNothing)) +} + +fn update(node: &mut Node, commands: [Command; 2], score: Score) { + *node.player_score_sums[0] + .entry(commands[0]) + .or_insert_with(ScoreSum::new) += score; + *node.player_score_sums[1] + .entry(commands[1]) + .or_insert_with(ScoreSum::new) += score; + node.score_sum += score; +} + +fn valid_moves(state: &GameBoard, player_index: usize) -> Vec { + state + .valid_shoot_commands(player_index) + .iter() + .chain(state.valid_move_commands(player_index).iter()) + .chain(state.valid_bomb_commands(player_index).iter()) + .chain([Command::new(Action::DoNothing)].iter()) + .cloned() + .collect() +} -- cgit v1.2.3