Strategy split to allow creating a minimax with pruning strategy

author: Justin Worthe <justin@worthe-it.co.za> 2019-07-07 14:46:26 +0200
committer: Justin Worthe <justin@worthe-it.co.za> 2019-07-07 14:46:26 +0200
commit: 41d9a1254477fd2a7ecd10bf2573dc4b62660465 (patch)
tree: cc7dcad811e6b1d2fdd4d58fecba40769c1772a1
parent: 352ad4a297511dfe843bad5206879983ad34f3e2 (diff)
4 files changed, 483 insertions, 243 deletions
diff --git a/src/game.rs b/src/game.rs
index 56c583b..4e074ea 100644
--- a/src/game.rs
+++ b/src/game.rs
@@ -320,6 +320,8 @@ impl GameBoard {
 
         // TODO: Destroy health packs
 
+        let map_clone = self.map.clone();
+
         // TODO: iter, filter_map, filter, filter_map... flatten this block down a smidge.
         for player_index in 0..actions.len() {
             if let Action::Bomb(p) = actions[player_index] {
@@ -333,9 +335,8 @@ impl GameBoard {
                         for &(damage_offset, weapon_damage) in BOMB_DAMAGES.iter() {
                             let target = p + damage_offset;
 
-                            if self.map.at(target) == Some(true) {
+                            if map_clone.at(target) == Some(true) {
                                 self.map.clear(target);
-                                // TODO: Fix this so that simultaneous bomb throwing gives points to both players
                                 self.players[player_index].moves_score += DIG_SCORE;
                             }
 
diff --git a/src/strategy.rs b/src/strategy.rs
index 0c86eeb..9c79d60 100644
--- a/src/strategy.rs
+++ b/src/strategy.rs
@@ -1,242 +1,4 @@
-use crate::command::{Action, Command};
-use crate::game::{GameBoard, SimulationOutcome};
+mod mcts;
+mod minimax;
 
-use std::cmp;
-use std::collections::HashMap;
-use std::ops::*;
-use time::{Duration, PreciseTime};
-
-pub fn choose_move(
-    state: &GameBoard,
-    previous_root: Option<Node>,
-    start_time: PreciseTime,
-    max_time: Duration,
-) -> (Command, Node) {
-    let mut root_node = match previous_root {
-        None => Node {
-            state: state.clone(),
-            score_sum: ScoreSum::new(),
-            player_score_sums: [HashMap::new(), HashMap::new()],
-            unexplored: mcts_move_combo(state),
-            children: HashMap::new(),
-        },
-        Some(mut node) => node
-            .children
-            .drain()
-            .map(|(_k, n)| n)
-            .find(|n| n.state == *state)
-            .unwrap_or_else(|| {
-                eprintln!("Previous round did not appear in the cache");
-                Node {
-                    state: state.clone(),
-                    score_sum: ScoreSum::new(),
-                    player_score_sums: [HashMap::new(), HashMap::new()],
-                    unexplored: mcts_move_combo(state),
-                    children: HashMap::new(),
-                }
-            }),
-    };
-
-    while start_time.to(PreciseTime::now()) < max_time {
-        let _ = mcts(&mut root_node);
-    }
-
-    eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
-    for (command, score_sum) in &root_node.player_score_sums[0] {
-        eprintln!(
-            "{} = {} ({} visits)",
-            command,
-            score_sum.avg().val,
-            score_sum.visit_count
-        );
-    }
-
-    let chosen_command = best_player_move(&root_node);
-
-    root_node
-        .children
-        .retain(|[c1, _], _| *c1 == chosen_command);
-
-    (chosen_command, root_node)
-}
-
-pub struct Node {
-    state: GameBoard,
-    score_sum: ScoreSum,
-    player_score_sums: [HashMap<Command, ScoreSum>; 2],
-    unexplored: Vec<[Command; 2]>,
-    children: HashMap<[Command; 2], Node>,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
-struct Score {
-    val: f32,
-}
-
-impl AddAssign for Score {
-    fn add_assign(&mut self, other: Self) {
-        self.val = self.val + other.val;
-    }
-}
-
-impl Div<u32> for Score {
-    type Output = Self;
-    fn div(self, other: u32) -> Self {
-        Score {
-            val: self.val / other as f32,
-        }
-    }
-}
-
-impl cmp::Eq for Score {}
-impl cmp::Ord for Score {
-    fn cmp(&self, other: &Score) -> cmp::Ordering {
-        self.val
-            .partial_cmp(&other.val)
-            .unwrap_or(cmp::Ordering::Equal)
-    }
-}
-
-struct ScoreSum {
-    sum: Score,
-    visit_count: u32,
-}
-
-impl ScoreSum {
-    fn new() -> ScoreSum {
-        ScoreSum {
-            sum: Score { val: 0. },
-            visit_count: 0,
-        }
-    }
-    fn with_initial(score: Score) -> ScoreSum {
-        ScoreSum {
-            sum: score,
-            visit_count: 1,
-        }
-    }
-    fn avg(&self) -> Score {
-        self.sum / self.visit_count
-    }
-}
-
-impl AddAssign<Score> for ScoreSum {
-    fn add_assign(&mut self, other: Score) {
-        self.sum += other;
-        self.visit_count = self.visit_count.saturating_add(1);
-    }
-}
-
-// TODO: Look at transforming this into more of a minimax with AB pruning approach?
-fn mcts(node: &mut Node) -> Score {
-    if node.state.outcome != SimulationOutcome::Continue {
-        score(&node.state)
-    } else if let Some(commands) = node.unexplored.pop() {
-        let mut new_state = node.state.clone();
-        new_state.simulate(commands);
-        let score = rollout(&new_state);
-        let unexplored = if new_state.outcome == SimulationOutcome::Continue {
-            mcts_move_combo(&new_state)
-        } else {
-            Vec::new()
-        };
-
-        let new_node = Node {
-            state: new_state,
-            score_sum: ScoreSum::with_initial(score),
-            player_score_sums: [HashMap::new(), HashMap::new()],
-            unexplored,
-            children: HashMap::new(),
-        };
-        node.children.insert(commands, new_node);
-
-        update(node, commands, score);
-        score
-    } else {
-        let commands = choose_existing(node);
-        let score = mcts(
-            node.children
-                .get_mut(&commands)
-                .expect("The existing node hasn't been tried yet"),
-        );
-        update(node, commands, score);
-        score
-    }
-}
-
-fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> {
-    let player_moves = valid_moves(state, 0);
-    let opponent_moves = valid_moves(state, 1);
-    debug_assert!(!player_moves.is_empty(), "No player moves");
-    debug_assert!(!opponent_moves.is_empty(), "No opponent moves");
-
-    let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len());
-    for p in &player_moves {
-        for o in &opponent_moves {
-            result.push([*p, *o]);
-        }
-    }
-
-    result
-}
-
-fn best_player_move(node: &Node) -> Command {
-    node.player_score_sums[0]
-        .iter()
-        .max_by_key(|(_command, score_sum)| score_sum.avg())
-        .map(|(command, _score_sum)| *command)
-        .unwrap_or_else(|| Command::new(Action::DoNothing))
-}
-
-fn score(state: &GameBoard) -> Score {
-    Score {
-        val: match state.outcome {
-            SimulationOutcome::PlayerWon(0) => 500.,
-            SimulationOutcome::PlayerWon(1) => -500.,
-            _ => (state.players[0].score() - state.players[1].score()) as f32,
-        },
-    }
-}
-
-fn rollout(state: &GameBoard) -> Score {
-    score(state)
-}
-
-fn choose_existing(node: &Node) -> [Command; 2] {
-    [choose_one_existing(node, 0), choose_one_existing(node, 1)]
-}
-
-fn choose_one_existing(node: &Node, player_index: usize) -> Command {
-    let ln_n = (node.score_sum.visit_count as f32).ln();
-    let c = 100.;
-    let multiplier = if player_index == 0 { 1. } else { -1. };
-    node.player_score_sums[player_index]
-        .iter()
-        .max_by_key(|(_command, score_sum)| {
-            (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt()))
-                as i32
-        })
-        .map(|(command, _score_sum)| *command)
-        .unwrap_or_else(|| Command::new(Action::DoNothing))
-}
-
-fn update(node: &mut Node, commands: [Command; 2], score: Score) {
-    *node.player_score_sums[0]
-        .entry(commands[0])
-        .or_insert_with(ScoreSum::new) += score;
-    *node.player_score_sums[1]
-        .entry(commands[1])
-        .or_insert_with(ScoreSum::new) += score;
-    node.score_sum += score;
-}
-
-fn valid_moves(state: &GameBoard, player_index: usize) -> Vec<Command> {
-    state
-        .valid_shoot_commands(player_index)
-        .iter()
-        .chain(state.valid_move_commands(player_index).iter())
-        .chain(state.valid_bomb_commands(player_index).iter())
-        .chain([Command::new(Action::DoNothing)].iter())
-        .cloned()
-        .collect()
-}
+pub use mcts::{choose_move, Node};
diff --git a/src/strategy/mcts.rs b/src/strategy/mcts.rs
new file mode 100644
index 0000000..0d1540e
--- /dev/null
+++ b/src/strategy/mcts.rs
@@ -0,0 +1,241 @@
+use crate::command::{Action, Command};
+use crate::game::{GameBoard, SimulationOutcome};
+
+use std::cmp;
+use std::collections::HashMap;
+use std::ops::*;
+use time::{Duration, PreciseTime};
+
+pub fn choose_move(
+    state: &GameBoard,
+    previous_root: Option<Node>,
+    start_time: PreciseTime,
+    max_time: Duration,
+) -> (Command, Node) {
+    let mut root_node = match previous_root {
+        None => Node {
+            state: state.clone(),
+            score_sum: ScoreSum::new(),
+            player_score_sums: [HashMap::new(), HashMap::new()],
+            unexplored: mcts_move_combo(state),
+            children: HashMap::new(),
+        },
+        Some(mut node) => node
+            .children
+            .drain()
+            .map(|(_k, n)| n)
+            .find(|n| n.state == *state)
+            .unwrap_or_else(|| {
+                eprintln!("Previous round did not appear in the cache");
+                Node {
+                    state: state.clone(),
+                    score_sum: ScoreSum::new(),
+                    player_score_sums: [HashMap::new(), HashMap::new()],
+                    unexplored: mcts_move_combo(state),
+                    children: HashMap::new(),
+                }
+            }),
+    };
+
+    while start_time.to(PreciseTime::now()) < max_time {
+        let _ = mcts(&mut root_node);
+    }
+
+    eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
+    for (command, score_sum) in &root_node.player_score_sums[0] {
+        eprintln!(
+            "{} = {} ({} visits)",
+            command,
+            score_sum.avg().val,
+            score_sum.visit_count
+        );
+    }
+
+    let chosen_command = best_player_move(&root_node);
+
+    root_node
+        .children
+        .retain(|[c1, _], _| *c1 == chosen_command);
+
+    (chosen_command, root_node)
+}
+
+pub struct Node {
+    state: GameBoard,
+    score_sum: ScoreSum,
+    player_score_sums: [HashMap<Command, ScoreSum>; 2],
+    unexplored: Vec<[Command; 2]>,
+    children: HashMap<[Command; 2], Node>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+struct Score {
+    val: f32,
+}
+
+impl AddAssign for Score {
+    fn add_assign(&mut self, other: Self) {
+        self.val = self.val + other.val;
+    }
+}
+
+impl Div<u32> for Score {
+    type Output = Self;
+    fn div(self, other: u32) -> Self {
+        Score {
+            val: self.val / other as f32,
+        }
+    }
+}
+
+impl cmp::Eq for Score {}
+impl cmp::Ord for Score {
+    fn cmp(&self, other: &Score) -> cmp::Ordering {
+        self.val
+            .partial_cmp(&other.val)
+            .unwrap_or(cmp::Ordering::Equal)
+    }
+}
+
+struct ScoreSum {
+    sum: Score,
+    visit_count: u32,
+}
+
+impl ScoreSum {
+    fn new() -> ScoreSum {
+        ScoreSum {
+            sum: Score { val: 0. },
+            visit_count: 0,
+        }
+    }
+    fn with_initial(score: Score) -> ScoreSum {
+        ScoreSum {
+            sum: score,
+            visit_count: 1,
+        }
+    }
+    fn avg(&self) -> Score {
+        self.sum / self.visit_count
+    }
+}
+
+impl AddAssign<Score> for ScoreSum {
+    fn add_assign(&mut self, other: Score) {
+        self.sum += other;
+        self.visit_count = self.visit_count.saturating_add(1);
+    }
+}
+
+fn mcts(node: &mut Node) -> Score {
+    if node.state.outcome != SimulationOutcome::Continue {
+        score(&node.state)
+    } else if let Some(commands) = node.unexplored.pop() {
+        let mut new_state = node.state.clone();
+        new_state.simulate(commands);
+        let score = rollout(&new_state);
+        let unexplored = if new_state.outcome == SimulationOutcome::Continue {
+            mcts_move_combo(&new_state)
+        } else {
+            Vec::new()
+        };
+
+        let new_node = Node {
+            state: new_state,
+            score_sum: ScoreSum::with_initial(score),
+            player_score_sums: [HashMap::new(), HashMap::new()],
+            unexplored,
+            children: HashMap::new(),
+        };
+        node.children.insert(commands, new_node);
+
+        update(node, commands, score);
+        score
+    } else {
+        let commands = choose_existing(node);
+        let score = mcts(
+            node.children
+                .get_mut(&commands)
+                .expect("The existing node hasn't been tried yet"),
+        );
+        update(node, commands, score);
+        score
+    }
+}
+
+fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> {
+    let player_moves = valid_moves(state, 0);
+    let opponent_moves = valid_moves(state, 1);
+    debug_assert!(!player_moves.is_empty(), "No player moves");
+    debug_assert!(!opponent_moves.is_empty(), "No opponent moves");
+
+    let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len());
+    for p in &player_moves {
+        for o in &opponent_moves {
+            result.push([*p, *o]);
+        }
+    }
+
+    result
+}
+
+fn best_player_move(node: &Node) -> Command {
+    node.player_score_sums[0]
+        .iter()
+        .max_by_key(|(_command, score_sum)| score_sum.avg())
+        .map(|(command, _score_sum)| *command)
+        .unwrap_or_else(|| Command::new(Action::DoNothing))
+}
+
+fn score(state: &GameBoard) -> Score {
+    Score {
+        val: match state.outcome {
+            SimulationOutcome::PlayerWon(0) => 500.,
+            SimulationOutcome::PlayerWon(1) => -500.,
+            _ => (state.players[0].score() - state.players[1].score()) as f32,
+        },
+    }
+}
+
+fn rollout(state: &GameBoard) -> Score {
+    score(state)
+}
+
+fn choose_existing(node: &Node) -> [Command; 2] {
+    [choose_one_existing(node, 0), choose_one_existing(node, 1)]
+}
+
+fn choose_one_existing(node: &Node, player_index: usize) -> Command {
+    let ln_n = (node.score_sum.visit_count as f32).ln();
+    let c = 100.;
+    let multiplier = if player_index == 0 { 1. } else { -1. };
+    node.player_score_sums[player_index]
+        .iter()
+        .max_by_key(|(_command, score_sum)| {
+            (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt()))
+                as i32
+        })
+        .map(|(command, _score_sum)| *command)
+        .unwrap_or_else(|| Command::new(Action::DoNothing))
+}
+
+fn update(node: &mut Node, commands: [Command; 2], score: Score) {
+    *node.player_score_sums[0]
+        .entry(commands[0])
+        .or_insert_with(ScoreSum::new) += score;
+    *node.player_score_sums[1]
+        .entry(commands[1])
+        .or_insert_with(ScoreSum::new) += score;
+    node.score_sum += score;
+}
+
+fn valid_moves(state: &GameBoard, player_index: usize) -> Vec<Command> {
+    state
+        .valid_shoot_commands(player_index)
+        .iter()
+        .chain(state.valid_move_commands(player_index).iter())
+        .chain(state.valid_bomb_commands(player_index).iter())
+        .chain([Command::new(Action::DoNothing)].iter())
+        .cloned()
+        .collect()
+}
diff --git a/src/strategy/minimax.rs b/src/strategy/minimax.rs
new file mode 100644
index 0000000..deba92e
--- /dev/null
+++ b/src/strategy/minimax.rs
@@ -0,0 +1,236 @@
+use crate::command::{Action, Command};
+use crate::game::{GameBoard, SimulationOutcome};
+
+use std::cmp;
+use std::collections::HashMap;
+use std::ops::*;
+use time::{Duration, PreciseTime};
+
+pub fn choose_move(
+    state: &GameBoard,
+    previous_root: Option<Node>,
+    start_time: PreciseTime,
+    max_time: Duration,
+) -> (Command, Node) {
+    let mut root_node = match previous_root {
+        None => Node {
+            score_sum: ScoreSum::new(),
+            player_score_sums: [HashMap::new(), HashMap::new()],
+            unexplored: mcts_move_combo(state),
+            children: HashMap::new(),
+        },
+        Some(mut node) => node
+            .children
+            .drain()
+            .map(|(_k, n)| n)
+            .find(|n| false) // TODO: Identify the last opponent move to use this cache
+            .unwrap_or_else(|| {
+                eprintln!("Previous round did not appear in the cache");
+                Node {
+                    state: state.clone(),
+                    score_sum: ScoreSum::new(),
+                    player_score_sums: [HashMap::new(), HashMap::new()],
+                    unexplored: mcts_move_combo(state),
+                    children: HashMap::new(),
+                }
+            }),
+    };
+
+    while start_time.to(PreciseTime::now()) < max_time {
+        let _ = mcts(&mut root_node);
+    }
+
+    eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
+    for (command, score_sum) in &root_node.player_score_sums[0] {
+        eprintln!(
+            "{} = {} ({} visits)",
+            command,
+            score_sum.avg().val,
+            score_sum.visit_count
+        );
+    }
+
+    let chosen_command = best_player_move(&root_node);
+
+    root_node
+        .children
+        .retain(|[c1, _], _| *c1 == chosen_command);
+
+    (chosen_command, root_node)
+}
+
+pub struct Node {
+    score_sum: ScoreSum,
+    player_score_sums: [HashMap<Command, ScoreSum>; 2],
+    unexplored: Vec<[Command; 2]>,
+    children: HashMap<[Command; 2], Node>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+struct Score {
+    val: f32,
+}
+
+impl AddAssign for Score {
+    fn add_assign(&mut self, other: Self) {
+        self.val = self.val + other.val;
+    }
+}
+
+impl Div<u32> for Score {
+    type Output = Self;
+    fn div(self, other: u32) -> Self {
+        Score {
+            val: self.val / other as f32,
+        }
+    }
+}
+
+impl cmp::Eq for Score {}
+impl cmp::Ord for Score {
+    fn cmp(&self, other: &Score) -> cmp::Ordering {
+        self.val
+            .partial_cmp(&other.val)
+            .unwrap_or(cmp::Ordering::Equal)
+    }
+}
+
+struct ScoreSum {
+    sum: Score,
+    visit_count: u32,
+}
+
+impl ScoreSum {
+    fn new() -> ScoreSum {
+        ScoreSum {
+            sum: Score { val: 0. },
+            visit_count: 0,
+        }
+    }
+    fn with_initial(score: Score) -> ScoreSum {
+        ScoreSum {
+            sum: score,
+            visit_count: 1,
+        }
+    }
+    fn avg(&self) -> Score {
+        self.sum / self.visit_count
+    }
+}
+
+impl AddAssign<Score> for ScoreSum {
+    fn add_assign(&mut self, other: Score) {
+        self.sum += other;
+        self.visit_count = self.visit_count.saturating_add(1);
+    }
+}
+
+// TODO: Transform this into more of a minimax with AB pruning approach?
+fn mcts(node: &mut Node) -> Score {
+    if node.state.outcome != SimulationOutcome::Continue {
+        score(&node.state)
+    } else if let Some(commands) = node.unexplored.pop() {
+        let mut new_state = node.state.clone();
+        new_state.simulate(commands);
+        let score = score(&new_state);
+        let unexplored = if new_state.outcome == SimulationOutcome::Continue {
+            mcts_move_combo(&new_state)
+        } else {
+            Vec::new()
+        };
+
+        let new_node = Node {
+            state: new_state,
+            score_sum: ScoreSum::with_initial(score),
+            player_score_sums: [HashMap::new(), HashMap::new()],
+            unexplored,
+            children: HashMap::new(),
+        };
+        node.children.insert(commands, new_node);
+
+        update(node, commands, score);
+        score
+    } else {
+        let commands = choose_existing(node);
+        let score = mcts(
+            node.children
+                .get_mut(&commands)
+                .expect("The existing node hasn't been tried yet"),
+        );
+        update(node, commands, score);
+        score
+    }
+}
+
+fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> {
+    let player_moves = valid_moves(state, 0);
+    let opponent_moves = valid_moves(state, 1);
+    debug_assert!(!player_moves.is_empty(), "No player moves");
+    debug_assert!(!opponent_moves.is_empty(), "No opponent moves");
+
+    let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len());
+    for p in &player_moves {
+        for o in &opponent_moves {
+            result.push([*p, *o]);
+        }
+    }
+
+    result
+}
+
+fn best_player_move(node: &Node) -> Command {
+    node.player_score_sums[0]
+        .iter()
+        .max_by_key(|(_command, score_sum)| score_sum.avg())
+        .map(|(command, _score_sum)| *command)
+        .unwrap_or_else(|| Command::new(Action::DoNothing))
+}
+
+fn score(state: &GameBoard) -> Score {
+    Score {
+        val: match state.outcome {
+            SimulationOutcome::PlayerWon(0) => 500.,
+            SimulationOutcome::PlayerWon(1) => -500.,
+            _ => (state.players[0].score() - state.players[1].score()) as f32,
+        },
+    }
+}
+
+fn choose_existing(node: &Node) -> [Command; 2] {
+    [choose_one_existing(node, 0), choose_one_existing(node, 1)]
+}
+
+fn choose_one_existing(node: &Node, player_index: usize) -> Command {
+    let ln_n = (node.score_sum.visit_count as f32).ln();
+    let c = 100.;
+    let multiplier = if player_index == 0 { 1. } else { -1. };
+    node.player_score_sums[player_index]
+        .iter()
+        .max_by_key(|(_command, score_sum)| {
+            (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt()))
+                as i32
+        })
+        .map(|(command, _score_sum)| *command)
+        .unwrap_or_else(|| Command::new(Action::DoNothing))
+}
+
+fn update(node: &mut Node, commands: [Command; 2], score: Score) {
+    *node.player_score_sums[0]
+        .entry(commands[0])
+        .or_insert_with(ScoreSum::new) += score;
+    *node.player_score_sums[1]
+        .entry(commands[1])
+        .or_insert_with(ScoreSum::new) += score;
+    node.score_sum += score;
+}
+
+fn valid_moves(state: &GameBoard, player_index: usize) -> Vec<Command> {
+    state
+        .valid_shoot_commands(player_index)
+        .iter()
+        .chain(state.valid_move_commands(player_index).iter())
+        .chain(state.valid_bomb_commands(player_index).iter())
+        .chain([Command::new(Action::DoNothing)].iter())
+        .cloned()
+        .collect()
+}
author	Justin Worthe <justin@worthe-it.co.za>	2019-07-07 14:46:26 +0200
committer	Justin Worthe <justin@worthe-it.co.za>	2019-07-07 14:46:26 +0200
commit	41d9a1254477fd2a7ecd10bf2573dc4b62660465 (patch)
tree	cc7dcad811e6b1d2fdd4d58fecba40769c1772a1
parent	352ad4a297511dfe843bad5206879983ad34f3e2 (diff)