summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Worthe <justin@worthe-it.co.za>2019-07-07 14:46:26 +0200
committerJustin Worthe <justin@worthe-it.co.za>2019-07-07 14:46:26 +0200
commit41d9a1254477fd2a7ecd10bf2573dc4b62660465 (patch)
treecc7dcad811e6b1d2fdd4d58fecba40769c1772a1
parent352ad4a297511dfe843bad5206879983ad34f3e2 (diff)
Strategy split to allow creating a minimax with pruning strategy
-rw-r--r--src/game.rs5
-rw-r--r--src/strategy.rs244
-rw-r--r--src/strategy/mcts.rs241
-rw-r--r--src/strategy/minimax.rs236
4 files changed, 483 insertions, 243 deletions
diff --git a/src/game.rs b/src/game.rs
index 56c583b..4e074ea 100644
--- a/src/game.rs
+++ b/src/game.rs
@@ -320,6 +320,8 @@ impl GameBoard {
// TODO: Destroy health packs
+ let map_clone = self.map.clone();
+
// TODO: iter, filter_map, filter, filter_map... flatten this block down a smidge.
for player_index in 0..actions.len() {
if let Action::Bomb(p) = actions[player_index] {
@@ -333,9 +335,8 @@ impl GameBoard {
for &(damage_offset, weapon_damage) in BOMB_DAMAGES.iter() {
let target = p + damage_offset;
- if self.map.at(target) == Some(true) {
+ if map_clone.at(target) == Some(true) {
self.map.clear(target);
- // TODO: Fix this so that simultaneous bomb throwing gives points to both players
self.players[player_index].moves_score += DIG_SCORE;
}
diff --git a/src/strategy.rs b/src/strategy.rs
index 0c86eeb..9c79d60 100644
--- a/src/strategy.rs
+++ b/src/strategy.rs
@@ -1,242 +1,4 @@
-use crate::command::{Action, Command};
-use crate::game::{GameBoard, SimulationOutcome};
+mod mcts;
+mod minimax;
-use std::cmp;
-use std::collections::HashMap;
-use std::ops::*;
-use time::{Duration, PreciseTime};
-
-pub fn choose_move(
- state: &GameBoard,
- previous_root: Option<Node>,
- start_time: PreciseTime,
- max_time: Duration,
-) -> (Command, Node) {
- let mut root_node = match previous_root {
- None => Node {
- state: state.clone(),
- score_sum: ScoreSum::new(),
- player_score_sums: [HashMap::new(), HashMap::new()],
- unexplored: mcts_move_combo(state),
- children: HashMap::new(),
- },
- Some(mut node) => node
- .children
- .drain()
- .map(|(_k, n)| n)
- .find(|n| n.state == *state)
- .unwrap_or_else(|| {
- eprintln!("Previous round did not appear in the cache");
- Node {
- state: state.clone(),
- score_sum: ScoreSum::new(),
- player_score_sums: [HashMap::new(), HashMap::new()],
- unexplored: mcts_move_combo(state),
- children: HashMap::new(),
- }
- }),
- };
-
- while start_time.to(PreciseTime::now()) < max_time {
- let _ = mcts(&mut root_node);
- }
-
- eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
- for (command, score_sum) in &root_node.player_score_sums[0] {
- eprintln!(
- "{} = {} ({} visits)",
- command,
- score_sum.avg().val,
- score_sum.visit_count
- );
- }
-
- let chosen_command = best_player_move(&root_node);
-
- root_node
- .children
- .retain(|[c1, _], _| *c1 == chosen_command);
-
- (chosen_command, root_node)
-}
-
-pub struct Node {
- state: GameBoard,
- score_sum: ScoreSum,
- player_score_sums: [HashMap<Command, ScoreSum>; 2],
- unexplored: Vec<[Command; 2]>,
- children: HashMap<[Command; 2], Node>,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
-struct Score {
- val: f32,
-}
-
-impl AddAssign for Score {
- fn add_assign(&mut self, other: Self) {
- self.val = self.val + other.val;
- }
-}
-
-impl Div<u32> for Score {
- type Output = Self;
- fn div(self, other: u32) -> Self {
- Score {
- val: self.val / other as f32,
- }
- }
-}
-
-impl cmp::Eq for Score {}
-impl cmp::Ord for Score {
- fn cmp(&self, other: &Score) -> cmp::Ordering {
- self.val
- .partial_cmp(&other.val)
- .unwrap_or(cmp::Ordering::Equal)
- }
-}
-
-struct ScoreSum {
- sum: Score,
- visit_count: u32,
-}
-
-impl ScoreSum {
- fn new() -> ScoreSum {
- ScoreSum {
- sum: Score { val: 0. },
- visit_count: 0,
- }
- }
- fn with_initial(score: Score) -> ScoreSum {
- ScoreSum {
- sum: score,
- visit_count: 1,
- }
- }
- fn avg(&self) -> Score {
- self.sum / self.visit_count
- }
-}
-
-impl AddAssign<Score> for ScoreSum {
- fn add_assign(&mut self, other: Score) {
- self.sum += other;
- self.visit_count = self.visit_count.saturating_add(1);
- }
-}
-
-// TODO: Look at transforming this into more of a minimax with AB pruning approach?
-fn mcts(node: &mut Node) -> Score {
- if node.state.outcome != SimulationOutcome::Continue {
- score(&node.state)
- } else if let Some(commands) = node.unexplored.pop() {
- let mut new_state = node.state.clone();
- new_state.simulate(commands);
- let score = rollout(&new_state);
- let unexplored = if new_state.outcome == SimulationOutcome::Continue {
- mcts_move_combo(&new_state)
- } else {
- Vec::new()
- };
-
- let new_node = Node {
- state: new_state,
- score_sum: ScoreSum::with_initial(score),
- player_score_sums: [HashMap::new(), HashMap::new()],
- unexplored,
- children: HashMap::new(),
- };
- node.children.insert(commands, new_node);
-
- update(node, commands, score);
- score
- } else {
- let commands = choose_existing(node);
- let score = mcts(
- node.children
- .get_mut(&commands)
- .expect("The existing node hasn't been tried yet"),
- );
- update(node, commands, score);
- score
- }
-}
-
-fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> {
- let player_moves = valid_moves(state, 0);
- let opponent_moves = valid_moves(state, 1);
- debug_assert!(!player_moves.is_empty(), "No player moves");
- debug_assert!(!opponent_moves.is_empty(), "No opponent moves");
-
- let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len());
- for p in &player_moves {
- for o in &opponent_moves {
- result.push([*p, *o]);
- }
- }
-
- result
-}
-
-fn best_player_move(node: &Node) -> Command {
- node.player_score_sums[0]
- .iter()
- .max_by_key(|(_command, score_sum)| score_sum.avg())
- .map(|(command, _score_sum)| *command)
- .unwrap_or_else(|| Command::new(Action::DoNothing))
-}
-
-fn score(state: &GameBoard) -> Score {
- Score {
- val: match state.outcome {
- SimulationOutcome::PlayerWon(0) => 500.,
- SimulationOutcome::PlayerWon(1) => -500.,
- _ => (state.players[0].score() - state.players[1].score()) as f32,
- },
- }
-}
-
-fn rollout(state: &GameBoard) -> Score {
- score(state)
-}
-
-fn choose_existing(node: &Node) -> [Command; 2] {
- [choose_one_existing(node, 0), choose_one_existing(node, 1)]
-}
-
-fn choose_one_existing(node: &Node, player_index: usize) -> Command {
- let ln_n = (node.score_sum.visit_count as f32).ln();
- let c = 100.;
- let multiplier = if player_index == 0 { 1. } else { -1. };
- node.player_score_sums[player_index]
- .iter()
- .max_by_key(|(_command, score_sum)| {
- (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt()))
- as i32
- })
- .map(|(command, _score_sum)| *command)
- .unwrap_or_else(|| Command::new(Action::DoNothing))
-}
-
-fn update(node: &mut Node, commands: [Command; 2], score: Score) {
- *node.player_score_sums[0]
- .entry(commands[0])
- .or_insert_with(ScoreSum::new) += score;
- *node.player_score_sums[1]
- .entry(commands[1])
- .or_insert_with(ScoreSum::new) += score;
- node.score_sum += score;
-}
-
-fn valid_moves(state: &GameBoard, player_index: usize) -> Vec<Command> {
- state
- .valid_shoot_commands(player_index)
- .iter()
- .chain(state.valid_move_commands(player_index).iter())
- .chain(state.valid_bomb_commands(player_index).iter())
- .chain([Command::new(Action::DoNothing)].iter())
- .cloned()
- .collect()
-}
+pub use mcts::{choose_move, Node};
diff --git a/src/strategy/mcts.rs b/src/strategy/mcts.rs
new file mode 100644
index 0000000..0d1540e
--- /dev/null
+++ b/src/strategy/mcts.rs
@@ -0,0 +1,241 @@
+use crate::command::{Action, Command};
+use crate::game::{GameBoard, SimulationOutcome};
+
+use std::cmp;
+use std::collections::HashMap;
+use std::ops::*;
+use time::{Duration, PreciseTime};
+
+pub fn choose_move(
+ state: &GameBoard,
+ previous_root: Option<Node>,
+ start_time: PreciseTime,
+ max_time: Duration,
+) -> (Command, Node) {
+ let mut root_node = match previous_root {
+ None => Node {
+ state: state.clone(),
+ score_sum: ScoreSum::new(),
+ player_score_sums: [HashMap::new(), HashMap::new()],
+ unexplored: mcts_move_combo(state),
+ children: HashMap::new(),
+ },
+ Some(mut node) => node
+ .children
+ .drain()
+ .map(|(_k, n)| n)
+ .find(|n| n.state == *state)
+ .unwrap_or_else(|| {
+ eprintln!("Previous round did not appear in the cache");
+ Node {
+ state: state.clone(),
+ score_sum: ScoreSum::new(),
+ player_score_sums: [HashMap::new(), HashMap::new()],
+ unexplored: mcts_move_combo(state),
+ children: HashMap::new(),
+ }
+ }),
+ };
+
+ while start_time.to(PreciseTime::now()) < max_time {
+ let _ = mcts(&mut root_node);
+ }
+
+ eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
+ for (command, score_sum) in &root_node.player_score_sums[0] {
+ eprintln!(
+ "{} = {} ({} visits)",
+ command,
+ score_sum.avg().val,
+ score_sum.visit_count
+ );
+ }
+
+ let chosen_command = best_player_move(&root_node);
+
+ root_node
+ .children
+ .retain(|[c1, _], _| *c1 == chosen_command);
+
+ (chosen_command, root_node)
+}
+
+pub struct Node {
+ state: GameBoard,
+ score_sum: ScoreSum,
+ player_score_sums: [HashMap<Command, ScoreSum>; 2],
+ unexplored: Vec<[Command; 2]>,
+ children: HashMap<[Command; 2], Node>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+struct Score {
+ val: f32,
+}
+
+impl AddAssign for Score {
+ fn add_assign(&mut self, other: Self) {
+ self.val = self.val + other.val;
+ }
+}
+
+impl Div<u32> for Score {
+ type Output = Self;
+ fn div(self, other: u32) -> Self {
+ Score {
+ val: self.val / other as f32,
+ }
+ }
+}
+
+impl cmp::Eq for Score {}
+impl cmp::Ord for Score {
+ fn cmp(&self, other: &Score) -> cmp::Ordering {
+ self.val
+ .partial_cmp(&other.val)
+ .unwrap_or(cmp::Ordering::Equal)
+ }
+}
+
+struct ScoreSum {
+ sum: Score,
+ visit_count: u32,
+}
+
+impl ScoreSum {
+ fn new() -> ScoreSum {
+ ScoreSum {
+ sum: Score { val: 0. },
+ visit_count: 0,
+ }
+ }
+ fn with_initial(score: Score) -> ScoreSum {
+ ScoreSum {
+ sum: score,
+ visit_count: 1,
+ }
+ }
+ fn avg(&self) -> Score {
+ self.sum / self.visit_count
+ }
+}
+
+impl AddAssign<Score> for ScoreSum {
+ fn add_assign(&mut self, other: Score) {
+ self.sum += other;
+ self.visit_count = self.visit_count.saturating_add(1);
+ }
+}
+
+fn mcts(node: &mut Node) -> Score {
+ if node.state.outcome != SimulationOutcome::Continue {
+ score(&node.state)
+ } else if let Some(commands) = node.unexplored.pop() {
+ let mut new_state = node.state.clone();
+ new_state.simulate(commands);
+ let score = rollout(&new_state);
+ let unexplored = if new_state.outcome == SimulationOutcome::Continue {
+ mcts_move_combo(&new_state)
+ } else {
+ Vec::new()
+ };
+
+ let new_node = Node {
+ state: new_state,
+ score_sum: ScoreSum::with_initial(score),
+ player_score_sums: [HashMap::new(), HashMap::new()],
+ unexplored,
+ children: HashMap::new(),
+ };
+ node.children.insert(commands, new_node);
+
+ update(node, commands, score);
+ score
+ } else {
+ let commands = choose_existing(node);
+ let score = mcts(
+ node.children
+ .get_mut(&commands)
+ .expect("The existing node hasn't been tried yet"),
+ );
+ update(node, commands, score);
+ score
+ }
+}
+
+fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> {
+ let player_moves = valid_moves(state, 0);
+ let opponent_moves = valid_moves(state, 1);
+ debug_assert!(!player_moves.is_empty(), "No player moves");
+ debug_assert!(!opponent_moves.is_empty(), "No opponent moves");
+
+ let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len());
+ for p in &player_moves {
+ for o in &opponent_moves {
+ result.push([*p, *o]);
+ }
+ }
+
+ result
+}
+
+fn best_player_move(node: &Node) -> Command {
+ node.player_score_sums[0]
+ .iter()
+ .max_by_key(|(_command, score_sum)| score_sum.avg())
+ .map(|(command, _score_sum)| *command)
+ .unwrap_or_else(|| Command::new(Action::DoNothing))
+}
+
+fn score(state: &GameBoard) -> Score {
+ Score {
+ val: match state.outcome {
+ SimulationOutcome::PlayerWon(0) => 500.,
+ SimulationOutcome::PlayerWon(1) => -500.,
+ _ => (state.players[0].score() - state.players[1].score()) as f32,
+ },
+ }
+}
+
+fn rollout(state: &GameBoard) -> Score {
+ score(state)
+}
+
+fn choose_existing(node: &Node) -> [Command; 2] {
+ [choose_one_existing(node, 0), choose_one_existing(node, 1)]
+}
+
+fn choose_one_existing(node: &Node, player_index: usize) -> Command {
+ let ln_n = (node.score_sum.visit_count as f32).ln();
+ let c = 100.;
+ let multiplier = if player_index == 0 { 1. } else { -1. };
+ node.player_score_sums[player_index]
+ .iter()
+ .max_by_key(|(_command, score_sum)| {
+ (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt()))
+ as i32
+ })
+ .map(|(command, _score_sum)| *command)
+ .unwrap_or_else(|| Command::new(Action::DoNothing))
+}
+
+fn update(node: &mut Node, commands: [Command; 2], score: Score) {
+ *node.player_score_sums[0]
+ .entry(commands[0])
+ .or_insert_with(ScoreSum::new) += score;
+ *node.player_score_sums[1]
+ .entry(commands[1])
+ .or_insert_with(ScoreSum::new) += score;
+ node.score_sum += score;
+}
+
+fn valid_moves(state: &GameBoard, player_index: usize) -> Vec<Command> {
+ state
+ .valid_shoot_commands(player_index)
+ .iter()
+ .chain(state.valid_move_commands(player_index).iter())
+ .chain(state.valid_bomb_commands(player_index).iter())
+ .chain([Command::new(Action::DoNothing)].iter())
+ .cloned()
+ .collect()
+}
diff --git a/src/strategy/minimax.rs b/src/strategy/minimax.rs
new file mode 100644
index 0000000..deba92e
--- /dev/null
+++ b/src/strategy/minimax.rs
@@ -0,0 +1,236 @@
+use crate::command::{Action, Command};
+use crate::game::{GameBoard, SimulationOutcome};
+
+use std::cmp;
+use std::collections::HashMap;
+use std::ops::*;
+use time::{Duration, PreciseTime};
+
+pub fn choose_move(
+ state: &GameBoard,
+ previous_root: Option<Node>,
+ start_time: PreciseTime,
+ max_time: Duration,
+) -> (Command, Node) {
+ let mut root_node = match previous_root {
+ None => Node {
+ score_sum: ScoreSum::new(),
+ player_score_sums: [HashMap::new(), HashMap::new()],
+ unexplored: mcts_move_combo(state),
+ children: HashMap::new(),
+ },
+ Some(mut node) => node
+ .children
+ .drain()
+ .map(|(_k, n)| n)
+ .find(|n| false) // TODO: Identify the last opponent move to use this cache
+ .unwrap_or_else(|| {
+ eprintln!("Previous round did not appear in the cache");
+ Node {
+ state: state.clone(),
+ score_sum: ScoreSum::new(),
+ player_score_sums: [HashMap::new(), HashMap::new()],
+ unexplored: mcts_move_combo(state),
+ children: HashMap::new(),
+ }
+ }),
+ };
+
+ while start_time.to(PreciseTime::now()) < max_time {
+ let _ = mcts(&mut root_node);
+ }
+
+ eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
+ for (command, score_sum) in &root_node.player_score_sums[0] {
+ eprintln!(
+ "{} = {} ({} visits)",
+ command,
+ score_sum.avg().val,
+ score_sum.visit_count
+ );
+ }
+
+ let chosen_command = best_player_move(&root_node);
+
+ root_node
+ .children
+ .retain(|[c1, _], _| *c1 == chosen_command);
+
+ (chosen_command, root_node)
+}
+
+pub struct Node {
+ score_sum: ScoreSum,
+ player_score_sums: [HashMap<Command, ScoreSum>; 2],
+ unexplored: Vec<[Command; 2]>,
+ children: HashMap<[Command; 2], Node>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+struct Score {
+ val: f32,
+}
+
+impl AddAssign for Score {
+ fn add_assign(&mut self, other: Self) {
+ self.val = self.val + other.val;
+ }
+}
+
+impl Div<u32> for Score {
+ type Output = Self;
+ fn div(self, other: u32) -> Self {
+ Score {
+ val: self.val / other as f32,
+ }
+ }
+}
+
+impl cmp::Eq for Score {}
+impl cmp::Ord for Score {
+ fn cmp(&self, other: &Score) -> cmp::Ordering {
+ self.val
+ .partial_cmp(&other.val)
+ .unwrap_or(cmp::Ordering::Equal)
+ }
+}
+
+struct ScoreSum {
+ sum: Score,
+ visit_count: u32,
+}
+
+impl ScoreSum {
+ fn new() -> ScoreSum {
+ ScoreSum {
+ sum: Score { val: 0. },
+ visit_count: 0,
+ }
+ }
+ fn with_initial(score: Score) -> ScoreSum {
+ ScoreSum {
+ sum: score,
+ visit_count: 1,
+ }
+ }
+ fn avg(&self) -> Score {
+ self.sum / self.visit_count
+ }
+}
+
+impl AddAssign<Score> for ScoreSum {
+ fn add_assign(&mut self, other: Score) {
+ self.sum += other;
+ self.visit_count = self.visit_count.saturating_add(1);
+ }
+}
+
+// TODO: Transform this into more of a minimax with AB pruning approach?
+fn mcts(node: &mut Node) -> Score {
+ if node.state.outcome != SimulationOutcome::Continue {
+ score(&node.state)
+ } else if let Some(commands) = node.unexplored.pop() {
+ let mut new_state = node.state.clone();
+ new_state.simulate(commands);
+ let score = score(&new_state);
+ let unexplored = if new_state.outcome == SimulationOutcome::Continue {
+ mcts_move_combo(&new_state)
+ } else {
+ Vec::new()
+ };
+
+ let new_node = Node {
+ state: new_state,
+ score_sum: ScoreSum::with_initial(score),
+ player_score_sums: [HashMap::new(), HashMap::new()],
+ unexplored,
+ children: HashMap::new(),
+ };
+ node.children.insert(commands, new_node);
+
+ update(node, commands, score);
+ score
+ } else {
+ let commands = choose_existing(node);
+ let score = mcts(
+ node.children
+ .get_mut(&commands)
+ .expect("The existing node hasn't been tried yet"),
+ );
+ update(node, commands, score);
+ score
+ }
+}
+
+fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> {
+ let player_moves = valid_moves(state, 0);
+ let opponent_moves = valid_moves(state, 1);
+ debug_assert!(!player_moves.is_empty(), "No player moves");
+ debug_assert!(!opponent_moves.is_empty(), "No opponent moves");
+
+ let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len());
+ for p in &player_moves {
+ for o in &opponent_moves {
+ result.push([*p, *o]);
+ }
+ }
+
+ result
+}
+
+fn best_player_move(node: &Node) -> Command {
+ node.player_score_sums[0]
+ .iter()
+ .max_by_key(|(_command, score_sum)| score_sum.avg())
+ .map(|(command, _score_sum)| *command)
+ .unwrap_or_else(|| Command::new(Action::DoNothing))
+}
+
+fn score(state: &GameBoard) -> Score {
+ Score {
+ val: match state.outcome {
+ SimulationOutcome::PlayerWon(0) => 500.,
+ SimulationOutcome::PlayerWon(1) => -500.,
+ _ => (state.players[0].score() - state.players[1].score()) as f32,
+ },
+ }
+}
+
+fn choose_existing(node: &Node) -> [Command; 2] {
+ [choose_one_existing(node, 0), choose_one_existing(node, 1)]
+}
+
+fn choose_one_existing(node: &Node, player_index: usize) -> Command {
+ let ln_n = (node.score_sum.visit_count as f32).ln();
+ let c = 100.;
+ let multiplier = if player_index == 0 { 1. } else { -1. };
+ node.player_score_sums[player_index]
+ .iter()
+ .max_by_key(|(_command, score_sum)| {
+ (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt()))
+ as i32
+ })
+ .map(|(command, _score_sum)| *command)
+ .unwrap_or_else(|| Command::new(Action::DoNothing))
+}
+
+fn update(node: &mut Node, commands: [Command; 2], score: Score) {
+ *node.player_score_sums[0]
+ .entry(commands[0])
+ .or_insert_with(ScoreSum::new) += score;
+ *node.player_score_sums[1]
+ .entry(commands[1])
+ .or_insert_with(ScoreSum::new) += score;
+ node.score_sum += score;
+}
+
+fn valid_moves(state: &GameBoard, player_index: usize) -> Vec<Command> {
+ state
+ .valid_shoot_commands(player_index)
+ .iter()
+ .chain(state.valid_move_commands(player_index).iter())
+ .chain(state.valid_bomb_commands(player_index).iter())
+ .chain([Command::new(Action::DoNothing)].iter())
+ .cloned()
+ .collect()
+}