diff options
author | Justin Worthe <justin@worthe-it.co.za> | 2019-08-13 22:00:10 +0200 |
---|---|---|
committer | Justin Worthe <justin@worthe-it.co.za> | 2019-08-13 22:00:10 +0200 |
commit | dabedf442d65fcaec36d0a30ed3ed2327b39a44d (patch) | |
tree | a9bbcb4e97dcb98050fd872997ace284f2f764e0 /src/strategy | |
parent | 33b9c9e05a3693d944342753288fda824f0da13c (diff) |
Trimming down on redundant code and tweaking perf
Diffstat (limited to 'src/strategy')
-rw-r--r-- | src/strategy/mcts.rs | 230 | ||||
-rw-r--r-- | src/strategy/minimax.rs | 104 |
2 files changed, 18 insertions, 316 deletions
diff --git a/src/strategy/mcts.rs b/src/strategy/mcts.rs deleted file mode 100644 index c122fa1..0000000 --- a/src/strategy/mcts.rs +++ /dev/null @@ -1,230 +0,0 @@ -use crate::command::{Action, Command}; -use crate::game::{GameBoard, SimulationOutcome}; - -use std::cmp; -use std::collections::HashMap; -use std::ops::*; -use time::{Duration, PreciseTime}; - -pub fn choose_move( - state: &GameBoard, - previous_root: Option<Node>, - start_time: PreciseTime, - max_time: Duration, -) -> (Command, Node) { - let mut root_node = match previous_root { - None => Node { - state: state.clone(), - score_sum: ScoreSum::new(), - player_score_sums: [HashMap::new(), HashMap::new()], - unexplored: mcts_move_combo(state), - children: HashMap::new(), - }, - Some(mut node) => node - .children - .drain() - .map(|(_k, n)| n) - .find(|n| n.state == *state) - .unwrap_or_else(|| { - eprintln!("Previous round did not appear in the cache"); - Node { - state: state.clone(), - score_sum: ScoreSum::new(), - player_score_sums: [HashMap::new(), HashMap::new()], - unexplored: mcts_move_combo(state), - children: HashMap::new(), - } - }), - }; - - while start_time.to(PreciseTime::now()) < max_time { - let _ = mcts(&mut root_node); - } - - eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); - for (command, score_sum) in &root_node.player_score_sums[0] { - eprintln!( - "{} = {} ({} visits)", - command, - score_sum.avg().val, - score_sum.visit_count - ); - } - - let chosen_command = best_player_move(&root_node); - - root_node - .children - .retain(|[c1, _], _| *c1 == chosen_command); - - (chosen_command, root_node) -} - -pub struct Node { - state: GameBoard, - score_sum: ScoreSum, - player_score_sums: [HashMap<Command, ScoreSum>; 2], - unexplored: Vec<[Command; 2]>, - children: HashMap<[Command; 2], Node>, -} - -#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] -struct Score { - val: f32, -} - -impl AddAssign for Score { - fn add_assign(&mut self, other: Self) { - self.val = self.val + other.val; - } -} - -impl Div<u32> for Score { - type Output = Self; - fn div(self, other: u32) -> Self { - Score { - val: self.val / other as f32, - } - } -} - -impl cmp::Eq for Score {} -impl cmp::Ord for Score { - fn cmp(&self, other: &Score) -> cmp::Ordering { - self.val - .partial_cmp(&other.val) - .unwrap_or(cmp::Ordering::Equal) - } -} - -struct ScoreSum { - sum: Score, - visit_count: u32, -} - -impl ScoreSum { - fn new() -> ScoreSum { - ScoreSum { - sum: Score { val: 0. }, - visit_count: 0, - } - } - fn with_initial(score: Score) -> ScoreSum { - ScoreSum { - sum: score, - visit_count: 1, - } - } - fn avg(&self) -> Score { - self.sum / self.visit_count - } -} - -impl AddAssign<Score> for ScoreSum { - fn add_assign(&mut self, other: Score) { - self.sum += other; - self.visit_count = self.visit_count.saturating_add(1); - } -} - -fn mcts(node: &mut Node) -> Score { - if node.state.outcome != SimulationOutcome::Continue { - score(&node.state) - } else if let Some(commands) = node.unexplored.pop() { - let mut new_state = node.state.clone(); - new_state.simulate(commands); - let score = rollout(&new_state); - let unexplored = if new_state.outcome == SimulationOutcome::Continue { - mcts_move_combo(&new_state) - } else { - Vec::new() - }; - - let new_node = Node { - state: new_state, - score_sum: ScoreSum::with_initial(score), - player_score_sums: [HashMap::new(), HashMap::new()], - unexplored, - children: HashMap::new(), - }; - node.children.insert(commands, new_node); - - update(node, commands, score); - score - } else { - let commands = choose_existing(node); - let score = mcts( - node.children - .get_mut(&commands) - .expect("The existing node hasn't been tried yet"), - ); - update(node, commands, score); - score - } -} - -fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> { - let player_moves = state.valid_moves(0); - let opponent_moves = state.valid_moves(1); - debug_assert!(!player_moves.is_empty(), "No player moves"); - debug_assert!(!opponent_moves.is_empty(), "No opponent moves"); - - let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len()); - for p in &player_moves { - for o in &opponent_moves { - result.push([*p, *o]); - } - } - - result -} - -fn best_player_move(node: &Node) -> Command { - node.player_score_sums[0] - .iter() - .max_by_key(|(_command, score_sum)| score_sum.avg()) - .map(|(command, _score_sum)| *command) - .unwrap_or_else(|| Command::new(Action::DoNothing)) -} - -fn score(state: &GameBoard) -> Score { - Score { - val: match state.outcome { - SimulationOutcome::PlayerWon(0) => 3000., - SimulationOutcome::PlayerWon(1) => -3000., - _ => (state.players[0].score() - state.players[1].score()) as f32, - }, - } -} - -fn rollout(state: &GameBoard) -> Score { - score(state) -} - -fn choose_existing(node: &Node) -> [Command; 2] { - [choose_one_existing(node, 0), choose_one_existing(node, 1)] -} - -fn choose_one_existing(node: &Node, player_index: usize) -> Command { - let ln_n = (node.score_sum.visit_count as f32).ln(); - let c = 100.; - let multiplier = if player_index == 0 { 1. } else { -1. }; - node.player_score_sums[player_index] - .iter() - .max_by_key(|(_command, score_sum)| { - (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt())) - as i32 - }) - .map(|(command, _score_sum)| *command) - .unwrap_or_else(|| Command::new(Action::DoNothing)) -} - -fn update(node: &mut Node, commands: [Command; 2], score: Score) { - *node.player_score_sums[0] - .entry(commands[0]) - .or_insert_with(ScoreSum::new) += score; - *node.player_score_sums[1] - .entry(commands[1]) - .or_insert_with(ScoreSum::new) += score; - node.score_sum += score; -} diff --git a/src/strategy/minimax.rs b/src/strategy/minimax.rs index 2c52127..5a7bbcd 100644 --- a/src/strategy/minimax.rs +++ b/src/strategy/minimax.rs @@ -7,21 +7,15 @@ use std::cmp; use std::ops::*; use time::{Duration, PreciseTime}; -// TODO: Calibrate these weightings somehow? Some sort of generate and sort based on playing against each other? -// What about: -// - Creating a list (mins and maxes) -// - Keep adding a new guess, run against all, and sort the list by fitness. -// - Repeat until list has many values -// - Somehow prioritize sticking new items in based on what's going well? Or maximally different? Keep dividing all the ranges in half? #[derive(Debug, Clone)] pub struct ScoreConfig { - max_health_weight: f32, - total_health_weight: f32, - points_weight: f32, - victory_weight: f32, - snowball_weight: f32, - bomb_weight: f32, - explore_exploit_weight: f32, + pub max_health_weight: f32, + pub total_health_weight: f32, + pub points_weight: f32, + pub victory_weight: f32, + pub snowball_weight: f32, + pub bomb_weight: f32, + pub explore_exploit_weight: f32, } impl Default for ScoreConfig { @@ -38,7 +32,6 @@ impl Default for ScoreConfig { } } -// TODO: Cache results from last round based on player / opponent move and worm positions pub fn choose_move( state: &GameBoard, config: &ScoreConfig, @@ -86,15 +79,15 @@ pub fn choose_move_with_normalized_perf( let _ = expand_tree(&mut root_node, state.clone(), config); } - eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); - for (command, score_sum) in &root_node.player_score_sums[player_index] { - eprintln!( - "{} = {} ({} visits)", - command, - score_sum.avg().val, - score_sum.visit_count - ); - } + // eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); + // for (command, score_sum) in &root_node.player_score_sums[player_index] { + // eprintln!( + // "{} = {} ({} visits)", + // command, + // score_sum.avg().val, + // score_sum.visit_count + // ); + // } best_player_move(&root_node, player_index) } @@ -178,7 +171,6 @@ fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> S if state.outcome != SimulationOutcome::Continue { score(&state, config) } else if let Some(commands) = node.unexplored.pop() { - // TODO: Explore preemptively doing the rollout? state.simulate(commands); let score = score(&state, config); let unexplored = if state.outcome == SimulationOutcome::Continue { @@ -196,8 +188,6 @@ fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> S node.children.insert(commands, new_node); update(node, commands, score); - // TODO: Prune dominated moves - score } else { let commands = choose_existing(node, config); @@ -215,8 +205,8 @@ fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> S } fn move_combos(state: &GameBoard) -> Vec<[Command; 2]> { - let player_moves = pruned_moves(state, 0); - let opponent_moves = pruned_moves(state, 1); + let player_moves = state.pruned_valid_moves(0); + let opponent_moves = state.pruned_valid_moves(1); debug_assert!(!player_moves.is_empty(), "No player moves"); debug_assert!(!opponent_moves.is_empty(), "No opponent moves"); @@ -255,8 +245,6 @@ fn score(state: &GameBoard, config: &ScoreConfig) -> Score { let snowballs = state.players[0].snowballs() as f32 - state.players[1].snowballs() as f32; let bombs = state.players[0].bombs() as f32 - state.players[1].bombs() as f32; - // TODO: None of these attributes give any signal early on. - // TODO: Try adding new features here. Something about board position? Score { val: max_health * config.max_health_weight + total_health * config.total_health_weight @@ -318,59 +306,3 @@ fn update(node: &mut Node, commands: [Command; 2], score: Score) { .or_insert_with(ScoreSum::new) += score; node.score_sum += score; } - -fn pruned_moves(state: &GameBoard, player_index: usize) -> Vec<Command> { - let sim_with_idle_opponent = |cmd| { - let mut idle_commands = [ - Command::new(Action::DoNothing), - Command::new(Action::DoNothing), - ]; - idle_commands[player_index] = cmd; - let mut state_cpy = state.clone(); - state_cpy.simulate(idle_commands); - state_cpy - }; - - let mut do_nothing_state = state.clone(); - do_nothing_state.simulate([ - Command::new(Action::DoNothing), - Command::new(Action::DoNothing), - ]); - - let opponent_index = GameBoard::opponent(player_index); - let my_starting_health = do_nothing_state.players[player_index].health(); - let opponent_starting_health = do_nothing_state.players[opponent_index].health(); - - state - .valid_moves(player_index) - .into_iter() - .filter(|command| { - // TODO: Some of these filters could be done with better - // performance by running them while generating the list - // of valid moves. - - // NB: These rules should pass for doing nothing, otherwise - // we need some other mechanism for sticking in a do - // nothing option. - - let idle_opponent_state = sim_with_idle_opponent(*command); - let hurt_self = idle_opponent_state.players[player_index].health() < my_starting_health; - let hurt_opponent = - idle_opponent_state.players[opponent_index].health() < opponent_starting_health; - let frozen_opponent = idle_opponent_state.players[opponent_index] - .worms - .iter() - .any(|w| w.rounds_until_unfrozen == FREEZE_DURATION); - - let is_select = command.worm.is_some(); - - let is_attack = command.action.is_attack(); - let is_snowball = command.action.is_snowball(); - - !hurt_self - && (!is_select || hurt_opponent) - && (!is_attack || hurt_opponent) - && (!is_snowball || frozen_opponent) - }) - .collect() -} |