From 3f5492b2bb67326be43cd7c5ba02ccf0ba1ae0e3 Mon Sep 17 00:00:00 2001 From: Justin Wernick Date: Tue, 19 Apr 2022 21:27:56 +0200 Subject: Refile for merging repos --- 2019-worms/src/strategy/minimax.rs | 330 +++++++++++++++++++++++++++++++++++++ 1 file changed, 330 insertions(+) create mode 100644 2019-worms/src/strategy/minimax.rs (limited to '2019-worms/src/strategy/minimax.rs') diff --git a/2019-worms/src/strategy/minimax.rs b/2019-worms/src/strategy/minimax.rs new file mode 100644 index 0000000..656ee36 --- /dev/null +++ b/2019-worms/src/strategy/minimax.rs @@ -0,0 +1,330 @@ +use crate::command::{Action, Command}; +use crate::constants::*; +use crate::game::{GameBoard, SimulationOutcome}; + +use fnv::FnvHashMap; +use std::cmp; +use std::ops::*; +use time::{Duration, PreciseTime}; + +#[derive(Debug, Clone)] +pub struct ScoreConfig { + pub max_health_weight: f32, + pub total_health_weight: f32, + pub points_weight: f32, + pub victory_weight: f32, + pub snowball_weight: f32, + pub bomb_weight: f32, + pub explore_exploit_weight: f32, +} + +impl Default for ScoreConfig { + fn default() -> ScoreConfig { + ScoreConfig { + max_health_weight: 100., + total_health_weight: 10., + points_weight: 1., + victory_weight: 4500., + snowball_weight: 10., + bomb_weight: 10., + explore_exploit_weight: 100., + } + } +} + +pub fn choose_move( + state: &GameBoard, + config: &ScoreConfig, + start_time: PreciseTime, + max_time: Duration, +) -> Command { + let mut root_node = Node { + score_sum: ScoreSum::new(), + player_score_sums: [FnvHashMap::default(), FnvHashMap::default()], + unexplored: move_combos(state), + children: FnvHashMap::default(), + }; + + #[cfg(feature = "logging")] + { + let mut max_expand_time = Duration::milliseconds(0); + while start_time.to(PreciseTime::now()) < max_time { + let expand_start_time = PreciseTime::now(); + let _ = expand_tree(&mut root_node, state.clone(), config); + max_expand_time = max_expand_time.max(expand_start_time.to(PreciseTime::now())); + } + eprintln!( + "Max expand time: {:?} ns", + max_expand_time.num_nanoseconds() + ); + } + #[cfg(not(feature = "logging"))] + { + while start_time.to(PreciseTime::now()) < max_time { + let _ = expand_tree(&mut root_node, state.clone(), config); + } + } + + #[cfg(feature = "logging")] + { + eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); + for (command, score_sum) in &root_node.player_score_sums[0] { + eprintln!( + "{} = {} ({} visits)", + command, + score_sum.avg().val, + score_sum.visit_count + ); + } + } + + best_player_move(&root_node, 0) +} + +pub fn choose_move_with_normalized_perf( + state: &GameBoard, + config: &ScoreConfig, + player_index: usize, + iterations: usize, +) -> Command { + let mut root_node = Node { + score_sum: ScoreSum::new(), + player_score_sums: [FnvHashMap::default(), FnvHashMap::default()], + unexplored: move_combos(state), + children: FnvHashMap::default(), + }; + + for _ in 0..iterations { + let _ = expand_tree(&mut root_node, state.clone(), config); + } + + #[cfg(feature = "logging")] + { + eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); + for (command, score_sum) in &root_node.player_score_sums[player_index] { + eprintln!( + "{} = {} ({} visits)", + command, + score_sum.avg().val, + score_sum.visit_count + ); + } + } + + best_player_move(&root_node, player_index) +} + +pub struct Node { + score_sum: ScoreSum, + player_score_sums: [FnvHashMap; 2], + unexplored: Vec<[Command; 2]>, + children: FnvHashMap<[Command; 2], Node>, +} + +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +struct Score { + val: f32, +} + +impl AddAssign for Score { + fn add_assign(&mut self, other: Self) { + self.val = self.val + other.val; + } +} + +impl Div for Score { + type Output = Self; + fn div(self, other: u32) -> Self { + Score { + val: self.val / other as f32, + } + } +} + +impl Mul for Score { + type Output = Self; + fn mul(self, other: f32) -> Self { + Score { + val: self.val * other, + } + } +} + +impl cmp::Eq for Score {} +impl cmp::Ord for Score { + fn cmp(&self, other: &Score) -> cmp::Ordering { + self.val + .partial_cmp(&other.val) + .unwrap_or(cmp::Ordering::Equal) + } +} + +struct ScoreSum { + sum: Score, + visit_count: u32, +} + +impl ScoreSum { + fn new() -> ScoreSum { + ScoreSum { + sum: Score { val: 0. }, + visit_count: 0, + } + } + fn with_initial(score: Score) -> ScoreSum { + ScoreSum { + sum: score, + visit_count: 1, + } + } + fn avg(&self) -> Score { + self.sum / self.visit_count + } +} + +impl AddAssign for ScoreSum { + fn add_assign(&mut self, other: Score) { + self.sum += other; + self.visit_count = self.visit_count.saturating_add(1); + } +} + +fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> Score { + if state.outcome != SimulationOutcome::Continue { + score(&state, config) + } else if let Some(commands) = node.unexplored.pop() { + state.simulate(commands); + let score = score(&state, config); + let unexplored = if state.outcome == SimulationOutcome::Continue { + move_combos(&state) + } else { + Vec::new() + }; + + let new_node = Node { + score_sum: ScoreSum::with_initial(score), + player_score_sums: [FnvHashMap::default(), FnvHashMap::default()], + unexplored, + children: FnvHashMap::default(), + }; + node.children.insert(commands, new_node); + update(node, commands, score); + + score + } else { + let commands = choose_existing(node, config); + state.simulate(commands); + let score = expand_tree( + node.children + .get_mut(&commands) + .expect("The existing node hasn't been tried yet"), + state, + config, + ); + update(node, commands, score); + score + } +} + +fn move_combos(state: &GameBoard) -> Vec<[Command; 2]> { + let player_moves = state.pruned_valid_moves(0); + let opponent_moves = state.pruned_valid_moves(1); + debug_assert!(!player_moves.is_empty(), "No player moves"); + debug_assert!(!opponent_moves.is_empty(), "No opponent moves"); + + let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len()); + for p in &player_moves { + for o in &opponent_moves { + result.push([*p, *o]); + } + } + + result +} + +fn best_player_move(node: &Node, player_index: usize) -> Command { + let multiplier = if player_index == 0 { 1. } else { -1. }; + node.player_score_sums[player_index] + .iter() + .max_by_key(|(_command, score_sum)| score_sum.avg() * multiplier) + .map(|(command, _score_sum)| *command) + .unwrap_or_else(|| Command::new(Action::DoNothing)) +} + +fn score(state: &GameBoard, config: &ScoreConfig) -> Score { + let max_health = + (state.players[0].max_worm_health() - state.players[1].max_worm_health()) as f32; + let total_health = (state.players[0].health() - state.players[1].health()) as f32; + let points = (state.players[0].score() - state.players[1].score()) as f32; + let victory = match state.outcome { + SimulationOutcome::PlayerWon(0) => 1., + SimulationOutcome::PlayerWon(1) => -1., + _ => 0., + }; + + let time_to_end = MAX_ROUNDS as f32 - state.round as f32 + 1.; + + let snowballs = state.players[0].snowballs() as f32 - state.players[1].snowballs() as f32; + let bombs = state.players[0].bombs() as f32 - state.players[1].bombs() as f32; + + Score { + val: max_health * config.max_health_weight + + total_health * config.total_health_weight + + points * config.points_weight + + victory * config.victory_weight + + snowballs * config.snowball_weight / time_to_end + + bombs * config.bomb_weight / time_to_end, + } +} + +fn choose_existing(node: &Node, config: &ScoreConfig) -> [Command; 2] { + [ + choose_one_existing(node, 0, config), + choose_one_existing(node, 1, config), + ] +} + +fn choose_one_existing(node: &Node, player_index: usize, config: &ScoreConfig) -> Command { + let ln_n = (node.score_sum.visit_count as f32).ln(); + let multiplier = if player_index == 0 { 1. } else { -1. }; + let mut command_confidences = + node.player_score_sums[player_index] + .iter() + .map(|(command, score_sum)| { + ( + command, + (score_sum.avg() * multiplier).val + + config.explore_exploit_weight + * (ln_n / score_sum.visit_count as f32).sqrt(), + ) + }); + + command_confidences + .next() + .map(|first| { + command_confidences + .fold( + first, + |(acc_command, acc_confidence), (next_command, next_confidence)| { + if acc_confidence > next_confidence { + (acc_command, acc_confidence) + } else { + (next_command, next_confidence) + } + }, + ) + .0 + .clone() + }) + .unwrap_or_else(|| Command::new(Action::DoNothing)) +} + +fn update(node: &mut Node, commands: [Command; 2], score: Score) { + *node.player_score_sums[0] + .entry(commands[0]) + .or_insert_with(ScoreSum::new) += score; + *node.player_score_sums[1] + .entry(commands[1]) + .or_insert_with(ScoreSum::new) += score; + node.score_sum += score; +} -- cgit v1.2.3