use crate::command::{Action, Command}; use crate::constants::*; use crate::game::{GameBoard, SimulationOutcome}; use std::cmp; use std::collections::HashMap; use std::ops::*; use time::{Duration, PreciseTime}; // TODO: Calibrate these weightings somehow? Some sort of generate and sort based on playing against each other? // What about: // - Creating a list (mins and maxes) // - Keep adding a new guess, run against all, and sort the list by fitness. // - Repeat until list has many values // - Somehow prioritize sticking new items in based on what's going well? Or maximally different? Keep dividing all the ranges in half? #[derive(Debug, Clone)] pub struct ScoreConfig { max_health_weight: f32, total_health_weight: f32, points_weight: f32, victory_weight: f32, snowball_weight: f32, bomb_weight: f32, } impl Default for ScoreConfig { fn default() -> ScoreConfig { ScoreConfig { max_health_weight: 1., total_health_weight: 1., points_weight: 0., victory_weight: 3000., snowball_weight: 100., bomb_weight: 100., } } } // TODO: Cache results from last round based on player / opponent move and worm positions pub fn choose_move( state: &GameBoard, config: &ScoreConfig, start_time: PreciseTime, max_time: Duration, ) -> Command { let mut root_node = Node { score_sum: ScoreSum::new(), player_score_sums: [HashMap::new(), HashMap::new()], unexplored: move_combos(state), children: HashMap::new(), }; while start_time.to(PreciseTime::now()) < max_time { let _ = expand_tree(&mut root_node, state.clone(), config); } eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); for (command, score_sum) in &root_node.player_score_sums[0] { eprintln!( "{} = {} ({} visits)", command, score_sum.avg().val, score_sum.visit_count ); } best_player_move(&root_node, 0) } pub fn choose_move_with_normalized_perf( state: &GameBoard, config: &ScoreConfig, player_index: usize, iterations: usize, ) -> Command { let mut root_node = Node { score_sum: ScoreSum::new(), player_score_sums: [HashMap::new(), HashMap::new()], unexplored: move_combos(state), children: HashMap::new(), }; for _ in 0..iterations { let _ = expand_tree(&mut root_node, state.clone(), config); } best_player_move(&root_node, player_index) } pub struct Node { score_sum: ScoreSum, player_score_sums: [HashMap; 2], unexplored: Vec<[Command; 2]>, children: HashMap<[Command; 2], Node>, } #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] struct Score { val: f32, } impl AddAssign for Score { fn add_assign(&mut self, other: Self) { self.val = self.val + other.val; } } impl Div for Score { type Output = Self; fn div(self, other: u32) -> Self { Score { val: self.val / other as f32, } } } impl Mul for Score { type Output = Self; fn mul(self, other: f32) -> Self { Score { val: self.val * other, } } } impl cmp::Eq for Score {} impl cmp::Ord for Score { fn cmp(&self, other: &Score) -> cmp::Ordering { self.val .partial_cmp(&other.val) .unwrap_or(cmp::Ordering::Equal) } } struct ScoreSum { sum: Score, visit_count: u32, } impl ScoreSum { fn new() -> ScoreSum { ScoreSum { sum: Score { val: 0. }, visit_count: 0, } } fn with_initial(score: Score) -> ScoreSum { ScoreSum { sum: score, visit_count: 1, } } fn avg(&self) -> Score { self.sum / self.visit_count } } impl AddAssign for ScoreSum { fn add_assign(&mut self, other: Score) { self.sum += other; self.visit_count = self.visit_count.saturating_add(1); } } fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> Score { if state.outcome != SimulationOutcome::Continue { score(&state, config) } else if let Some(commands) = node.unexplored.pop() { // TODO: Explore preemptively doing the rollout? state.simulate(commands); let score = score(&state, config); let unexplored = if state.outcome == SimulationOutcome::Continue { move_combos(&state) } else { Vec::new() }; let new_node = Node { score_sum: ScoreSum::with_initial(score), player_score_sums: [HashMap::new(), HashMap::new()], unexplored, children: HashMap::new(), }; node.children.insert(commands, new_node); update(node, commands, score); // TODO: Prune dominated moves score } else { let commands = choose_existing(node); state.simulate(commands); let score = expand_tree( node.children .get_mut(&commands) .expect("The existing node hasn't been tried yet"), state, config, ); update(node, commands, score); score } } fn move_combos(state: &GameBoard) -> Vec<[Command; 2]> { let player_moves = pruned_moves(state, 0); let opponent_moves = pruned_moves(state, 1); debug_assert!(!player_moves.is_empty(), "No player moves"); debug_assert!(!opponent_moves.is_empty(), "No opponent moves"); let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len()); for p in &player_moves { for o in &opponent_moves { result.push([*p, *o]); } } result } fn best_player_move(node: &Node, player_index: usize) -> Command { let multiplier = if player_index == 0 { 1. } else { -1. }; node.player_score_sums[player_index] .iter() .max_by_key(|(_command, score_sum)| score_sum.avg() * multiplier) .map(|(command, _score_sum)| *command) .unwrap_or_else(|| Command::new(Action::DoNothing)) } fn score(state: &GameBoard, config: &ScoreConfig) -> Score { let max_health = (state.players[0].max_worm_health() - state.players[1].max_worm_health()) as f32; let total_health = (state.players[0].health() - state.players[1].health()) as f32; let points = (state.players[0].score() - state.players[1].score()) as f32; let victory = match state.outcome { SimulationOutcome::PlayerWon(0) => 1., SimulationOutcome::PlayerWon(1) => -1., _ => 0., }; let time_to_end = MAX_ROUNDS as f32 - state.round as f32 + 1.; let snowballs = state.players[0].snowballs() as f32 - state.players[1].snowballs() as f32; let bombs = state.players[0].bombs() as f32 - state.players[1].bombs() as f32; // TODO: Try adding new features here. Something about board position? Score { val: max_health * config.max_health_weight + total_health * config.total_health_weight + points * config.points_weight + victory * config.victory_weight + snowballs * config.snowball_weight / time_to_end + bombs * config.bomb_weight / time_to_end, } } fn choose_existing(node: &Node) -> [Command; 2] { [choose_one_existing(node, 0), choose_one_existing(node, 1)] } fn choose_one_existing(node: &Node, player_index: usize) -> Command { let ln_n = (node.score_sum.visit_count as f32).ln(); let c = 100.; let multiplier = if player_index == 0 { 1. } else { -1. }; node.player_score_sums[player_index] .iter() .max_by_key(|(_command, score_sum)| { (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt())) as i32 }) .map(|(command, _score_sum)| *command) .unwrap_or_else(|| Command::new(Action::DoNothing)) } fn update(node: &mut Node, commands: [Command; 2], score: Score) { *node.player_score_sums[0] .entry(commands[0]) .or_insert_with(ScoreSum::new) += score; *node.player_score_sums[1] .entry(commands[1]) .or_insert_with(ScoreSum::new) += score; node.score_sum += score; } fn pruned_moves(state: &GameBoard, player_index: usize) -> Vec { let sim_with_idle_opponent = |cmd| { let mut idle_commands = [ Command::new(Action::DoNothing), Command::new(Action::DoNothing), ]; idle_commands[player_index] = cmd; let mut state_cpy = state.clone(); state_cpy.simulate(idle_commands); state_cpy }; let mut do_nothing_state = state.clone(); do_nothing_state.simulate([ Command::new(Action::DoNothing), Command::new(Action::DoNothing), ]); let opponent_index = GameBoard::opponent(player_index); let my_starting_health = do_nothing_state.players[player_index].health(); let opponent_starting_health = do_nothing_state.players[opponent_index].health(); state .valid_moves(player_index) .into_iter() .filter(|command| { // TODO: Some of these filters could be done with better // performance by running them while generating the list // of valid moves. // NB: These rules should pass for doing nothing, otherwise // we need some other mechanism for sticking in a do // nothing option. let idle_opponent_state = sim_with_idle_opponent(*command); let hurt_self = idle_opponent_state.players[player_index].health() < my_starting_health; let hurt_opponent = idle_opponent_state.players[opponent_index].health() < opponent_starting_health; let frozen_opponent = idle_opponent_state.players[opponent_index] .worms .iter() .any(|w| w.rounds_until_unfrozen == FREEZE_DURATION); let is_select = command.worm.is_some(); let is_attack = command.action.is_attack(); let is_snowball = command.action.is_snowball(); !hurt_self && (!is_select || hurt_opponent) && (!is_attack || hurt_opponent) && (!is_snowball || frozen_opponent) }) .collect() }