use crate::command::{Action, Command}; use crate::constants::*; use crate::game::{GameBoard, SimulationOutcome}; use fnv::FnvHashMap; use std::cmp; use std::ops::*; use time::{Duration, PreciseTime}; #[derive(Debug, Clone)] pub struct ScoreConfig { pub max_health_weight: f32, pub total_health_weight: f32, pub points_weight: f32, pub victory_weight: f32, pub snowball_weight: f32, pub bomb_weight: f32, pub explore_exploit_weight: f32, } impl Default for ScoreConfig { fn default() -> ScoreConfig { ScoreConfig { max_health_weight: 100., total_health_weight: 10., points_weight: 1., victory_weight: 4500., snowball_weight: 10., bomb_weight: 10., explore_exploit_weight: 100., } } } pub fn choose_move( state: &GameBoard, config: &ScoreConfig, start_time: PreciseTime, max_time: Duration, ) -> Command { let mut root_node = Node { score_sum: ScoreSum::new(), player_score_sums: [FnvHashMap::default(), FnvHashMap::default()], unexplored: move_combos(state), children: FnvHashMap::default(), }; #[cfg(feature = "logging")] { let mut max_expand_time = Duration::milliseconds(0); while start_time.to(PreciseTime::now()) < max_time { let expand_start_time = PreciseTime::now(); let _ = expand_tree(&mut root_node, state.clone(), config); max_expand_time = max_expand_time.max(expand_start_time.to(PreciseTime::now())); } eprintln!( "Max expand time: {:?} ns", max_expand_time.num_nanoseconds() ); } #[cfg(not(feature = "logging"))] { while start_time.to(PreciseTime::now()) < max_time { let _ = expand_tree(&mut root_node, state.clone(), config); } } #[cfg(feature = "logging")] { eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); for (command, score_sum) in &root_node.player_score_sums[0] { eprintln!( "{} = {} ({} visits)", command, score_sum.avg().val, score_sum.visit_count ); } } best_player_move(&root_node, 0) } pub fn choose_move_with_normalized_perf( state: &GameBoard, config: &ScoreConfig, player_index: usize, iterations: usize, ) -> Command { let mut root_node = Node { score_sum: ScoreSum::new(), player_score_sums: [FnvHashMap::default(), FnvHashMap::default()], unexplored: move_combos(state), children: FnvHashMap::default(), }; for _ in 0..iterations { let _ = expand_tree(&mut root_node, state.clone(), config); } #[cfg(feature = "logging")] { eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); for (command, score_sum) in &root_node.player_score_sums[player_index] { eprintln!( "{} = {} ({} visits)", command, score_sum.avg().val, score_sum.visit_count ); } } best_player_move(&root_node, player_index) } pub struct Node { score_sum: ScoreSum, player_score_sums: [FnvHashMap; 2], unexplored: Vec<[Command; 2]>, children: FnvHashMap<[Command; 2], Node>, } #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] struct Score { val: f32, } impl AddAssign for Score { fn add_assign(&mut self, other: Self) { self.val = self.val + other.val; } } impl Div for Score { type Output = Self; fn div(self, other: u32) -> Self { Score { val: self.val / other as f32, } } } impl Mul for Score { type Output = Self; fn mul(self, other: f32) -> Self { Score { val: self.val * other, } } } impl cmp::Eq for Score {} impl cmp::Ord for Score { fn cmp(&self, other: &Score) -> cmp::Ordering { self.val .partial_cmp(&other.val) .unwrap_or(cmp::Ordering::Equal) } } struct ScoreSum { sum: Score, visit_count: u32, } impl ScoreSum { fn new() -> ScoreSum { ScoreSum { sum: Score { val: 0. }, visit_count: 0, } } fn with_initial(score: Score) -> ScoreSum { ScoreSum { sum: score, visit_count: 1, } } fn avg(&self) -> Score { self.sum / self.visit_count } } impl AddAssign for ScoreSum { fn add_assign(&mut self, other: Score) { self.sum += other; self.visit_count = self.visit_count.saturating_add(1); } } fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> Score { if state.outcome != SimulationOutcome::Continue { score(&state, config) } else if let Some(commands) = node.unexplored.pop() { state.simulate(commands); let score = score(&state, config); let unexplored = if state.outcome == SimulationOutcome::Continue { move_combos(&state) } else { Vec::new() }; let new_node = Node { score_sum: ScoreSum::with_initial(score), player_score_sums: [FnvHashMap::default(), FnvHashMap::default()], unexplored, children: FnvHashMap::default(), }; node.children.insert(commands, new_node); update(node, commands, score); score } else { let commands = choose_existing(node, config); state.simulate(commands); let score = expand_tree( node.children .get_mut(&commands) .expect("The existing node hasn't been tried yet"), state, config, ); update(node, commands, score); score } } fn move_combos(state: &GameBoard) -> Vec<[Command; 2]> { let player_moves = state.pruned_valid_moves(0); let opponent_moves = state.pruned_valid_moves(1); debug_assert!(!player_moves.is_empty(), "No player moves"); debug_assert!(!opponent_moves.is_empty(), "No opponent moves"); let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len()); for p in &player_moves { for o in &opponent_moves { result.push([*p, *o]); } } result } fn best_player_move(node: &Node, player_index: usize) -> Command { let multiplier = if player_index == 0 { 1. } else { -1. }; node.player_score_sums[player_index] .iter() .max_by_key(|(_command, score_sum)| score_sum.avg() * multiplier) .map(|(command, _score_sum)| *command) .unwrap_or_else(|| Command::new(Action::DoNothing)) } fn score(state: &GameBoard, config: &ScoreConfig) -> Score { let max_health = (state.players[0].max_worm_health() - state.players[1].max_worm_health()) as f32; let total_health = (state.players[0].health() - state.players[1].health()) as f32; let points = (state.players[0].score() - state.players[1].score()) as f32; let victory = match state.outcome { SimulationOutcome::PlayerWon(0) => 1., SimulationOutcome::PlayerWon(1) => -1., _ => 0., }; let time_to_end = MAX_ROUNDS as f32 - state.round as f32 + 1.; let snowballs = state.players[0].snowballs() as f32 - state.players[1].snowballs() as f32; let bombs = state.players[0].bombs() as f32 - state.players[1].bombs() as f32; Score { val: max_health * config.max_health_weight + total_health * config.total_health_weight + points * config.points_weight + victory * config.victory_weight + snowballs * config.snowball_weight / time_to_end + bombs * config.bomb_weight / time_to_end, } } fn choose_existing(node: &Node, config: &ScoreConfig) -> [Command; 2] { [ choose_one_existing(node, 0, config), choose_one_existing(node, 1, config), ] } fn choose_one_existing(node: &Node, player_index: usize, config: &ScoreConfig) -> Command { let ln_n = (node.score_sum.visit_count as f32).ln(); let multiplier = if player_index == 0 { 1. } else { -1. }; let mut command_confidences = node.player_score_sums[player_index] .iter() .map(|(command, score_sum)| { ( command, (score_sum.avg() * multiplier).val + config.explore_exploit_weight * (ln_n / score_sum.visit_count as f32).sqrt(), ) }); command_confidences .next() .map(|first| { command_confidences .fold( first, |(acc_command, acc_confidence), (next_command, next_confidence)| { if acc_confidence > next_confidence { (acc_command, acc_confidence) } else { (next_command, next_confidence) } }, ) .0 .clone() }) .unwrap_or_else(|| Command::new(Action::DoNothing)) } fn update(node: &mut Node, commands: [Command; 2], score: Score) { *node.player_score_sums[0] .entry(commands[0]) .or_insert_with(ScoreSum::new) += score; *node.player_score_sums[1] .entry(commands[1]) .or_insert_with(ScoreSum::new) += score; node.score_sum += score; }