From dad50b87af3ecd23387bcf78dd16399a33074540 Mon Sep 17 00:00:00 2001 From: Justin Worthe Date: Fri, 17 May 2019 22:57:19 +0200 Subject: Strategy to focus mcts --- src/game.rs | 47 ++++++----- src/geometry/vec.rs | 8 +- src/strategy.rs | 219 +++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 193 insertions(+), 81 deletions(-) (limited to 'src') diff --git a/src/game.rs b/src/game.rs index 9903250..1ac627d 100644 --- a/src/game.rs +++ b/src/game.rs @@ -31,8 +31,9 @@ pub struct Worm { } #[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub enum Powerup { - Health(Point2d, i32) +pub struct Powerup { + pub position: Point2d, + pub value: i32 } #[derive(Debug, PartialEq, Eq, Clone)] @@ -88,7 +89,10 @@ impl GameBoard { max_rounds: json.max_rounds, players: [player, opponent], powerups: json.map.iter().flatten().filter_map(|c| { - c.powerup.clone().map(|p| Powerup::Health(Point2d::new(c.x, c.y), p.value)) + c.powerup.clone().map(|p| Powerup { + position: Point2d::new(c.x, c.y), + value: p.value + }) }).collect(), map, outcome: SimulationOutcome::Continue @@ -110,7 +114,10 @@ impl GameBoard { } self.powerups = json.map.iter().flatten().filter_map(|c| { - c.powerup.clone().map(|p| Powerup::Health(Point2d::new(c.x, c.y), p.value)) + c.powerup.clone().map(|p| Powerup { + position: Point2d::new(c.x, c.y), + value: p.value + }) }).collect(); for cell in json.map.iter().flatten() { @@ -174,12 +181,13 @@ impl GameBoard { worm.position.x = x; worm.position.y = y; - self.powerups.retain(|p| match p { - Powerup::Health(point, size) if *point == worm.position => { - worm.health += *size; + self.powerups.retain(|p| { + if p.position == worm.position { + worm.health += p.value; false - }, - _ => true + } else { + true + } }); } } @@ -241,11 +249,13 @@ impl GameBoard { if player.worms[worm_index].health <= 0 { player.worms.remove(worm_index); if player.active_worm >= worm_index { - if player.active_worm > 0 { - player.active_worm -= 1; + player.active_worm = if player.active_worm > 0 { + player.active_worm - 1 + } else if player.worms.len() > 0 { + player.worms.len() - 1 } else { - player.active_worm = player.worms.len()-1; - } + 0 + }; } } } @@ -268,13 +278,12 @@ impl GameBoard { self.outcome } - pub fn find_targets(&self, center: Point2d, weapon_range: u8) -> Vec { + pub fn find_targets(&self, player_index: usize, center: Point2d, weapon_range: u8) -> Vec { let range = weapon_range as i8; let dir_range = ((weapon_range as f32 + 1.) / 2f32.sqrt()).floor() as i8; - let mut directions: Vec = self.players.iter() - .flat_map(|p| p.worms.iter()) - .filter(|w| w.position != center) + let mut directions: Vec = self.players[(player_index + 1)%2].worms + .iter() .filter_map(|w| { let diff = w.position - center; if diff.x == 0 && diff.y.abs() <= range { @@ -302,7 +311,9 @@ impl GameBoard { }) .filter(|(dir, range)| { let diff = dir.as_vec(); - (1..*range).any(|distance| self.map.at(center + diff * distance) != Some(false)) + !(1..*range).any(|distance| + self.map.at(center + diff * distance) != Some(false) && + !self.players[player_index].worms.iter().any(|w| w.position == center + diff * distance)) }) .map(|(dir, _range)| dir) .collect(); diff --git a/src/geometry/vec.rs b/src/geometry/vec.rs index f6bd29f..ab2210d 100644 --- a/src/geometry/vec.rs +++ b/src/geometry/vec.rs @@ -1,5 +1,5 @@ use std::ops::*; -use num_traits::{NumOps, NumAssignOps}; +use num_traits::{NumOps, NumAssignOps, Signed}; use num_traits::pow::Pow; use num_traits::real::Real; @@ -21,6 +21,12 @@ macro_rules! impl_vector { $VecN { $($field),+ } } } + + impl $VecN { + pub fn walking_distance(&self) -> T { + fold_array!(max, { $(self.$field.abs()),+ }) + } + } impl + Copy> $VecN { pub fn magnitude_squared(&self) -> T { diff --git a/src/strategy.rs b/src/strategy.rs index 3506e75..16d639b 100644 --- a/src/strategy.rs +++ b/src/strategy.rs @@ -3,8 +3,8 @@ use crate::game::{GameBoard, SimulationOutcome}; use crate::geometry::*; use std::cmp; +use std::collections::HashMap; use std::ops::*; -use std::collections::{HashMap}; use time::{Duration, PreciseTime}; use rand; @@ -15,8 +15,8 @@ pub fn choose_move(state: &GameBoard, start_time: &PreciseTime, max_time: Durati state: state.clone(), score_sum: ScoreSum::new(), player_score_sums: [HashMap::new(), HashMap::new()], - unexplored: valid_move_combo(state), - children: HashMap::new() + unexplored: mcts_move_combo(state), + children: HashMap::new(), }; while start_time.to(PreciseTime::now()) < max_time { @@ -25,7 +25,12 @@ pub fn choose_move(state: &GameBoard, start_time: &PreciseTime, max_time: Durati eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); for (command, score_sum) in &root_node.player_score_sums[0] { - eprintln!("{} = {} ({} visits)", command, score_sum.avg().val, score_sum.visit_count); + eprintln!( + "{} = {} ({} visits)", + command, + score_sum.avg().val, + score_sum.visit_count + ); } best_player_move(&root_node) @@ -41,7 +46,7 @@ struct Node { #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] struct Score { - val: f32 + val: f32, } impl AddAssign for Score { @@ -54,7 +59,7 @@ impl Div for Score { type Output = Self; fn div(self, other: u32) -> Self { Score { - val: self.val / other as f32 + val: self.val / other as f32, } } } @@ -62,26 +67,28 @@ impl Div for Score { impl cmp::Eq for Score {} impl cmp::Ord for Score { fn cmp(&self, other: &Score) -> cmp::Ordering { - self.val.partial_cmp(&other.val).unwrap_or(cmp::Ordering::Equal) + self.val + .partial_cmp(&other.val) + .unwrap_or(cmp::Ordering::Equal) } } struct ScoreSum { sum: Score, - visit_count: u32 + visit_count: u32, } impl ScoreSum { fn new() -> ScoreSum { ScoreSum { sum: Score { val: 0. }, - visit_count: 0 + visit_count: 0, } } fn with_initial(score: Score) -> ScoreSum { ScoreSum { sum: score, - visit_count: 1 + visit_count: 1, } } fn avg(&self) -> Score { @@ -103,45 +110,62 @@ fn mcts(node: &mut Node) -> Score { let mut new_state = node.state.clone(); new_state.simulate(commands); let score = rollout(&new_state); - let unexplored = valid_move_combo(&new_state); - + let unexplored = mcts_move_combo(&new_state); + let new_node = Node { state: new_state, score_sum: ScoreSum::with_initial(score), player_score_sums: [HashMap::new(), HashMap::new()], unexplored, - children: HashMap::new() + children: HashMap::new(), }; node.children.insert(commands, new_node); - + update(node, commands, score); score } else { let commands = choose_existing(node); - let score = mcts(node.children.get_mut(&commands).expect("The existing node hasn't been tried yet")); + let score = mcts( + node.children + .get_mut(&commands) + .expect("The existing node hasn't been tried yet"), + ); update(node, commands, score); score } } +fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> { + let player_moves = heuristic_moves(state, 0); + let opponent_moves = heuristic_moves(state, 1); + debug_assert!(player_moves.len() > 0, "No player moves"); + debug_assert!(player_moves.len() > 0, "No opponent moves"); + + let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len()); + for p in &player_moves { + for o in &opponent_moves { + result.push([p.clone(), o.clone()]); + } + } + + result +} + fn best_player_move(node: &Node) -> Command { - node - .player_score_sums[0] + node.player_score_sums[0] .iter() - .max_by_key(|(_command, score_sum)| { - score_sum.avg() - }) + .max_by_key(|(_command, score_sum)| score_sum.avg()) .map(|(command, _score_sum)| *command) .unwrap_or(Command::DoNothing) } fn score(state: &GameBoard) -> Score { let mutiplier = match state.outcome { - SimulationOutcome::PlayerWon(_) => 100., - _ => 1. + SimulationOutcome::PlayerWon(_) => 1000., + _ => 1., }; Score { - val: mutiplier * (state.players[0].health() - state.players[1].health()) as f32 + val: mutiplier * (state.players[0].health() - state.players[1].health()) as f32, } } @@ -149,80 +173,151 @@ fn rollout(state: &GameBoard) -> Score { let mut s = state.clone(); let mut rng = rand::thread_rng(); while s.outcome == SimulationOutcome::Continue { - let player_moves = valid_moves(&s, 0); - let opponent_moves = valid_moves(&s, 1); + let player_moves = rollout_moves(&s, 0); + let opponent_moves = rollout_moves(&s, 1); s.simulate([ - player_moves.choose(&mut rng).cloned().unwrap_or(Command::DoNothing), - opponent_moves.choose(&mut rng).cloned().unwrap_or(Command::DoNothing) + player_moves + .choose(&mut rng) + .cloned() + .unwrap_or(Command::DoNothing), + opponent_moves + .choose(&mut rng) + .cloned() + .unwrap_or(Command::DoNothing), ]); } - + score(&s) } - + fn choose_existing(node: &Node) -> [Command; 2] { - [ - choose_one_existing(node, 0), - choose_one_existing(node, 1) - ] + [choose_one_existing(node, 0), choose_one_existing(node, 1)] } fn choose_one_existing(node: &Node, player_index: usize) -> Command { let ln_n = (node.score_sum.visit_count as f32).ln(); let c = 100.; - let multiplier = if player_index == 0 { - 1. - } else { - -1. - }; + let multiplier = if player_index == 0 { 1. } else { -1. }; node.player_score_sums[player_index] .iter() .max_by_key(|(_command, score_sum)| { - (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt())) as i32 + (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt())) + as i32 }) .map(|(command, _score_sum)| *command) .unwrap_or(Command::DoNothing) } - fn update(node: &mut Node, commands: [Command; 2], score: Score) { - *node.player_score_sums[0].entry(commands[0]).or_insert(ScoreSum::new()) += score; - *node.player_score_sums[1].entry(commands[1]).or_insert(ScoreSum::new()) += score; + *node.player_score_sums[0] + .entry(commands[0]) + .or_insert(ScoreSum::new()) += score; + *node.player_score_sums[1] + .entry(commands[1]) + .or_insert(ScoreSum::new()) += score; node.score_sum += score; } -fn valid_move_combo(state: &GameBoard) -> Vec<[Command; 2]> { - let player_moves = valid_moves(state, 0); - let opponent_moves = valid_moves(state, 1); - let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len()); - for p in &player_moves { - for o in &opponent_moves { - result.push([p.clone(), o.clone()]); - } - } - - result -} +// TODO: Remove all invalid moves onto other worms -fn valid_moves(state: &GameBoard, player_index: usize) -> Vec { +fn heuristic_moves(state: &GameBoard, player_index: usize) -> Vec { let worm = state.players[player_index].active_worm(); + + let mut shoots = state + .find_targets(player_index, worm.position, worm.weapon_range) + .iter() + .map(|d| Command::Shoot(*d)) + .collect::>(); + + let closest_powerup = state.powerups + .iter() + .min_by_key(|p| (p.position - worm.position).walking_distance()); + + let average_player_position = Point2d { + x: state.players[player_index].worms + .iter() + .map(|w| w.position.x) + .sum::() / state.players[player_index].worms.len() as i8, + y: state.players[player_index].worms + .iter() + .map(|w| w.position.y) + .sum::() / state.players[player_index].worms.len() as i8 + }; + + let closest_opponent = state.players[(player_index + 1) % 2].worms + .iter() + .min_by_key(|w| (w.position - average_player_position).walking_distance()); - let mut moves = Direction::all().iter() + let mut commands = if !shoots.is_empty() { + // we're in combat now. Feel free to move anywhere. + let mut moves = Direction::all() + .iter() + .map(Direction::as_vec) + .map(|d| worm.position + d) + .filter_map(|p| match state.map.at(p) { + Some(false) => Some(Command::Move(p.x, p.y)), + Some(true) => Some(Command::Dig(p.x, p.y)), + _ => None, + }) + .collect::>(); + moves.append(&mut shoots); + moves + } else if let Some(powerup) = closest_powerup { + // there are powerups! Let's go grab the closest one. + moves_towards(state, worm.position, powerup.position) + } else if let Some(opponent) = closest_opponent { + // we're not currently in combat. Let's go find the closest worm. + moves_towards(state, worm.position, opponent.position) + } else { + // this shouldn't happen + debug_assert!(false, "No valid heuristic moves"); + vec!() + }; + commands.push(Command::DoNothing); + commands +} + +fn moves_towards(state: &GameBoard, from: Point2d, to: Point2d) -> Vec { + let distance = (to - from).walking_distance(); + Direction::all() + .iter() .map(Direction::as_vec) - .map(|d| worm.position + d) + .map(|d| from + d) + .filter(|p| (to - *p).walking_distance() < distance) .filter_map(|p| match state.map.at(p) { Some(false) => Some(Command::Move(p.x, p.y)), Some(true) => Some(Command::Dig(p.x, p.y)), - _ => None + _ => None, }) - .collect::>(); - let mut shoots = state.find_targets(worm.position, worm.weapon_range) + .collect() +} + +fn rollout_moves(state: &GameBoard, player_index: usize) -> Vec { + let worm = state.players[player_index].active_worm(); + + let shoots = state + .find_targets(player_index, worm.position, worm.weapon_range) .iter() .map(|d| Command::Shoot(*d)) .collect::>(); - moves.append(&mut shoots); - moves.retain(|m| *m != Command::DoNothing); + if !shoots.is_empty() { + return shoots; + } + + // TODO: More directed destruction movements? + let mut moves = Direction::all() + .iter() + .map(Direction::as_vec) + .map(|d| worm.position + d) + .filter_map(|p| match state.map.at(p) { + Some(false) => Some(Command::Move(p.x, p.y)), + Some(true) => Some(Command::Dig(p.x, p.y)), + _ => None, + }) + .collect::>(); + + moves.push(Command::DoNothing); moves } -- cgit v1.2.3