Strategy to focus mcts

author: Justin Worthe <justin@worthe-it.co.za> 2019-05-17 22:57:19 +0200
committer: Justin Worthe <justin@worthe-it.co.za> 2019-05-17 22:57:19 +0200
commit: dad50b87af3ecd23387bcf78dd16399a33074540 (patch)
tree: ff55d9143171e70e951a11ffe135840e781d5efb
parent: 56627fa6c913919acef6799c489ba9c5cf25cd0a (diff)
4 files changed, 196 insertions, 82 deletions
diff --git a/Cargo.toml b/Cargo.toml
index af0623b..58d1b0c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,4 +12,6 @@ num-traits = "0.2.6"
 arrayvec = "0.4.10"
 
 [profile.release]
-debug = true
-\ No newline at end of file
+debug = true
+debug-assertions = true
+overflow-checks = true
+\ No newline at end of file
diff --git a/src/game.rs b/src/game.rs
index 9903250..1ac627d 100644
--- a/src/game.rs
+++ b/src/game.rs
@@ -31,8 +31,9 @@ pub struct Worm {
 }
 
 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
-pub enum Powerup {
-    Health(Point2d<i8>, i32)
+pub struct Powerup {
+    pub position: Point2d<i8>,
+    pub value: i32
 }
 
 #[derive(Debug, PartialEq, Eq, Clone)]
@@ -88,7 +89,10 @@ impl GameBoard {
             max_rounds: json.max_rounds,
             players: [player, opponent],
             powerups: json.map.iter().flatten().filter_map(|c| {
-                c.powerup.clone().map(|p| Powerup::Health(Point2d::new(c.x, c.y), p.value))
+                c.powerup.clone().map(|p| Powerup {
+                    position: Point2d::new(c.x, c.y),
+                    value: p.value
+                })
             }).collect(),
             map,
             outcome: SimulationOutcome::Continue
@@ -110,7 +114,10 @@ impl GameBoard {
         }
 
         self.powerups = json.map.iter().flatten().filter_map(|c| {
-            c.powerup.clone().map(|p| Powerup::Health(Point2d::new(c.x, c.y), p.value))
+            c.powerup.clone().map(|p| Powerup {
+                position: Point2d::new(c.x, c.y),
+                value: p.value
+            })
         }).collect();
 
         for cell in json.map.iter().flatten() {
@@ -174,12 +181,13 @@ impl GameBoard {
                         worm.position.x = x;
                         worm.position.y = y;
 
-                        self.powerups.retain(|p| match p {
-                            Powerup::Health(point, size) if *point == worm.position => {
-                                worm.health += *size;
+                        self.powerups.retain(|p| {
+                            if p.position == worm.position {
+                                worm.health += p.value;
                                 false
-                            },
-                            _ => true
+                            } else {
+                                true
+                            }
                         });
                     }
                 }
@@ -241,11 +249,13 @@ impl GameBoard {
                 if player.worms[worm_index].health <= 0 {
                     player.worms.remove(worm_index);
                     if player.active_worm >= worm_index {
-                        if player.active_worm > 0 {
-                            player.active_worm -= 1;
+                        player.active_worm = if player.active_worm > 0 {
+                            player.active_worm - 1
+                        } else if player.worms.len() > 0 {
+                             player.worms.len() - 1 
                         } else {
-                            player.active_worm = player.worms.len()-1;
-                        }
+                            0
+                        };
                     }
                 }
             }
@@ -268,13 +278,12 @@ impl GameBoard {
         self.outcome
     }
 
-    pub fn find_targets(&self, center: Point2d<i8>, weapon_range: u8) -> Vec<Direction> {
+    pub fn find_targets(&self, player_index: usize, center: Point2d<i8>, weapon_range: u8) -> Vec<Direction> {
         let range = weapon_range as i8;
         let dir_range = ((weapon_range as f32 + 1.) / 2f32.sqrt()).floor() as i8;
 
-        let mut directions: Vec<Direction> = self.players.iter()
-            .flat_map(|p| p.worms.iter())
-            .filter(|w| w.position != center)
+        let mut directions: Vec<Direction> = self.players[(player_index + 1)%2].worms
+            .iter()
             .filter_map(|w| {
                 let diff = w.position - center;
                 if diff.x == 0 && diff.y.abs() <= range {
@@ -302,7 +311,9 @@ impl GameBoard {
             })
             .filter(|(dir, range)| {
                 let diff = dir.as_vec();
-                (1..*range).any(|distance| self.map.at(center + diff * distance) != Some(false))
+                !(1..*range).any(|distance|
+                                 self.map.at(center + diff * distance) != Some(false) &&
+                                 !self.players[player_index].worms.iter().any(|w| w.position == center + diff * distance))
             })
             .map(|(dir, _range)| dir)
             .collect();
diff --git a/src/geometry/vec.rs b/src/geometry/vec.rs
index f6bd29f..ab2210d 100644
--- a/src/geometry/vec.rs
+++ b/src/geometry/vec.rs
@@ -1,5 +1,5 @@
 use std::ops::*;
-use num_traits::{NumOps, NumAssignOps};
+use num_traits::{NumOps, NumAssignOps, Signed};
 use num_traits::pow::Pow;
 use num_traits::real::Real;
 
@@ -21,6 +21,12 @@ macro_rules! impl_vector {
                 $VecN { $($field),+ }
             }
         }
+
+        impl<T: NumOps + Ord + Signed + Copy> $VecN<T> {
+            pub fn walking_distance(&self) -> T {
+                fold_array!(max, { $(self.$field.abs()),+ })
+            }
+        }
         
         impl<T: NumOps + Pow<u8, Output=T> + Copy> $VecN<T> {
             pub fn magnitude_squared(&self) -> T {
diff --git a/src/strategy.rs b/src/strategy.rs
index 3506e75..16d639b 100644
--- a/src/strategy.rs
+++ b/src/strategy.rs
@@ -3,8 +3,8 @@ use crate::game::{GameBoard, SimulationOutcome};
 use crate::geometry::*;
 
 use std::cmp;
+use std::collections::HashMap;
 use std::ops::*;
-use std::collections::{HashMap};
 use time::{Duration, PreciseTime};
 
 use rand;
@@ -15,8 +15,8 @@ pub fn choose_move(state: &GameBoard, start_time: &PreciseTime, max_time: Durati
         state: state.clone(),
         score_sum: ScoreSum::new(),
         player_score_sums: [HashMap::new(), HashMap::new()],
-        unexplored: valid_move_combo(state),
-        children: HashMap::new()
+        unexplored: mcts_move_combo(state),
+        children: HashMap::new(),
     };
 
     while start_time.to(PreciseTime::now()) < max_time {
@@ -25,7 +25,12 @@ pub fn choose_move(state: &GameBoard, start_time: &PreciseTime, max_time: Durati
 
     eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
     for (command, score_sum) in &root_node.player_score_sums[0] {
-        eprintln!("{} = {} ({} visits)", command, score_sum.avg().val, score_sum.visit_count);
+        eprintln!(
+            "{} = {} ({} visits)",
+            command,
+            score_sum.avg().val,
+            score_sum.visit_count
+        );
     }
 
     best_player_move(&root_node)
@@ -41,7 +46,7 @@ struct Node {
 
 #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
 struct Score {
-    val: f32
+    val: f32,
 }
 
 impl AddAssign for Score {
@@ -54,7 +59,7 @@ impl Div<u32> for Score {
     type Output = Self;
     fn div(self, other: u32) -> Self {
         Score {
-            val: self.val / other as f32
+            val: self.val / other as f32,
         }
     }
 }
@@ -62,26 +67,28 @@ impl Div<u32> for Score {
 impl cmp::Eq for Score {}
 impl cmp::Ord for Score {
     fn cmp(&self, other: &Score) -> cmp::Ordering {
-        self.val.partial_cmp(&other.val).unwrap_or(cmp::Ordering::Equal)
+        self.val
+            .partial_cmp(&other.val)
+            .unwrap_or(cmp::Ordering::Equal)
     }
 }
 
 struct ScoreSum {
     sum: Score,
-    visit_count: u32
+    visit_count: u32,
 }
 
 impl ScoreSum {
     fn new() -> ScoreSum {
         ScoreSum {
             sum: Score { val: 0. },
-            visit_count: 0
+            visit_count: 0,
         }
     }
     fn with_initial(score: Score) -> ScoreSum {
         ScoreSum {
             sum: score,
-            visit_count: 1
+            visit_count: 1,
         }
     }
     fn avg(&self) -> Score {
@@ -103,45 +110,62 @@ fn mcts(node: &mut Node) -> Score {
         let mut new_state = node.state.clone();
         new_state.simulate(commands);
         let score = rollout(&new_state);
-        let unexplored = valid_move_combo(&new_state);
-        
+        let unexplored = mcts_move_combo(&new_state);
+
         let new_node = Node {
             state: new_state,
             score_sum: ScoreSum::with_initial(score),
             player_score_sums: [HashMap::new(), HashMap::new()],
             unexplored,
-            children: HashMap::new()
+            children: HashMap::new(),
         };
         node.children.insert(commands, new_node);
-        
+
         update(node, commands, score);
         score
     } else {
         let commands = choose_existing(node);
-        let score = mcts(node.children.get_mut(&commands).expect("The existing node hasn't been tried yet"));
+        let score = mcts(
+            node.children
+                .get_mut(&commands)
+                .expect("The existing node hasn't been tried yet"),
+        );
         update(node, commands, score);
         score
     }
 }
 
+fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> {
+    let player_moves = heuristic_moves(state, 0);
+    let opponent_moves = heuristic_moves(state, 1);
+    debug_assert!(player_moves.len() > 0, "No player moves");
+    debug_assert!(player_moves.len() > 0, "No opponent moves");
+    
+    let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len());
+    for p in &player_moves {
+        for o in &opponent_moves {
+            result.push([p.clone(), o.clone()]);
+        }
+    }
+
+    result
+}
+
 fn best_player_move(node: &Node) -> Command {
-    node
-        .player_score_sums[0]
+    node.player_score_sums[0]
         .iter()
-        .max_by_key(|(_command, score_sum)| {
-            score_sum.avg()
-        })
+        .max_by_key(|(_command, score_sum)| score_sum.avg())
         .map(|(command, _score_sum)| *command)
         .unwrap_or(Command::DoNothing)
 }
 
 fn score(state: &GameBoard) -> Score {
     let mutiplier = match state.outcome {
-        SimulationOutcome::PlayerWon(_) => 100.,
-        _ => 1.
+        SimulationOutcome::PlayerWon(_) => 1000.,
+        _ => 1.,
     };
     Score {
-        val: mutiplier * (state.players[0].health() - state.players[1].health()) as f32
+        val: mutiplier * (state.players[0].health() - state.players[1].health()) as f32,
     }
 }
 
@@ -149,80 +173,151 @@ fn rollout(state: &GameBoard) -> Score {
     let mut s = state.clone();
     let mut rng = rand::thread_rng();
     while s.outcome == SimulationOutcome::Continue {
-        let player_moves = valid_moves(&s, 0);
-        let opponent_moves = valid_moves(&s, 1);
+        let player_moves = rollout_moves(&s, 0);
+        let opponent_moves = rollout_moves(&s, 1);
 
         s.simulate([
-            player_moves.choose(&mut rng).cloned().unwrap_or(Command::DoNothing),
-            opponent_moves.choose(&mut rng).cloned().unwrap_or(Command::DoNothing)
+            player_moves
+                .choose(&mut rng)
+                .cloned()
+                .unwrap_or(Command::DoNothing),
+            opponent_moves
+                .choose(&mut rng)
+                .cloned()
+                .unwrap_or(Command::DoNothing),
         ]);
     }
-    
+
     score(&s)
 }
-    
+
 fn choose_existing(node: &Node) -> [Command; 2] {
-    [
-        choose_one_existing(node, 0),
-        choose_one_existing(node, 1)
-    ]
+    [choose_one_existing(node, 0), choose_one_existing(node, 1)]
 }
 
 fn choose_one_existing(node: &Node, player_index: usize) -> Command {
     let ln_n = (node.score_sum.visit_count as f32).ln();
     let c = 100.;
-    let multiplier = if player_index == 0 {
-        1.
-    } else {
-        -1.
-    };
+    let multiplier = if player_index == 0 { 1. } else { -1. };
     node.player_score_sums[player_index]
         .iter()
         .max_by_key(|(_command, score_sum)| {
-            (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt())) as i32
+            (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt()))
+                as i32
         })
         .map(|(command, _score_sum)| *command)
         .unwrap_or(Command::DoNothing)
 }
 
-
 fn update(node: &mut Node, commands: [Command; 2], score: Score) {
-    *node.player_score_sums[0].entry(commands[0]).or_insert(ScoreSum::new()) += score;
-    *node.player_score_sums[1].entry(commands[1]).or_insert(ScoreSum::new()) += score;
+    *node.player_score_sums[0]
+        .entry(commands[0])
+        .or_insert(ScoreSum::new()) += score;
+    *node.player_score_sums[1]
+        .entry(commands[1])
+        .or_insert(ScoreSum::new()) += score;
     node.score_sum += score;
 }
 
-fn valid_move_combo(state: &GameBoard) -> Vec<[Command; 2]> {
-    let player_moves = valid_moves(state, 0);
-    let opponent_moves = valid_moves(state, 1);
-    let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len());
-    for p in &player_moves {
-        for o in &opponent_moves {
-            result.push([p.clone(), o.clone()]);
-        }
-    }
-    
-    result
-}
+// TODO: Remove all invalid moves onto other worms
 
-fn valid_moves(state: &GameBoard, player_index: usize) -> Vec<Command> {
+fn heuristic_moves(state: &GameBoard, player_index: usize) -> Vec<Command> {
     let worm = state.players[player_index].active_worm();
+
+    let mut shoots = state
+        .find_targets(player_index, worm.position, worm.weapon_range)
+        .iter()
+        .map(|d| Command::Shoot(*d))
+        .collect::<Vec<_>>();
+
+    let closest_powerup = state.powerups
+        .iter()
+        .min_by_key(|p| (p.position - worm.position).walking_distance());
+
+    let average_player_position = Point2d {
+        x: state.players[player_index].worms
+            .iter()
+            .map(|w| w.position.x)
+            .sum::<i8>() / state.players[player_index].worms.len() as i8,
+        y: state.players[player_index].worms
+            .iter()
+            .map(|w| w.position.y)
+            .sum::<i8>() / state.players[player_index].worms.len() as i8
+    };
+
+    let closest_opponent = state.players[(player_index + 1) % 2].worms
+        .iter()
+        .min_by_key(|w| (w.position - average_player_position).walking_distance());
     
-    let mut moves = Direction::all().iter()
+    let mut commands = if !shoots.is_empty() {
+        // we're in combat now. Feel free to move anywhere.
+        let mut moves = Direction::all()
+            .iter()
+            .map(Direction::as_vec)
+            .map(|d| worm.position + d)
+            .filter_map(|p| match state.map.at(p) {
+                Some(false) => Some(Command::Move(p.x, p.y)),
+                Some(true) => Some(Command::Dig(p.x, p.y)),
+                _ => None,
+            })
+            .collect::<Vec<_>>();
+        moves.append(&mut shoots);
+        moves
+    } else if let Some(powerup) = closest_powerup {
+        // there are powerups! Let's go grab the closest one.
+        moves_towards(state, worm.position, powerup.position)
+    } else if let Some(opponent) = closest_opponent {
+        // we're not currently in combat. Let's go find the closest worm.
+        moves_towards(state, worm.position, opponent.position)
+    } else {
+        // this shouldn't happen
+        debug_assert!(false, "No valid heuristic moves");
+        vec!()
+    };
+    commands.push(Command::DoNothing);
+    commands
+}
+
+fn moves_towards(state: &GameBoard, from: Point2d<i8>, to: Point2d<i8>) -> Vec<Command> {
+    let distance = (to - from).walking_distance();
+    Direction::all()
+        .iter()
         .map(Direction::as_vec)
-        .map(|d| worm.position + d)
+        .map(|d| from + d)
+        .filter(|p| (to - *p).walking_distance() < distance)
         .filter_map(|p| match state.map.at(p) {
             Some(false) => Some(Command::Move(p.x, p.y)),
             Some(true) => Some(Command::Dig(p.x, p.y)),
-            _ => None
+            _ => None,
         })
-        .collect::<Vec<_>>();
-    let mut shoots = state.find_targets(worm.position, worm.weapon_range)
+        .collect()
+}
+
+fn rollout_moves(state: &GameBoard, player_index: usize) -> Vec<Command> {
+    let worm = state.players[player_index].active_worm();
+
+    let shoots = state
+        .find_targets(player_index, worm.position, worm.weapon_range)
         .iter()
         .map(|d| Command::Shoot(*d))
         .collect::<Vec<_>>();
-    moves.append(&mut shoots);
 
-    moves.retain(|m| *m != Command::DoNothing);
+    if !shoots.is_empty() {
+        return shoots;
+    }
+
+    // TODO: More directed destruction movements?
+    let mut moves = Direction::all()
+        .iter()
+        .map(Direction::as_vec)
+        .map(|d| worm.position + d)
+        .filter_map(|p| match state.map.at(p) {
+            Some(false) => Some(Command::Move(p.x, p.y)),
+            Some(true) => Some(Command::Dig(p.x, p.y)),
+            _ => None,
+        })
+        .collect::<Vec<_>>();
+
+    moves.push(Command::DoNothing);
     moves
 }
author	Justin Worthe <justin@worthe-it.co.za>	2019-05-17 22:57:19 +0200
committer	Justin Worthe <justin@worthe-it.co.za>	2019-05-17 22:57:19 +0200
commit	dad50b87af3ecd23387bcf78dd16399a33074540 (patch)
tree	ff55d9143171e70e951a11ffe135840e781d5efb
parent	56627fa6c913919acef6799c489ba9c5cf25cd0a (diff)