Building up unsimulated moves list

author: Justin Worthe <justin@worthe-it.co.za> 2019-05-13 17:33:22 +0200
committer: Justin Worthe <justin@worthe-it.co.za> 2019-05-13 17:33:22 +0200
commit: 652242e584ee2b7cfb3021d570a63e57cfa52773 (patch)
tree: cec80eeaafa5cdaa0ece8e395bafcccf70496f92 /src/strategy.rs
parent: 3d1676842e20c90bb5599daa2caefdea2bbf9fe8 (diff)
1 files changed, 75 insertions, 42 deletions
diff --git a/src/strategy.rs b/src/strategy.rs
index ce65e54..db4409e 100644
--- a/src/strategy.rs
+++ b/src/strategy.rs
@@ -3,19 +3,15 @@ use crate::game::{GameBoard, SimulationOutcome};
 use crate::geometry::*;
 
 use std::ops::*;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use time::{Duration, PreciseTime};
 
-struct GameTree {
-    state: GameBoard,
-    next_states: Vec<([Command; 2], GameTree)>
-}
-
 pub fn choose_move(state: &GameBoard, start_time: &PreciseTime, max_time: Duration) -> Command {
     let mut root_node = Node {
         state: state.clone(),
-        score_sum: Score { val: 0 },
-        visit_count: 0,
+        score_sum: ScoreSum::new(),
+        player_score_sums: [HashMap::new(), HashMap::new()],
+        unexplored: valid_move_combo(state),
         children: HashMap::new()
     };
 
@@ -23,24 +19,20 @@ pub fn choose_move(state: &GameBoard, start_time: &PreciseTime, max_time: Durati
         let _ = mcts(&mut root_node);
     }
 
-    root_node
-        .children
-        .iter()
-        .max_by_key(|(_k, v)| v.score())
-        .map(|(k, _v)| k[0])
-        .unwrap_or(Command::DoNothing)
+    best_player_move(&root_node)
 }
 
 struct Node {
     state: GameBoard,
-    score_sum: Score,
-    visit_count: u32,
-    children: HashMap<[Command; 2], Node>
+    score_sum: ScoreSum,
+    player_score_sums: [HashMap<Command, ScoreSum>; 2],
+    unexplored: Vec<[Command; 2]>,
+    children: HashMap<[Command; 2], Node>,
 }
 
 impl Node {
     fn score(&self) -> Score {
-        self.score_sum / self.visit_count
+        self.score_sum.avg()
     }
 }
 
@@ -55,29 +47,59 @@ impl AddAssign for Score {
     }
 }
 
-impl Div<u32> for Score {
+impl Div<i32> for Score {
     type Output = Self;
-    fn div(self, other: u32) -> Self {
+    fn div(self, other: i32) -> Self {
         Score {
-            val: self.val / other as i32
+            val: self.val / other
+        }
+    }
+}
+
+struct ScoreSum {
+    sum: Score,
+    visit_count: i32
+}
+
+impl ScoreSum {
+    fn new() -> ScoreSum {
+        ScoreSum {
+            sum: Score { val: 0 },
+            visit_count: 0
         }
     }
+    fn with_initial(score: Score) -> ScoreSum {
+        ScoreSum {
+            sum: score,
+            visit_count: 1
+        }
+    }
+    fn avg(&self) -> Score {
+        self.sum / self.visit_count
+    }
+}
+
+impl AddAssign<Score> for ScoreSum {
+    fn add_assign(&mut self, other: Score) {
+        self.sum += other;
+        self.visit_count += 1;
+    }
 }
 
 fn mcts(node: &mut Node) -> Score {
     if node.state.outcome != SimulationOutcome::Continue {
         score(&node.state)
-    } else if has_unsimulated_outcomes(node) {
-        let commands = choose_unsimulated(&node);
-
+    } else if let Some(commands) = node.unexplored.pop() {
         let mut new_state = node.state.clone();
         new_state.simulate(commands);
         let score = rollout(&new_state);
+        let unexplored = valid_move_combo(&new_state);
         
         let new_node = Node {
             state: new_state,
-            score_sum: score,
-            visit_count: 1,
+            score_sum: ScoreSum::with_initial(score),
+            player_score_sums: [HashMap::new(), HashMap::new()],
+            unexplored,
             children: HashMap::new()
         };
         node.children.insert(commands, new_node);
@@ -85,29 +107,27 @@ fn mcts(node: &mut Node) -> Score {
         update(node, commands, score);
         score
     } else {
-        let commands = select(node);
+        let commands = choose_existing(node);
         let score = mcts(node.children.get_mut(&commands).unwrap());
         update(node, commands, score);
         score
     }
 }
 
-fn score(state: &GameBoard) -> Score {
-    // TODO
-    Score { val: 0 }
-}
-
-fn has_unsimulated_outcomes(node: &Node) -> bool {
-    // TODO
-    false
+fn best_player_move(node: &Node) -> Command {
+    // TODO, use player_score_sums?
+    node
+        .children
+        .iter()
+        .max_by_key(|(_k, v)| v.score())
+        .map(|(k, _v)| k[0])
+        .unwrap_or(Command::DoNothing)
 }
 
-fn choose_unsimulated(node: &Node) -> [Command; 2] {
-    // TODO
-    [
-        Command::DoNothing,
-        Command::DoNothing
-    ]
+fn score(state: &GameBoard) -> Score {
+    Score {
+        val: state.players[0].health() - state.players[1].health()
+    }
 }
 
 fn rollout(state: &GameBoard) -> Score {
@@ -115,7 +135,7 @@ fn rollout(state: &GameBoard) -> Score {
     Score { val: 0 }
 }
     
-fn select(node: &Node) -> [Command; 2] {
+fn choose_existing(node: &Node) -> [Command; 2] {
     // TODO
     [
         Command::DoNothing,
@@ -127,6 +147,19 @@ fn update(node: &mut Node, commands: [Command; 2], score: Score) {
     // TODO
 }
 
+fn valid_move_combo(state: &GameBoard) -> Vec<[Command; 2]> {
+    let player_moves = valid_moves(state, 0);
+    let opponent_moves = valid_moves(state, 1);
+    let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len());
+    for p in &player_moves {
+        for o in &opponent_moves {
+            result.push([p.clone(), o.clone()]);
+        }
+    }
+    
+    result
+}
+
 fn valid_moves(state: &GameBoard, player_index: usize) -> Vec<Command> {
     let worm = state.players[player_index].active_worm();
author	Justin Worthe <justin@worthe-it.co.za>	2019-05-13 17:33:22 +0200
committer	Justin Worthe <justin@worthe-it.co.za>	2019-05-13 17:33:22 +0200
commit	652242e584ee2b7cfb3021d570a63e57cfa52773 (patch)
tree	cec80eeaafa5cdaa0ece8e395bafcccf70496f92 /src/strategy.rs
parent	3d1676842e20c90bb5599daa2caefdea2bbf9fe8 (diff)