From d5272bb0f9d9df63e05c62b30ed43d6387437251 Mon Sep 17 00:00:00 2001
From: Justin Worthe <justin@worthe-it.co.za>
Date: Sat, 10 Aug 2019 16:46:26 +0200
Subject: Passing score weightings in, so they can be more configurable

---
 src/strategy/minimax.rs | 91 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 66 insertions(+), 25 deletions(-)

(limited to 'src/strategy/minimax.rs')
diff --git a/src/strategy/minimax.rs b/src/strategy/minimax.rs
index 6c07b07..1d833cb 100644
--- a/src/strategy/minimax.rs
+++ b/src/strategy/minimax.rs
@@ -7,8 +7,42 @@ use std::collections::HashMap;
 use std::ops::*;
 use time::{Duration, PreciseTime};
 
+// TODO: Calibrate these weightings somehow? Some sort of generate and sort based on playing against each other?
+// What about:
+// - Creating a list (mins and maxes)
+// - Keep adding a new guess, run against all, and sort the list by fitness.
+// - Repeat until list has many values
+// - Somehow prioritize sticking new items in based on what's going well? Or maximally different? Keep dividing all the ranges in half?
+#[derive(Debug, Clone)]
+pub struct ScoreConfig {
+    max_health_weight: f32,
+    total_health_weight: f32,
+    points_weight: f32,
+    victory_weight: f32,
+    snowball_weight: f32,
+    bomb_weight: f32,
+}
+
+impl Default for ScoreConfig {
+    fn default() -> ScoreConfig {
+        ScoreConfig {
+            max_health_weight: 1.,
+            total_health_weight: 1.,
+            points_weight: 0.,
+            victory_weight: 3000.,
+            snowball_weight: 100.,
+            bomb_weight: 100.,
+        }
+    }
+}
+
 // TODO: Cache results from last round based on player / opponent move and worm positions
-pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duration) -> Command {
+pub fn choose_move(
+    state: &GameBoard,
+    config: &ScoreConfig,
+    start_time: PreciseTime,
+    max_time: Duration,
+) -> Command {
     let mut root_node = Node {
         score_sum: ScoreSum::new(),
         player_score_sums: [HashMap::new(), HashMap::new()],
@@ -17,7 +51,7 @@ pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duratio
     };
 
     while start_time.to(PreciseTime::now()) < max_time {
-        let _ = expand_tree(&mut root_node, state.clone());
+        let _ = expand_tree(&mut root_node, state.clone(), config);
     }
 
     eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
@@ -33,6 +67,25 @@ pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duratio
     best_player_move(&root_node)
 }
 
+pub fn choose_move_with_normalized_perf(
+    state: &GameBoard,
+    config: &ScoreConfig,
+    iterations: usize,
+) -> Command {
+    let mut root_node = Node {
+        score_sum: ScoreSum::new(),
+        player_score_sums: [HashMap::new(), HashMap::new()],
+        unexplored: move_combos(state),
+        children: HashMap::new(),
+    };
+
+    for _ in 0..iterations {
+        let _ = expand_tree(&mut root_node, state.clone(), config);
+    }
+
+    best_player_move(&root_node)
+}
+
 pub struct Node {
     score_sum: ScoreSum,
     player_score_sums: [HashMap<Command, ScoreSum>; 2],
@@ -99,13 +152,13 @@ impl AddAssign<Score> for ScoreSum {
     }
 }
 
-fn expand_tree(node: &mut Node, mut state: GameBoard) -> Score {
+fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> Score {
     if state.outcome != SimulationOutcome::Continue {
-        score(&state)
+        score(&state, config)
     } else if let Some(commands) = node.unexplored.pop() {
         // TODO: Explore preemptively doing the rollout?
         state.simulate(commands);
-        let score = score(&state);
+        let score = score(&state, config);
         let unexplored = if state.outcome == SimulationOutcome::Continue {
             move_combos(&state)
         } else {
@@ -171,6 +224,7 @@ fn expand_tree(node: &mut Node, mut state: GameBoard) -> Score {
                 .get_mut(&commands)
                 .expect("The existing node hasn't been tried yet"),
             state,
+            config,
         );
         update(node, commands, score);
         score
@@ -201,7 +255,7 @@ fn best_player_move(node: &Node) -> Command {
         .unwrap_or_else(|| Command::new(Action::DoNothing))
 }
 
-fn score(state: &GameBoard) -> Score {
+fn score(state: &GameBoard, config: &ScoreConfig) -> Score {
     let max_health =
         (state.players[0].max_worm_health() - state.players[1].max_worm_health()) as f32;
     let total_health = (state.players[0].health() - state.players[1].health()) as f32;
@@ -217,27 +271,14 @@ fn score(state: &GameBoard) -> Score {
     let snowballs = state.players[0].snowballs() as f32 - state.players[1].snowballs() as f32;
     let bombs = state.players[0].bombs() as f32 - state.players[1].bombs() as f32;
 
-    // TODO: Calibrate these weightings somehow? Some sort of generate and sort based on playing against each other?
-    // What about:
-    // - Creating a list (mins and maxes)
-    // - Keep adding a new guess, run against all, and sort the list by fitness.
-    // - Repeat until list has many values
-    // - Somehow prioritize sticking new items in based on what's going well? Or maximally different? Keep dividing all the ranges in half?
-    const MAX_HEALTH_WEIGHT: f32 = 1.;
-    const TOTAL_HEALTH_WEIGHT: f32 = 1.;
-    const POINTS_WEIGHT: f32 = 0.;
-    const VICTORY_WEIGHT: f32 = 3000.;
-    const SNOWBALL_WEIGHT: f32 = 100.;
-    const BOMB_WEIGHT: f32 = 100.;
-
     // TODO: Try adding new features here. Something about board position?
     Score {
-        val: max_health * MAX_HEALTH_WEIGHT
-            + total_health * TOTAL_HEALTH_WEIGHT
-            + points * POINTS_WEIGHT
-            + victory * VICTORY_WEIGHT
-            + snowballs * SNOWBALL_WEIGHT / time_to_end
-            + bombs * BOMB_WEIGHT / time_to_end,
+        val: max_health * config.max_health_weight
+            + total_health * config.total_health_weight
+            + points * config.points_weight
+            + victory * config.victory_weight
+            + snowballs * config.snowball_weight / time_to_end
+            + bombs * config.bomb_weight / time_to_end,
     }
 }
 
-- 
cgit v1.2.3