Passing score weightings in, so they can be more configurable

author: Justin Worthe <justin@worthe-it.co.za> 2019-08-10 16:46:26 +0200
committer: Justin Worthe <justin@worthe-it.co.za> 2019-08-10 16:46:26 +0200
commit: d5272bb0f9d9df63e05c62b30ed43d6387437251 (patch)
tree: e8723091587221100db7d1936423d4d143f58d25
parent: 1d54d0825505b670ee8cc5f370088ae1a6ed3b5f (diff)
4 files changed, 74 insertions, 31 deletions
diff --git a/src/bin/benchmark.rs b/src/bin/benchmark.rs
index 9a62aed..84e869e 100644
--- a/src/bin/benchmark.rs
+++ b/src/bin/benchmark.rs
@@ -4,7 +4,7 @@ use time::{Duration, PreciseTime};
 
 use steam_powered_wyrm::game;
 use steam_powered_wyrm::json;
-use steam_powered_wyrm::strategy::choose_move;
+use steam_powered_wyrm::strategy::{choose_move, ScoreConfig};
 
 fn main() {
     let max_time = Duration::milliseconds(19950);
@@ -13,7 +13,7 @@ fn main() {
     match json::read_state_from_json_file(&Path::new(&format!("./tests/example-state.json"))) {
         Ok(json_state) => {
             let new_board = game::GameBoard::new(json_state);
-            let _ = choose_move(&new_board, start_time, max_time);
+            let _ = choose_move(&new_board, &ScoreConfig::default(), start_time, max_time);
         }
         Err(e) => {
             eprintln!("WARN: State file could not be parsed: {}", e);
diff --git a/src/main.rs b/src/main.rs
index 6f3fba5..d2ec145 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -7,10 +7,12 @@ use time::{Duration, PreciseTime};
 use steam_powered_wyrm::command::{Action, Command};
 use steam_powered_wyrm::game;
 use steam_powered_wyrm::json;
-use steam_powered_wyrm::strategy::choose_move;
+use steam_powered_wyrm::strategy::{choose_move, ScoreConfig};
 
 fn main() {
     let max_time = Duration::milliseconds(900);
+    let config = ScoreConfig::default();
+
     let mut game_board = None;
     for line in stdin().lock().lines() {
         let start_time = PreciseTime::now();
@@ -24,13 +26,13 @@ fn main() {
             Ok(json_state) => match &mut game_board {
                 None => {
                     let new_board = game::GameBoard::new(json_state);
-                    let command = choose_move(&new_board, start_time, max_time);
+                    let command = choose_move(&new_board, &config, start_time, max_time);
                     game_board = Some(new_board);
                     command
                 }
                 Some(game_board) => {
                     game_board.update(json_state);
-                    choose_move(&game_board, start_time, max_time)
+                    choose_move(&game_board, &config, start_time, max_time)
                 }
             },
             Err(e) => {
diff --git a/src/strategy.rs b/src/strategy.rs
index b6069a1..9c92ba5 100644
--- a/src/strategy.rs
+++ b/src/strategy.rs
@@ -2,4 +2,4 @@
 //pub use mcts::{choose_move, Node};
 
 mod minimax;
-pub use minimax::{choose_move, Node};
+pub use minimax::{choose_move, Node, ScoreConfig};
diff --git a/src/strategy/minimax.rs b/src/strategy/minimax.rs
index 6c07b07..1d833cb 100644
--- a/src/strategy/minimax.rs
+++ b/src/strategy/minimax.rs
@@ -7,8 +7,42 @@ use std::collections::HashMap;
 use std::ops::*;
 use time::{Duration, PreciseTime};
 
+// TODO: Calibrate these weightings somehow? Some sort of generate and sort based on playing against each other?
+// What about:
+// - Creating a list (mins and maxes)
+// - Keep adding a new guess, run against all, and sort the list by fitness.
+// - Repeat until list has many values
+// - Somehow prioritize sticking new items in based on what's going well? Or maximally different? Keep dividing all the ranges in half?
+#[derive(Debug, Clone)]
+pub struct ScoreConfig {
+    max_health_weight: f32,
+    total_health_weight: f32,
+    points_weight: f32,
+    victory_weight: f32,
+    snowball_weight: f32,
+    bomb_weight: f32,
+}
+
+impl Default for ScoreConfig {
+    fn default() -> ScoreConfig {
+        ScoreConfig {
+            max_health_weight: 1.,
+            total_health_weight: 1.,
+            points_weight: 0.,
+            victory_weight: 3000.,
+            snowball_weight: 100.,
+            bomb_weight: 100.,
+        }
+    }
+}
+
 // TODO: Cache results from last round based on player / opponent move and worm positions
-pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duration) -> Command {
+pub fn choose_move(
+    state: &GameBoard,
+    config: &ScoreConfig,
+    start_time: PreciseTime,
+    max_time: Duration,
+) -> Command {
     let mut root_node = Node {
         score_sum: ScoreSum::new(),
         player_score_sums: [HashMap::new(), HashMap::new()],
@@ -17,7 +51,7 @@ pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duratio
     };
 
     while start_time.to(PreciseTime::now()) < max_time {
-        let _ = expand_tree(&mut root_node, state.clone());
+        let _ = expand_tree(&mut root_node, state.clone(), config);
     }
 
     eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
@@ -33,6 +67,25 @@ pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duratio
     best_player_move(&root_node)
 }
 
+pub fn choose_move_with_normalized_perf(
+    state: &GameBoard,
+    config: &ScoreConfig,
+    iterations: usize,
+) -> Command {
+    let mut root_node = Node {
+        score_sum: ScoreSum::new(),
+        player_score_sums: [HashMap::new(), HashMap::new()],
+        unexplored: move_combos(state),
+        children: HashMap::new(),
+    };
+
+    for _ in 0..iterations {
+        let _ = expand_tree(&mut root_node, state.clone(), config);
+    }
+
+    best_player_move(&root_node)
+}
+
 pub struct Node {
     score_sum: ScoreSum,
     player_score_sums: [HashMap<Command, ScoreSum>; 2],
@@ -99,13 +152,13 @@ impl AddAssign<Score> for ScoreSum {
     }
 }
 
-fn expand_tree(node: &mut Node, mut state: GameBoard) -> Score {
+fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> Score {
     if state.outcome != SimulationOutcome::Continue {
-        score(&state)
+        score(&state, config)
     } else if let Some(commands) = node.unexplored.pop() {
         // TODO: Explore preemptively doing the rollout?
         state.simulate(commands);
-        let score = score(&state);
+        let score = score(&state, config);
         let unexplored = if state.outcome == SimulationOutcome::Continue {
             move_combos(&state)
         } else {
@@ -171,6 +224,7 @@ fn expand_tree(node: &mut Node, mut state: GameBoard) -> Score {
                 .get_mut(&commands)
                 .expect("The existing node hasn't been tried yet"),
             state,
+            config,
         );
         update(node, commands, score);
         score
@@ -201,7 +255,7 @@ fn best_player_move(node: &Node) -> Command {
         .unwrap_or_else(|| Command::new(Action::DoNothing))
 }
 
-fn score(state: &GameBoard) -> Score {
+fn score(state: &GameBoard, config: &ScoreConfig) -> Score {
     let max_health =
         (state.players[0].max_worm_health() - state.players[1].max_worm_health()) as f32;
     let total_health = (state.players[0].health() - state.players[1].health()) as f32;
@@ -217,27 +271,14 @@ fn score(state: &GameBoard) -> Score {
     let snowballs = state.players[0].snowballs() as f32 - state.players[1].snowballs() as f32;
     let bombs = state.players[0].bombs() as f32 - state.players[1].bombs() as f32;
 
-    // TODO: Calibrate these weightings somehow? Some sort of generate and sort based on playing against each other?
-    // What about:
-    // - Creating a list (mins and maxes)
-    // - Keep adding a new guess, run against all, and sort the list by fitness.
-    // - Repeat until list has many values
-    // - Somehow prioritize sticking new items in based on what's going well? Or maximally different? Keep dividing all the ranges in half?
-    const MAX_HEALTH_WEIGHT: f32 = 1.;
-    const TOTAL_HEALTH_WEIGHT: f32 = 1.;
-    const POINTS_WEIGHT: f32 = 0.;
-    const VICTORY_WEIGHT: f32 = 3000.;
-    const SNOWBALL_WEIGHT: f32 = 100.;
-    const BOMB_WEIGHT: f32 = 100.;
-
     // TODO: Try adding new features here. Something about board position?
     Score {
-        val: max_health * MAX_HEALTH_WEIGHT
-            + total_health * TOTAL_HEALTH_WEIGHT
-            + points * POINTS_WEIGHT
-            + victory * VICTORY_WEIGHT
-            + snowballs * SNOWBALL_WEIGHT / time_to_end
-            + bombs * BOMB_WEIGHT / time_to_end,
+        val: max_health * config.max_health_weight
+            + total_health * config.total_health_weight
+            + points * config.points_weight
+            + victory * config.victory_weight
+            + snowballs * config.snowball_weight / time_to_end
+            + bombs * config.bomb_weight / time_to_end,
     }
 }
author	Justin Worthe <justin@worthe-it.co.za>	2019-08-10 16:46:26 +0200
committer	Justin Worthe <justin@worthe-it.co.za>	2019-08-10 16:46:26 +0200
commit	d5272bb0f9d9df63e05c62b30ed43d6387437251 (patch)
tree	e8723091587221100db7d1936423d4d143f58d25
parent	1d54d0825505b670ee8cc5f370088ae1a6ed3b5f (diff)