From d5272bb0f9d9df63e05c62b30ed43d6387437251 Mon Sep 17 00:00:00 2001 From: Justin Worthe Date: Sat, 10 Aug 2019 16:46:26 +0200 Subject: Passing score weightings in, so they can be more configurable --- src/bin/benchmark.rs | 4 +-- src/main.rs | 8 +++-- src/strategy.rs | 2 +- src/strategy/minimax.rs | 91 +++++++++++++++++++++++++++++++++++-------------- 4 files changed, 74 insertions(+), 31 deletions(-) diff --git a/src/bin/benchmark.rs b/src/bin/benchmark.rs index 9a62aed..84e869e 100644 --- a/src/bin/benchmark.rs +++ b/src/bin/benchmark.rs @@ -4,7 +4,7 @@ use time::{Duration, PreciseTime}; use steam_powered_wyrm::game; use steam_powered_wyrm::json; -use steam_powered_wyrm::strategy::choose_move; +use steam_powered_wyrm::strategy::{choose_move, ScoreConfig}; fn main() { let max_time = Duration::milliseconds(19950); @@ -13,7 +13,7 @@ fn main() { match json::read_state_from_json_file(&Path::new(&format!("./tests/example-state.json"))) { Ok(json_state) => { let new_board = game::GameBoard::new(json_state); - let _ = choose_move(&new_board, start_time, max_time); + let _ = choose_move(&new_board, &ScoreConfig::default(), start_time, max_time); } Err(e) => { eprintln!("WARN: State file could not be parsed: {}", e); diff --git a/src/main.rs b/src/main.rs index 6f3fba5..d2ec145 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,10 +7,12 @@ use time::{Duration, PreciseTime}; use steam_powered_wyrm::command::{Action, Command}; use steam_powered_wyrm::game; use steam_powered_wyrm::json; -use steam_powered_wyrm::strategy::choose_move; +use steam_powered_wyrm::strategy::{choose_move, ScoreConfig}; fn main() { let max_time = Duration::milliseconds(900); + let config = ScoreConfig::default(); + let mut game_board = None; for line in stdin().lock().lines() { let start_time = PreciseTime::now(); @@ -24,13 +26,13 @@ fn main() { Ok(json_state) => match &mut game_board { None => { let new_board = game::GameBoard::new(json_state); - let command = choose_move(&new_board, start_time, max_time); + let command = choose_move(&new_board, &config, start_time, max_time); game_board = Some(new_board); command } Some(game_board) => { game_board.update(json_state); - choose_move(&game_board, start_time, max_time) + choose_move(&game_board, &config, start_time, max_time) } }, Err(e) => { diff --git a/src/strategy.rs b/src/strategy.rs index b6069a1..9c92ba5 100644 --- a/src/strategy.rs +++ b/src/strategy.rs @@ -2,4 +2,4 @@ //pub use mcts::{choose_move, Node}; mod minimax; -pub use minimax::{choose_move, Node}; +pub use minimax::{choose_move, Node, ScoreConfig}; diff --git a/src/strategy/minimax.rs b/src/strategy/minimax.rs index 6c07b07..1d833cb 100644 --- a/src/strategy/minimax.rs +++ b/src/strategy/minimax.rs @@ -7,8 +7,42 @@ use std::collections::HashMap; use std::ops::*; use time::{Duration, PreciseTime}; +// TODO: Calibrate these weightings somehow? Some sort of generate and sort based on playing against each other? +// What about: +// - Creating a list (mins and maxes) +// - Keep adding a new guess, run against all, and sort the list by fitness. +// - Repeat until list has many values +// - Somehow prioritize sticking new items in based on what's going well? Or maximally different? Keep dividing all the ranges in half? +#[derive(Debug, Clone)] +pub struct ScoreConfig { + max_health_weight: f32, + total_health_weight: f32, + points_weight: f32, + victory_weight: f32, + snowball_weight: f32, + bomb_weight: f32, +} + +impl Default for ScoreConfig { + fn default() -> ScoreConfig { + ScoreConfig { + max_health_weight: 1., + total_health_weight: 1., + points_weight: 0., + victory_weight: 3000., + snowball_weight: 100., + bomb_weight: 100., + } + } +} + // TODO: Cache results from last round based on player / opponent move and worm positions -pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duration) -> Command { +pub fn choose_move( + state: &GameBoard, + config: &ScoreConfig, + start_time: PreciseTime, + max_time: Duration, +) -> Command { let mut root_node = Node { score_sum: ScoreSum::new(), player_score_sums: [HashMap::new(), HashMap::new()], @@ -17,7 +51,7 @@ pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duratio }; while start_time.to(PreciseTime::now()) < max_time { - let _ = expand_tree(&mut root_node, state.clone()); + let _ = expand_tree(&mut root_node, state.clone(), config); } eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); @@ -33,6 +67,25 @@ pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duratio best_player_move(&root_node) } +pub fn choose_move_with_normalized_perf( + state: &GameBoard, + config: &ScoreConfig, + iterations: usize, +) -> Command { + let mut root_node = Node { + score_sum: ScoreSum::new(), + player_score_sums: [HashMap::new(), HashMap::new()], + unexplored: move_combos(state), + children: HashMap::new(), + }; + + for _ in 0..iterations { + let _ = expand_tree(&mut root_node, state.clone(), config); + } + + best_player_move(&root_node) +} + pub struct Node { score_sum: ScoreSum, player_score_sums: [HashMap; 2], @@ -99,13 +152,13 @@ impl AddAssign for ScoreSum { } } -fn expand_tree(node: &mut Node, mut state: GameBoard) -> Score { +fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> Score { if state.outcome != SimulationOutcome::Continue { - score(&state) + score(&state, config) } else if let Some(commands) = node.unexplored.pop() { // TODO: Explore preemptively doing the rollout? state.simulate(commands); - let score = score(&state); + let score = score(&state, config); let unexplored = if state.outcome == SimulationOutcome::Continue { move_combos(&state) } else { @@ -171,6 +224,7 @@ fn expand_tree(node: &mut Node, mut state: GameBoard) -> Score { .get_mut(&commands) .expect("The existing node hasn't been tried yet"), state, + config, ); update(node, commands, score); score @@ -201,7 +255,7 @@ fn best_player_move(node: &Node) -> Command { .unwrap_or_else(|| Command::new(Action::DoNothing)) } -fn score(state: &GameBoard) -> Score { +fn score(state: &GameBoard, config: &ScoreConfig) -> Score { let max_health = (state.players[0].max_worm_health() - state.players[1].max_worm_health()) as f32; let total_health = (state.players[0].health() - state.players[1].health()) as f32; @@ -217,27 +271,14 @@ fn score(state: &GameBoard) -> Score { let snowballs = state.players[0].snowballs() as f32 - state.players[1].snowballs() as f32; let bombs = state.players[0].bombs() as f32 - state.players[1].bombs() as f32; - // TODO: Calibrate these weightings somehow? Some sort of generate and sort based on playing against each other? - // What about: - // - Creating a list (mins and maxes) - // - Keep adding a new guess, run against all, and sort the list by fitness. - // - Repeat until list has many values - // - Somehow prioritize sticking new items in based on what's going well? Or maximally different? Keep dividing all the ranges in half? - const MAX_HEALTH_WEIGHT: f32 = 1.; - const TOTAL_HEALTH_WEIGHT: f32 = 1.; - const POINTS_WEIGHT: f32 = 0.; - const VICTORY_WEIGHT: f32 = 3000.; - const SNOWBALL_WEIGHT: f32 = 100.; - const BOMB_WEIGHT: f32 = 100.; - // TODO: Try adding new features here. Something about board position? Score { - val: max_health * MAX_HEALTH_WEIGHT - + total_health * TOTAL_HEALTH_WEIGHT - + points * POINTS_WEIGHT - + victory * VICTORY_WEIGHT - + snowballs * SNOWBALL_WEIGHT / time_to_end - + bombs * BOMB_WEIGHT / time_to_end, + val: max_health * config.max_health_weight + + total_health * config.total_health_weight + + points * config.points_weight + + victory * config.victory_weight + + snowballs * config.snowball_weight / time_to_end + + bombs * config.bomb_weight / time_to_end, } } -- cgit v1.2.3