From d5272bb0f9d9df63e05c62b30ed43d6387437251 Mon Sep 17 00:00:00 2001 From: Justin Worthe Date: Sat, 10 Aug 2019 16:46:26 +0200 Subject: Passing score weightings in, so they can be more configurable --- src/strategy/minimax.rs | 91 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 25 deletions(-) (limited to 'src/strategy/minimax.rs') diff --git a/src/strategy/minimax.rs b/src/strategy/minimax.rs index 6c07b07..1d833cb 100644 --- a/src/strategy/minimax.rs +++ b/src/strategy/minimax.rs @@ -7,8 +7,42 @@ use std::collections::HashMap; use std::ops::*; use time::{Duration, PreciseTime}; +// TODO: Calibrate these weightings somehow? Some sort of generate and sort based on playing against each other? +// What about: +// - Creating a list (mins and maxes) +// - Keep adding a new guess, run against all, and sort the list by fitness. +// - Repeat until list has many values +// - Somehow prioritize sticking new items in based on what's going well? Or maximally different? Keep dividing all the ranges in half? +#[derive(Debug, Clone)] +pub struct ScoreConfig { + max_health_weight: f32, + total_health_weight: f32, + points_weight: f32, + victory_weight: f32, + snowball_weight: f32, + bomb_weight: f32, +} + +impl Default for ScoreConfig { + fn default() -> ScoreConfig { + ScoreConfig { + max_health_weight: 1., + total_health_weight: 1., + points_weight: 0., + victory_weight: 3000., + snowball_weight: 100., + bomb_weight: 100., + } + } +} + // TODO: Cache results from last round based on player / opponent move and worm positions -pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duration) -> Command { +pub fn choose_move( + state: &GameBoard, + config: &ScoreConfig, + start_time: PreciseTime, + max_time: Duration, +) -> Command { let mut root_node = Node { score_sum: ScoreSum::new(), player_score_sums: [HashMap::new(), HashMap::new()], @@ -17,7 +51,7 @@ pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duratio }; while start_time.to(PreciseTime::now()) < max_time { - let _ = expand_tree(&mut root_node, state.clone()); + let _ = expand_tree(&mut root_node, state.clone(), config); } eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); @@ -33,6 +67,25 @@ pub fn choose_move(state: &GameBoard, start_time: PreciseTime, max_time: Duratio best_player_move(&root_node) } +pub fn choose_move_with_normalized_perf( + state: &GameBoard, + config: &ScoreConfig, + iterations: usize, +) -> Command { + let mut root_node = Node { + score_sum: ScoreSum::new(), + player_score_sums: [HashMap::new(), HashMap::new()], + unexplored: move_combos(state), + children: HashMap::new(), + }; + + for _ in 0..iterations { + let _ = expand_tree(&mut root_node, state.clone(), config); + } + + best_player_move(&root_node) +} + pub struct Node { score_sum: ScoreSum, player_score_sums: [HashMap; 2], @@ -99,13 +152,13 @@ impl AddAssign for ScoreSum { } } -fn expand_tree(node: &mut Node, mut state: GameBoard) -> Score { +fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> Score { if state.outcome != SimulationOutcome::Continue { - score(&state) + score(&state, config) } else if let Some(commands) = node.unexplored.pop() { // TODO: Explore preemptively doing the rollout? state.simulate(commands); - let score = score(&state); + let score = score(&state, config); let unexplored = if state.outcome == SimulationOutcome::Continue { move_combos(&state) } else { @@ -171,6 +224,7 @@ fn expand_tree(node: &mut Node, mut state: GameBoard) -> Score { .get_mut(&commands) .expect("The existing node hasn't been tried yet"), state, + config, ); update(node, commands, score); score @@ -201,7 +255,7 @@ fn best_player_move(node: &Node) -> Command { .unwrap_or_else(|| Command::new(Action::DoNothing)) } -fn score(state: &GameBoard) -> Score { +fn score(state: &GameBoard, config: &ScoreConfig) -> Score { let max_health = (state.players[0].max_worm_health() - state.players[1].max_worm_health()) as f32; let total_health = (state.players[0].health() - state.players[1].health()) as f32; @@ -217,27 +271,14 @@ fn score(state: &GameBoard) -> Score { let snowballs = state.players[0].snowballs() as f32 - state.players[1].snowballs() as f32; let bombs = state.players[0].bombs() as f32 - state.players[1].bombs() as f32; - // TODO: Calibrate these weightings somehow? Some sort of generate and sort based on playing against each other? - // What about: - // - Creating a list (mins and maxes) - // - Keep adding a new guess, run against all, and sort the list by fitness. - // - Repeat until list has many values - // - Somehow prioritize sticking new items in based on what's going well? Or maximally different? Keep dividing all the ranges in half? - const MAX_HEALTH_WEIGHT: f32 = 1.; - const TOTAL_HEALTH_WEIGHT: f32 = 1.; - const POINTS_WEIGHT: f32 = 0.; - const VICTORY_WEIGHT: f32 = 3000.; - const SNOWBALL_WEIGHT: f32 = 100.; - const BOMB_WEIGHT: f32 = 100.; - // TODO: Try adding new features here. Something about board position? Score { - val: max_health * MAX_HEALTH_WEIGHT - + total_health * TOTAL_HEALTH_WEIGHT - + points * POINTS_WEIGHT - + victory * VICTORY_WEIGHT - + snowballs * SNOWBALL_WEIGHT / time_to_end - + bombs * BOMB_WEIGHT / time_to_end, + val: max_health * config.max_health_weight + + total_health * config.total_health_weight + + points * config.points_weight + + victory * config.victory_weight + + snowballs * config.snowball_weight / time_to_end + + bombs * config.bomb_weight / time_to_end, } } -- cgit v1.2.3