From 33b9c9e05a3693d944342753288fda824f0da13c Mon Sep 17 00:00:00 2001 From: Justin Worthe Date: Sun, 11 Aug 2019 14:18:49 +0200 Subject: Pass explore / exploit tuning param as another scoring config --- src/strategy/minimax.rs | 48 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/src/strategy/minimax.rs b/src/strategy/minimax.rs index 55abcec..2c52127 100644 --- a/src/strategy/minimax.rs +++ b/src/strategy/minimax.rs @@ -21,6 +21,7 @@ pub struct ScoreConfig { victory_weight: f32, snowball_weight: f32, bomb_weight: f32, + explore_exploit_weight: f32, } impl Default for ScoreConfig { @@ -32,6 +33,7 @@ impl Default for ScoreConfig { victory_weight: 3000., snowball_weight: 100., bomb_weight: 100., + explore_exploit_weight: 10., } } } @@ -198,7 +200,7 @@ fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> S score } else { - let commands = choose_existing(node); + let commands = choose_existing(node, config); state.simulate(commands); let score = expand_tree( node.children @@ -265,21 +267,45 @@ fn score(state: &GameBoard, config: &ScoreConfig) -> Score { } } -fn choose_existing(node: &Node) -> [Command; 2] { - [choose_one_existing(node, 0), choose_one_existing(node, 1)] +fn choose_existing(node: &Node, config: &ScoreConfig) -> [Command; 2] { + [ + choose_one_existing(node, 0, config), + choose_one_existing(node, 1, config), + ] } -fn choose_one_existing(node: &Node, player_index: usize) -> Command { +fn choose_one_existing(node: &Node, player_index: usize, config: &ScoreConfig) -> Command { let ln_n = (node.score_sum.visit_count as f32).ln(); - let c = 100.; let multiplier = if player_index == 0 { 1. } else { -1. }; - node.player_score_sums[player_index] - .iter() - .max_by_key(|(_command, score_sum)| { - (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt())) - as i32 + let mut command_confidences = + node.player_score_sums[player_index] + .iter() + .map(|(command, score_sum)| { + ( + command, + (score_sum.avg() * multiplier).val + + config.explore_exploit_weight + * (ln_n / score_sum.visit_count as f32).sqrt(), + ) + }); + + command_confidences + .next() + .map(|first| { + command_confidences + .fold( + first, + |(acc_command, acc_confidence), (next_command, next_confidence)| { + if acc_confidence > next_confidence { + (acc_command, acc_confidence) + } else { + (next_command, next_confidence) + } + }, + ) + .0 + .clone() }) - .map(|(command, _score_sum)| *command) .unwrap_or_else(|| Command::new(Action::DoNothing)) } -- cgit v1.2.3