summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Worthe <justin@worthe-it.co.za>2019-08-11 14:18:49 +0200
committerJustin Worthe <justin@worthe-it.co.za>2019-08-11 14:18:49 +0200
commit33b9c9e05a3693d944342753288fda824f0da13c (patch)
treed76f63c7849cd79c47ac79dafb20b715d0503c8e
parentb3d48c9924a2502ba7e93bafb0a8afcd096bec76 (diff)
Pass explore / exploit tuning param as another scoring config
-rw-r--r--src/strategy/minimax.rs48
1 files changed, 37 insertions, 11 deletions
diff --git a/src/strategy/minimax.rs b/src/strategy/minimax.rs
index 55abcec..2c52127 100644
--- a/src/strategy/minimax.rs
+++ b/src/strategy/minimax.rs
@@ -21,6 +21,7 @@ pub struct ScoreConfig {
victory_weight: f32,
snowball_weight: f32,
bomb_weight: f32,
+ explore_exploit_weight: f32,
}
impl Default for ScoreConfig {
@@ -32,6 +33,7 @@ impl Default for ScoreConfig {
victory_weight: 3000.,
snowball_weight: 100.,
bomb_weight: 100.,
+ explore_exploit_weight: 10.,
}
}
}
@@ -198,7 +200,7 @@ fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> S
score
} else {
- let commands = choose_existing(node);
+ let commands = choose_existing(node, config);
state.simulate(commands);
let score = expand_tree(
node.children
@@ -265,21 +267,45 @@ fn score(state: &GameBoard, config: &ScoreConfig) -> Score {
}
}
-fn choose_existing(node: &Node) -> [Command; 2] {
- [choose_one_existing(node, 0), choose_one_existing(node, 1)]
+fn choose_existing(node: &Node, config: &ScoreConfig) -> [Command; 2] {
+ [
+ choose_one_existing(node, 0, config),
+ choose_one_existing(node, 1, config),
+ ]
}
-fn choose_one_existing(node: &Node, player_index: usize) -> Command {
+fn choose_one_existing(node: &Node, player_index: usize, config: &ScoreConfig) -> Command {
let ln_n = (node.score_sum.visit_count as f32).ln();
- let c = 100.;
let multiplier = if player_index == 0 { 1. } else { -1. };
- node.player_score_sums[player_index]
- .iter()
- .max_by_key(|(_command, score_sum)| {
- (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt()))
- as i32
+ let mut command_confidences =
+ node.player_score_sums[player_index]
+ .iter()
+ .map(|(command, score_sum)| {
+ (
+ command,
+ (score_sum.avg() * multiplier).val
+ + config.explore_exploit_weight
+ * (ln_n / score_sum.visit_count as f32).sqrt(),
+ )
+ });
+
+ command_confidences
+ .next()
+ .map(|first| {
+ command_confidences
+ .fold(
+ first,
+ |(acc_command, acc_confidence), (next_command, next_confidence)| {
+ if acc_confidence > next_confidence {
+ (acc_command, acc_confidence)
+ } else {
+ (next_command, next_confidence)
+ }
+ },
+ )
+ .0
+ .clone()
})
- .map(|(command, _score_sum)| *command)
.unwrap_or_else(|| Command::new(Action::DoNothing))
}