summaryrefslogtreecommitdiff
path: root/src/strategy
diff options
context:
space:
mode:
authorJustin Worthe <justin@worthe-it.co.za>2019-08-13 22:00:10 +0200
committerJustin Worthe <justin@worthe-it.co.za>2019-08-13 22:00:10 +0200
commitdabedf442d65fcaec36d0a30ed3ed2327b39a44d (patch)
treea9bbcb4e97dcb98050fd872997ace284f2f764e0 /src/strategy
parent33b9c9e05a3693d944342753288fda824f0da13c (diff)
Trimming down on redundant code and tweaking perf
Diffstat (limited to 'src/strategy')
-rw-r--r--src/strategy/mcts.rs230
-rw-r--r--src/strategy/minimax.rs104
2 files changed, 18 insertions, 316 deletions
diff --git a/src/strategy/mcts.rs b/src/strategy/mcts.rs
deleted file mode 100644
index c122fa1..0000000
--- a/src/strategy/mcts.rs
+++ /dev/null
@@ -1,230 +0,0 @@
-use crate::command::{Action, Command};
-use crate::game::{GameBoard, SimulationOutcome};
-
-use std::cmp;
-use std::collections::HashMap;
-use std::ops::*;
-use time::{Duration, PreciseTime};
-
-pub fn choose_move(
- state: &GameBoard,
- previous_root: Option<Node>,
- start_time: PreciseTime,
- max_time: Duration,
-) -> (Command, Node) {
- let mut root_node = match previous_root {
- None => Node {
- state: state.clone(),
- score_sum: ScoreSum::new(),
- player_score_sums: [HashMap::new(), HashMap::new()],
- unexplored: mcts_move_combo(state),
- children: HashMap::new(),
- },
- Some(mut node) => node
- .children
- .drain()
- .map(|(_k, n)| n)
- .find(|n| n.state == *state)
- .unwrap_or_else(|| {
- eprintln!("Previous round did not appear in the cache");
- Node {
- state: state.clone(),
- score_sum: ScoreSum::new(),
- player_score_sums: [HashMap::new(), HashMap::new()],
- unexplored: mcts_move_combo(state),
- children: HashMap::new(),
- }
- }),
- };
-
- while start_time.to(PreciseTime::now()) < max_time {
- let _ = mcts(&mut root_node);
- }
-
- eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
- for (command, score_sum) in &root_node.player_score_sums[0] {
- eprintln!(
- "{} = {} ({} visits)",
- command,
- score_sum.avg().val,
- score_sum.visit_count
- );
- }
-
- let chosen_command = best_player_move(&root_node);
-
- root_node
- .children
- .retain(|[c1, _], _| *c1 == chosen_command);
-
- (chosen_command, root_node)
-}
-
-pub struct Node {
- state: GameBoard,
- score_sum: ScoreSum,
- player_score_sums: [HashMap<Command, ScoreSum>; 2],
- unexplored: Vec<[Command; 2]>,
- children: HashMap<[Command; 2], Node>,
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
-struct Score {
- val: f32,
-}
-
-impl AddAssign for Score {
- fn add_assign(&mut self, other: Self) {
- self.val = self.val + other.val;
- }
-}
-
-impl Div<u32> for Score {
- type Output = Self;
- fn div(self, other: u32) -> Self {
- Score {
- val: self.val / other as f32,
- }
- }
-}
-
-impl cmp::Eq for Score {}
-impl cmp::Ord for Score {
- fn cmp(&self, other: &Score) -> cmp::Ordering {
- self.val
- .partial_cmp(&other.val)
- .unwrap_or(cmp::Ordering::Equal)
- }
-}
-
-struct ScoreSum {
- sum: Score,
- visit_count: u32,
-}
-
-impl ScoreSum {
- fn new() -> ScoreSum {
- ScoreSum {
- sum: Score { val: 0. },
- visit_count: 0,
- }
- }
- fn with_initial(score: Score) -> ScoreSum {
- ScoreSum {
- sum: score,
- visit_count: 1,
- }
- }
- fn avg(&self) -> Score {
- self.sum / self.visit_count
- }
-}
-
-impl AddAssign<Score> for ScoreSum {
- fn add_assign(&mut self, other: Score) {
- self.sum += other;
- self.visit_count = self.visit_count.saturating_add(1);
- }
-}
-
-fn mcts(node: &mut Node) -> Score {
- if node.state.outcome != SimulationOutcome::Continue {
- score(&node.state)
- } else if let Some(commands) = node.unexplored.pop() {
- let mut new_state = node.state.clone();
- new_state.simulate(commands);
- let score = rollout(&new_state);
- let unexplored = if new_state.outcome == SimulationOutcome::Continue {
- mcts_move_combo(&new_state)
- } else {
- Vec::new()
- };
-
- let new_node = Node {
- state: new_state,
- score_sum: ScoreSum::with_initial(score),
- player_score_sums: [HashMap::new(), HashMap::new()],
- unexplored,
- children: HashMap::new(),
- };
- node.children.insert(commands, new_node);
-
- update(node, commands, score);
- score
- } else {
- let commands = choose_existing(node);
- let score = mcts(
- node.children
- .get_mut(&commands)
- .expect("The existing node hasn't been tried yet"),
- );
- update(node, commands, score);
- score
- }
-}
-
-fn mcts_move_combo(state: &GameBoard) -> Vec<[Command; 2]> {
- let player_moves = state.valid_moves(0);
- let opponent_moves = state.valid_moves(1);
- debug_assert!(!player_moves.is_empty(), "No player moves");
- debug_assert!(!opponent_moves.is_empty(), "No opponent moves");
-
- let mut result = Vec::with_capacity(player_moves.len() * opponent_moves.len());
- for p in &player_moves {
- for o in &opponent_moves {
- result.push([*p, *o]);
- }
- }
-
- result
-}
-
-fn best_player_move(node: &Node) -> Command {
- node.player_score_sums[0]
- .iter()
- .max_by_key(|(_command, score_sum)| score_sum.avg())
- .map(|(command, _score_sum)| *command)
- .unwrap_or_else(|| Command::new(Action::DoNothing))
-}
-
-fn score(state: &GameBoard) -> Score {
- Score {
- val: match state.outcome {
- SimulationOutcome::PlayerWon(0) => 3000.,
- SimulationOutcome::PlayerWon(1) => -3000.,
- _ => (state.players[0].score() - state.players[1].score()) as f32,
- },
- }
-}
-
-fn rollout(state: &GameBoard) -> Score {
- score(state)
-}
-
-fn choose_existing(node: &Node) -> [Command; 2] {
- [choose_one_existing(node, 0), choose_one_existing(node, 1)]
-}
-
-fn choose_one_existing(node: &Node, player_index: usize) -> Command {
- let ln_n = (node.score_sum.visit_count as f32).ln();
- let c = 100.;
- let multiplier = if player_index == 0 { 1. } else { -1. };
- node.player_score_sums[player_index]
- .iter()
- .max_by_key(|(_command, score_sum)| {
- (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt()))
- as i32
- })
- .map(|(command, _score_sum)| *command)
- .unwrap_or_else(|| Command::new(Action::DoNothing))
-}
-
-fn update(node: &mut Node, commands: [Command; 2], score: Score) {
- *node.player_score_sums[0]
- .entry(commands[0])
- .or_insert_with(ScoreSum::new) += score;
- *node.player_score_sums[1]
- .entry(commands[1])
- .or_insert_with(ScoreSum::new) += score;
- node.score_sum += score;
-}
diff --git a/src/strategy/minimax.rs b/src/strategy/minimax.rs
index 2c52127..5a7bbcd 100644
--- a/src/strategy/minimax.rs
+++ b/src/strategy/minimax.rs
@@ -7,21 +7,15 @@ use std::cmp;
use std::ops::*;
use time::{Duration, PreciseTime};
-// TODO: Calibrate these weightings somehow? Some sort of generate and sort based on playing against each other?
-// What about:
-// - Creating a list (mins and maxes)
-// - Keep adding a new guess, run against all, and sort the list by fitness.
-// - Repeat until list has many values
-// - Somehow prioritize sticking new items in based on what's going well? Or maximally different? Keep dividing all the ranges in half?
#[derive(Debug, Clone)]
pub struct ScoreConfig {
- max_health_weight: f32,
- total_health_weight: f32,
- points_weight: f32,
- victory_weight: f32,
- snowball_weight: f32,
- bomb_weight: f32,
- explore_exploit_weight: f32,
+ pub max_health_weight: f32,
+ pub total_health_weight: f32,
+ pub points_weight: f32,
+ pub victory_weight: f32,
+ pub snowball_weight: f32,
+ pub bomb_weight: f32,
+ pub explore_exploit_weight: f32,
}
impl Default for ScoreConfig {
@@ -38,7 +32,6 @@ impl Default for ScoreConfig {
}
}
-// TODO: Cache results from last round based on player / opponent move and worm positions
pub fn choose_move(
state: &GameBoard,
config: &ScoreConfig,
@@ -86,15 +79,15 @@ pub fn choose_move_with_normalized_perf(
let _ = expand_tree(&mut root_node, state.clone(), config);
}
- eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
- for (command, score_sum) in &root_node.player_score_sums[player_index] {
- eprintln!(
- "{} = {} ({} visits)",
- command,
- score_sum.avg().val,
- score_sum.visit_count
- );
- }
+ // eprintln!("Number of simulations: {}", root_node.score_sum.visit_count);
+ // for (command, score_sum) in &root_node.player_score_sums[player_index] {
+ // eprintln!(
+ // "{} = {} ({} visits)",
+ // command,
+ // score_sum.avg().val,
+ // score_sum.visit_count
+ // );
+ // }
best_player_move(&root_node, player_index)
}
@@ -178,7 +171,6 @@ fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> S
if state.outcome != SimulationOutcome::Continue {
score(&state, config)
} else if let Some(commands) = node.unexplored.pop() {
- // TODO: Explore preemptively doing the rollout?
state.simulate(commands);
let score = score(&state, config);
let unexplored = if state.outcome == SimulationOutcome::Continue {
@@ -196,8 +188,6 @@ fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> S
node.children.insert(commands, new_node);
update(node, commands, score);
- // TODO: Prune dominated moves
-
score
} else {
let commands = choose_existing(node, config);
@@ -215,8 +205,8 @@ fn expand_tree(node: &mut Node, mut state: GameBoard, config: &ScoreConfig) -> S
}
fn move_combos(state: &GameBoard) -> Vec<[Command; 2]> {
- let player_moves = pruned_moves(state, 0);
- let opponent_moves = pruned_moves(state, 1);
+ let player_moves = state.pruned_valid_moves(0);
+ let opponent_moves = state.pruned_valid_moves(1);
debug_assert!(!player_moves.is_empty(), "No player moves");
debug_assert!(!opponent_moves.is_empty(), "No opponent moves");
@@ -255,8 +245,6 @@ fn score(state: &GameBoard, config: &ScoreConfig) -> Score {
let snowballs = state.players[0].snowballs() as f32 - state.players[1].snowballs() as f32;
let bombs = state.players[0].bombs() as f32 - state.players[1].bombs() as f32;
- // TODO: None of these attributes give any signal early on.
- // TODO: Try adding new features here. Something about board position?
Score {
val: max_health * config.max_health_weight
+ total_health * config.total_health_weight
@@ -318,59 +306,3 @@ fn update(node: &mut Node, commands: [Command; 2], score: Score) {
.or_insert_with(ScoreSum::new) += score;
node.score_sum += score;
}
-
-fn pruned_moves(state: &GameBoard, player_index: usize) -> Vec<Command> {
- let sim_with_idle_opponent = |cmd| {
- let mut idle_commands = [
- Command::new(Action::DoNothing),
- Command::new(Action::DoNothing),
- ];
- idle_commands[player_index] = cmd;
- let mut state_cpy = state.clone();
- state_cpy.simulate(idle_commands);
- state_cpy
- };
-
- let mut do_nothing_state = state.clone();
- do_nothing_state.simulate([
- Command::new(Action::DoNothing),
- Command::new(Action::DoNothing),
- ]);
-
- let opponent_index = GameBoard::opponent(player_index);
- let my_starting_health = do_nothing_state.players[player_index].health();
- let opponent_starting_health = do_nothing_state.players[opponent_index].health();
-
- state
- .valid_moves(player_index)
- .into_iter()
- .filter(|command| {
- // TODO: Some of these filters could be done with better
- // performance by running them while generating the list
- // of valid moves.
-
- // NB: These rules should pass for doing nothing, otherwise
- // we need some other mechanism for sticking in a do
- // nothing option.
-
- let idle_opponent_state = sim_with_idle_opponent(*command);
- let hurt_self = idle_opponent_state.players[player_index].health() < my_starting_health;
- let hurt_opponent =
- idle_opponent_state.players[opponent_index].health() < opponent_starting_health;
- let frozen_opponent = idle_opponent_state.players[opponent_index]
- .worms
- .iter()
- .any(|w| w.rounds_until_unfrozen == FREEZE_DURATION);
-
- let is_select = command.worm.is_some();
-
- let is_attack = command.action.is_attack();
- let is_snowball = command.action.is_snowball();
-
- !hurt_self
- && (!is_select || hurt_opponent)
- && (!is_attack || hurt_opponent)
- && (!is_snowball || frozen_opponent)
- })
- .collect()
-}