From dcbd04dfdc6dd6dac88020d3a51f23fa5905c356 Mon Sep 17 00:00:00 2001 From: Justin Worthe Date: Tue, 14 May 2019 00:45:49 +0200 Subject: Filled in the rest of the MCTS Problem: The current random things isn't actually finding any victorious end states. This game easily meanders if it's played without purpose. --- src/bin/benchmark.rs | 22 +++++++++++ src/game.rs | 91 ++++++++++++++++++++++++++++++++++++++-------- src/geometry.rs | 2 - src/geometry/rect.rs | 19 ---------- src/json.rs | 4 +- src/strategy.rs | 101 +++++++++++++++++++++++++++++++++++++-------------- 6 files changed, 173 insertions(+), 66 deletions(-) create mode 100644 src/bin/benchmark.rs delete mode 100644 src/geometry/rect.rs (limited to 'src') diff --git a/src/bin/benchmark.rs b/src/bin/benchmark.rs new file mode 100644 index 0000000..794ba4e --- /dev/null +++ b/src/bin/benchmark.rs @@ -0,0 +1,22 @@ +use std::path::Path; + +use time::{Duration, PreciseTime}; + +use steam_powered_wyrm::strategy::choose_move; +use steam_powered_wyrm::json; +use steam_powered_wyrm::game; + +fn main() { + let max_time = Duration::milliseconds(950); + let start_time = PreciseTime::now(); + + match json::read_state_from_json_file(&Path::new(&format!("./tests/example-state.json"))) { + Ok(json_state) => { + let new_board = game::GameBoard::new(json_state); + let _ = choose_move(&new_board, &start_time, max_time); + }, + Err(e) => { + eprintln!("WARN: State file could not be parsed: {}", e); + } + }; +} diff --git a/src/game.rs b/src/game.rs index bf014f1..3a7cd92 100644 --- a/src/game.rs +++ b/src/game.rs @@ -5,8 +5,10 @@ use crate::constants::*; use arrayvec::ArrayVec; -#[derive(Clone)] +#[derive(Debug, PartialEq, Eq, Clone)] pub struct GameBoard { + pub round: u16, + pub max_rounds: u16, pub players: [Player; 2], pub powerups: ArrayVec<[Powerup; 2]>, pub map: Map, @@ -33,7 +35,7 @@ pub enum Powerup { Health(Point2d, i32) } -#[derive(Clone)] +#[derive(Debug, PartialEq, Eq, Clone)] pub struct Map { pub cells: [u64; MAP_U64S] } @@ -82,6 +84,8 @@ impl GameBoard { } GameBoard { + round: json.current_round, + max_rounds: json.max_rounds, players: [player, opponent], powerups: json.map.iter().flatten().filter_map(|c| { c.powerup.clone().map(|p| Powerup::Health(Point2d::new(c.x, c.y), p.value)) @@ -91,14 +95,52 @@ impl GameBoard { } } - pub fn update(&mut self, _json: json::State) { - // TODO - // What can change? - // - Worm health (and dead worms die) - // - Active worms += 1 - // - The worms may move - // - The powerups may be taken - // - The map cells may change from dirt to not dirt + pub fn update(&mut self, json: json::State) { + for w in json.my_player.worms { + if let Some(worm) = self.players[0].find_worm_mut(w.id) { + worm.health = w.health; + worm.position = Point2d::new(w.position.x, w.position.y); + } + } + for w in json.opponents.iter().flat_map(|o| &o.worms) { + if let Some(worm) = self.players[1].find_worm_mut(w.id) { + worm.health = w.health; + worm.position = Point2d::new(w.position.x, w.position.y); + } + } + + self.powerups = json.map.iter().flatten().filter_map(|c| { + c.powerup.clone().map(|p| Powerup::Health(Point2d::new(c.x, c.y), p.value)) + }).collect(); + + for cell in json.map.iter().flatten() { + if cell.cell_type == json::CellType::Air { + self.map.clear(Point2d::new(cell.x, cell.y)) + } + } + + self.round += 1; + debug_assert_eq!(json.current_round, self.round); + + // Remove dead worms and update active worm + for player in &mut self.players { + for worm_index in (0..player.worms.len()).rev() { + if player.worms[worm_index].health <= 0 { + player.worms.remove(worm_index); + if player.active_worm >= worm_index { + if player.active_worm > 0 { + player.active_worm -= 1; + } else { + player.active_worm = player.worms.len()-1; + } + } + } + } + // Update the active worm + if player.worms.len() > 0 { + player.active_worm = (player.active_worm + 1).checked_rem(player.worms.len()).unwrap_or(0); + } + } } pub fn simulate(&mut self, moves: [Command; 2]) -> SimulationOutcome { @@ -131,6 +173,14 @@ impl GameBoard { worm.position.x = x; worm.position.y = y; + + self.powerups.retain(|p| match p { + Powerup::Health(point, size) if *point == worm.position => { + worm.health += *size; + false + }, + _ => true + }); } } } @@ -200,13 +250,18 @@ impl GameBoard { } } // Update the active worm - player.active_worm = (player.active_worm + 1) % player.worms.len(); + if player.worms.len() > 0 { + player.active_worm = (player.active_worm + 1).checked_rem(player.worms.len()).unwrap_or(0); + } } - self.outcome = match (self.players[0].worms.len(), self.players[1].worms.len()) { - (0, 0) => SimulationOutcome::Draw, - (_, 0) => SimulationOutcome::PlayerWon(0), - (0, _) => SimulationOutcome::PlayerWon(1), + self.round += 1; + + self.outcome = match (self.players[0].worms.len(), self.players[1].worms.len(), self.round > self.max_rounds) { + (0, 0, _) => SimulationOutcome::Draw, + (_, 0, _) => SimulationOutcome::PlayerWon(0), + (0, _, _) => SimulationOutcome::PlayerWon(1), + (_, _, true) => SimulationOutcome::Draw, _ => SimulationOutcome::Continue }; @@ -248,6 +303,12 @@ impl Player { .find(|w| w.id == id) } + pub fn find_worm_mut(&mut self, id: i32) -> Option<&mut Worm> { + self.worms + .iter_mut() + .find(|w| w.id == id) + } + pub fn active_worm(&self) -> &Worm { &self.worms[self.active_worm] } diff --git a/src/geometry.rs b/src/geometry.rs index dc8b096..1bcdace 100644 --- a/src/geometry.rs +++ b/src/geometry.rs @@ -2,7 +2,5 @@ mod vec; pub use self::vec::*; mod point; pub use self::point::*; -mod rect; -pub use self::rect::*; mod direction; pub use self::direction::*; diff --git a/src/geometry/rect.rs b/src/geometry/rect.rs deleted file mode 100644 index e2b1882..0000000 --- a/src/geometry/rect.rs +++ /dev/null @@ -1,19 +0,0 @@ -use crate::geometry::Point2d; -use crate::geometry::Vec2d; - -#[derive(Debug, Default, Clone, Copy, PartialEq)] -pub struct Rectangle { - pub pos: Point2d, - pub size: Vec2d, -} - -impl Rectangle { - pub fn new(x: T, y: T, w: T, h: T) -> Rectangle { - Rectangle { - pos: Point2d::new(x, y), - size: Vec2d::new(w, h), - } - } - - // TODO: Constructor to build a rectangle centered at an x,y -} diff --git a/src/json.rs b/src/json.rs index 979252e..c796d2b 100644 --- a/src/json.rs +++ b/src/json.rs @@ -18,8 +18,8 @@ pub fn read_state_from_json_file(filename: &Path) -> Result> { #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] #[serde(rename_all = "camelCase")] pub struct State { - pub current_round: u32, - pub max_rounds: u32, + pub current_round: u16, + pub max_rounds: u16, pub map_size: u8, pub current_worm_id: i32, pub consecutive_do_nothing_count: u32, diff --git a/src/strategy.rs b/src/strategy.rs index db4409e..d6f92a6 100644 --- a/src/strategy.rs +++ b/src/strategy.rs @@ -2,10 +2,14 @@ use crate::command::Command; use crate::game::{GameBoard, SimulationOutcome}; use crate::geometry::*; +use std::cmp; use std::ops::*; -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap}; use time::{Duration, PreciseTime}; +use rand; +use rand::prelude::*; + pub fn choose_move(state: &GameBoard, start_time: &PreciseTime, max_time: Duration) -> Command { let mut root_node = Node { state: state.clone(), @@ -19,6 +23,11 @@ pub fn choose_move(state: &GameBoard, start_time: &PreciseTime, max_time: Durati let _ = mcts(&mut root_node); } + eprintln!("Number of simulations: {}", root_node.score_sum.visit_count); + for (command, score_sum) in &root_node.player_score_sums[0] { + eprintln!("{} = {} ({} visits)", command, score_sum.avg().val, score_sum.visit_count); + } + best_player_move(&root_node) } @@ -30,41 +39,42 @@ struct Node { children: HashMap<[Command; 2], Node>, } -impl Node { - fn score(&self) -> Score { - self.score_sum.avg() - } -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] struct Score { - val: i32 + val: f32 } impl AddAssign for Score { fn add_assign(&mut self, other: Self) { - self.val += other.val; + self.val = self.val + other.val; } } -impl Div for Score { +impl Div for Score { type Output = Self; - fn div(self, other: i32) -> Self { + fn div(self, other: u32) -> Self { Score { - val: self.val / other + val: self.val / other as f32 } } } +impl cmp::Eq for Score {} +impl cmp::Ord for Score { + fn cmp(&self, other: &Score) -> cmp::Ordering { + self.val.partial_cmp(&other.val).unwrap_or(cmp::Ordering::Equal) + } +} + struct ScoreSum { sum: Score, - visit_count: i32 + visit_count: u32 } impl ScoreSum { fn new() -> ScoreSum { ScoreSum { - sum: Score { val: 0 }, + sum: Score { val: 0. }, visit_count: 0 } } @@ -82,7 +92,7 @@ impl ScoreSum { impl AddAssign for ScoreSum { fn add_assign(&mut self, other: Score) { self.sum += other; - self.visit_count += 1; + self.visit_count = self.visit_count.saturating_add(1); } } @@ -108,43 +118,78 @@ fn mcts(node: &mut Node) -> Score { score } else { let commands = choose_existing(node); - let score = mcts(node.children.get_mut(&commands).unwrap()); + let score = mcts(node.children.get_mut(&commands).expect("The existing node hasn't been tried yet")); update(node, commands, score); score } } fn best_player_move(node: &Node) -> Command { - // TODO, use player_score_sums? node - .children + .player_score_sums[0] .iter() - .max_by_key(|(_k, v)| v.score()) - .map(|(k, _v)| k[0]) + .max_by_key(|(_command, score_sum)| { + score_sum.avg() + }) + .map(|(command, _score_sum)| *command) .unwrap_or(Command::DoNothing) } fn score(state: &GameBoard) -> Score { + let mutiplier = match state.outcome { + SimulationOutcome::PlayerWon(_) => 100., + _ => 1. + }; Score { - val: state.players[0].health() - state.players[1].health() + val: mutiplier * (state.players[0].health() - state.players[1].health()) as f32 } } fn rollout(state: &GameBoard) -> Score { - // TODO - Score { val: 0 } + let mut s = state.clone(); + let mut rng = rand::thread_rng(); + while s.outcome == SimulationOutcome::Continue { + let player_moves = valid_moves(&s, 0); + let opponent_moves = valid_moves(&s, 1); + + s.simulate([ + player_moves.choose(&mut rng).cloned().unwrap_or(Command::DoNothing), + opponent_moves.choose(&mut rng).cloned().unwrap_or(Command::DoNothing) + ]); + } + + score(&s) } fn choose_existing(node: &Node) -> [Command; 2] { - // TODO [ - Command::DoNothing, - Command::DoNothing + choose_one_existing(node, 0), + choose_one_existing(node, 1) ] } +fn choose_one_existing(node: &Node, player_index: usize) -> Command { + let ln_n = (node.score_sum.visit_count as f32).ln(); + let c = 100.; + let multiplier = if player_index == 0 { + 1. + } else { + -1. + }; + node.player_score_sums[player_index] + .iter() + .max_by_key(|(_command, score_sum)| { + (multiplier * (score_sum.avg().val + c * (ln_n / score_sum.visit_count as f32).sqrt())) as i32 + }) + .map(|(command, _score_sum)| *command) + .unwrap_or(Command::DoNothing) +} + + fn update(node: &mut Node, commands: [Command; 2], score: Score) { - // TODO + *node.player_score_sums[0].entry(commands[0]).or_insert(ScoreSum::new()) += score; + *node.player_score_sums[1].entry(commands[1]).or_insert(ScoreSum::new()) += score; + node.score_sum += score; } fn valid_move_combo(state: &GameBoard) -> Vec<[Command; 2]> { -- cgit v1.2.3