Added extra logging of the win ratios
[entelect-challenge-tower-defence.git] / src / strategy / monte_carlo.rs
1 use engine::command::*;
2 use engine::status::GameStatus;
3 use engine::bitwise_engine::{Player, BitwiseGameState};
4 use engine::constants::*;
5
6 use std::fmt;
7
8 use rand::{Rng, XorShiftRng, SeedableRng};
9
10 const MAX_MOVES: u16 = 400;
11
12 use time::{Duration, PreciseTime};
13
14 #[cfg(not(feature = "single-threaded"))]
15 use rayon::prelude::*;
16
17 //TODO Rethink / adjust these?
18 #[cfg(feature = "energy-cutoff")] pub const ENERGY_PRODUCTION_CUTOFF: u16 = 100;
19 #[cfg(feature = "energy-cutoff")] pub const ENERGY_STORAGE_CUTOFF: u16 = 100;
20
21 pub fn choose_move(state: &BitwiseGameState, start_time: PreciseTime, max_time: Duration) -> Command {
22     let mut command_scores = CommandScore::init_command_scores(state);
23
24     let command = {
25         let best_command_score = simulate_options_to_timeout(&mut command_scores, state, start_time, max_time);
26         match best_command_score {
27             Some(best_command_score) => best_command_score.command,
28             _ => Command::Nothing
29         }
30     };
31
32     #[cfg(feature = "benchmarking")]
33     {
34         let total_iterations: u32 = command_scores.iter().map(|c| c.attempts).sum();
35         println!("Iterations: {}", total_iterations);
36     }
37     #[cfg(feature = "debug-decisions")]
38     {
39         for score in command_scores {
40             println!("{}", score);
41         }
42     }
43
44     command
45 }
46
47 #[cfg(not(feature = "discard-poor-performers"))]
48 fn simulate_options_to_timeout(command_scores: &'a mut Vec<CommandScore>, settings: &GameSettings, state: &BitwiseGameState, start_time: PreciseTime, max_time: Duration) -> Option<&'a CommandScore> {
49     loop {
50         simulate_all_options_once(command_scores, settings, state);
51         if start_time.to(PreciseTime::now()) > max_time {
52             break;
53         }
54     }
55     command_scores.iter().max_by_key(|&c| c.win_ratio())
56 }
57
58 #[cfg(feature = "discard-poor-performers")]
59 fn simulate_options_to_timeout<'a>(command_scores: &'a mut Vec<CommandScore>, state: &BitwiseGameState, start_time: PreciseTime, max_time: Duration) -> Option<&'a CommandScore> {
60     use std::cmp;
61     let min_options = cmp::min(command_scores.len(), 5);
62     
63     let maxes = [max_time / 4, max_time / 2, max_time * 3 / 4, max_time];
64     for (i, &max) in maxes.iter().enumerate() {
65         let new_length = cmp::max(min_options, command_scores.len() / (2usize.pow(i as u32)));
66         let active_scores = &mut command_scores[0..new_length];
67         loop {
68             simulate_all_options_once(active_scores, state);
69             if start_time.to(PreciseTime::now()) > max {
70                 break;
71             }
72         }
73         active_scores.sort_unstable_by_key(|c| -c.win_ratio());
74     }
75     command_scores.first()
76 }
77
78 #[cfg(feature = "single-threaded")]
79 fn simulate_all_options_once(command_scores: &mut[CommandScore], state: &BitwiseGameState) {
80     command_scores.iter_mut()
81         .for_each(|score| {
82             let mut rng = XorShiftRng::from_seed(score.next_seed);
83             simulate_to_endstate(score, state, &mut rng);
84         });
85 }
86
87 #[cfg(not(feature = "single-threaded"))]
88 fn simulate_all_options_once(command_scores: &mut[CommandScore], state: &BitwiseGameState) {
89     command_scores.par_iter_mut()
90         .for_each(|score| {
91             let mut rng = XorShiftRng::from_seed(score.next_seed);
92             simulate_to_endstate(score, state, &mut rng);
93         });
94 }
95
96 fn simulate_to_endstate<R: Rng>(command_score: &mut CommandScore, state: &BitwiseGameState, rng: &mut R) {
97     let mut state_mut = state.clone();
98     
99     let opponent_first = random_move(&state_mut.opponent, rng);
100     let mut status = state_mut.simulate(command_score.command, opponent_first);
101     
102     for _ in 0..MAX_MOVES {
103         if status != GameStatus::Continue {
104             break;
105         }
106
107         let player_command = random_move(&state_mut.player, rng);
108         let opponent_command = random_move(&state_mut.opponent, rng);
109         status = state_mut.simulate(player_command, opponent_command);
110     }
111
112     let mut next_seed: [u8;16] = [0; 16];
113     rng.fill_bytes(&mut next_seed);
114     match status {
115         GameStatus::PlayerWon => command_score.add_victory(next_seed),
116         GameStatus::OpponentWon => command_score.add_defeat(next_seed),
117         GameStatus::Continue => command_score.add_stalemate(next_seed),
118         GameStatus::Draw => command_score.add_draw(next_seed)
119     }
120 }
121
122 fn random_move<R: Rng>(player: &Player, rng: &mut R) -> Command {
123     let all_buildings = sensible_buildings(player);
124     let free_positions_count = player.unoccupied_cell_count();
125     
126     let nothing_count = if all_buildings.len() > 2 && free_positions_count > 0 { 0 } else { 1 };
127     let iron_curtain_count = if player.can_build_iron_curtain() { 1 } else { 0 };
128         
129     let building_choice_index = rng.gen_range(0, all_buildings.len() + nothing_count + iron_curtain_count);
130
131     if building_choice_index < all_buildings.len() && free_positions_count > 0 {
132         let position_choice = rng.gen_range(0, free_positions_count);
133         Command::Build(
134             player.location_of_unoccupied_cell(position_choice),
135             all_buildings[building_choice_index]
136         )
137     }
138     else if iron_curtain_count > 0 && building_choice_index == all_buildings.len() {
139         Command::IronCurtain
140     } else {
141         Command::Nothing
142     }
143 }
144
145 #[derive(Debug)]
146 struct CommandScore {
147     command: Command,
148     victories: u32,
149     defeats: u32,
150     draws: u32,
151     stalemates: u32,
152     attempts: u32,
153     next_seed: [u8; 16]
154 }
155
156 impl CommandScore {
157     fn new(command: Command) -> CommandScore {
158         CommandScore {
159             command,
160             victories: 0,
161             defeats: 0,
162             draws: 0,
163             stalemates: 0,
164             attempts: 0,
165             next_seed: [0x7b, 0x6a, 0xe1, 0xf4, 0x41, 0x3c, 0xe9, 0x0f, 0x67, 0x81, 0x67, 0x99, 0x77, 0x0a, 0x6b, 0xda]
166         }
167     }
168
169     fn add_victory(&mut self, next_seed: [u8; 16]) {
170         self.victories += 1;
171         self.attempts += 1;
172         self.next_seed = next_seed;
173     }
174
175     fn add_defeat(&mut self, next_seed: [u8; 16]) {
176         self.defeats += 1;
177         self.attempts += 1;
178         self.next_seed = next_seed;
179     }
180
181     fn add_draw(&mut self, next_seed: [u8; 16]) {
182         self.draws += 1;
183         self.attempts += 1;
184         self.next_seed = next_seed;
185     }
186
187     fn add_stalemate(&mut self, next_seed: [u8; 16]) {
188         self.stalemates += 1;
189         self.attempts += 1;
190         self.next_seed = next_seed;
191     }
192
193     fn win_ratio(&self) -> i32 {
194         (self.victories as i32 - self.defeats as i32) * 10000 / (self.attempts as i32)
195     }
196
197     //TODO: Devalue nothing so that it doesn't stand and do nothing when it can do things
198     fn init_command_scores(state: &BitwiseGameState) -> Vec<CommandScore> {
199         let all_buildings = sensible_buildings(&state.player);
200
201         let unoccupied_cells = (0..state.player.unoccupied_cell_count()).map(|i| state.player.location_of_unoccupied_cell(i));
202
203         let building_command_count = unoccupied_cells.len()*all_buildings.len();
204         
205         let mut commands = Vec::with_capacity(building_command_count + 2);
206         commands.push(CommandScore::new(Command::Nothing));
207         if state.player.can_build_iron_curtain() {
208             commands.push(CommandScore::new(Command::IronCurtain));
209         }
210
211         for position in unoccupied_cells {
212             for &building in &all_buildings {
213                 commands.push(CommandScore::new(Command::Build(position, building)));
214             }
215         }
216
217         commands
218     }
219 }
220
221 impl fmt::Display for CommandScore {
222     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
223         write!(f, "{},{}", self.command, self.win_ratio())
224     }
225 }
226
227
228 #[cfg(not(feature = "energy-cutoff"))]
229 fn sensible_buildings(player: &Player) -> Vec<BuildingType> {
230     let mut result = Vec::with_capacity(4);
231
232     if DEFENCE_PRICE <= player.energy {
233         result.push(BuildingType::Defence);
234     }
235     if MISSILE_PRICE <= player.energy {
236         result.push(BuildingType::Attack);
237     }
238     if ENERGY_PRICE <= player.energy {
239         result.push(BuildingType::Energy);
240     }
241     if TESLA_PRICE <= player.energy && !player.has_max_teslas() {
242         result.push(BuildingType::Tesla);
243     }
244
245     result
246 }
247
248 //TODO: Heuristic that avoids building the initial energy towers all in the same row? Max energy in a row?
249 #[cfg(feature = "energy-cutoff")]
250 fn sensible_buildings(player: &Player) -> Vec<BuildingType> {
251     let mut result = Vec::with_capacity(4);
252     let needs_energy = player.energy_generated() <= ENERGY_PRODUCTION_CUTOFF ||
253         player.energy <= ENERGY_STORAGE_CUTOFF;
254
255     if DEFENCE_PRICE <= player.energy {
256         result.push(BuildingType::Defence);
257     }
258     if MISSILE_PRICE <= player.energy {
259         result.push(BuildingType::Attack);
260     }
261     if ENERGY_PRICE <= player.energy && needs_energy {
262         result.push(BuildingType::Energy);
263     }
264     if TESLA_PRICE <= player.energy && !player.has_max_teslas() {
265         result.push(BuildingType::Tesla);
266     }
267     
268     result
269 }
270