Placeholder for new heuristic based random search
[entelect-challenge-tower-defence.git] / src / strategy / monte_carlo.rs
1 use engine::command::*;
2 use engine::status::GameStatus;
3 use engine::bitwise_engine::{Player, BitwiseGameState};
4 use engine::constants::*;
5 use engine::geometry::*;
6
7 use std::fmt;
8
9 use rand::{Rng, XorShiftRng, SeedableRng};
10
11 use arrayvec::ArrayVec;
12
13 const MAX_MOVES: u16 = 400;
14 const INIT_SEED: [u8;16] = [0x7b, 0x6a, 0xe1, 0xf4, 0x41, 0x3c, 0xe9, 0x0f, 0x67, 0x81, 0x67, 0x99, 0x77, 0x0a, 0x6b, 0xda];
15
16 use time::{Duration, PreciseTime};
17
18 #[cfg(not(feature = "single-threaded"))]
19 use rayon::prelude::*;
20
21 //TODO Rethink / adjust these?
22 #[cfg(feature = "energy-cutoff")] pub const ENERGY_PRODUCTION_CUTOFF: u16 = 50;
23 #[cfg(feature = "energy-cutoff")] pub const ENERGY_STORAGE_CUTOFF: u16 = 100;
24
25 pub fn choose_move(state: &BitwiseGameState, start_time: PreciseTime, max_time: Duration) -> Command {
26     let mut command_scores = CommandScore::init_command_scores(state);
27
28     let command = {
29         let best_command_score = simulate_options_to_timeout(&mut command_scores, state, start_time, max_time);
30         match best_command_score {
31             Some(best) if !best.starts_with_nothing => best.command,
32             _ => Command::Nothing
33         }
34     };
35
36     #[cfg(feature = "benchmarking")]
37     {
38         let total_iterations: u32 = command_scores.iter().map(|c| c.attempts).sum();
39         println!("Iterations: {}", total_iterations);
40     }
41     #[cfg(feature = "debug-decisions")]
42     {
43         debug_print_choices("ENERGY", &command_scores, |score| match score.command {
44             Command::Build(p, BuildingType::Energy) => Some((p, score.win_ratio())),
45             _ => None
46         });
47         debug_print_choices("ATTACK", &command_scores, |score| match score.command {
48             Command::Build(p, BuildingType::Attack) => Some((p, score.win_ratio())),
49             _ => None
50         });
51         debug_print_choices("DEFENCE", &command_scores, |score| match score.command {
52             Command::Build(p, BuildingType::Defence) => Some((p, score.win_ratio())),
53             _ => None
54         });
55         debug_print_choices("TESLA", &command_scores, |score| match score.command {
56             Command::Build(p, BuildingType::Tesla) => Some((p, score.win_ratio())),
57             _ => None
58         });
59         
60         println!("NOTHING");
61         println!("{}", command_scores.iter().find(|c| c.command == Command::Nothing).map(|s| s.win_ratio()).unwrap_or(0));
62         println!("");
63
64         println!("IRON CURTAIN");
65         println!("{}", command_scores.iter().find(|c| c.command == Command::IronCurtain).map(|s| s.win_ratio()).unwrap_or(0));
66         println!("");
67     }
68
69     command
70 }
71
72 #[cfg(feature = "debug-decisions")]
73 fn debug_print_choices<F: FnMut(&CommandScore) -> Option<(Point, i32)>>(label: &str, command_scores: &[CommandScore], extractor: F) {
74     println!("#+NAME: {}", label);
75     println!("#+PLOT: type:3d with:pm3d");
76     let relevant_moves: Vec<(Point, i32)>  = command_scores.iter()
77         .filter_map(extractor)
78         .collect();
79     for y in 0..MAP_HEIGHT {
80         for x in 0..SINGLE_MAP_WIDTH {
81             let point = Point::new(x, y);
82             let score = relevant_moves.iter().find(|(p, _)| *p == point);
83             print!(" | {}", score.map(|(_,s)| s).cloned().unwrap_or(0));
84         }
85         println!(" |");
86     }
87     println!("");
88 }
89
90 #[cfg(not(feature = "discard-poor-performers"))]
91 fn simulate_options_to_timeout(command_scores: &'a mut Vec<CommandScore>, settings: &GameSettings, state: &BitwiseGameState, start_time: PreciseTime, max_time: Duration) -> Option<&'a CommandScore> {
92     loop {
93         simulate_all_options_once(command_scores, settings, state);
94         if start_time.to(PreciseTime::now()) > max_time {
95             break;
96         }
97     }
98     command_scores.iter().max_by_key(|&c| c.win_ratio())
99 }
100
101 #[cfg(feature = "discard-poor-performers")]
102 fn simulate_options_to_timeout<'a>(command_scores: &'a mut Vec<CommandScore>, state: &BitwiseGameState, start_time: PreciseTime, max_time: Duration) -> Option<&'a CommandScore> {
103     use std::cmp;
104     let min_options = cmp::min(command_scores.len(), 5);
105     
106     let maxes = [max_time / 4, max_time / 2, max_time * 3 / 4, max_time];
107     for (i, &max) in maxes.iter().enumerate() {
108         let new_length = cmp::max(min_options, command_scores.len() / (2usize.pow(i as u32)));
109         let active_scores = &mut command_scores[0..new_length];
110         loop {
111             simulate_all_options_once(active_scores, state);
112             if start_time.to(PreciseTime::now()) > max {
113                 break;
114             }
115         }
116         active_scores.sort_unstable_by_key(|c| -c.win_ratio());
117     }
118     command_scores.first()
119 }
120
121 #[cfg(feature = "single-threaded")]
122 fn simulate_all_options_once(command_scores: &mut[CommandScore], state: &BitwiseGameState) {
123     command_scores.iter_mut()
124         .for_each(|score| {
125             let mut rng = XorShiftRng::from_seed(score.next_seed);
126             simulate_to_endstate(score, state, &mut rng);
127         });
128 }
129
130 #[cfg(not(feature = "single-threaded"))]
131 fn simulate_all_options_once(command_scores: &mut[CommandScore], state: &BitwiseGameState) {
132     command_scores.par_iter_mut()
133         .for_each(|score| {
134             let mut rng = XorShiftRng::from_seed(score.next_seed);
135             simulate_to_endstate(score, state, &mut rng);
136         });
137 }
138
139 fn simulate_to_endstate<R: Rng>(command_score: &mut CommandScore, state: &BitwiseGameState, rng: &mut R) {
140     let mut state_mut = state.clone();
141     
142     let mut status = GameStatus::Continue; //state_mut.simulate(command_score.command, opponent_first);
143     let mut first_move_made = false;
144     
145     for _ in 0..MAX_MOVES {
146         if status != GameStatus::Continue {
147             break;
148         }
149
150         let player_command = if first_move_made {
151             random_move(&state_mut.player, rng)
152         } else {
153             let do_nothing = command_score.command.cant_build_yet(state_mut.player.energy);
154             first_move_made = !do_nothing;
155             if do_nothing { Command::Nothing } else { command_score.command }
156         };
157         let opponent_command = random_move(&state_mut.opponent, rng);
158         status = state_mut.simulate(player_command, opponent_command);
159     }
160
161     let mut next_seed: [u8;16] = [0; 16];
162     rng.fill_bytes(&mut next_seed);
163     match status {
164         GameStatus::PlayerWon => command_score.add_victory(next_seed),
165         GameStatus::OpponentWon => command_score.add_defeat(next_seed),
166         GameStatus::Continue => command_score.add_stalemate(next_seed),
167         GameStatus::Draw => command_score.add_draw(next_seed)
168     }
169 }
170
171 // TODO
172 // 1. Have a (static) array of all moves. Even invalid ones. ALL
173 // 2. Create a new CDF array, same size.
174 // 3. Loop moves
175 // 3.1. Compute PDF for move. Invalid moves are 0.
176 // 3.2. Add to last CDF value and stick in array
177 // 4. Generate random number uniformly, 0 to CDF max
178 // 5. Binary search to find random number in CDF array. Min index where CDF[index] > random
179 // 6. Lookup move in static array
180 #[cfg(feature = "heuristic-random")]
181 fn random_move<R: Rng>(player: &Player, rng: &mut R) -> Command {
182     Command::Nothing
183 }
184
185 #[cfg(not(feature = "heuristic-random"))]
186 fn random_move<R: Rng>(player: &Player, rng: &mut R) -> Command {
187     let free_positions_count = player.unoccupied_cell_count();
188
189     let open_building_spot = free_positions_count > 0;
190
191     let all_buildings = sensible_buildings(player, open_building_spot);
192
193     let iron_curtain_count = if player.can_build_iron_curtain() && player.energy >= IRON_CURTAIN_PRICE { 1 } else { 0 };
194     let nothing_count = 1;
195
196     let building_choice_index = rng.gen_range(0, all_buildings.len() + nothing_count + iron_curtain_count);
197
198     if building_choice_index < all_buildings.len() {
199         let position_choice = rng.gen_range(0, free_positions_count);
200         Command::Build(
201             player.location_of_unoccupied_cell(position_choice),
202             all_buildings[building_choice_index]
203         )
204     }
205     else if building_choice_index == all_buildings.len() {
206         Command::Nothing        
207     } else {
208         Command::IronCurtain
209     }
210 }
211
212 #[derive(Debug)]
213 struct CommandScore {
214     command: Command,
215     starts_with_nothing: bool,
216     victories: u32,
217     defeats: u32,
218     draws: u32,
219     stalemates: u32,
220     attempts: u32,
221     next_seed: [u8; 16]
222 }
223
224 impl CommandScore {
225     fn new(command: Command, starts_with_nothing: bool) -> CommandScore {
226         CommandScore {
227             command, starts_with_nothing,
228             victories: 0,
229             defeats: 0,
230             draws: 0,
231             stalemates: 0,
232             attempts: 0,
233             next_seed: INIT_SEED
234         }
235     }
236
237     fn add_victory(&mut self, next_seed: [u8; 16]) {
238         self.victories += 1;
239         self.attempts += 1;
240         self.next_seed = next_seed;
241     }
242
243     fn add_defeat(&mut self, next_seed: [u8; 16]) {
244         self.defeats += 1;
245         self.attempts += 1;
246         self.next_seed = next_seed;
247     }
248
249     fn add_draw(&mut self, next_seed: [u8; 16]) {
250         self.draws += 1;
251         self.attempts += 1;
252         self.next_seed = next_seed;
253     }
254
255     fn add_stalemate(&mut self, next_seed: [u8; 16]) {
256         self.stalemates += 1;
257         self.attempts += 1;
258         self.next_seed = next_seed;
259     }
260
261     fn win_ratio(&self) -> i32 {
262         (self.victories as i32 - self.defeats as i32) * 10000 / (self.attempts as i32)
263     }
264
265     fn init_command_scores(state: &BitwiseGameState) -> Vec<CommandScore> {
266         let unoccupied_cells_count = state.player.unoccupied_cell_count();
267         let unoccupied_cells = (0..unoccupied_cells_count)
268             .map(|i| state.player.location_of_unoccupied_cell(i));
269         let energy_generated = state.player.energy_generated();
270
271         let mut all_buildings: ArrayVec<[BuildingType; 4]> = ArrayVec::new();
272         if DEFENCE_PRICE <= state.player.energy {
273             all_buildings.push(BuildingType::Defence);
274         }
275         if MISSILE_PRICE <= state.player.energy {
276             all_buildings.push(BuildingType::Attack);
277         }
278         if ENERGY_PRICE <= state.player.energy {
279             all_buildings.push(BuildingType::Energy);
280         }
281         if !state.player.has_max_teslas() && (TESLA_PRICE.saturating_sub(state.player.energy) / energy_generated < 4) {
282             all_buildings.push(BuildingType::Tesla);
283         }
284         
285         let building_command_count = unoccupied_cells.len()*all_buildings.len();
286         
287         let mut commands = Vec::with_capacity(building_command_count + 1);
288         if state.player.can_build_iron_curtain() && IRON_CURTAIN_PRICE.saturating_sub(state.player.energy) / energy_generated < 4 {
289             commands.push(CommandScore::new(Command::IronCurtain, state.player.energy < IRON_CURTAIN_PRICE));
290         }
291
292         for position in unoccupied_cells {
293             for &building in &all_buildings {
294                 commands.push(CommandScore::new(Command::Build(position, building), building.cant_build_yet(state.player.energy)));
295             }
296         }
297
298         commands
299     }
300 }
301
302 impl fmt::Display for CommandScore {
303     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
304         write!(f, "{},{}", self.command, self.win_ratio())
305     }
306 }
307
308 #[cfg(not(feature = "energy-cutoff"))]
309 fn sensible_buildings(player: &Player, open_building_spot: bool) -> ArrayVec<[BuildingType;4]> {
310     let mut result = ArrayVec::new();
311     if !open_building_spot {
312         return result;
313     }
314
315     if DEFENCE_PRICE <= player.energy {
316         result.push(BuildingType::Defence);
317     }
318     if MISSILE_PRICE <= player.energy {
319         result.push(BuildingType::Attack);
320     }
321     if ENERGY_PRICE <= player.energy {
322         result.push(BuildingType::Energy);
323     }
324     if TESLA_PRICE <= player.energy && !player.has_max_teslas() {
325         result.push(BuildingType::Tesla);
326     }
327
328     result
329 }
330
331 #[cfg(feature = "energy-cutoff")]
332 fn sensible_buildings(player: &Player, open_building_spot: bool) -> ArrayVec<[BuildingType;4]> {
333     let mut result = ArrayVec::new();
334     if !open_building_spot {
335         return result;
336     }
337
338     let needs_energy = player.energy_generated() <= ENERGY_PRODUCTION_CUTOFF ||
339         player.energy <= ENERGY_STORAGE_CUTOFF;
340
341     if DEFENCE_PRICE <= player.energy {
342         result.push(BuildingType::Defence);
343     }
344     if MISSILE_PRICE <= player.energy {
345         result.push(BuildingType::Attack);
346     }
347     if ENERGY_PRICE <= player.energy && needs_energy {
348         result.push(BuildingType::Energy);
349     }
350     if TESLA_PRICE <= player.energy && !player.has_max_teslas() {
351         result.push(BuildingType::Tesla);
352     }
353     
354     result
355 }
356