Source code for gridworld.models.game

from typing import List, Tuple

from loguru import logger

from .agent import Agent
from .world import World


[docs]class Game: """ Models a simple game where an agent tries to solve a grid world with the given configuration Parameters ---------- world: gridworld.models.World Board to be solved. agent: gridworld.models.Agent Agent that will solve the board. """ def __init__(self, world_width: int, world_height: int, start_cell: int, goal_cell: int, obstacles_cells: List[int]) -> None: """ Parameters ---------- world_width: int World's width in cells. world_height: int World's height in cells. start_cell: int Cell where the agent will start. goal_cell: int Cell where the agent has to go. obstacles_cells: list Cells where obstacles will be placed. """ self.world = World(grid_width=world_width, grid_height=world_height, starting_position=start_cell, goal_position=goal_cell, obstacle_positions=obstacles_cells) self.agent = Agent()
[docs] def play(self, policy_search_iterations: int = 200000, value_search_iterations: int = 100000, threshold: float = 1e-20, gamma: float = 0.8) -> Tuple[list, bool]: """ Makes agent solve the board. Parameters ---------- policy_search_iterations: int Maximum number of iterations when looking for optimal policy. value_search_iterations: int Maximum number of iterations when looking for optimal state-value function. threshold: float Minimum change that should happen to continue value search iteration. gamma: float Discount factor Returns ------- player_positions: list Cells the agent followed. reached_goal: bool True if agent reached goal cell successfully, False otherwise. """ logger.info("Beginning agent training") self.agent.environment = self.world self.agent.run_value_iteration(value_search_iterations, threshold, gamma) self.agent.run_policy_iteration(policy_search_iterations, gamma) self.agent.plot_q_function() logger.info("Solving game") player_positions, reached_goal = self.agent.solve() self.world.print(player_positions[1:-1]) if reached_goal: logger.info(f"Agent solved the problem in {len(player_positions) - 1} moves!") else: logger.info(f"Agent could not solve the problem!") return player_positions, reached_goal