Source code for ttt.training

from pathlib import Path

from loguru import logger

from .models import CPUAgent
from .play import play_game_cpu_vs_cpu
from .rewarding import rewarding


def _run_simulation(iterations: int, cpu_agent: CPUAgent, mode_1: str, mode_2: str, lr: float) -> CPUAgent:
    """
    Runs a set of games with the given parameters.

    Parameters
    ----------
    iterations: int
        Number of games to be played.
    cpu_agent: ttt.models.CPUAgent
        Agent to be trained.
    mode_1: str
        Policy of player 1. Possible values: best or random
    mode_2
        Policy of player 2. Possible values: best or random
    lr: float
        Learning rate.

    Returns
    -------
    trained_agent: ttt.models.CPUAgent
        Agent with its weights updated with training results.
    """
    for i in range(iterations):
        logger.info(f"\t\t{i}/{iterations}")
        game = play_game_cpu_vs_cpu(cpu_agent, cpu_agent, mode_1, mode_2, 1, False, False)
        rewarding(game, 1, lr)
        cpu_agent = rewarding(game, 2, lr)

    return cpu_agent


[docs]def train(output_path: str, lr: float = 0.1, exploration_iterations: int = 2500, exploitation_iterations: int = 1500, exploration_exploitation_iterations: int = 1000) -> CPUAgent: """ Trains a new agent. Parameters ---------- output_path: str Where to save agent's weights. E.g., weights.json. lr: float Learning rate. exploration_iterations: int Number of iterations during exploration phase. exploitation_iterations: int Number of iterations during exploitation phase. exploration_exploitation_iterations: int Number of iterations during exploration-exploitation phase. Returns ------- cpu_agent: ttt.models.CPUAgent """ cpu_agent = CPUAgent() logger.info("Starting CPU training") logger.info("\tRunning exploration") cpu_agent = _run_simulation(exploration_iterations, cpu_agent, "random", "random", lr) logger.info("\tRunning exploitation") cpu_agent = _run_simulation(exploitation_iterations, cpu_agent, "best", "best", lr) logger.info("\tRunning exploration-exploitation") cpu_agent = _run_simulation(exploration_exploitation_iterations, cpu_agent, "best", "random", lr) output_path = Path(output_path).absolute() logger.info(f"\tFinished. Saving agent to {output_path}") cpu_agent.save(output_path) return cpu_agent