Source code for baybe.simulation.transfer_learning

"""Functionality for transfer learning backtesting."""

from __future__ import annotations

from copy import deepcopy
from typing import Any

import pandas as pd

from baybe.campaign import Campaign
from baybe.parameters import TaskParameter
from baybe.searchspace import SearchSpaceType
from baybe.simulation.scenarios import simulate_scenarios



[docs]
def simulate_transfer_learning(
    campaign: Campaign,
    lookup: pd.DataFrame,
    /,
    *,
    batch_size: int = 1,
    n_doe_iterations: int | None = None,
    groupby: list[str] | None = None,
    n_mc_iterations: int = 1,
) -> pd.DataFrame:
    """Simulate Bayesian optimization with transfer learning.

    A wrapper around :func:`baybe.simulation.scenarios.simulate_scenarios` that
    partitions the search space into its tasks and simulates each task with the training
    data from the remaining tasks.

    **NOTE:**
    Currently, the simulation only supports purely discrete search spaces. This is
    because ``lookup`` serves both as the loop-closing element **and** as the source
    for off-task training data. For continuous (or mixed) spaces, the lookup mechanism
    would need to be either implemented as a callable (in which case the training data
    must be provided separately) or the continuous parameters need to be effectively
    restricted to the finite number of provided lookup configurations. Neither is
    implemented at the moment.

    Args:
        campaign: See :func:`baybe.simulation.core.simulate_experiment`.
        lookup: See :func:`baybe.simulation.scenarios.simulate_scenarios`.
        batch_size: See :func:`baybe.simulation.scenarios.simulate_scenarios`.
        n_doe_iterations: See :func:`baybe.simulation.scenarios.simulate_scenarios`.
        groupby: See :func:`baybe.simulation.scenarios.simulate_scenarios`.
        n_mc_iterations: See :func:`baybe.simulation.scenarios.simulate_scenarios`.

    Returns:
        A dataframe as returned by :func:`baybe.simulation.scenarios.simulate_scenarios`
        where the different tasks are represented in the ``Scenario`` column.

    Raises:
        NotImplementedError: If a non-discrete search space is chosen.
    """
    # TODO: Currently, we assume a purely discrete search space
    if campaign.searchspace.type != SearchSpaceType.DISCRETE:
        raise NotImplementedError(
            "Currently, only purely discrete search spaces are supported. "
            "For details, see NOTE in the function docstring."
        )

    # TODO [16932]: Currently, we assume exactly one task parameter exists
    # Extract the single task parameter
    task_params = [p for p in campaign.parameters if isinstance(p, TaskParameter)]
    if len(task_params) > 1:
        raise NotImplementedError(
            "Currently, transfer learning supports only a single task parameter."
        )
    task_param = task_params[0]

    # Create simulation objects for all tasks
    scenarios: dict[Any, Campaign] = {}
    for task in task_param.values:
        # Create a campaign that focuses only on the current task by excluding
        # off-task configurations from the candidates list
        # TODO: Reconsider if deepcopies are required once [16605] is resolved
        campaign_task = deepcopy(campaign)
        off_task_mask = campaign.searchspace.discrete.exp_rep[task_param.name] != task
        # TODO [16605]: Avoid direct manipulation of metadata
        campaign_task.searchspace.discrete.metadata.loc[
            off_task_mask.values, "dont_recommend"
        ] = True

        # Use all off-task data as training data
        df_train = lookup[lookup[task_param.name] != task]
        campaign_task.add_measurements(df_train)

        # Add the task scenario
        scenarios[task] = campaign_task

    # Simulate all tasks
    return simulate_scenarios(
        scenarios,
        lookup,
        batch_size=batch_size,
        n_doe_iterations=n_doe_iterations,
        groupby=groupby,
        n_mc_iterations=n_mc_iterations,
        impute_mode="ignore",
    )