"""Hybrid recommendation routines for BotorchRecommender."""
from __future__ import annotations
import math
import warnings
from collections.abc import Callable, Iterable
from typing import TYPE_CHECKING
import numpy as np
import pandas as pd
from baybe.constraints.utils import is_cardinality_fulfilled
from baybe.exceptions import (
IncompatibilityError,
IncompatibleAcquisitionFunctionError,
MinimumCardinalityViolatedWarning,
)
from baybe.searchspace import SearchSpace
from baybe.utils.basic import flatten
from baybe.utils.dataframe import to_tensor
from baybe.utils.sampling_algorithms import sample_numerical_df
if TYPE_CHECKING:
from torch import Tensor
from baybe.recommenders.pure.bayesian.botorch.core import BotorchRecommender
[docs]
def recommend_hybrid_without_subsets(
recommender: BotorchRecommender,
searchspace: SearchSpace,
candidates_exp: pd.DataFrame,
batch_size: int,
) -> pd.DataFrame:
"""Recommend points using the ``optimize_acqf_mixed`` function of BoTorch.
This functions samples points from the discrete subspace, performs optimization
in the continuous subspace with these points being fixed and returns the best
found solution.
**Important**: This performs a brute-force calculation by fixing every possible
assignment of discrete variables and optimizing the continuous subspace for
each of them. It is thus computationally expensive.
**Note**: This function implicitly assumes that discrete search space parts in
the respective data frame come first and continuous parts come second.
Args:
recommender: The recommender instance.
searchspace: The search space in which the recommendations should be made.
candidates_exp: The experimental representation of the candidates
of the discrete subspace.
batch_size: The size of the calculated batch.
Raises:
IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition
function is used with a batch size > 1.
Returns:
The recommended points.
"""
assert recommender._objective is not None
# Interpoint constraints cannot be used with optimize_acqf_mixed, see
# https://github.com/meta-pytorch/botorch/issues/2996
if searchspace.continuous.has_interpoint_constraints:
raise IncompatibilityError(
"Interpoint constraints are not available in hybrid spaces."
)
if (
batch_size > 1
and not recommender._get_acquisition_function(
recommender._objective
).supports_batching
):
raise IncompatibleAcquisitionFunctionError(
f"The '{recommender.__class__.__name__}' only works with Monte Carlo "
f"acquisition functions for batch sizes > 1."
)
import torch
from botorch.optim import optimize_acqf_mixed
# Transform discrete candidates
candidates_comp = searchspace.discrete.transform(candidates_exp)
# Calculate the number of samples from the given percentage
n_candidates = math.ceil(
recommender.sampling_percentage * len(candidates_comp.index)
)
# Potential sampling of discrete candidates
if recommender.hybrid_sampler is not None:
candidates_comp = sample_numerical_df(
candidates_comp, n_candidates, method=recommender.hybrid_sampler
)
# Prepare all considered discrete configurations in the
# List[Dict[int, float]] format expected by BoTorch.
num_comp_columns = len(candidates_comp.columns)
candidates_comp.columns = list(range(num_comp_columns))
fixed_features_list = candidates_comp.to_dict("records")
# Actual call of the BoTorch optimization routine
# NOTE: The explicit `or None` conversion is added as an additional safety net
# because it is unclear if the corresponding presence checks for these
# arguments is correctly implemented in all invoked BoTorch subroutines.
# For details: https://github.com/pytorch/botorch/issues/2042
points, _ = optimize_acqf_mixed(
acq_function=recommender._botorch_acqf,
bounds=torch.from_numpy(searchspace.comp_rep_bounds.to_numpy(copy=True)),
q=batch_size,
num_restarts=recommender.n_restarts,
raw_samples=recommender.n_raw_samples,
fixed_features_list=fixed_features_list, # type: ignore[arg-type]
equality_constraints=flatten(
c.to_botorch(
searchspace.continuous.parameters,
idx_offset=len(candidates_comp.columns),
batch_size=batch_size if c.is_interpoint else None,
)
for c in searchspace.continuous.constraints_lin_eq
)
or None,
inequality_constraints=flatten(
c.to_botorch(
searchspace.continuous.parameters,
idx_offset=num_comp_columns,
batch_size=batch_size if c.is_interpoint else None,
)
for c in searchspace.continuous.constraints_lin_ineq
)
or None,
)
# Align candidates with search space index. Done via including the search space
# index during the merge, which is used later for back-translation into the
# experimental representation
merged = pd.merge(
pd.DataFrame(points),
candidates_comp.reset_index(),
on=list(candidates_comp.columns),
how="left",
).set_index("index")
# Get experimental representation of discrete part
rec_disc_exp = searchspace.discrete.exp_rep.loc[merged.index]
# Combine discrete and continuous parts
rec_exp = pd.concat(
[
rec_disc_exp,
merged.iloc[:, num_comp_columns:].set_axis(
searchspace.continuous.parameter_names, axis=1
),
],
axis=1,
)
return rec_exp
[docs]
def recommend_hybrid_with_subsets(
recommender: BotorchRecommender,
searchspace: SearchSpace,
candidates_exp: pd.DataFrame,
batch_size: int,
) -> pd.DataFrame:
"""Recommend from a hybrid space with subset constraints.
Uses ``SearchSpace.subsets()`` to enumerate the Cartesian
product of discrete and continuous subset configurations, capped at
``max_n_subsets`` total. In purely discrete search spaces, subsets
with fewer candidates than ``batch_size`` are pre-filtered.
Args:
recommender: The recommender instance.
searchspace: The search space in which the recommendations should be made.
candidates_exp: The experimental representation of the candidates
of the discrete subspace.
batch_size: The size of the calculated batch.
Returns:
The recommended points.
"""
from attrs import evolve
subspace_c = searchspace.continuous
# Get combined configurations, capped at max_n_subsets
# NOTE: No min_discrete_candidates filtering in hybrid spaces because
# optimize_acqf_mixed can produce multiple recommendations from a single
# discrete candidate by varying continuous parameters.
combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]]
if searchspace.n_subsets <= recommender.max_n_subsets:
combined_masks = searchspace.subsets(candidates_exp)
else:
combined_masks = searchspace.sample_subsets(
candidates_exp, recommender.max_n_subsets
)
def make_callable(
d_mask: np.ndarray,
c_inactive_params: frozenset[str],
) -> Callable[[], tuple[pd.DataFrame, Tensor]]:
def optimize() -> tuple[pd.DataFrame, Tensor]:
import torch
subset = candidates_exp.loc[d_mask]
if c_inactive_params:
mod_cont = subspace_c._enforce_cardinality_constraints(
c_inactive_params
)
else:
mod_cont = subspace_c
mod_searchspace = evolve(searchspace, continuous=mod_cont)
rec = recommend_hybrid_without_subsets(
recommender, mod_searchspace, subset, batch_size
)
comp = mod_searchspace.transform(rec)
with torch.no_grad():
acqf_value = recommender._botorch_acqf(
to_tensor(comp.values).unsqueeze(0)
)
return rec, acqf_value
return optimize
callables = (make_callable(d_mask, c_ip) for d_mask, c_ip in combined_masks)
best_rec, _ = recommender._optimize_over_subsets(callables)
# Post-check minimum cardinality on continuous columns
if subspace_c.constraints_cardinality and not is_cardinality_fulfilled(
best_rec[list(subspace_c.parameter_names)],
subspace_c,
check_maximum=False,
):
warnings.warn(
"At least one minimum cardinality constraint has been violated. "
"This may occur when parameter ranges extend beyond zero in both "
"directions, making the feasible region non-convex. For such "
"parameters, minimum cardinality constraints are currently not "
"enforced due to the complexity of the resulting optimization "
"problem.",
MinimumCardinalityViolatedWarning,
)
return best_rec