Source code for baybe.parameters.utils

"""Parameter utilities."""

from collections.abc import Callable, Collection
from functools import partial
from typing import Any, TypeVar

import numpy as np
import numpy.typing as npt
import pandas as pd
from attrs import evolve

from baybe.parameters.base import Parameter
from baybe.parameters.numerical import (
    NumericalContinuousParameter,
)
from baybe.utils.interval import Interval

_TParameter = TypeVar("_TParameter", bound=Parameter)


[docs] def get_parameters_from_dataframe( df: pd.DataFrame, factory: Callable[[str, Collection[Any]], _TParameter], parameters: Collection[_TParameter] | None = None, ) -> list[_TParameter]: """Create a list of parameters from a dataframe. Returns one parameter for each column of the given dataframe. By default, the parameters are created using the provided factory, which takes the name of the column and its unique values as arguments. However, there is also the possibility to provide explicit parameter objects with names matching specific columns of the dataframe, to bypass the parameter factory creation for those columns. This allows finer control, for example, to specify custom parameter attributes (e.g. specific optional arguments) compared to what would be provided by the factory. Still, the pre-specified parameters are validated to ensure that they are compatible with the contents of the dataframe. Args: df: The dataframe from which to create the parameters. factory: A parameter factor, creating parameter objects for the columns from the column name and the unique column values. parameters: An optional list of parameter objects to bypass the factory creation for columns whose names match with the parameter names. Returns: The combined parameter list, containing both the (validated) pre-specified parameters and the parameters inferred from the dataframe. Raises: ValueError: If several parameters with identical names are provided. ValueError: If a parameter was specified for which no match was found. """ # Turn the pre-specified parameters into a dict and check for duplicate names specified_params: dict[str, _TParameter] = {} if parameters is not None: for param in parameters: if param.name in specified_params: raise ValueError( f"You provided several parameters with the name '{param.name}'." ) specified_params[param.name] = param # Try to find a parameter match for each dataframe column parameters = [] for name, series in df.items(): assert isinstance(name, str), ( "The given dataframe must only contain string-valued column names." ) unique_values = series.unique() # If a match is found, assert that the values are in range if match := specified_params.pop(name, None): if not all(match.is_in_range(x) for x in unique_values): raise ValueError( f"The dataframe column '{name}' contains the values " f"{unique_values}, which are outside the range of {match}." ) parameters.append(match) # Otherwise, create a new parameter using the factory else: param = factory(name, unique_values) parameters.append(param) # By now, all pre-specified parameters must have been used if specified_params: raise ValueError( f"For the parameter(s) {list(specified_params.keys())}, " f"no match could be found in the given dataframe." ) return parameters
[docs] def sort_parameters(parameters: Collection[Parameter]) -> tuple[Parameter, ...]: """Sort parameters alphabetically by their names.""" return tuple(sorted(parameters, key=lambda p: p.name))
[docs] def activate_parameter( parameter: NumericalContinuousParameter, thresholds: Interval ) -> NumericalContinuousParameter: """Force-activates a given parameter by moving its bounds away from zero. A parameter is considered active if its value falls outside the specified threshold interval. Force-activating a parameter adjusts its range to ensure it cannot take values within this interval. Parameters that are inherently active, due to their original value ranges not overlapping with the inactivity interval, remain unchanged. Important: A parameter whose range includes zero but extends beyond the threshold interval on both sides remains unchanged, because the corresponding activated parameter would no longer have a continuous value range. Args: parameter: The parameter to be activated. thresholds: The considered parameter (in)activity thresholds. Returns: A copy of the parameter with adjusted bounds. Raises: ValueError: If the threshold interval does not contain zero. ValueError: If the parameter cannot be activated since both its bounds are in the inactive range. NotImplementedError: In situations that cannot be encountered in a regular recommendation context since prevented by other validation measures (for example, edge case of an activation that would result in a single active point when the open inactive interval aligns on one side with the parameter bounds). For such situations, the behavior of the function is not defined. """ def is_fraction(value: float, reference: float) -> bool: """Check if the given value is a fraction of a specified reference value.""" if value == 0.0: return reference == 0.0 return reference / value > 1.0 lower_bound = parameter.bounds.lower upper_bound = parameter.bounds.upper if not thresholds.contains(0.0): raise ValueError( f"The thresholds must cover zero but ({thresholds.lower}, " f"{thresholds.upper}) was given." ) if not ( is_fraction(thresholds.lower, lower_bound) and is_fraction(thresholds.upper, upper_bound) ): raise NotImplementedError( "This function is implemented only for the case when " "thresholds is a proper sub-interval of the parameter bounds." ) # Callable checking whether the argument is within the inactive range _is_inactive = partial( is_inactive, lower_threshold=thresholds.lower, upper_threshold=thresholds.upper, ) # When the upper bound is in inactive range, move it to the lower threshold of the # inactive region if not _is_inactive(lower_bound) and _is_inactive(upper_bound): return evolve(parameter, bounds=(lower_bound, thresholds.lower)) # When the lower bound is in inactive range, move it to the upper threshold of # the inactive region if not _is_inactive(upper_bound) and _is_inactive(lower_bound): return evolve(parameter, bounds=(thresholds.upper, upper_bound)) # When the parameter is already trivially active (or activating it would tear # its value range apart) return parameter
[docs] def is_inactive( x: npt.ArrayLike, /, lower_threshold: npt.ArrayLike, upper_threshold: npt.ArrayLike ) -> np.ndarray: """Check if the given values are inactive (i.e. can be treated as zero). A value is considered inactive when at least one of the following is true: * The value lies in the open interval specified by the given thresholds. * The value is zero. Args: x: An array-like object containing numeric values. lower_threshold: The (broadcastable) lower thresholds of the inactive regions. upper_threshold: The (broadcastable) upper thresholds of the inactive regions. Returns: A Boolean-valued numpy array indicating which elements are inactive. """ x = np.asarray(x) lower_threshold = np.asarray(lower_threshold) upper_threshold = np.asarray(upper_threshold) return ((x > lower_threshold) & (x < upper_threshold)) | (x == 0.0)