"""Core simulation and backtesting functionality."""from__future__importannotationsimportwarningsfromcollections.abcimportCallablefromcontextlibimportnullcontextfromcopyimportdeepcopyfromfunctoolsimportpartialfromtypingimportLiteralimportnumpyasnpimportpandasaspdfrombaybe.campaignimportCampaignfrombaybe.exceptionsimportNotEnoughPointsLeftError,NothingToSimulateErrorfrombaybe.simulation.lookupimportlook_up_targetsfrombaybe.targets.enumimportTargetModefrombaybe.utils.dataframeimportadd_parameter_noisefrombaybe.utils.numericalimportDTypeFloatNumpy,closer_element,closest_elementfrombaybe.utils.randomimporttemporary_seed
[docs]defsimulate_experiment(campaign:Campaign,lookup:pd.DataFrame|Callable[[pd.DataFrame],pd.DataFrame]|None=None,/,*,batch_size:int=1,n_doe_iterations:int|None=None,initial_data:pd.DataFrame|None=None,random_seed:int|None=None,impute_mode:Literal["error","worst","best","mean","random","ignore"]="error",noise_percent:float|None=None,)->pd.DataFrame:"""Simulate a Bayesian optimization loop. The most basic type of simulation. Runs a single execution of the loop either for a specified number of steps or until there are no more configurations left to be tested. Args: campaign: The DOE setting to be simulated. lookup: The lookup used to close the loop, providing target values for the queried parameter settings. For details, see :func:`baybe.simulation.lookup.look_up_targets`. batch_size: The number of recommendations to be queried per iteration. n_doe_iterations: The number of iterations to run the design-of-experiments loop. If not specified, the simulation proceeds until there are no more testable configurations left. initial_data: The initial measurement data to be ingested before starting the loop. random_seed: An optional random seed to be used for the simulation. impute_mode: Specifies how a missing lookup will be handled. For details, see :func:`baybe.simulation.lookup.look_up_targets`. In addition to the choices listed there, the following option is available: - ``"ignore"``: The search space is stripped before recommendations are made so that unmeasured experiments will not be recommended. noise_percent: If not ``None``, relative noise in percent of ``noise_percent`` will be applied to the parameter measurements. Returns: A dataframe ready for plotting, see the ``Note`` for details. Raises: TypeError: If a non-suitable lookup is chosen. ValueError: If the impute mode ``ignore`` is chosen for non-dataframe lookup. ValueError: If a setup is provided that would run indefinitely. Note: The returned dataframe contains the following columns: * ``Iteration``: Corresponds to the DOE iteration (starting at 0) * ``Num_Experiments``: Corresponds to the running number of experiments performed (usually x-axis) * for each target a column ``{targetname}_IterBest``: Corresponds to the best result for that target at the respective iteration * for each target a column ``{targetname}_CumBest``: Corresponds to the best result for that target up to including respective iteration * for each target a column ``{targetname}_Measurements``: The individual measurements obtained for the respective target and iteration """# TODO: Use a `will_terminate` campaign property to decide if the campaign will# run indefinitely or not, and allow omitting `n_doe_iterations` for the latter.ifcampaign.objectiveisNone:raiseValueError("The given campaign has no objective defined, hence there are no targets ""to be tracked.")context=temporary_seed(random_seed)ifrandom_seedisnotNoneelsenullcontext()withcontext:# Validate the lookup mechanismifnot(isinstance(lookup,(pd.DataFrame,Callable))or(lookupisNone)):raiseTypeError("The lookup can either be 'None', a pandas dataframe or a callable.")# Validate the data imputation modeif(impute_mode=="ignore")and(notisinstance(lookup,pd.DataFrame)):raiseValueError("Impute mode 'ignore' is only available for dataframe lookups.")# Enforce correct float precision in lookup dataframesifisinstance(lookup,pd.DataFrame):lookup=lookup.copy()float_cols=lookup.select_dtypes(include=["float"]).columnslookup[float_cols]=lookup[float_cols].astype(DTypeFloatNumpy)# Clone the campaign to avoid mutating the original object# TODO: Reconsider if deepcopies are required once [16605] is resolvedcampaign=deepcopy(campaign)# Add the initial dataif(initial_dataisnotNone)andnotinitial_data.empty:campaign.add_measurements(initial_data)# For impute_mode 'ignore', do not recommend space entries that are not# available in the lookupifimpute_mode=="ignore":campaign.toggle_discrete_candidates(lookup[[p.nameforpincampaign.parameters]],exclude=True,complement=True,)# Run the DOE looplimit=n_doe_iterationsornp.infk_iteration=0n_experiments=0dfs=[]whilek_iteration<limit:# Get the next recommendations and corresponding measurementstry:measured=campaign.recommend(batch_size=batch_size)exceptNotEnoughPointsLeftError:# TODO: There can be still N < batch_quantity points left in the search# space. Once the recommender/strategy refactoring is completed,# find an elegant way to return those.warnings.warn("The simulation of the campaign ended because because not ""sufficiently many points were left for recommendation",UserWarning,)break# Temporary workaround to enable returning incomplete simulationsexceptExceptionasex:warnings.warn(f"An error has occurred during the simulation, "f"therefore incomplete simulation results are returned. "f"The error message was:\n{str(ex)}")breakn_experiments+=len(measured)look_up_targets(measured,campaign.targets,lookup,impute_mode)# Create the summary for the current iteration and store itresult=pd.DataFrame([# <-- this ensures that the internal lists to not get expanded{"Iteration":k_iteration,"Num_Experiments":n_experiments,**{f"{target.name}_Measurements":measured[target.name].to_list()fortargetincampaign.targets},}])dfs.append(result)# Apply optional noise to the parameter measurementsifnoise_percent:add_parameter_noise(measured,campaign.parameters,noise_type="relative_percent",noise_level=noise_percent,)# Update the campaigncampaign.add_measurements(measured)# Update the iteration counterk_iteration+=1# Collect the iteration resultsiflen(dfs)==0:raiseNothingToSimulateError()results=pd.concat(dfs,ignore_index=True)# Add the instantaneous and running best values for all targetsfortargetincampaign.targets:# Define the summary functions for the current targetiftarget.modeisTargetMode.MAX:agg_fun=np.maxcum_fun=np.maximum.accumulateeliftarget.modeisTargetMode.MIN:agg_fun=np.mincum_fun=np.minimum.accumulateeliftarget.modeisTargetMode.MATCH:match_val=target.bounds.centeragg_fun=partial(closest_element,target=match_val)cum_fun=lambdax:np.array(# noqa: E731np.frompyfunc(partial(closer_element,target=match_val),2,1,).accumulate(x),dtype=float,)# Add the summary columnsmeasurement_col=f"{target.name}_Measurements"iterbest_col=f"{target.name}_IterBest"cumbest_cols=f"{target.name}_CumBest"results[iterbest_col]=results[measurement_col].apply(agg_fun)results[cumbest_cols]=cum_fun(results[iterbest_col])returnresults