Source code for baybe.surrogates.gaussian_process.presets.edbo
"""EDBO preset for Gaussian process surrogates."""from__future__importannotationsimportgcfromcollections.abcimportCollectionfromtypingimportTYPE_CHECKINGfromattrsimportdefinefromtyping_extensionsimportoverridefrombaybe.kernels.basicimportMaternKernelfrombaybe.kernels.compositeimportScaleKernelfrombaybe.parametersimportTaskParameterfrombaybe.parameters.enumimportSubstanceEncodingfrombaybe.parameters.substanceimportSubstanceParameterfrombaybe.priors.basicimportGammaPriorfrombaybe.searchspace.discreteimportSubspaceDiscretefrombaybe.surrogates.gaussian_process.kernel_factoryimportKernelFactoryifTYPE_CHECKING:fromtorchimportTensorfrombaybe.kernels.baseimportKernelfrombaybe.searchspace.coreimportSearchSpacedef_contains_encoding(subspace:SubspaceDiscrete,encodings:Collection[SubstanceEncoding])->bool:"""Tell if any of the substance parameters uses one of the specified encodings."""returnany(p.encodinginencodingsforpinsubspace.parametersifisinstance(p,SubstanceParameter))_EDBO_ENCODINGS=(SubstanceEncoding.MORDRED,SubstanceEncoding.RDKIT,SubstanceEncoding.RDKIT2DDESCRIPTORS,)"""Encodings relevant to EDBO logic."""
[docs]@defineclassEDBOKernelFactory(KernelFactory):"""A factory providing the kernel for Gaussian process surrogates adapted from EDBO. References: * https://github.com/b-shields/edbo/blob/master/edbo/bro.py#L664 * https://doi.org/10.1038/s41586-021-03213-y """@overridedef__call__(self,searchspace:SearchSpace,train_x:Tensor,train_y:Tensor)->Kernel:effective_dims=train_x.shape[-1]-len([pforpinsearchspace.parametersifisinstance(p,TaskParameter)])switching_condition=_contains_encoding(searchspace.discrete,_EDBO_ENCODINGS)and(effective_dims>=50)# low D priorsifeffective_dims<5:lengthscale_prior=GammaPrior(1.2,1.1)lengthscale_initial_value=0.2outputscale_prior=GammaPrior(5.0,0.5)outputscale_initial_value=8.0# DFT optimized priorselifswitching_conditionandeffective_dims<100:lengthscale_prior=GammaPrior(2.0,0.2)lengthscale_initial_value=5.0outputscale_prior=GammaPrior(5.0,0.5)outputscale_initial_value=8.0# Mordred optimized priorselifswitching_condition:lengthscale_prior=GammaPrior(2.0,0.1)lengthscale_initial_value=10.0outputscale_prior=GammaPrior(2.0,0.1)outputscale_initial_value=10.0# OHE optimized priorselse:lengthscale_prior=GammaPrior(3.0,1.0)lengthscale_initial_value=2.0outputscale_prior=GammaPrior(5.0,0.2)outputscale_initial_value=20.0returnScaleKernel(MaternKernel(nu=2.5,lengthscale_prior=lengthscale_prior,lengthscale_initial_value=lengthscale_initial_value,),outputscale_prior=outputscale_prior,outputscale_initial_value=outputscale_initial_value,)
def_edbo_noise_factory(searchspace:SearchSpace,train_x:Tensor,train_y:Tensor)->tuple[GammaPrior,float]:"""Create the default noise settings for the Gaussian process surrogate. The logic is adapted from EDBO (Experimental Design via Bayesian Optimization). References: * https://github.com/b-shields/edbo/blob/master/edbo/bro.py#L664 * https://doi.org/10.1038/s41586-021-03213-y """# TODO: Replace this function with a proper likelihood factoryeffective_dims=train_x.shape[-1]-len([pforpinsearchspace.parametersifisinstance(p,TaskParameter)])switching_condition=_contains_encoding(searchspace.discrete,_EDBO_ENCODINGS)and(effective_dims>=50)# low D priorsifeffective_dims<5:return(GammaPrior(1.05,0.5),0.1)# DFT optimized priorselifswitching_conditionandeffective_dims<100:return(GammaPrior(1.5,0.1),5.0)# Mordred optimized priorselifswitching_condition:return(GammaPrior(1.5,0.1),5.0)# OHE optimized priorselse:return(GammaPrior(1.5,0.1),5.0)# Collect leftover original slotted classes processed by `attrs.define`gc.collect()