Skip to content

octopus.models

Init.

CatBoostCoxSurvival

Bases: SurvivalMixin, BaseEstimator

CatBoost Cox proportional hazards model for survival analysis.

Wraps CatBoostRegressor with loss_function="Cox" to produce risk scores. Accepts structured numpy array y with fields: - 'c1' (bool): event indicator (True = event observed) - 'c2' (float): duration (time to event or censoring)

Converts these to CatBoost's signed-target format internally: +t = event at time t, -t = censored at time t

Output: risk scores (log-partial hazard) where higher = higher risk.

Attributes:

Name Type Description
learning_rate float

Learning rate (shrinkage).

depth int

Depth of trees.

l2_leaf_reg float

L2 regularization coefficient.

random_strength float

Random strength for scoring splits.

rsm float

Random subspace method (fraction of features per split).

iterations int

Maximum number of boosting iterations.

allow_writing_files bool

Whether CatBoost can write temp files.

logging_level str

CatBoost logging level.

thread_count int

Number of threads for CatBoost.

task_type str

CatBoost computation device.

random_state int | None

Random seed for reproducibility.

Source code in octopus/models/wrapper_models/CatBoostCoxSurvival.py
@define(slots=False)
class CatBoostCoxSurvival(SurvivalMixin, BaseEstimator):
    """CatBoost Cox proportional hazards model for survival analysis.

    Wraps CatBoostRegressor with ``loss_function="Cox"`` to produce risk scores.
    Accepts structured numpy array y with fields:
        - 'c1' (bool): event indicator (True = event observed)
        - 'c2' (float): duration (time to event or censoring)

    Converts these to CatBoost's signed-target format internally:
        +t = event at time t, -t = censored at time t

    Output: risk scores (log-partial hazard) where higher = higher risk.

    Attributes:
        learning_rate: Learning rate (shrinkage).
        depth: Depth of trees.
        l2_leaf_reg: L2 regularization coefficient.
        random_strength: Random strength for scoring splits.
        rsm: Random subspace method (fraction of features per split).
        iterations: Maximum number of boosting iterations.
        allow_writing_files: Whether CatBoost can write temp files.
        logging_level: CatBoost logging level.
        thread_count: Number of threads for CatBoost.
        task_type: CatBoost computation device.
        random_state: Random seed for reproducibility.
    """

    learning_rate: float = 0.03
    depth: int = 6
    l2_leaf_reg: float = 3.0
    random_strength: float = 2.0
    rsm: float = 1.0
    iterations: int = 500
    allow_writing_files: bool = False
    logging_level: str = "Silent"
    thread_count: int = 1
    task_type: str = "CPU"
    random_state: int | None = None

    @property
    def feature_importances_(self) -> np.ndarray:
        """Feature importances from the fitted CatBoost model."""
        return np.asarray(self.model_.feature_importances_)

    def fit(self, X: np.ndarray, y: np.ndarray, *args, **kwargs) -> "CatBoostCoxSurvival":
        """Fit CatBoost Cox model.

        Args:
            X: Feature matrix.
            y: Structured array with 'c1' (event bool) and 'c2' (duration float).
            *args: Additional positional arguments (unused, for API compatibility).
            **kwargs: Additional keyword arguments (unused, for API compatibility).

        Returns:
            self
        """
        from catboost import CatBoostRegressor  # noqa: PLC0415

        event, duration = check_y_survival(y)

        self.model_ = CatBoostRegressor(
            loss_function="Cox",
            learning_rate=self.learning_rate,
            depth=self.depth,
            l2_leaf_reg=self.l2_leaf_reg,
            random_strength=self.random_strength,
            rsm=self.rsm,
            iterations=self.iterations,
            allow_writing_files=self.allow_writing_files,
            logging_level=self.logging_level,
            thread_count=self.thread_count,
            task_type=self.task_type,
            random_state=self.random_state,
        )

        # Convert to CatBoost signed-target format: +t = event, -t = censored
        y_signed = np.where(event, duration, -duration)
        self.model_.fit(X, y_signed)
        return self

    def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
        """Predict risk scores. Higher = higher risk.

        Args:
            X: Feature matrix.
            **kwargs: Additional keyword arguments (unused, for API compatibility).

        Returns:
            Array of risk scores (log-partial hazard).
        """
        return np.asarray(self.model_.predict(X))

feature_importances_ property

Feature importances from the fitted CatBoost model.

fit(X, y, *args, **kwargs)

Fit CatBoost Cox model.

Parameters:

Name Type Description Default
X ndarray

Feature matrix.

required
y ndarray

Structured array with 'c1' (event bool) and 'c2' (duration float).

required
*args

Additional positional arguments (unused, for API compatibility).

()
**kwargs

Additional keyword arguments (unused, for API compatibility).

{}

Returns:

Type Description
CatBoostCoxSurvival

self

Source code in octopus/models/wrapper_models/CatBoostCoxSurvival.py
def fit(self, X: np.ndarray, y: np.ndarray, *args, **kwargs) -> "CatBoostCoxSurvival":
    """Fit CatBoost Cox model.

    Args:
        X: Feature matrix.
        y: Structured array with 'c1' (event bool) and 'c2' (duration float).
        *args: Additional positional arguments (unused, for API compatibility).
        **kwargs: Additional keyword arguments (unused, for API compatibility).

    Returns:
        self
    """
    from catboost import CatBoostRegressor  # noqa: PLC0415

    event, duration = check_y_survival(y)

    self.model_ = CatBoostRegressor(
        loss_function="Cox",
        learning_rate=self.learning_rate,
        depth=self.depth,
        l2_leaf_reg=self.l2_leaf_reg,
        random_strength=self.random_strength,
        rsm=self.rsm,
        iterations=self.iterations,
        allow_writing_files=self.allow_writing_files,
        logging_level=self.logging_level,
        thread_count=self.thread_count,
        task_type=self.task_type,
        random_state=self.random_state,
    )

    # Convert to CatBoost signed-target format: +t = event, -t = censored
    y_signed = np.where(event, duration, -duration)
    self.model_.fit(X, y_signed)
    return self

predict(X, **kwargs)

Predict risk scores. Higher = higher risk.

Parameters:

Name Type Description Default
X ndarray

Feature matrix.

required
**kwargs

Additional keyword arguments (unused, for API compatibility).

{}

Returns:

Type Description
ndarray

Array of risk scores (log-partial hazard).

Source code in octopus/models/wrapper_models/CatBoostCoxSurvival.py
def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
    """Predict risk scores. Higher = higher risk.

    Args:
        X: Feature matrix.
        **kwargs: Additional keyword arguments (unused, for API compatibility).

    Returns:
        Array of risk scores (log-partial hazard).
    """
    return np.asarray(self.model_.predict(X))

CategoricalHyperparameter

Bases: Hyperparameter

Categorical Hyperparameter class.

Source code in octopus/models/hyperparameter.py
@define
class CategoricalHyperparameter(Hyperparameter):
    """Categorical Hyperparameter class."""

    choices: list[Any] = field(factory=list)

    def __attrs_post_init__(self):
        if len(self.choices) == 0:
            raise ValueError("choices must be a non-empty list.")

    def suggest(self, trial: optuna.trial.Trial, unique_name: str) -> Any:
        """Suggest a categorical value using Optuna trial."""
        return trial.suggest_categorical(name=unique_name, choices=self.choices)

suggest(trial, unique_name)

Suggest a categorical value using Optuna trial.

Source code in octopus/models/hyperparameter.py
def suggest(self, trial: optuna.trial.Trial, unique_name: str) -> Any:
    """Suggest a categorical value using Optuna trial."""
    return trial.suggest_categorical(name=unique_name, choices=self.choices)

FIComputeMethod

Bases: StrEnum

Computation methods for feature importance calculation.

Used in model configuration (ModelConfig.feature_method), module configuration (Octo.fi_methods_bestbag, Rfe2.fi_method_rfe, Mrmr.feature_importance_method), and internal dispatch in bag and training code.

Source code in octopus/types.py
class FIComputeMethod(StrEnum):
    """Computation methods for feature importance calculation.

    Used in model configuration (``ModelConfig.feature_method``), module
    configuration (``Octo.fi_methods_bestbag``, ``Rfe2.fi_method_rfe``,
    ``Mrmr.feature_importance_method``), and internal dispatch in bag
    and training code.
    """

    INTERNAL = "internal"
    PERMUTATION = "permutation"
    SHAP = "shap"
    LOFO = "lofo"
    CONSTANT = "constant"

FixedHyperparameter

Bases: Hyperparameter

Fixed Hyperparameter class.

Source code in octopus/models/hyperparameter.py
@define
class FixedHyperparameter(Hyperparameter):
    """Fixed Hyperparameter class."""

    value: Any = field()

    def __attrs_post_init__(self):
        if self.value is None:
            raise ValueError("value must be provided for FixedHyperparameter.")

    def suggest(self, trial: optuna.trial.Trial, unique_name: str) -> Any:
        """Return the fixed value (no trial suggestion needed)."""
        return self.value

suggest(trial, unique_name)

Return the fixed value (no trial suggestion needed).

Source code in octopus/models/hyperparameter.py
def suggest(self, trial: optuna.trial.Trial, unique_name: str) -> Any:
    """Return the fixed value (no trial suggestion needed)."""
    return self.value

FloatHyperparameter

Bases: Hyperparameter

Float Hyperparameter class.

Source code in octopus/models/hyperparameter.py
@define
class FloatHyperparameter(Hyperparameter):
    """Float Hyperparameter class."""

    low: float = field(validator=validators.instance_of((float, int)))
    high: float = field(validator=validators.instance_of((float, int)))
    step: float | None = field(default=None, validator=validators.optional(validators.instance_of((float, int))))
    log: bool = False

    def __attrs_post_init__(self):
        if self.low > self.high:
            raise ValueError("Low limit must be <= high limit.")

        if self.step is not None:
            if self.step <= 0:
                raise ValueError("step must be greater than 0.")
            if self.log:
                raise ValueError("Both step and log cannot be selected at the same time.")

    def suggest(self, trial: optuna.trial.Trial, unique_name: str) -> float:
        """Suggest a float value using Optuna trial."""
        if self.step is not None:
            return trial.suggest_float(name=unique_name, low=self.low, high=self.high, step=self.step)
        else:
            return trial.suggest_float(name=unique_name, low=self.low, high=self.high, log=self.log)

suggest(trial, unique_name)

Suggest a float value using Optuna trial.

Source code in octopus/models/hyperparameter.py
def suggest(self, trial: optuna.trial.Trial, unique_name: str) -> float:
    """Suggest a float value using Optuna trial."""
    if self.step is not None:
        return trial.suggest_float(name=unique_name, low=self.low, high=self.high, step=self.step)
    else:
        return trial.suggest_float(name=unique_name, low=self.low, high=self.high, log=self.log)

GPClassifierWrapper

Bases: ClassifierMixin, BaseEstimator

Wrapper for Gaussian Process Classifier.

Source code in octopus/models/wrapper_models/GaussianProcessClassifier.py
class GPClassifierWrapper(ClassifierMixin, BaseEstimator):
    """Wrapper for Gaussian Process Classifier."""

    _estimator_type = "classifier"

    def __init__(
        self,
        kernel: str | Kernel = "RBF",
        optimizer: Literal["fmin_l_bfgs_b"] | Callable | None = "fmin_l_bfgs_b",
        n_restarts_optimizer: int = 0,
        max_iter_predict: int = 100,
        warm_start: bool = False,
        copy_X_train: bool = True,
        random_state: int | None = None,
        multi_class: Literal["one_vs_rest", "one_vs_one"] = "one_vs_rest",
    ) -> None:
        self.kernel = kernel
        self.optimizer = optimizer
        self.n_restarts_optimizer = n_restarts_optimizer
        self.max_iter_predict = max_iter_predict
        self.warm_start = warm_start
        self.copy_X_train = copy_X_train
        self.random_state = random_state
        self.multi_class = multi_class

    @property
    def classes_(self) -> np.ndarray:
        """Get the class labels."""
        check_is_fitted(self, "model_")
        return self.model_.classes_  # type: ignore[return-value]

    def fit(self, X: Any, y: Any) -> "GPClassifierWrapper":
        """Fit the Gaussian Process model."""
        X, y = check_X_y(X, y)
        kernel = self._get_kernel(self.kernel)
        self.model_ = GaussianProcessClassifier(
            kernel=kernel,
            optimizer=self.optimizer,
            n_restarts_optimizer=self.n_restarts_optimizer,
            max_iter_predict=self.max_iter_predict,
            warm_start=self.warm_start,
            copy_X_train=self.copy_X_train,
            random_state=self.random_state,
            multi_class=self.multi_class,
        )
        self.model_.fit(X, y)
        return self

    def predict(self, X: Any) -> np.ndarray:
        """Predict using the Gaussian Process model."""
        check_is_fitted(self, "model_")
        X = check_array(X)
        return self.model_.predict(X)

    def predict_proba(self, X: Any) -> np.ndarray:
        """Predict class probabilities using the Gaussian Process model."""
        check_is_fitted(self, "model_")
        X = check_array(X)
        return self.model_.predict_proba(X)

    def _get_kernel(self, kernel_str: str | Kernel) -> Kernel:
        """Get the kernel object based on the kernel string."""
        if isinstance(kernel_str, Kernel):
            return kernel_str
        elif kernel_str == "RBF":
            return RBF()
        elif kernel_str == "Matern":
            return Matern()
        elif kernel_str == "RationalQuadratic":
            return RationalQuadratic()
        else:
            raise ValueError(f"Unknown kernel: {kernel_str}")

classes_ property

Get the class labels.

fit(X, y)

Fit the Gaussian Process model.

Source code in octopus/models/wrapper_models/GaussianProcessClassifier.py
def fit(self, X: Any, y: Any) -> "GPClassifierWrapper":
    """Fit the Gaussian Process model."""
    X, y = check_X_y(X, y)
    kernel = self._get_kernel(self.kernel)
    self.model_ = GaussianProcessClassifier(
        kernel=kernel,
        optimizer=self.optimizer,
        n_restarts_optimizer=self.n_restarts_optimizer,
        max_iter_predict=self.max_iter_predict,
        warm_start=self.warm_start,
        copy_X_train=self.copy_X_train,
        random_state=self.random_state,
        multi_class=self.multi_class,
    )
    self.model_.fit(X, y)
    return self

predict(X)

Predict using the Gaussian Process model.

Source code in octopus/models/wrapper_models/GaussianProcessClassifier.py
def predict(self, X: Any) -> np.ndarray:
    """Predict using the Gaussian Process model."""
    check_is_fitted(self, "model_")
    X = check_array(X)
    return self.model_.predict(X)

predict_proba(X)

Predict class probabilities using the Gaussian Process model.

Source code in octopus/models/wrapper_models/GaussianProcessClassifier.py
def predict_proba(self, X: Any) -> np.ndarray:
    """Predict class probabilities using the Gaussian Process model."""
    check_is_fitted(self, "model_")
    X = check_array(X)
    return self.model_.predict_proba(X)

GPRegressorWrapper

Bases: RegressorMixin, BaseEstimator

Wrapper for Gaussian Process Regressor.

Source code in octopus/models/wrapper_models/GaussianProcessRegressor.py
class GPRegressorWrapper(RegressorMixin, BaseEstimator):
    """Wrapper for Gaussian Process Regressor."""

    _estimator_type = "regressor"

    def __init__(
        self,
        kernel: Literal["RBF", "Matern", "RationalQuadratic"] | Kernel = "RBF",
        alpha: float = 1e-10,
        optimizer: Literal["fmin_l_bfgs_b"] | Callable | None = "fmin_l_bfgs_b",
        n_restarts_optimizer: int = 0,
        normalize_y: bool = False,
        copy_X_train: bool = True,
        random_state: int | None = None,
    ) -> None:
        self.kernel = kernel
        self.alpha = alpha
        self.optimizer = optimizer
        self.n_restarts_optimizer = n_restarts_optimizer
        self.normalize_y = normalize_y
        self.copy_X_train = copy_X_train
        self.random_state = random_state

    def fit(self, X: Any, y: Any) -> "GPRegressorWrapper":
        """Fit the Gaussian Process model."""
        X, y = check_X_y(X, y, y_numeric=True)
        kernel = self._get_kernel(self.kernel)
        self.model_ = GaussianProcessRegressor(
            kernel=kernel,
            alpha=self.alpha,
            optimizer=self.optimizer,
            n_restarts_optimizer=self.n_restarts_optimizer,
            normalize_y=self.normalize_y,
            copy_X_train=self.copy_X_train,
            random_state=self.random_state,
        )
        self.model_.fit(X, y)
        return self

    def predict(self, X: Any) -> np.ndarray:
        """Predict using the Gaussian Process model."""
        check_is_fitted(self, "model_")
        X = check_array(X)
        return self.model_.predict(X)  # type: ignore[return-value]

    def _get_kernel(self, kernel_str: Literal["RBF", "Matern", "RationalQuadratic"] | Kernel) -> Kernel:
        """Get the kernel object based on the kernel string."""
        if isinstance(kernel_str, Kernel):
            return kernel_str
        elif kernel_str == "RBF":
            return RBF()
        elif kernel_str == "Matern":
            return Matern()
        elif kernel_str == "RationalQuadratic":
            return RationalQuadratic()
        else:
            raise ValueError(f"Unknown kernel: {kernel_str}")

fit(X, y)

Fit the Gaussian Process model.

Source code in octopus/models/wrapper_models/GaussianProcessRegressor.py
def fit(self, X: Any, y: Any) -> "GPRegressorWrapper":
    """Fit the Gaussian Process model."""
    X, y = check_X_y(X, y, y_numeric=True)
    kernel = self._get_kernel(self.kernel)
    self.model_ = GaussianProcessRegressor(
        kernel=kernel,
        alpha=self.alpha,
        optimizer=self.optimizer,
        n_restarts_optimizer=self.n_restarts_optimizer,
        normalize_y=self.normalize_y,
        copy_X_train=self.copy_X_train,
        random_state=self.random_state,
    )
    self.model_.fit(X, y)
    return self

predict(X)

Predict using the Gaussian Process model.

Source code in octopus/models/wrapper_models/GaussianProcessRegressor.py
def predict(self, X: Any) -> np.ndarray:
    """Predict using the Gaussian Process model."""
    check_is_fitted(self, "model_")
    X = check_array(X)
    return self.model_.predict(X)  # type: ignore[return-value]

IntHyperparameter

Bases: Hyperparameter

Integer Hyperparameter class.

Source code in octopus/models/hyperparameter.py
@define
class IntHyperparameter(Hyperparameter):
    """Integer Hyperparameter class."""

    low: int = field(validator=validators.instance_of(int))
    high: int = field(validator=validators.instance_of(int))
    step: int | None = field(default=None, validator=validators.optional(validators.instance_of(int)))
    log: bool = False

    def __attrs_post_init__(self):
        if self.low > self.high:
            raise ValueError("Low limit must be <= high limit.")

        if self.step is not None:
            if self.step <= 0:
                raise ValueError("step must be greater than 0.")
            if self.log:
                raise ValueError("Both step and log cannot be selected at the same time.")

    def suggest(self, trial: optuna.trial.Trial, unique_name: str) -> int:
        """Suggest an int value using Optuna trial."""
        if self.step is not None:
            return trial.suggest_int(name=unique_name, low=self.low, high=self.high, step=self.step)
        else:
            return trial.suggest_int(name=unique_name, low=self.low, high=self.high, log=self.log)

suggest(trial, unique_name)

Suggest an int value using Optuna trial.

Source code in octopus/models/hyperparameter.py
def suggest(self, trial: optuna.trial.Trial, unique_name: str) -> int:
    """Suggest an int value using Optuna trial."""
    if self.step is not None:
        return trial.suggest_int(name=unique_name, low=self.low, high=self.high, step=self.step)
    else:
        return trial.suggest_int(name=unique_name, low=self.low, high=self.high, log=self.log)

MLType

Bases: StrEnum

Machine learning task types.

Source code in octopus/types.py
class MLType(StrEnum):
    """Machine learning task types."""

    BINARY = "binary"
    MULTICLASS = "multiclass"
    REGRESSION = "regression"
    TIMETOEVENT = "timetoevent"

ModelConfig

Create model config.

Source code in octopus/models/config.py
@define(slots=False)
class ModelConfig:
    """Create model config."""

    model_class: type[BaseModel]
    feature_method: FIComputeMethod = field(converter=FIComputeMethod)
    ml_types: frozenset[MLType] = field(converter=to_ml_types_frozenset, validator=validate_ml_types)
    hyperparameters: list[Hyperparameter] = field(validator=validate_hyperparameters)
    n_repeats: None | int = field(factory=lambda: None)
    n_jobs: None | str = field(factory=lambda: "n_jobs")
    model_seed: None | str = field(factory=lambda: "model_seed")
    chpo_compatible: bool = field(default=False)
    scaler: None | str = field(default=None, validator=validators.in_([None, "StandardScaler"]))
    imputation_required: bool = field(default=True)
    categorical_enabled: bool = field(default=False)
    default: bool = field(default=False)

    def supports_ml_type(self, ml_type: MLType) -> bool:
        """Check if this model supports the given ml_type."""
        return ml_type in self.ml_types

supports_ml_type(ml_type)

Check if this model supports the given ml_type.

Source code in octopus/models/config.py
def supports_ml_type(self, ml_type: MLType) -> bool:
    """Check if this model supports the given ml_type."""
    return ml_type in self.ml_types

ModelName

Bases: StrEnum

Available model names.

Use this enum for IDE autocomplete when specifying models, e.g.::

Octo(task_id=0, models=[ModelName.XGBClassifier, ModelName.CatBoostClassifier])

Plain strings still work too, but ModelName keeps call sites consistent.

Source code in octopus/types.py
class ModelName(StrEnum):
    """Available model names.

    Use this enum for IDE autocomplete when specifying models, e.g.::

        Octo(task_id=0, models=[ModelName.XGBClassifier, ModelName.CatBoostClassifier])

    Plain strings still work too, but `ModelName` keeps call sites consistent.
    """

    # Classification models
    ExtraTreesClassifier = "ExtraTreesClassifier"
    HistGradientBoostingClassifier = "HistGradientBoostingClassifier"
    GradientBoostingClassifier = "GradientBoostingClassifier"
    RandomForestClassifier = "RandomForestClassifier"
    XGBClassifier = "XGBClassifier"
    CatBoostClassifier = "CatBoostClassifier"
    LogisticRegressionClassifier = "LogisticRegressionClassifier"
    GaussianProcessClassifier = "GaussianProcessClassifier"

    # Regression models
    ARDRegressor = "ARDRegressor"
    CatBoostRegressor = "CatBoostRegressor"
    ElasticNetRegressor = "ElasticNetRegressor"
    ExtraTreesRegressor = "ExtraTreesRegressor"
    GaussianProcessRegressor = "GaussianProcessRegressor"
    GradientBoostingRegressor = "GradientBoostingRegressor"
    RandomForestRegressor = "RandomForestRegressor"
    RidgeRegressor = "RidgeRegressor"
    SvrRegressor = "SvrRegressor"
    XGBRegressor = "XGBRegressor"
    HistGradientBoostingRegressor = "HistGradientBoostingRegressor"
    TabularNNRegressor = "TabularNNRegressor"

    # Time-to-event (survival) models
    CatBoostCoxSurvival = "CatBoostCoxSurvival"
    XGBoostCoxSurvival = "XGBoostCoxSurvival"

Models

Central registry and inventory for models.

Source code in octopus/models/core.py
class Models:
    """Central registry and inventory for models."""

    # Internal registry: model name -> function returning ModelConfig
    _config_factories: ClassVar[dict[str, Callable[[], ModelConfig]]] = {}

    # Internal cache: model name -> ModelConfig
    _model_configs: ClassVar[dict[str, ModelConfig]] = {}

    @classmethod
    def register(cls, name: str) -> Callable[[Callable[[], ModelConfig]], Callable[[], ModelConfig]]:
        """Register a model configuration factory function under a given name.

        Args:
            name: The name to register the model under.

        Returns:
            Decorator function.
        """

        def decorator(factory: Callable[[], ModelConfig]) -> Callable[[], ModelConfig]:
            if name in cls._config_factories:
                raise ValueError(f"Model '{name}' is already registered.")
            cls._config_factories[name] = factory
            return factory

        return decorator

    @classmethod
    def _get_registered_models(cls) -> list[ModelName]:
        """Get a list of all registered model names."""
        return [ModelName(name) for name in cls._config_factories]

    @classmethod
    def get_config(cls, name: ModelName) -> ModelConfig:
        """Get model configuration by name.

        Args:
            name: The name of the model to retrieve.

        Returns:
            The ModelConfig instance for the specified model.

        Raises:
            UnknownModelError: If no model with the specified name is found.
        """
        # Return cached config if available
        if name in cls._model_configs:
            return cls._model_configs[name]

        # Lookup factory
        factory = cls._config_factories.get(name)
        if factory is None:
            available = ", ".join(sorted(cls._config_factories.keys()))
            raise UnknownModelError(
                f"Unknown model '{name}'. Available models are: {available}. Please check the model name and try again."
            )

        # Build config via factory and enforce name consistency
        config = factory()
        # Use object.__setattr__ to bypass attrs' attribute restrictions
        object.__setattr__(config, "name", name)
        cls._model_configs[name] = config
        return config

    @classmethod
    def get_instance(cls, name: ModelName, params: dict[str, Any]):
        """Get model class by name and initialize it with the provided parameters.

        Args:
            name: The name of the model to retrieve.
            params: The parameters for model initialization.

        Returns:
            The initialized model instance.
        """
        model_config = cls.get_config(name)
        return model_config.model_class(**params)

    @classmethod
    def create_trial_parameters(
        cls,
        trial: optuna.trial.Trial,
        model_name: ModelName,
        custom_hyperparameters: dict[ModelName, list[Hyperparameter]] | None,
        n_jobs: int,
        model_seed: int,
    ) -> dict[str, Any]:
        """Create Optuna parameters for a specific model.

        Args:
            trial: The Optuna trial object.
            model_name: The name of the model to create parameters for.
            custom_hyperparameters: Optional dict mapping model names to custom hyperparameter lists.
                                   If None or model not in dict, uses default hyperparameters from config.
            n_jobs: Number of jobs for parallel execution.
            model_seed: Random seed for the model.

        Returns:
            Dictionary of parameter names to values.
        """
        # Get model configuration
        model_item = cls.get_config(model_name)

        # Resolve hyperparameters: use custom if provided, otherwise use defaults
        if custom_hyperparameters is not None and model_name in custom_hyperparameters:
            hyperparameters = custom_hyperparameters[model_name]
        else:
            hyperparameters = model_item.hyperparameters

        # Create parameters
        params: dict[str, Any] = {}

        for hp in hyperparameters:
            # get_config() always sets name, safe to access
            unique_name = f"{hp.name}_{model_item.name}"  # type: ignore[attr-defined]
            params[hp.name] = hp.suggest(trial, unique_name)

        if model_item.n_jobs is not None:
            params[model_item.n_jobs] = n_jobs
        if model_item.model_seed is not None:
            params[model_item.model_seed] = model_seed

        return params

    @classmethod
    def get_models_for_type(cls, ml_type: MLType) -> list[ModelName]:
        """Get all registered model names compatible with the given ml_type.

        Args:
            ml_type: The MLType to filter by.

        Returns:
            List of model names that support the given ml_type.
        """
        return [mn for name in cls._config_factories if cls.get_config(mn := ModelName(name)).supports_ml_type(ml_type)]

    @classmethod
    def get_defaults(cls, ml_type: MLType) -> list[ModelName]:
        """Get default model names for a given ml_type.

        Args:
            ml_type: The MLType to filter by.

        Returns:
            List of default model names that support the given ml_type.

        Raises:
            ValueError: If no default models are defined for the given ml_type.
        """
        defaults = [
            mn
            for name in cls._config_factories
            if (config := cls.get_config(mn := ModelName(name))).supports_ml_type(ml_type) and config.default
        ]
        if not defaults:
            raise ValueError(f"No default models defined for ml_type '{ml_type.value}'. Specify models explicitly.")
        return defaults

    @classmethod
    def validate_model_compatibility(cls, model_name: ModelName, ml_type: MLType) -> None:
        """Validate that a model is compatible with the given ml_type.

        Args:
            model_name: Name of the registered model.
            ml_type: The MLType to check compatibility against.

        Raises:
            ValueError: If the model does not support the given ml_type.
        """
        config = cls.get_config(model_name)
        if not config.supports_ml_type(ml_type):
            raise ValueError(
                f"Model '{model_name}' does not support ml_type '{ml_type.value}'. Supported types: {', '.join(t.value for t in config.ml_types)}"
            )

create_trial_parameters(trial, model_name, custom_hyperparameters, n_jobs, model_seed) classmethod

Create Optuna parameters for a specific model.

Parameters:

Name Type Description Default
trial Trial

The Optuna trial object.

required
model_name ModelName

The name of the model to create parameters for.

required
custom_hyperparameters dict[ModelName, list[Hyperparameter]] | None

Optional dict mapping model names to custom hyperparameter lists. If None or model not in dict, uses default hyperparameters from config.

required
n_jobs int

Number of jobs for parallel execution.

required
model_seed int

Random seed for the model.

required

Returns:

Type Description
dict[str, Any]

Dictionary of parameter names to values.

Source code in octopus/models/core.py
@classmethod
def create_trial_parameters(
    cls,
    trial: optuna.trial.Trial,
    model_name: ModelName,
    custom_hyperparameters: dict[ModelName, list[Hyperparameter]] | None,
    n_jobs: int,
    model_seed: int,
) -> dict[str, Any]:
    """Create Optuna parameters for a specific model.

    Args:
        trial: The Optuna trial object.
        model_name: The name of the model to create parameters for.
        custom_hyperparameters: Optional dict mapping model names to custom hyperparameter lists.
                               If None or model not in dict, uses default hyperparameters from config.
        n_jobs: Number of jobs for parallel execution.
        model_seed: Random seed for the model.

    Returns:
        Dictionary of parameter names to values.
    """
    # Get model configuration
    model_item = cls.get_config(model_name)

    # Resolve hyperparameters: use custom if provided, otherwise use defaults
    if custom_hyperparameters is not None and model_name in custom_hyperparameters:
        hyperparameters = custom_hyperparameters[model_name]
    else:
        hyperparameters = model_item.hyperparameters

    # Create parameters
    params: dict[str, Any] = {}

    for hp in hyperparameters:
        # get_config() always sets name, safe to access
        unique_name = f"{hp.name}_{model_item.name}"  # type: ignore[attr-defined]
        params[hp.name] = hp.suggest(trial, unique_name)

    if model_item.n_jobs is not None:
        params[model_item.n_jobs] = n_jobs
    if model_item.model_seed is not None:
        params[model_item.model_seed] = model_seed

    return params

get_config(name) classmethod

Get model configuration by name.

Parameters:

Name Type Description Default
name ModelName

The name of the model to retrieve.

required

Returns:

Type Description
ModelConfig

The ModelConfig instance for the specified model.

Raises:

Type Description
UnknownModelError

If no model with the specified name is found.

Source code in octopus/models/core.py
@classmethod
def get_config(cls, name: ModelName) -> ModelConfig:
    """Get model configuration by name.

    Args:
        name: The name of the model to retrieve.

    Returns:
        The ModelConfig instance for the specified model.

    Raises:
        UnknownModelError: If no model with the specified name is found.
    """
    # Return cached config if available
    if name in cls._model_configs:
        return cls._model_configs[name]

    # Lookup factory
    factory = cls._config_factories.get(name)
    if factory is None:
        available = ", ".join(sorted(cls._config_factories.keys()))
        raise UnknownModelError(
            f"Unknown model '{name}'. Available models are: {available}. Please check the model name and try again."
        )

    # Build config via factory and enforce name consistency
    config = factory()
    # Use object.__setattr__ to bypass attrs' attribute restrictions
    object.__setattr__(config, "name", name)
    cls._model_configs[name] = config
    return config

get_defaults(ml_type) classmethod

Get default model names for a given ml_type.

Parameters:

Name Type Description Default
ml_type MLType

The MLType to filter by.

required

Returns:

Type Description
list[ModelName]

List of default model names that support the given ml_type.

Raises:

Type Description
ValueError

If no default models are defined for the given ml_type.

Source code in octopus/models/core.py
@classmethod
def get_defaults(cls, ml_type: MLType) -> list[ModelName]:
    """Get default model names for a given ml_type.

    Args:
        ml_type: The MLType to filter by.

    Returns:
        List of default model names that support the given ml_type.

    Raises:
        ValueError: If no default models are defined for the given ml_type.
    """
    defaults = [
        mn
        for name in cls._config_factories
        if (config := cls.get_config(mn := ModelName(name))).supports_ml_type(ml_type) and config.default
    ]
    if not defaults:
        raise ValueError(f"No default models defined for ml_type '{ml_type.value}'. Specify models explicitly.")
    return defaults

get_instance(name, params) classmethod

Get model class by name and initialize it with the provided parameters.

Parameters:

Name Type Description Default
name ModelName

The name of the model to retrieve.

required
params dict[str, Any]

The parameters for model initialization.

required

Returns:

Type Description

The initialized model instance.

Source code in octopus/models/core.py
@classmethod
def get_instance(cls, name: ModelName, params: dict[str, Any]):
    """Get model class by name and initialize it with the provided parameters.

    Args:
        name: The name of the model to retrieve.
        params: The parameters for model initialization.

    Returns:
        The initialized model instance.
    """
    model_config = cls.get_config(name)
    return model_config.model_class(**params)

get_models_for_type(ml_type) classmethod

Get all registered model names compatible with the given ml_type.

Parameters:

Name Type Description Default
ml_type MLType

The MLType to filter by.

required

Returns:

Type Description
list[ModelName]

List of model names that support the given ml_type.

Source code in octopus/models/core.py
@classmethod
def get_models_for_type(cls, ml_type: MLType) -> list[ModelName]:
    """Get all registered model names compatible with the given ml_type.

    Args:
        ml_type: The MLType to filter by.

    Returns:
        List of model names that support the given ml_type.
    """
    return [mn for name in cls._config_factories if cls.get_config(mn := ModelName(name)).supports_ml_type(ml_type)]

register(name) classmethod

Register a model configuration factory function under a given name.

Parameters:

Name Type Description Default
name str

The name to register the model under.

required

Returns:

Type Description
Callable[[Callable[[], ModelConfig]], Callable[[], ModelConfig]]

Decorator function.

Source code in octopus/models/core.py
@classmethod
def register(cls, name: str) -> Callable[[Callable[[], ModelConfig]], Callable[[], ModelConfig]]:
    """Register a model configuration factory function under a given name.

    Args:
        name: The name to register the model under.

    Returns:
        Decorator function.
    """

    def decorator(factory: Callable[[], ModelConfig]) -> Callable[[], ModelConfig]:
        if name in cls._config_factories:
            raise ValueError(f"Model '{name}' is already registered.")
        cls._config_factories[name] = factory
        return factory

    return decorator

validate_model_compatibility(model_name, ml_type) classmethod

Validate that a model is compatible with the given ml_type.

Parameters:

Name Type Description Default
model_name ModelName

Name of the registered model.

required
ml_type MLType

The MLType to check compatibility against.

required

Raises:

Type Description
ValueError

If the model does not support the given ml_type.

Source code in octopus/models/core.py
@classmethod
def validate_model_compatibility(cls, model_name: ModelName, ml_type: MLType) -> None:
    """Validate that a model is compatible with the given ml_type.

    Args:
        model_name: Name of the registered model.
        ml_type: The MLType to check compatibility against.

    Raises:
        ValueError: If the model does not support the given ml_type.
    """
    config = cls.get_config(model_name)
    if not config.supports_ml_type(ml_type):
        raise ValueError(
            f"Model '{model_name}' does not support ml_type '{ml_type.value}'. Supported types: {', '.join(t.value for t in config.ml_types)}"
        )

XGBoostCoxSurvival

Bases: SurvivalMixin, BaseEstimator

XGBoost Cox proportional hazards model for survival analysis.

Wraps XGBRegressor with objective="survival:cox" to produce risk scores. Accepts structured numpy array y with fields: - 'c1' (bool): event indicator (True = event observed) - 'c2' (float): duration (time to event or censoring)

Converts these to XGBoost's signed-target format internally: +t = event at time t, -t = censored at time t

Output: risk scores (exp(margin)) where higher = higher risk.

Attributes:

Name Type Description
learning_rate float

Learning rate (shrinkage).

min_child_weight int

Minimum sum of instance weight in a child.

subsample float

Subsample ratio of training instances.

n_estimators int

Number of boosting rounds.

max_depth int

Maximum tree depth.

validate_parameters bool

Whether to validate parameters.

n_jobs int

Number of parallel threads.

random_state int | None

Random seed.

Source code in octopus/models/wrapper_models/XGBoostCoxSurvival.py
@define(slots=False)
class XGBoostCoxSurvival(SurvivalMixin, BaseEstimator):
    """XGBoost Cox proportional hazards model for survival analysis.

    Wraps XGBRegressor with ``objective="survival:cox"`` to produce risk scores.
    Accepts structured numpy array y with fields:
        - 'c1' (bool): event indicator (True = event observed)
        - 'c2' (float): duration (time to event or censoring)

    Converts these to XGBoost's signed-target format internally:
        +t = event at time t, -t = censored at time t

    Output: risk scores (exp(margin)) where higher = higher risk.

    Attributes:
        learning_rate: Learning rate (shrinkage).
        min_child_weight: Minimum sum of instance weight in a child.
        subsample: Subsample ratio of training instances.
        n_estimators: Number of boosting rounds.
        max_depth: Maximum tree depth.
        validate_parameters: Whether to validate parameters.
        n_jobs: Number of parallel threads.
        random_state: Random seed.
    """

    learning_rate: float = 0.1
    min_child_weight: int = 2
    subsample: float = 1.0
    n_estimators: int = 200
    max_depth: int = 6
    validate_parameters: bool = True
    n_jobs: int = 1
    random_state: int | None = None

    @property
    def feature_importances_(self) -> np.ndarray:
        """Feature importances from the fitted XGBoost model."""
        return np.asarray(self.model_.feature_importances_)

    def fit(self, X: np.ndarray, y: np.ndarray, *args, **kwargs) -> "XGBoostCoxSurvival":
        """Fit XGBoost Cox model.

        Args:
            X: Feature matrix.
            y: Structured array with 'c1' (event bool) and 'c2' (duration float).
            *args: Additional positional arguments (unused, for API compatibility).
            **kwargs: Additional keyword arguments (unused, for API compatibility).

        Returns:
            self
        """
        from xgboost import XGBRegressor  # noqa: PLC0415

        event, duration = check_y_survival(y)

        self.model_ = XGBRegressor(
            objective="survival:cox",
            eval_metric="cox-nloglik",
            learning_rate=self.learning_rate,
            min_child_weight=self.min_child_weight,
            subsample=self.subsample,
            n_estimators=self.n_estimators,
            max_depth=self.max_depth,
            validate_parameters=self.validate_parameters,
            n_jobs=self.n_jobs,
            random_state=self.random_state,
            verbosity=0,
        )

        # Convert to XGBoost signed-target format: +t = event, -t = censored
        y_signed = np.where(event, duration, -duration)
        self.model_.fit(X, y_signed)
        return self

    def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
        """Predict risk scores. Higher = higher risk.

        Args:
            X: Feature matrix.
            **kwargs: Additional keyword arguments (unused, for API compatibility).

        Returns:
            Array of risk scores (exp(margin)).
        """
        return np.asarray(self.model_.predict(X))

feature_importances_ property

Feature importances from the fitted XGBoost model.

fit(X, y, *args, **kwargs)

Fit XGBoost Cox model.

Parameters:

Name Type Description Default
X ndarray

Feature matrix.

required
y ndarray

Structured array with 'c1' (event bool) and 'c2' (duration float).

required
*args

Additional positional arguments (unused, for API compatibility).

()
**kwargs

Additional keyword arguments (unused, for API compatibility).

{}

Returns:

Type Description
XGBoostCoxSurvival

self

Source code in octopus/models/wrapper_models/XGBoostCoxSurvival.py
def fit(self, X: np.ndarray, y: np.ndarray, *args, **kwargs) -> "XGBoostCoxSurvival":
    """Fit XGBoost Cox model.

    Args:
        X: Feature matrix.
        y: Structured array with 'c1' (event bool) and 'c2' (duration float).
        *args: Additional positional arguments (unused, for API compatibility).
        **kwargs: Additional keyword arguments (unused, for API compatibility).

    Returns:
        self
    """
    from xgboost import XGBRegressor  # noqa: PLC0415

    event, duration = check_y_survival(y)

    self.model_ = XGBRegressor(
        objective="survival:cox",
        eval_metric="cox-nloglik",
        learning_rate=self.learning_rate,
        min_child_weight=self.min_child_weight,
        subsample=self.subsample,
        n_estimators=self.n_estimators,
        max_depth=self.max_depth,
        validate_parameters=self.validate_parameters,
        n_jobs=self.n_jobs,
        random_state=self.random_state,
        verbosity=0,
    )

    # Convert to XGBoost signed-target format: +t = event, -t = censored
    y_signed = np.where(event, duration, -duration)
    self.model_.fit(X, y_signed)
    return self

predict(X, **kwargs)

Predict risk scores. Higher = higher risk.

Parameters:

Name Type Description Default
X ndarray

Feature matrix.

required
**kwargs

Additional keyword arguments (unused, for API compatibility).

{}

Returns:

Type Description
ndarray

Array of risk scores (exp(margin)).

Source code in octopus/models/wrapper_models/XGBoostCoxSurvival.py
def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
    """Predict risk scores. Higher = higher risk.

    Args:
        X: Feature matrix.
        **kwargs: Additional keyword arguments (unused, for API compatibility).

    Returns:
        Array of risk scores (exp(margin)).
    """
    return np.asarray(self.model_.predict(X))

ard_regressor()

ARD regression model class.

Source code in octopus/models/regression_models.py
@Models.register(ModelName.ARDRegressor)
def ard_regressor() -> ModelConfig:
    """ARD regression model class."""
    return ModelConfig(
        model_class=ARDRegression,  # type: ignore[arg-type]
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.PERMUTATION,
        n_repeats=2,
        chpo_compatible=False,
        scaler="StandardScaler",
        imputation_required=True,
        categorical_enabled=False,
        hyperparameters=[
            FloatHyperparameter(name="alpha_1", low=1e-10, high=1e-3, log=True),
            FloatHyperparameter(name="alpha_2", low=1e-10, high=1e-3, log=True),
            FloatHyperparameter(name="lambda_1", low=1e-10, high=1e-3, log=True),
            FloatHyperparameter(name="lambda_2", low=1e-10, high=1e-3, log=True),
            FloatHyperparameter(name="threshold_lambda", low=1e3, high=1e5, log=True),
            FloatHyperparameter(name="tol", low=1e-5, high=1e-1, log=True),
            FixedHyperparameter(name="fit_intercept", value=True),
        ],
        n_jobs=None,
        model_seed=None,
    )

catboost_classifier()

CatBoost classification model config.

Source code in octopus/models/classification_models.py
@Models.register(ModelName.CatBoostClassifier)
def catboost_classifier() -> ModelConfig:
    """CatBoost classification model config."""
    return ModelConfig(
        model_class=CatBoostClassifier,
        # Multiclass excluded: SHAP explainers segfault on CatBoost multiclass models.
        ml_types=[MLType.BINARY],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=False,
        categorical_enabled=True,
        default=True,
        hyperparameters=[
            FloatHyperparameter(name="learning_rate", low=1e-2, high=1e-1, log=True),
            IntHyperparameter(name="depth", low=3, high=10),
            FloatHyperparameter(name="l2_leaf_reg", low=2, high=10),
            FloatHyperparameter(name="random_strength", low=2, high=10),
            FloatHyperparameter(name="rsm", low=0.1, high=1),
            FixedHyperparameter(name="iterations", value=1000),
            CategoricalHyperparameter(name="auto_class_weights", choices=[None, "Balanced"]),
            FixedHyperparameter(name="allow_writing_files", value=False),
            FixedHyperparameter(name="logging_level", value="Silent"),
            FixedHyperparameter(name="thread_count", value=1),
            FixedHyperparameter(name="task_type", value="CPU"),
        ],
        n_jobs="thread_count",
        model_seed="random_state",
    )

catboost_cox_survival()

CatBoost Cox survival model config.

Source code in octopus/models/time_to_event_models.py
@Models.register(ModelName.CatBoostCoxSurvival)
def catboost_cox_survival() -> ModelConfig:
    """CatBoost Cox survival model config."""
    return ModelConfig(
        model_class=CatBoostCoxSurvival,  # type: ignore[arg-type]
        ml_types=[MLType.TIMETOEVENT],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=False,
        categorical_enabled=True,
        default=True,
        hyperparameters=[
            FloatHyperparameter(name="learning_rate", low=1e-3, high=1e-1, log=True),
            IntHyperparameter(name="depth", low=3, high=10),
            FloatHyperparameter(name="l2_leaf_reg", low=2, high=10),
            FloatHyperparameter(name="random_strength", low=2, high=10),
            FloatHyperparameter(name="rsm", low=0.1, high=1),
            FixedHyperparameter(name="iterations", value=500),
            FixedHyperparameter(name="allow_writing_files", value=False),
            FixedHyperparameter(name="logging_level", value="Silent"),
            FixedHyperparameter(name="task_type", value="CPU"),
        ],
        n_jobs="thread_count",
        model_seed="random_state",
    )

catboost_regressor()

Cat boost regression model class.

Source code in octopus/models/regression_models.py
@Models.register(ModelName.CatBoostRegressor)
def catboost_regressor() -> ModelConfig:
    """Cat boost regression model class."""
    return ModelConfig(
        model_class=CatBoostRegressor,
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=False,
        categorical_enabled=True,
        default=True,
        hyperparameters=[
            FloatHyperparameter(name="learning_rate", low=1e-3, high=1e-1, log=True),
            IntHyperparameter(name="depth", low=3, high=10),
            FloatHyperparameter(name="l2_leaf_reg", low=2, high=10),
            FloatHyperparameter(name="random_strength", low=2, high=10),
            FloatHyperparameter(name="rsm", low=0.1, high=1),
            FixedHyperparameter(name="iterations", value=500),
            FixedHyperparameter(name="allow_writing_files", value=False),
            FixedHyperparameter(name="logging_level", value="Silent"),
            FixedHyperparameter(name="thread_count", value=1),
            FixedHyperparameter(name="task_type", value="CPU"),
        ],
        n_jobs="thread_count",
        model_seed="random_state",
    )

elastic_net_regressor()

ElasticNet regression model class.

Source code in octopus/models/regression_models.py
@Models.register(ModelName.ElasticNetRegressor)
def elastic_net_regressor() -> ModelConfig:
    """ElasticNet regression model class."""
    return ModelConfig(
        model_class=ElasticNet,  # type: ignore[arg-type]
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.SHAP,
        chpo_compatible=True,
        scaler="StandardScaler",
        imputation_required=True,
        categorical_enabled=False,
        default=True,
        hyperparameters=[
            FloatHyperparameter(name="alpha", low=1e-10, high=1e2, log=True),
            FloatHyperparameter(name="l1_ratio", low=0, high=1, log=False),
            CategoricalHyperparameter(name="fit_intercept", choices=[True, False]),
            FloatHyperparameter(name="tol", low=1e-5, high=1e-1, log=True),
            FixedHyperparameter(name="max_iter", value=4000),
            FixedHyperparameter(name="selection", value="random"),
        ],
        n_jobs=None,
        model_seed="random_state",
    )

extra_trees_classifier()

ExtraTrees classification model config.

Source code in octopus/models/classification_models.py
@Models.register(ModelName.ExtraTreesClassifier)
def extra_trees_classifier() -> ModelConfig:
    """ExtraTrees classification model config."""
    return ModelConfig(
        model_class=ExtraTreesClassifier,  # type: ignore[arg-type]
        ml_types=[MLType.BINARY, MLType.MULTICLASS],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=True,
        categorical_enabled=False,
        default=True,
        hyperparameters=[
            IntHyperparameter(name="max_depth", low=2, high=32),
            IntHyperparameter(name="min_samples_split", low=2, high=100),
            IntHyperparameter(name="min_samples_leaf", low=1, high=50),
            FloatHyperparameter(name="max_features", low=0.1, high=1),
            IntHyperparameter(name="n_estimators", low=100, high=500, log=False),
            CategoricalHyperparameter(name="class_weight", choices=[None, "balanced"]),
            FixedHyperparameter(name="criterion", value="entropy"),
            FixedHyperparameter(name="bootstrap", value=True),
        ],
        n_jobs="n_jobs",
        model_seed="random_state",
    )

extra_trees_regressor()

ExtraTrees regression model class.

Source code in octopus/models/regression_models.py
@Models.register(ModelName.ExtraTreesRegressor)
def extra_trees_regressor() -> ModelConfig:
    """ExtraTrees regression model class."""
    return ModelConfig(
        model_class=ExtraTreesRegressor,  # type: ignore[arg-type]
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=True,
        categorical_enabled=False,
        default=True,
        hyperparameters=[
            IntHyperparameter(name="max_depth", low=2, high=32),
            IntHyperparameter(name="min_samples_split", low=2, high=100),
            IntHyperparameter(name="min_samples_leaf", low=1, high=50),
            IntHyperparameter(name="n_estimators", low=100, high=500, log=False),
            FloatHyperparameter(name="max_features", low=0.1, high=1),
        ],
        n_jobs="n_jobs",
        model_seed="random_state",
    )

gaussian_process_classifier()

Gaussian process classification model config.

Source code in octopus/models/classification_models.py
@Models.register(ModelName.GaussianProcessClassifier)
def gaussian_process_classifier() -> ModelConfig:
    """Gaussian process classification model config."""
    return ModelConfig(
        model_class=GPClassifierWrapper,  # type: ignore[arg-type]
        ml_types=[MLType.BINARY, MLType.MULTICLASS],
        feature_method=FIComputeMethod.PERMUTATION,
        n_repeats=2,
        chpo_compatible=False,
        scaler="StandardScaler",
        imputation_required=True,
        categorical_enabled=False,
        hyperparameters=[
            CategoricalHyperparameter(name="kernel", choices=["RBF", "Matern", "RationalQuadratic"]),
            CategoricalHyperparameter(name="optimizer", choices=["fmin_l_bfgs_b", None]),
            IntHyperparameter(name="n_restarts_optimizer", low=0, high=10, log=False),
            IntHyperparameter(name="max_iter_predict", low=50, high=200, log=False),
        ],
        n_jobs=None,
        model_seed="random_state",
    )

gaussian_process_regressor()

Gaussian process regression model class.

Source code in octopus/models/regression_models.py
@Models.register(ModelName.GaussianProcessRegressor)
def gaussian_process_regressor() -> ModelConfig:
    """Gaussian process regression model class."""
    return ModelConfig(
        model_class=GPRegressorWrapper,  # type: ignore[arg-type]
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.PERMUTATION,
        n_repeats=2,
        chpo_compatible=False,
        scaler="StandardScaler",
        imputation_required=True,
        categorical_enabled=False,
        hyperparameters=[
            CategoricalHyperparameter(name="kernel", choices=["RBF", "Matern", "RationalQuadratic"]),
            FloatHyperparameter(name="alpha", low=1e-10, high=1e-1, log=True),
            FloatHyperparameter(name="alpha", low=1e-10, high=1e-1, log=True),
            CategoricalHyperparameter(name="normalize_y", choices=[True, False]),
            CategoricalHyperparameter(name="optimizer", choices=["fmin_l_bfgs_b", None]),
            IntHyperparameter(name="n_restarts_optimizer", low=0, high=10, log=False),
        ],
        n_jobs=None,
        model_seed="random_state",
    )

gradient_boosting_classifier()

Gradient boosting classification model config.

Source code in octopus/models/classification_models.py
@Models.register(ModelName.GradientBoostingClassifier)
def gradient_boosting_classifier() -> ModelConfig:
    """Gradient boosting classification model config."""
    return ModelConfig(
        model_class=GradientBoostingClassifier,  # type: ignore[arg-type]
        ml_types=[MLType.BINARY, MLType.MULTICLASS],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=True,
        categorical_enabled=False,
        hyperparameters=[
            FloatHyperparameter(name="learning_rate", low=0.01, high=1, log=True),
            IntHyperparameter(name="min_samples_leaf", low=1, high=200),
            IntHyperparameter(name="max_leaf_nodes", low=3, high=2047),
            IntHyperparameter(name="max_depth", low=3, high=9, step=2),
            IntHyperparameter(name="n_estimators", low=30, high=500),
            FloatHyperparameter(name="max_features", low=0.1, high=1),
            FixedHyperparameter(name="loss", value="log_loss"),
        ],
        n_jobs=None,
        model_seed="random_state",
    )

gradient_boosting_regressor()

Gradient boost regression model class.

Source code in octopus/models/regression_models.py
@Models.register(ModelName.GradientBoostingRegressor)
def gradient_boosting_regressor() -> ModelConfig:
    """Gradient boost regression model class."""
    return ModelConfig(
        model_class=GradientBoostingRegressor,  # type: ignore[arg-type]
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=True,
        categorical_enabled=False,
        hyperparameters=[
            FloatHyperparameter(name="learning_rate", low=0.01, high=1, log=True),
            IntHyperparameter(name="min_samples_leaf", low=1, high=200),
            IntHyperparameter(name="max_leaf_nodes", low=3, high=2047),
            IntHyperparameter(name="max_depth", low=3, high=9, step=2),
            IntHyperparameter(name="n_estimators", low=30, high=500),
            FloatHyperparameter(name="max_features", low=0.1, high=1),
            FixedHyperparameter(name="loss", value="squared_error"),
        ],
        n_jobs=None,
        model_seed="random_state",
    )

hist_gradient_boosting_classifier()

Histogram-based gradient boosting classification model config (scikit-learn 1.6.1).

Source code in octopus/models/classification_models.py
@Models.register(ModelName.HistGradientBoostingClassifier)
def hist_gradient_boosting_classifier() -> ModelConfig:
    """Histogram-based gradient boosting classification model config (scikit-learn 1.6.1)."""
    return ModelConfig(
        model_class=HistGradientBoostingClassifier,  # type: ignore[arg-type]
        ml_types=[MLType.BINARY, MLType.MULTICLASS],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=False,
        categorical_enabled=True,
        default=True,
        hyperparameters=[
            FloatHyperparameter(name="learning_rate", low=0.01, high=0.3, log=True),
            IntHyperparameter(name="max_iter", low=50, high=1000),
            IntHyperparameter(name="max_leaf_nodes", low=7, high=256),
            IntHyperparameter(name="min_samples_leaf", low=1, high=200),
            IntHyperparameter(name="max_bins", low=16, high=255),
            FloatHyperparameter(name="l2_regularization", low=0.0, high=10.0, log=False),
            FixedHyperparameter(name="loss", value="log_loss"),
        ],
        # HistGradientBoostingClassifier uses `random_state` for seeding (map model_seed -> "random_state")
        n_jobs=None,
        model_seed="random_state",
    )

hist_gradient_boosting_regressor()

Histogram-based gradient boosting regression model class (scikit-learn 1.6.1).

Source code in octopus/models/regression_models.py
@Models.register(ModelName.HistGradientBoostingRegressor)
def hist_gradient_boosting_regressor() -> ModelConfig:
    """Histogram-based gradient boosting regression model class (scikit-learn 1.6.1)."""
    return ModelConfig(
        model_class=HistGradientBoostingRegressor,  # type: ignore[arg-type]
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=False,
        categorical_enabled=True,
        default=True,
        hyperparameters=[
            FloatHyperparameter(name="learning_rate", low=0.01, high=0.3, log=True),
            IntHyperparameter(name="max_iter", low=50, high=1000),
            IntHyperparameter(name="max_leaf_nodes", low=7, high=256),
            FloatHyperparameter(name="l2_regularization", low=1e-6, high=10.0, log=True),
            IntHyperparameter(name="min_samples_leaf", low=1, high=200),
            IntHyperparameter(name="max_bins", low=16, high=255),
            FixedHyperparameter(name="loss", value="squared_error"),
        ],
        n_jobs=None,
        model_seed="random_state",
    )

logistic_regression_classifier()

Logistic regression classification model config.

Source code in octopus/models/classification_models.py
@Models.register(ModelName.LogisticRegressionClassifier)
def logistic_regression_classifier() -> ModelConfig:
    """Logistic regression classification model config."""
    return ModelConfig(
        model_class=LogisticRegression,  # type: ignore[arg-type]
        ml_types=[MLType.BINARY, MLType.MULTICLASS],
        feature_method=FIComputeMethod.PERMUTATION,
        n_repeats=2,
        chpo_compatible=True,
        scaler="StandardScaler",
        imputation_required=True,
        categorical_enabled=False,
        default=True,
        hyperparameters=[
            IntHyperparameter(name="max_iter", low=100, high=500),
            FloatHyperparameter(name="C", low=1e-2, high=100, log=True),
            FloatHyperparameter(name="tol", low=1e-4, high=1e-2, log=True),
            CategoricalHyperparameter(name="penalty", choices=["l2", None]),
            CategoricalHyperparameter(name="fit_intercept", choices=[True, False]),
            CategoricalHyperparameter(name="class_weight", choices=[None, "balanced"]),
            FixedHyperparameter(name="solver", value="lbfgs"),
        ],
        n_jobs="n_jobs",
        model_seed="random_state",
    )

random_forest_classifier()

Random forest classification model config.

Source code in octopus/models/classification_models.py
@Models.register(ModelName.RandomForestClassifier)
def random_forest_classifier() -> ModelConfig:
    """Random forest classification model config."""
    return ModelConfig(
        model_class=RandomForestClassifier,  # type: ignore[arg-type]
        ml_types=[MLType.BINARY, MLType.MULTICLASS],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=True,  # maybe: False - check!
        categorical_enabled=False,
        default=True,
        hyperparameters=[
            IntHyperparameter(name="max_depth", low=2, high=32),
            IntHyperparameter(name="min_samples_split", low=2, high=100),
            IntHyperparameter(name="min_samples_leaf", low=1, high=50),
            FloatHyperparameter(name="max_features", low=0.1, high=1),
            IntHyperparameter(name="n_estimators", low=100, high=500, log=False),
            CategoricalHyperparameter(name="class_weight", choices=[None, "balanced"]),
        ],
        n_jobs="n_jobs",
        model_seed="random_state",
    )

random_forest_regressor()

Random forrest regression model class.

Source code in octopus/models/regression_models.py
@Models.register(ModelName.RandomForestRegressor)
def random_forest_regressor() -> ModelConfig:
    """Random forrest regression model class."""
    return ModelConfig(
        model_class=RandomForestRegressor,  # type: ignore[arg-type]
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=True,  # maybe: False -- check!
        categorical_enabled=False,
        default=True,
        hyperparameters=[
            IntHyperparameter(name="max_depth", low=2, high=32),
            IntHyperparameter(name="min_samples_split", low=2, high=100),
            IntHyperparameter(name="min_samples_leaf", low=1, high=50),
            IntHyperparameter(name="n_estimators", low=100, high=500),
            FloatHyperparameter(name="max_features", low=0.1, high=1),
        ],
        n_jobs="n_jobs",
        model_seed="random_state",
    )

ridge_regressor()

Ridge regression model class.

Source code in octopus/models/regression_models.py
@Models.register(ModelName.RidgeRegressor)
def ridge_regressor() -> ModelConfig:
    """Ridge regression model class."""
    return ModelConfig(
        model_class=Ridge,  # type: ignore[arg-type]
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.SHAP,
        chpo_compatible=False,
        scaler="StandardScaler",
        imputation_required=True,
        categorical_enabled=False,
        hyperparameters=[
            FloatHyperparameter(name="alpha", low=1e-5, high=1e5, log=True),
            CategoricalHyperparameter(name="fit_intercept", choices=[True, False]),
            FixedHyperparameter(name="solver", value="svd"),
        ],
        n_jobs=None,
        model_seed="random_state",
    )

svr_regressor()

Svr regression model class.

Source code in octopus/models/regression_models.py
@Models.register(ModelName.SvrRegressor)
def svr_regressor() -> ModelConfig:
    """Svr regression model class."""
    return ModelConfig(
        model_class=SVR,  # type: ignore[arg-type]
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.PERMUTATION,
        n_repeats=2,
        chpo_compatible=False,
        scaler="StandardScaler",
        imputation_required=True,
        categorical_enabled=False,
        hyperparameters=[
            FloatHyperparameter(name="C", low=0.03125, high=32768, log=True),
            FloatHyperparameter(name="epsilon", low=0.001, high=1, log=True),
            FloatHyperparameter(name="tol", low=1e-5, high=1e-1, log=True),
        ],
        n_jobs=None,
        model_seed=None,
    )

tabular_nn_regressor()

Tabular Neural Network regression model class with categorical embeddings.

Source code in octopus/models/regression_models.py
@Models.register(ModelName.TabularNNRegressor)
def tabular_nn_regressor() -> ModelConfig:
    """Tabular Neural Network regression model class with categorical embeddings."""
    from .wrapper_models.TabularNNRegressor import TabularNNRegressor  # noqa: PLC0415

    return ModelConfig(
        model_class=TabularNNRegressor,  # type: ignore[arg-type]
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.PERMUTATION,
        n_repeats=2,
        chpo_compatible=False,
        scaler="StandardScaler",
        imputation_required=False,
        categorical_enabled=True,
        hyperparameters=[
            CategoricalHyperparameter(
                name="hidden_sizes",
                choices=[
                    [512, 256, 128],
                    [512, 256],
                    [512, 128],
                    [256, 256, 128],
                    [256, 128, 64],
                    [256, 128],
                    [256, 64],
                    [128, 128, 64],
                    [128, 64],
                    [128, 32],
                ],
            ),
            FloatHyperparameter(name="dropout", low=0.0, high=0.5),
            FloatHyperparameter(name="learning_rate", low=1e-5, high=1e-2, log=True),
            FixedHyperparameter(name="weight_decay", value=1e-5),
            FixedHyperparameter(name="activation", value="elu"),
            FixedHyperparameter(name="optimizer", value="adamw"),
            CategoricalHyperparameter(name="batch_size", choices=[32, 64, 128, 256]),
            FixedHyperparameter(name="epochs", value=200),
        ],
        n_jobs=None,
        model_seed="random_state",
    )

xgb_classifier()

XGBoost classification model config.

Source code in octopus/models/classification_models.py
@Models.register(ModelName.XGBClassifier)
def xgb_classifier() -> ModelConfig:
    """XGBoost classification model config."""
    return ModelConfig(
        model_class=XGBClassifier,
        ml_types=[MLType.BINARY, MLType.MULTICLASS],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=False,
        categorical_enabled=False,  # Maybe True - check!
        default=True,
        hyperparameters=[
            FloatHyperparameter(name="learning_rate", low=1e-4, high=0.3, log=True),
            IntHyperparameter(name="min_child_weight", low=2, high=15),
            FloatHyperparameter(name="subsample", low=0.15, high=1.0),
            IntHyperparameter(name="n_estimators", low=30, high=200),
            IntHyperparameter(name="max_depth", low=3, high=9, step=2),
            FixedHyperparameter(name="validate_parameters", value=True),
        ],
        n_jobs="n_jobs",
        model_seed="random_state",
    )

xgb_regressor()

XGBoost regression model class.

Source code in octopus/models/regression_models.py
@Models.register(ModelName.XGBRegressor)
def xgb_regressor() -> ModelConfig:
    """XGBoost regression model class."""
    return ModelConfig(
        model_class=XGBRegressor,
        ml_types=[MLType.REGRESSION],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=False,
        categorical_enabled=False,  # maybe:True -- check!
        default=True,
        hyperparameters=[
            FloatHyperparameter(name="learning_rate", low=1e-4, high=0.3, log=True),
            IntHyperparameter(name="min_child_weight", low=2, high=15),
            FloatHyperparameter(name="subsample", low=0.15, high=1.0),
            IntHyperparameter(name="n_estimators", low=30, high=500),
            IntHyperparameter(name="max_depth", low=3, high=9, step=2),
            FixedHyperparameter(name="validate_parameters", value=True),
            FloatHyperparameter(name="lambda", low=1e-8, high=1, log=True),
        ],
        n_jobs="n_jobs",
        model_seed="random_state",
    )

xgboost_cox_survival()

XGBoost Cox survival model config.

Source code in octopus/models/time_to_event_models.py
@Models.register(ModelName.XGBoostCoxSurvival)
def xgboost_cox_survival() -> ModelConfig:
    """XGBoost Cox survival model config."""
    return ModelConfig(
        model_class=XGBoostCoxSurvival,  # type: ignore[arg-type]
        ml_types=[MLType.TIMETOEVENT],
        feature_method=FIComputeMethod.INTERNAL,
        chpo_compatible=True,
        scaler=None,
        imputation_required=False,
        categorical_enabled=False,
        default=True,
        hyperparameters=[
            FloatHyperparameter(name="learning_rate", low=1e-4, high=0.3, log=True),
            IntHyperparameter(name="min_child_weight", low=2, high=15),
            FloatHyperparameter(name="subsample", low=0.15, high=1.0),
            IntHyperparameter(name="n_estimators", low=30, high=500),
            IntHyperparameter(name="max_depth", low=3, high=9, step=1),
            FixedHyperparameter(name="validate_parameters", value=True),
        ],
        n_jobs="n_jobs",
        model_seed="random_state",
    )