Source code for pyepal.pal.pal_catboost

# -*- coding: utf-8 -*-
# Copyright 2022 PyePAL authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Implements a PAL class for GBDT models with virtual ensembles for the uncertainty estimates.

Note that the scaling of the hyperrectangles has been derived
for GPR models (with RBF kernels).
"""
from functools import partial

import numpy as np

from .pal_base import PALBase
from .validate_inputs import validate_catboost_models

__all__ = ["PALCatBoost"]


def _train_model_picklable(i, models, design_space, objectives, sampled):
    model = models[i]
    model.fit(
        design_space[sampled[:, i], :],
        objectives[sampled[:, i], i].ravel(),
    )
    return model


[docs]class PALCatBoost(PALBase): """PAL class for a list of LightGBM GBDT models"""
[docs] def __init__(self, *args, **kwargs): """Construct the PALCatBoost instance Args: X_design (np.array): Design space (feature matrix) models (List[CatBoostRegressor]: Machine learning models. You need to provide a list of CatBoost regressors. The regressors need to use the `RMSEWithUncertainty` loss. ndim (int): Number of objectives epsilon (Union[list, float], optional): Epsilon hyperparameter. Defaults to 0.01. delta (float, optional): Delta hyperparameter. Defaults to 0.05. beta_scale (float, optional): Scaling parameter for beta. If not equal to 1, the theoretical guarantees do not necessarily hold. Also note that the parametrization depends on the kernel type. Defaults to 1/9. goals (List[str], optional): If a list, provide "min" for every objective that shall be minimized and "max" for every objective that shall be maximized. Defaults to None, which means that the code maximizes all objectives. coef_var_threshold (float, optional): Use only points with a coefficient of variation below this threshold in the classification step. Defaults to 3. virtual_ensembles_count (int, optional): Number of virtual ensemble models. Defaults to 10. """ self.virtual_ensembles_count = kwargs.pop("virtual_ensembles_count", 10) super().__init__(*args, **kwargs) self.models = validate_catboost_models(self.models, self.ndim)
def _set_data(self): pass def _train(self): train_single_partial = partial( _train_model_picklable, models=self.models, design_space=self.design_space, objectives=self.y, sampled=self.sampled, ) models = [] for model in map(train_single_partial, range(len(self.models))): models.append(model) self.models = models def _predict(self): means, stds = [], [] for model in self.models: pred = model.virtual_ensembles_predict( self.design_space, prediction_type="TotalUncertainty", virtual_ensembles_count=20 ) std = np.sqrt(pred[:, 1] + pred[:, 2]) mean = pred[:, 0] means.append(mean.reshape(-1, 1)) stds.append(std.reshape(-1, 1)) self._means = np.hstack(means) self.std = np.hstack(stds) def _set_hyperparameters(self): # ToDo: Maybe add some optuna helper here. pass