Skip to content

uqregressors.tuning.tuning

Tuning contains the helper function tune_hyperparams which uses the Bayesian hyperparameter optimization framework, Optuna as well as some examples of potential scoring functions.

Important features of this hyperparameter optimization method are
  • Customizable scoring function
  • Customizable hyperparameters to be tuned
  • Customizable number of tuning iterations, search space
  • Support for cross-validation

interval_score(estimator, X, y)

Example of interval_score scoring function for hyperparameter tuning, greater=False

Source code in uqregressors\tuning\tuning.py
23
24
25
26
27
28
29
30
31
32
def interval_score(estimator, X, y): 
    """
    Example of interval_score scoring function for hyperparameter tuning, greater=False
    """
    alpha = estimator.alpha
    _, lower, upper = estimator.predict(X)
    width = upper - lower
    penalty_lower = (2 / alpha) * (lower - y) * (y < lower)
    penalty_upper = (2 / alpha) * (y - upper) * (y > upper)
    return np.mean(width + penalty_lower + penalty_upper)

interval_width(estimator, X, y)

Example of minimizing interval width scoring function for hyperparameter tuning, greater=False

Source code in uqregressors\tuning\tuning.py
34
35
36
37
38
39
def interval_width(estimator, X, y): 
    """
    Example of minimizing interval width scoring function for hyperparameter tuning, greater=False
    """
    _, lower, upper = estimator.predict(X)
    return np.mean(upper - lower)

log_likelihood(estimator, X, y)

Example of maximizing log likelihood scoring function for hyperparameter tuning, greater=True

Source code in uqregressors\tuning\tuning.py
41
42
43
44
45
46
47
48
49
50
51
52
def log_likelihood(estimator, X, y): 
    """
    Example of maximizing log likelihood scoring function for hyperparameter tuning, greater=True
    """
    mean, lower, upper = estimator.predict(X)
    alpha = estimator.alpha 
    z = norm.ppf(1 - alpha / 2)
    std = (upper - lower) / (2 * z)
    std = np.clip(std, 1e-6, None)

    log_likelihoods = -0.5 * np.log(2 * np.pi * std**2) - 0.5 * ((y - mean) / std) ** 2
    return np.mean(log_likelihoods)

tune_hyperparams(regressor, param_space, X, y, score_fn, greater_is_better, n_trials=20, n_splits=3, random_state=42, verbose=True)

Optimizes a scikit-learn-style regressor using Optuna.

Supports CV when n_splits > 1, otherwise uses train/val split.

Parameters:

Name Type Description Default
regressor BaseEstimator

An instance of a base regressor (must have .fit and .predict).

required
param_space dict

Dict mapping param name → optuna suggest function (e.g., lambda t: t.suggest_float(...)).

required
X Union[Tensor, ndarray, DataFrame, Series]

Training inputs.

required
y Union[Tensor, ndarray, DataFrame, Series]

Training targets.

required
score_fn Callable(estimator, X_val, y_val) → float

Scoring function.

required
greater_is_better bool

Whether score_fn should be maximized (True) or minimized (False).

required
n_trials int

Number of Optuna trials.

20
n_splits int

If >1, uses KFold CV; otherwise single train/val split.

3
random_state int

For reproducibility.

42
verbose bool

Print status messages.

True

Returns:

Type Description
Tuple[BaseEstimator, float, study]

A tuple containing the estimator with the optimized hyperparameters, the minimum score, and the optuna study object

Source code in uqregressors\tuning\tuning.py
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def tune_hyperparams(
    regressor,
    param_space,
    X,
    y,
    score_fn,
    greater_is_better,
    n_trials=20,
    n_splits=3,
    random_state=42,
    verbose=True,
):
    """
    Optimizes a scikit-learn-style regressor using Optuna.

    Supports CV when n_splits > 1, otherwise uses train/val split.

    Args:
        regressor (BaseEstimator): An instance of a base regressor (must have .fit and .predict).
        param_space (dict): Dict mapping param name → optuna suggest function (e.g., lambda t: t.suggest_float(...)).
        X (Union[torch.Tensor, np.ndarray, pd.DataFrame, pd.Series]): Training inputs.
        y (Union[torch.Tensor, np.ndarray, pd.DataFrame, pd.Series]): Training targets.
        score_fn (Callable(estimator, X_val, y_val) → float): Scoring function.
        greater_is_better (bool): Whether score_fn should be maximized (True) or minimized (False).
        n_trials (int): Number of Optuna trials.
        n_splits (int): If >1, uses KFold CV; otherwise single train/val split.
        random_state (int): For reproducibility.
        verbose (bool): Print status messages.

    Returns:
        (Tuple[BaseEstimator, float, optuna.study]): A tuple containing the estimator with the optimized 
                                                     hyperparameters, the minimum score, and the optuna study object
    """

    direction = "maximize" if greater_is_better else "minimize"

    def objective(trial):
        # Sample hyperparameters
        trial_params = {k: suggest_fn(trial) for k, suggest_fn in param_space.items()}

        scores = []

        if n_splits == 1:
            # Single train/val split
            X_train, X_val, y_train, y_val = train_test_split(
                X, y, test_size=0.2, random_state=random_state
            )

            with tempfile.TemporaryDirectory() as tmpdir: 
                regressor.save(tmpdir)
                estimator = regressor.__class__.load(tmpdir)

            for param_name, param_value in trial_params.items():
                setattr(estimator, param_name, param_value)

            estimator.fit(X_train, y_train)
            score = score_fn(estimator, X_val, y_val)
            scores.append(score)
        else:
            # K-fold CV
            kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
            for train_idx, val_idx in kf.split(X):
                X_train, X_val = X[train_idx], X[val_idx]
                y_train, y_val = y[train_idx], y[val_idx]

                with tempfile.TemporaryDirectory() as tmpdir: 
                    regressor.save(tmpdir)
                    estimator = regressor.__class__.load(tmpdir)

                for param_name, param_value in trial_params.items():
                    setattr(estimator, param_name, param_value)

                estimator.fit(X_train, y_train)
                score = score_fn(estimator, X_val, y_val)
                scores.append(score)

        mean_score = np.mean(scores)

        if verbose:
            print(f"Trial params: {trial_params} -> Score: {mean_score:.4f}")

        return mean_score

    study = optuna.create_study(direction=direction)
    study.optimize(objective, n_trials=n_trials)

    # Re-train on full data with best hyperparameters
    best_params = study.best_params

    with tempfile.TemporaryDirectory() as tmpdir: 
        regressor.save(tmpdir)
        best_estimator = regressor.__class__.load(tmpdir)

    for k, v in best_params.items():
        setattr(best_estimator, k, v)
    best_estimator.fit(X, y)

    if verbose:
        print("Best score:", study.best_value)
        print("Best hyperparameters:", best_params)

    return best_estimator, study.best_value, study