Skip to content

uqregressors.utils.file_manager

file_manager

Handles saving paths, including saving and loading models and plots.

Examples:

>>> from uqregressors.bayesian.deep_ens import DeepEnsembleRegressor
>>> from uqregressors.metrics.metrics import compute_all_metrics
>>> from uqregressors.utils.file_manager import FileManager
>>> # Instantiate File Manager
>>> BASE_PATH = "C:/.uqregressors"
>>> fm = FileManager(BASE_PATH)  # Replace with desired base path
>>> # Fit a model and compute metrics
>>> reg = DeepEnsembleRegressor()
>>> reg.fit(X_train, y_train)
>>> mean, lower, upper = reg.predict(X_test)
>>> metrics = compute_all_metrics(
...     mean, lower, upper, y_test, reg.alpha
... )
>>> # Save model and metrics
>>> save_path = fm.save_model(
...     name="Deep_Ens",
...     model=reg,
...     metrics=metrics,
...     X_train=X_train,
...     y_train=y_train,
...     X_test=X_test,
...     y_test=y_test
... )
>>> # Will save to: BASE_PATH/models/Deep_Ens
>>> # Alternatively, specify full path directly
>>> save_path = fm.save_model(path="SAVE_PATH", model=reg, ...)
>>> # Load model and metrics
>>> load_dict = fm.load_model(
...     reg.__class__, save_path, load_logs=True
... )
>>> metrics = load_dict["metrics"]
>>> loaded_model = load_dict["model"]
>>> X_test = load_dict["X_test"]

FileManager

FileManager class to handle paths and saving.

Parameters:

Name Type Description Default
base_dir str

Base directory to save files to. Defaults to creating a folder ".uqregressors" within the Users home path.

home() / '.uqregressors'

Attributes:

Name Type Description
base_dir Path

The base directory as a Path object.

model_dir Path

The directory "models" within the base_dir, where models will be saved and loaded.

Source code in uqregressors\utils\file_manager.py
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
class FileManager:
    """
    FileManager class to handle paths and saving.

    Args: 
        base_dir (str): Base directory to save files to. Defaults to creating a folder ".uqregressors" within the Users home path. 

    Attributes: 
        base_dir (Path): The base directory as a Path object.
        model_dir (Path): The directory "models" within the base_dir, where models will be saved and loaded.
    """
    def __init__(self, base_dir=Path.home() / ".uqregressors"):
        self.base_dir = Path(base_dir)
        self.model_dir = self.base_dir / "models"
        self.model_dir.mkdir(parents=True, exist_ok=True)

    def get_timestamped_path(self, model_class_name: str) -> Path:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        return self.model_dir / f"{model_class_name}_{timestamp}"

    def get_named_path(self, name: str) -> Path:
        return self.model_dir / name

    def save_model(
        self, model, name=None, path=None, metrics=None, X_train=None, y_train=None, X_test=None, y_test=None
    ) -> Path:
        """
        Saves a model, along with metrics, and training and testing data

        Args: 
            model (BaseEstimator): The regressor to save. Note that it must implement the save method.
            name (str): The name of the model for directory purposes. If given, the model will be saved wihin the directory: fm.base_dir/models/name.
            path (str): The path to the directory where the model should be saved. Only one of name or path should be given. If neither are given,
                        a directory with the model class and timestamp is created. 
            metrics (dict): A dictionary of metrics to store. Can be used with uqregressors.metrics.metrics.compute_all_metrics.
            X_train (array-like): Training features. 
            y_train (array-like): Training targets. 
            X_test (array-like): Testing features. 
            y_test (array-like): Testing targets.
        """
        if name is not None:
            if path is not None:
                warnings.warn(f"Both name and path given. Using named path: {path}")
            else: 
                path = self.get_named_path(name)
        elif path is None:
            path = self.get_timestamped_path(model.__class__.__name__)
        else:
            path = Path(path)

        path.mkdir(parents=True, exist_ok=True)

        if not hasattr(model, "save") or not callable(model.save):
            raise AttributeError(f"{model.__class__.__name__} must implement `save(path)`")
        model.save(path)

        if metrics:
            with open(path / "metrics.json", "w") as f:
                json.dump(metrics, f, indent=2)

        for name, array in [("X_train", X_train), ("y_train", y_train), ("X_test", X_test), ("y_test", y_test)]:
            if array is not None:
                np.save(path / f"{name}.npy", np.array(array))

        print(f"Model and additional artifacts saved to: {path}")
        return path

    def load_model(self, model_class, path=None, name=None, device="cpu", load_logs=False):
        """
        Loads a model and associated metadata from path

        Args: 
            model_class (BaseEstimator): The class of the model to be loaded. This should match with the class of model which was saved. 
            path (str): The path to the directory in which the model and associated metadata is stored. If not given, name must be given. 
            name (str): The name if the directory containing the model is fm.base_dir/models/{name}. If not given, the path must be given. 
            device (str): The device, "cpu" or "cuda" to load the model with. 
            load_logs (bool): Whether training and hyperparameter logs should be loaded along with the model so they can be accessed by code.

        Returns: 
            (dict): Dictionary of loaded objects with the following keys: 

                    model: The loaded model,

                    metrics: The loaded metrics or None if there is no metrics.json file, 

                    X_train: The loaded training features or None if there is no X_train.npy file, 

                    y_train: The loaded training targets or None if there is no y_train.npy file,

                    X_test: The loaded testing features or None if there is no X_test.npy file, 

                    y_test: The loaded testing targets or None if there is no y_test.npy file.
        """
        if name:
            path = self.get_named_path(name)
        elif path:
            path = Path(path)
        else:
            raise ValueError("Either `name` or `path` must be specified.")

        if not path.exists():
            raise FileNotFoundError(f"Path {path} does not exist")

        if not hasattr(model_class, "load") or not callable(model_class.load):
            raise AttributeError(f"{model_class.__name__} must implement `load(path)`")

        from torch.serialization import safe_globals
        with safe_globals([np._core.multiarray._reconstruct, np.ndarray, np.dtype]):
            model = model_class.load(path, device=device, load_logs=load_logs)

        def try_load(name):
            f = path / f"{name}.npy"
            return np.load(f) if f.exists() else None

        metrics = None
        metrics_path = path / "metrics.json"
        if metrics_path.exists():
            with open(metrics_path) as f:
                metrics = json.load(f)

        return {
            "model": model,
            "metrics": metrics,
            "X_train": try_load("X_train"),
            "y_train": try_load("y_train"),
            "X_test": try_load("X_test"),
            "y_test": try_load("y_test"),
        }

    def save_plot(self, fig, model_path, filename="plot.png", show=True, subdir="plots"):
        """
        A helper method to save plots to a subdirectory within the directory in which the model would be saved. 

        Args: 
            fig (matplotlib.figure.Figure): The figure to be saved. 
            model_path (str): The directory in which to create a "plots" subdirectory where the image will be saved. 
            filename (str): The filename of the plot to be saved, including the file extension.
            show (bool): Whether to display the plot after saving it. 
            subdir (str): The subdirectory in which the plot will be saved, each image will be saved to model_path/subdir/filename .
        """

        path = Path(model_path)
        plot_dir = path / subdir
        plot_dir.mkdir(parents=True, exist_ok=True)
        save_path = plot_dir / filename

        plt.figure(fig.number)
        fig.savefig(save_path, bbox_inches='tight')

        if show:
            plt.show(fig)
        plt.close(fig)
        print(f"Plot saved to {save_path}")
        return save_path

load_model(model_class, path=None, name=None, device='cpu', load_logs=False)

Loads a model and associated metadata from path

Parameters:

Name Type Description Default
model_class BaseEstimator

The class of the model to be loaded. This should match with the class of model which was saved.

required
path str

The path to the directory in which the model and associated metadata is stored. If not given, name must be given.

None
name str

The name if the directory containing the model is fm.base_dir/models/{name}. If not given, the path must be given.

None
device str

The device, "cpu" or "cuda" to load the model with.

'cpu'
load_logs bool

Whether training and hyperparameter logs should be loaded along with the model so they can be accessed by code.

False

Returns:

Type Description
dict

Dictionary of loaded objects with the following keys:

model: The loaded model,

metrics: The loaded metrics or None if there is no metrics.json file,

X_train: The loaded training features or None if there is no X_train.npy file,

y_train: The loaded training targets or None if there is no y_train.npy file,

X_test: The loaded testing features or None if there is no X_test.npy file,

y_test: The loaded testing targets or None if there is no y_test.npy file.
Source code in uqregressors\utils\file_manager.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def load_model(self, model_class, path=None, name=None, device="cpu", load_logs=False):
    """
    Loads a model and associated metadata from path

    Args: 
        model_class (BaseEstimator): The class of the model to be loaded. This should match with the class of model which was saved. 
        path (str): The path to the directory in which the model and associated metadata is stored. If not given, name must be given. 
        name (str): The name if the directory containing the model is fm.base_dir/models/{name}. If not given, the path must be given. 
        device (str): The device, "cpu" or "cuda" to load the model with. 
        load_logs (bool): Whether training and hyperparameter logs should be loaded along with the model so they can be accessed by code.

    Returns: 
        (dict): Dictionary of loaded objects with the following keys: 

                model: The loaded model,

                metrics: The loaded metrics or None if there is no metrics.json file, 

                X_train: The loaded training features or None if there is no X_train.npy file, 

                y_train: The loaded training targets or None if there is no y_train.npy file,

                X_test: The loaded testing features or None if there is no X_test.npy file, 

                y_test: The loaded testing targets or None if there is no y_test.npy file.
    """
    if name:
        path = self.get_named_path(name)
    elif path:
        path = Path(path)
    else:
        raise ValueError("Either `name` or `path` must be specified.")

    if not path.exists():
        raise FileNotFoundError(f"Path {path} does not exist")

    if not hasattr(model_class, "load") or not callable(model_class.load):
        raise AttributeError(f"{model_class.__name__} must implement `load(path)`")

    from torch.serialization import safe_globals
    with safe_globals([np._core.multiarray._reconstruct, np.ndarray, np.dtype]):
        model = model_class.load(path, device=device, load_logs=load_logs)

    def try_load(name):
        f = path / f"{name}.npy"
        return np.load(f) if f.exists() else None

    metrics = None
    metrics_path = path / "metrics.json"
    if metrics_path.exists():
        with open(metrics_path) as f:
            metrics = json.load(f)

    return {
        "model": model,
        "metrics": metrics,
        "X_train": try_load("X_train"),
        "y_train": try_load("y_train"),
        "X_test": try_load("X_test"),
        "y_test": try_load("y_test"),
    }

save_model(model, name=None, path=None, metrics=None, X_train=None, y_train=None, X_test=None, y_test=None)

Saves a model, along with metrics, and training and testing data

Parameters:

Name Type Description Default
model BaseEstimator

The regressor to save. Note that it must implement the save method.

required
name str

The name of the model for directory purposes. If given, the model will be saved wihin the directory: fm.base_dir/models/name.

None
path str

The path to the directory where the model should be saved. Only one of name or path should be given. If neither are given, a directory with the model class and timestamp is created.

None
metrics dict

A dictionary of metrics to store. Can be used with uqregressors.metrics.metrics.compute_all_metrics.

None
X_train array - like

Training features.

None
y_train array - like

Training targets.

None
X_test array - like

Testing features.

None
y_test array - like

Testing targets.

None
Source code in uqregressors\utils\file_manager.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def save_model(
    self, model, name=None, path=None, metrics=None, X_train=None, y_train=None, X_test=None, y_test=None
) -> Path:
    """
    Saves a model, along with metrics, and training and testing data

    Args: 
        model (BaseEstimator): The regressor to save. Note that it must implement the save method.
        name (str): The name of the model for directory purposes. If given, the model will be saved wihin the directory: fm.base_dir/models/name.
        path (str): The path to the directory where the model should be saved. Only one of name or path should be given. If neither are given,
                    a directory with the model class and timestamp is created. 
        metrics (dict): A dictionary of metrics to store. Can be used with uqregressors.metrics.metrics.compute_all_metrics.
        X_train (array-like): Training features. 
        y_train (array-like): Training targets. 
        X_test (array-like): Testing features. 
        y_test (array-like): Testing targets.
    """
    if name is not None:
        if path is not None:
            warnings.warn(f"Both name and path given. Using named path: {path}")
        else: 
            path = self.get_named_path(name)
    elif path is None:
        path = self.get_timestamped_path(model.__class__.__name__)
    else:
        path = Path(path)

    path.mkdir(parents=True, exist_ok=True)

    if not hasattr(model, "save") or not callable(model.save):
        raise AttributeError(f"{model.__class__.__name__} must implement `save(path)`")
    model.save(path)

    if metrics:
        with open(path / "metrics.json", "w") as f:
            json.dump(metrics, f, indent=2)

    for name, array in [("X_train", X_train), ("y_train", y_train), ("X_test", X_test), ("y_test", y_test)]:
        if array is not None:
            np.save(path / f"{name}.npy", np.array(array))

    print(f"Model and additional artifacts saved to: {path}")
    return path

save_plot(fig, model_path, filename='plot.png', show=True, subdir='plots')

A helper method to save plots to a subdirectory within the directory in which the model would be saved.

Parameters:

Name Type Description Default
fig Figure

The figure to be saved.

required
model_path str

The directory in which to create a "plots" subdirectory where the image will be saved.

required
filename str

The filename of the plot to be saved, including the file extension.

'plot.png'
show bool

Whether to display the plot after saving it.

True
subdir str

The subdirectory in which the plot will be saved, each image will be saved to model_path/subdir/filename .

'plots'
Source code in uqregressors\utils\file_manager.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def save_plot(self, fig, model_path, filename="plot.png", show=True, subdir="plots"):
    """
    A helper method to save plots to a subdirectory within the directory in which the model would be saved. 

    Args: 
        fig (matplotlib.figure.Figure): The figure to be saved. 
        model_path (str): The directory in which to create a "plots" subdirectory where the image will be saved. 
        filename (str): The filename of the plot to be saved, including the file extension.
        show (bool): Whether to display the plot after saving it. 
        subdir (str): The subdirectory in which the plot will be saved, each image will be saved to model_path/subdir/filename .
    """

    path = Path(model_path)
    plot_dir = path / subdir
    plot_dir.mkdir(parents=True, exist_ok=True)
    save_path = plot_dir / filename

    plt.figure(fig.number)
    fig.savefig(save_path, bbox_inches='tight')

    if show:
        plt.show(fig)
    plt.close(fig)
    print(f"Plot saved to {save_path}")
    return save_path