Source code for flyqma.annotation.model_selection.univariate
from os.path import join, exists
from os import mkdir
import numpy as np
from ...utilities import IO
from ..classification import UnivariateMixtureClassifier
from .visualization import ModelSelectionVisualization
[docs]class SelectionIO:
"""
Methods for saving and loading a model selection instance.
"""
[docs] def save(self, dirpath, image=False, **kwargs):
"""
Save classifier to specified path.
Args:
dirpath (str) - directory in which classifier is to be saved
image (bool) - if True, save model image
kwargs: keyword arguments for image rendering
Returns:
path (str) - model selection directory
"""
# create directory for model selection
path = join(dirpath, 'models')
if not exists(path):
mkdir(path)
# save parameters
io = IO()
io.write_json(join(path, 'parameters.json'), self.parameters)
# save values
np.save(join(path, 'values.npy'), self.values)
# save models
for n, model in self._models.items():
model.save(path, image=image, extension=n, **kwargs)
return path
[docs] @staticmethod
def load_model(path):
""" Load model from <path> directory. """
return UnivariateMixtureClassifier.load(path)
[docs] @classmethod
def load(cls, path):
"""
Load model selection instance from file.
Args:
path (str) - model selection directory
Returns:
selector (UnivariateModelSelection derivative)
"""
io = IO()
# load values and parameters
values = io.read_npy(join(path, 'values.npy'))
parameters = io.read_json(join(path, 'parameters.json'))
attribute = parameters.pop('attribute')
# load models
n_min = parameters['min_num_components']
n_max = parameters['max_num_components']
models = {}
for num_components in range(n_min, n_max+1):
model_path = join(path, 'classifier_{:d}'.format(num_components))
if exists(model_path):
model = cls.load_model(model_path)
model._values = values
model.model.values = np.log(values)
models[num_components] = model
return cls(values, attribute, models=models, **parameters)
[docs]class UnivariateModelSelection(SelectionIO, ModelSelectionVisualization):
"""
Class for performing univariate mixture model selection. The optimal model is chosen based on BIC score.
"""
def __init__(self, values, attribute,
min_num_components=3,
max_num_components=8,
num_labels=3,
models=None):
"""
Perform model selection by choosing the model that minimizes BIC score.
Args:
values (np.ndarray[float]) - 1D array of sample values
attribute (str) - attribute label for sample values
min_num_components (int) - minimum number of components in mixture
max_num_components (int) - maximum number of components in mixture
num_labels (int) - maximum number of unique labels to be assigned
models (dict) - pre-fitted Classification instances keyed by number of components
"""
self.values = values
self.attribute = attribute
self.min_num_components = min_num_components
self.max_num_components = max_num_components
self.num_labels = num_labels
self.num_components = range(min_num_components, max_num_components+1)
# fit models
if models is None:
models = self.fit_models()
self._models = models
[docs] @staticmethod
def fit_model(values, num_components, num_labels, **kwargs):
""" Fit model with specified number of components. """
return UnivariateMixtureClassifier(values,
num_components=num_components,
num_labels=num_labels,
**kwargs)
[docs] def fit_models(self):
""" Fit model with each number of components. """
# define parameters
args = (self.values,)
kwargs = dict(attribute=self.attribute)
# fit models
models_dict = {}
for num_components in self.num_components:
model = self.fit_model(self.values,
num_components,
self.num_labels, **kwargs)
models_dict[num_components] = model
return models_dict
@property
def parameters(self):
""" Dictionary of instance parameters. """
return {
'attribute': self.attribute,
'min_num_components': self.min_num_components,
'max_num_components': self.max_num_components,
'num_labels': self.num_labels}
@property
def models(self):
""" List of models ordered by number of components. """
return [m for n, m in sorted(self._models.items())]
@property
def BIC(self):
""" BIC scores for each model. """
return np.array([model.model.BIC for model in self.models])
@property
def BIC_optimal(self):
""" Model with BIC optimal number of components. """
return self.models[np.argmin(self.BIC)]
@property
def AIC(self):
""" AIC scores for each model. """
return np.array([model.model.AIC for model in self.models])
@property
def AIC_optimal(self):
""" Model with AIC optimal number of components. """
return self.models[np.argmin(self.AIC)]