Source code for flyqma.annotation.classification.kmeans
import numpy as np
from sklearn.cluster import KMeans
from .classifiers import Classifier
[docs]class KMeansClassifier(Classifier):
"""
K-means classifier.
Attributes:
groups (dict) - {cluster_id: label_id} pairs for merging clusters
component_to_label (vectorized func) - maps cluster_id to label_id
km (sklearn.cluster.KMeans) - kmeans object
classifier (vectorized func) - maps value to label_id
labels (np.ndarray[int]) - predicted labels
Inherited attributes:
values (array like) - basis for clustering
attribute (str or list) - attribute(s) on which to cluster
log (bool) - indicates whether clustering performed on log values
cmap (matplotlib.colors.ColorMap) - colormap for label_id
parameters (dict) - {param name: param value} pairs
fig (matplotlib.figures.Figure) - histogram figure
"""
def __init__(self, values,
num_components=3,
groups=None,
log=True,
**kwargs):
"""
Instantiate k-means classifier.
Args:
values (array like) - basis for clustering
num_components (int) - number of clusters
groups (dict) - {cluster_id: label_id} pairs for merging clusters
log (bool) - indicates whether clustering performed on log values
kwargs: keyword arguments for Classifier parent class
"""
# set groups and number of clusters
if groups is None:
groups = {k: k for k in range(num_components)}
else:
groups = {int(k): v for k, v in groups.items()}
num_labels = len(groups)
# instantiate classifier
super().__init__(values, num_labels=num_labels, log=log, **kwargs)
self.num_components = num_components
self.component_to_label = np.vectorize(groups.get)
self.groups = groups
# build classifiers
self.model = self.fit(self.values, self.num_components)
self.classifier = self._build_value_to_groups_classifier()
# assign group labels
self.labels = self.classifier(self.values.reshape(-1, 1))
# store parameters
self.parameters.update(dict(groups=self.groups))
@property
def means(self):
""" Mean of each cluster. """
return self.model.cluster_centers_.ravel()
[docs] def predict(self, values):
""" Predict which component each of <values> belongs to. """
return self.model.predict(values)
[docs] @staticmethod
def fit(values, n):
""" Fit n clusters to x """
return KMeans(n).fit(values.reshape(-1, 1))
@staticmethod
def _build_value_to_cluster_classifier(km):
""" Build classifier mapping values to sequential clusters. """
centroids = km.cluster_centers_.ravel()
flip = lambda f: f.__class__(map(reversed, f.items()))
km_to_ordered_dict = flip(dict(enumerate(np.argsort(centroids))))
km_to_ordered = np.vectorize(km_to_ordered_dict.get)
classifier = lambda x: km_to_ordered(km.predict(x))
return classifier
def _build_value_to_groups_classifier(self):
""" Build classifier mapping values to groups. """
value_to_cluster = self._build_value_to_cluster_classifier(self.model)
classifier = lambda x: self.component_to_label(value_to_cluster(x))
return classifier