Source code for flyqma.annotation.classification.visualization

import numpy as np
from scipy.signal import argrelextrema
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, Normalize

from ...visualization import *


[docs]class MixtureVisualization:
    """ Methods for visualizing a mixture-model based classifier. """

    @property
    def label_colors(self):
        """ RGB color for each class label. """
        component_to_label = np.vectorize(self.component_to_label.get)
        labels = component_to_label(np.arange(self.num_components))
        label_colors = self.cmap(labels)[:, :-1]
        return label_colors

    @property
    def support(self):
        """ Model support. """
        return self.model.support

    @property
    def esupport(self):
        """ Empirical support vector (sorted values). """
        return np.sort(self.values, axis=0)

    @property
    def epdf(self):
        """ Empirical PDF over support. """
        num_bins = self.num_samples // 25
        bins = np.linspace(self.support.min(), self.support.max(), num_bins)
        counts, edges = np.histogram(self.values, bins=bins, normed=True)
        bin_centers = [(edges[i]+edges[i+1])/2. for i in range(len(edges)-1)]
        return edges[:-1], counts

    @property
    def ecdf(self):
        """ Empirical CDF over support. """
        return np.linspace(0, 1, len(self.values), endpoint=False)

    @property
    def pdf(self):
        """ Model PDF over support. """
        return self.model.pdf

    @property
    def component_pdfs(self):
        """ Weighted component PDFs over support. """
        return self.model.component_pdfs

    @property
    def component_cdfs(self):
        """ Returns weighted CDF of each component over support. """
        cdfs = np.vstack([x.cdf(self.support) for x in self.model.components])
        cdfs *= self.model.weights_.reshape(-1, 1)
        return cdfs

    @property
    def support_labels(self):
        """ Labels for support vector. """
        return self.classifier(self.support.reshape(-1, 1))

    @default_figure
    def plot_pdf(self,
                  density=1000,
                  alpha=0.5,
                  xmin=None,
                  xmax=None,
                  ymin=None,
                  ymax=None,
                  ax=None):
        """
        Plot model density function, colored by output label.
        """

        # plot model pdf segments, colored by output label
        support_labels = self.support_labels
        breakpoints = [0]+list(np.diff(support_labels).nonzero()[0]+1)+[None]
        for i, bp in enumerate(breakpoints[:-1]):
            indices = slice(bp, breakpoints[i+1])
            segment_support = self.support[indices]
            segment_pdf = self.pdf[indices]
            segment_labels = support_labels[indices]
            segment_color = self.cmap(segment_labels)
            ax.fill_between(segment_support, segment_pdf, color=segment_color)

        # plot overall model pdf
        ax.plot(self.support, self.pdf, '-', c='k', lw=2)

        # format axis
        if ymax is None:
            maxima = self.pdf[argrelextrema(self.pdf, np.greater)]
            ymax = 2.5*np.product(maxima)**(1/maxima.size)

        ax.set_xlim(self.model.lbound, self.model.ubound)
        ax.set_ylim(0, ymax)
        ax.set_xlabel('Values', fontsize=8)
        ax.set_ylabel('Density', fontsize=8)

    @default_figure
    def plot_pdfs(self,
                  empirical=False,
                  line=True,
                  fill=True,
                  density=1000,
                  alpha=0.5,
                  cmap=None,
                  vmin=-1,
                  xmax=None,
                  ymax=None,
                  ax=None):
        """
        Plot density function for each distribution, colored by output label.

        Args:

            ax (matplotlib.axes.AxesSubplot) - if None, create figure

            empirical (bool) - if True, include empirical PDF

        """

        # define colormap
        if cmap is not None:
            colormap = self.build_colormap(cmap, vmin)
        else:
            colormap = self.cmap

        # plot empirical pdf
        if empirical:
            ax.step(*self.epdf, where='post', color='r', linewidth=1)

        # plot individual component pdfs
        for i, pdf in enumerate(self.component_pdfs):
            color = colormap(self.component_to_label[i])
            if line:
                ax.plot(self.support, pdf, color=color, alpha=alpha, lw=1.)
            if fill:
                ax.fill_between(self.support, pdf, facecolors=color, alpha=alpha, linewidth=1., rasterized=True)

        # plot model pdf
        ax.plot(self.support, self.pdf, '--', c='k', lw=1)

        # format axis
        if ymax is None:
            maxima = self.pdf[argrelextrema(self.pdf, np.greater)]
            ymax = 2.5*np.product(maxima)**(1/maxima.size)

        ax.set_ylim(0, ymax)
        ax.set_xlim(self.model.lbound, self.model.ubound)
        ax.set_xlabel('Values', fontsize=8)
        ax.set_ylabel('Density', fontsize=8)

    @default_figure
    def plot_cdfs(self,
                  log=True,
                  cmap=plt.cm.Greys,
                  ax=None,
                  **kwargs):
        """
        Plot component cumulative distribution functions as stackplot.
        """

        def to_linear(support):
            """ Convert support to linear basis. """
            return np.exp(support)

        # log transform data
        support, esupport = self.support, self.esupport
        if not log:
            support, esupport = to_linear(support), to_linear(esupport)

        # get component CDFs
        component_cdfs = self.component_cdfs

        # plot weighted CDF for each component
        means = self.means
        values = self.values
        norm = Normalize(vmin=self.values.min(), vmax=self.values.max())
        order = np.argsort(means)
        colors = cmap(norm(means[order]))
        ax.stackplot(support, component_cdfs[order], colors=colors, **kwargs)

        # plot empirical CDF (data)
        ax.plot(esupport, self.ecdf, '-r', lw=1.)

        # plot mixture CDF
        ax.plot(support, component_cdfs.sum(axis=0), '--k', lw=1)


class BivariateMixtureVisualization:

    @property
    def support(self):
        """ Model support. """
        return self.model.supportx

    @property
    def esupport(self):
        """ Empirical support vector (sorted values). """
        return np.sort(self.values[:, 0])

    @property
    def pdf(self):
        """ Model PDF over support. """
        return self.model[0].pdf

    @property
    def component_pdfs(self):
        """ Weighted component PDFs over support. """
        return self.model[0].component_pdfs

    @property
    def component_cdfs(self):
        """ Returns weighted CDF of each component over support. """
        model = self.model[0]
        cdfs = np.vstack([x.cdf(self.support) for x in model.components])
        cdfs *= model.weights_.reshape(-1, 1)
        return cdfs

    @property
    def support_labels(self):
        """ Labels for support vector (over x margin). """
        margin = self.marginalize(0)
        return margin.classifier(self.support.reshape(-1, 1))

    @joint_figure
    def plot_bivariate_pdf(self, fig, bg='w', **kwargs):
        """ Plot bivariate PDF, with each cluster shaded by its label. """

        pdfs = self.model.component_pdfs

        # plot each bivariate pdf, colored by label
        norm = Normalize(pdfs.min(), pdfs.max())
        fig.ax_joint.set_facecolor(bg)
        for idx, pdf in enumerate(pdfs):
            ccm = build_transparent_cmap(self.label_colors[idx], bg=bg)
            fig.ax_joint.imshow(norm(pdf), cmap=ccm, extent=self.model.extent)

        # plot marginal pdfs
        self.model.plot_margin(0, ax=fig.ax_xmargin, component_color=self.label_colors)
        self.model.plot_margin(1, invert=True, ax=fig.ax_ymargin, component_color=self.label_colors)

        fig.ax_xmargin.set_xlim(self.model.lbound, self.model.ubound)
        fig.ax_ymargin.set_ylim(self.model.lbound, self.model.ubound)

        return fig

    @joint_figure
    def plot_bivariate_data(self, fig, bg='w', **kwargs):
        """ Plot bivariate data, with each sample shaded by its label. """

        # define label colors
        marker_labels = self.classifier(self.model.values)
        marker_colors = self.cmap(marker_labels)[:, :-1]

        # plot each bivariate pdf, colored by label
        fig.ax_joint.set_facecolor(bg)
        self.model.plot_data(ax=fig.ax_joint, c=marker_colors, **kwargs)
        fig.ax_joint.set_xlim(self.model.lbound, self.model.ubound)
        fig.ax_joint.set_ylim(self.model.lbound, self.model.ubound)
        fig.ax_joint.invert_yaxis()
        fig.ax_joint.set_yticks(fig.ax_joint.get_xticks())

        # plot marginal pdfs
        self.model.plot_margin(0, ax=fig.ax_xmargin, component_color=self.label_colors)
        self.model.plot_margin(1, invert=True, ax=fig.ax_ymargin, component_color=self.label_colors)
        fig.ax_xmargin.set_xlim(self.model.lbound, self.model.ubound)
        fig.ax_ymargin.set_ylim(self.model.lbound, self.model.ubound)

        return fig

    @joint_figure
    def plot_phase_space(self, fig, bg='w', **kwargs):
        """ Plot phase space, with each region shaded by its label. """

        labels = self.classifier(self.model.support)
        labels = labels.reshape(self.model.support_size)

        l, u = self.model.lbound, self.model.ubound
        extent = (l, u, u, l)
        fig.ax_joint.imshow(self.cmap(labels), extent=extent)

        # plot marginal pdfs
        self.model.plot_margin(0, ax=fig.ax_xmargin, component_color=self.label_colors)
        self.model.plot_margin(1, invert=True, ax=fig.ax_ymargin, component_color=self.label_colors)

        fig.ax_xmargin.set_xlim(self.model.lbound, self.model.ubound)
        fig.ax_ymargin.set_ylim(self.model.lbound, self.model.ubound)

        return fig
Source code for flyqma.annotation.classification.visualization

Navigation

Related Topics