Source code for flyeye.dynamics.resampling

import numpy as np
from .averages import subsample, get_rolling_window


[docs]class DiscResampler: """ Class for evaluating the moving average across multiple discrete populations of cells. Measurements are resampled from pools of cells generated by resampling the discrete populations. Attributes: data (pd.DataFrame) - cell measurement data, including a disc_id attribute variable (str) - variable to be averaged window_size (int) - rolling window size resolution (int) - sampling resolution nbootstraps (int) - number of resampling confidence (float) - confidence interval, 0 to 100 point_estimates (np.ndarray[float]) - array of point estimates within each window """ def __init__(self, cells, variable, window_size=250, resolution=10, nbootstraps=1000, confidence=95): """ Instantiate object for bootstrap resampling the moving average of <variable> across a population of eye discs. Args: cells (data.cells.Cells) - cell measurement data variable (str) - variable to be averaged window_size (int) - rolling window size resolution (int) - sampling resolution nbootstraps (int) - number of resampling confidence (float) - confidence interval, 0 to 100 """ self.data = cells.data self.variable = variable self.window_size = window_size self.resolution = resolution self.nbootstraps = nbootstraps self.confidence = confidence self.point_estimates = self.generate_point_estimates()
[docs] def roll(self, x): """ Returns rolling window over <x>. """ return get_rolling_window(x, self.window_size, self.resolution)
[docs] def apply(self, f, x): """ Applies function <f> to rolling window over <x>.""" return np.apply_along_axis(f, axis=-1, arr=self.roll(x))
[docs] def bootstrap(self, idx, variable, f=np.mean, nbootstraps=1000): """ Returns list of point estimates obtained by resampling eye discs. Args: idx (pd.Index) - indices for current window variable (str) - name of variable nbootstraps (int) - number of resamplings Returns: point_estimates (list) - point estimates obtained by resampling eye discs """ # create dictionary of values for each disc cells = self.data.iloc[idx] unique_ids = cells.disc_id.unique() values_per_disc = {i: cells[cells.disc_id==_id][variable].values for i, _id in enumerate(unique_ids)} # compute point estimates for each subsample point_estimates = [f(self.subsample_discs(values_per_disc)) for _ in range(nbootstraps)] return point_estimates
[docs] def subsample_discs(self, values_per_disc): """ Subsample discs within the specified set of positional indices, then cells within them. Args: values_per_disc (dict) - dictionary of values keyed by disc indices Returns: sample (np.ndarray[float]) - sample randomized within and between discs """ # subsample discs num_discs = len(values_per_disc.keys()) disc_ids = np.random.randint(0, num_discs, size=num_discs) sampled_values = np.hstack([values_per_disc[disc_id] for disc_id in disc_ids]) # subsample cells within subsampled discs sample = subsample(sampled_values) return sample
[docs] def generate_point_estimates(self): """ Returns an array of point estimates within each window. """ bootstrap = lambda idx: self.bootstrap(idx, self.variable, np.mean, self.nbootstraps) indices = np.arange(self.data.shape[0]) point_estimates = self.apply(bootstrap, indices) return point_estimates
@property def mean(self): """ Rolling average of variable. """ return np.percentile(self.point_estimates, q=50, axis=1) @property def confidence_interval(self): """ 95% confidence interval for rolling average of variable.""" q = (((100-self.confidence)/2), (100+self.confidence)/2) return np.percentile(self.point_estimates, q=q, axis=1)