Source code for flyeye.dynamics.resampling
import numpy as np
from .averages import subsample, get_rolling_window
[docs]class DiscResampler:
"""
Class for evaluating the moving average across multiple discrete populations of cells. Measurements are resampled from pools of cells generated by resampling the discrete populations.
Attributes:
data (pd.DataFrame) - cell measurement data, including a disc_id attribute
variable (str) - variable to be averaged
window_size (int) - rolling window size
resolution (int) - sampling resolution
nbootstraps (int) - number of resampling
confidence (float) - confidence interval, 0 to 100
point_estimates (np.ndarray[float]) - array of point estimates within each window
"""
def __init__(self, cells, variable,
window_size=250,
resolution=10,
nbootstraps=1000,
confidence=95):
"""
Instantiate object for bootstrap resampling the moving average of <variable> across a population of eye discs.
Args:
cells (data.cells.Cells) - cell measurement data
variable (str) - variable to be averaged
window_size (int) - rolling window size
resolution (int) - sampling resolution
nbootstraps (int) - number of resampling
confidence (float) - confidence interval, 0 to 100
"""
self.data = cells.data
self.variable = variable
self.window_size = window_size
self.resolution = resolution
self.nbootstraps = nbootstraps
self.confidence = confidence
self.point_estimates = self.generate_point_estimates()
[docs] def roll(self, x):
""" Returns rolling window over <x>. """
return get_rolling_window(x, self.window_size, self.resolution)
[docs] def apply(self, f, x):
""" Applies function <f> to rolling window over <x>."""
return np.apply_along_axis(f, axis=-1, arr=self.roll(x))
[docs] def bootstrap(self, idx, variable, f=np.mean, nbootstraps=1000):
"""
Returns list of point estimates obtained by resampling eye discs.
Args:
idx (pd.Index) - indices for current window
variable (str) - name of variable
nbootstraps (int) - number of resamplings
Returns:
point_estimates (list) - point estimates obtained by resampling eye discs
"""
# create dictionary of values for each disc
cells = self.data.iloc[idx]
unique_ids = cells.disc_id.unique()
values_per_disc = {i: cells[cells.disc_id==_id][variable].values for i, _id in enumerate(unique_ids)}
# compute point estimates for each subsample
point_estimates = [f(self.subsample_discs(values_per_disc)) for _ in range(nbootstraps)]
return point_estimates
[docs] def subsample_discs(self, values_per_disc):
"""
Subsample discs within the specified set of positional indices, then cells within them.
Args:
values_per_disc (dict) - dictionary of values keyed by disc indices
Returns:
sample (np.ndarray[float]) - sample randomized within and between discs
"""
# subsample discs
num_discs = len(values_per_disc.keys())
disc_ids = np.random.randint(0, num_discs, size=num_discs)
sampled_values = np.hstack([values_per_disc[disc_id] for disc_id in disc_ids])
# subsample cells within subsampled discs
sample = subsample(sampled_values)
return sample
[docs] def generate_point_estimates(self):
""" Returns an array of point estimates within each window. """
bootstrap = lambda idx: self.bootstrap(idx, self.variable, np.mean, self.nbootstraps)
indices = np.arange(self.data.shape[0])
point_estimates = self.apply(bootstrap, indices)
return point_estimates
@property
def mean(self):
""" Rolling average of variable. """
return np.percentile(self.point_estimates, q=50, axis=1)
@property
def confidence_interval(self):
""" 95% confidence interval for rolling average of variable."""
q = (((100-self.confidence)/2), (100+self.confidence)/2)
return np.percentile(self.point_estimates, q=q, axis=1)