Source code for flyqma.bleedthrough.resampling
import numpy as np
from collections import Counter
[docs]def resample_uniformly(x, y, size=None, cutoff=None):
"""
Resample X and Y uniformly in X.
Args:
x, y (np.ndarray[float]) - original samples
size (int) - number of uniform samples
cutoff (int) - upper bound for samples (quantile, 0 to 100)
Returns:
x, y (np.ndarray[float]) - resampled s.t. x is uniformly distributed
"""
if size is None:
size = x.size
# sort values
sort_ind = np.argsort(x)
xx, yy = x[sort_ind], y[sort_ind]
# apply threshold on upper bound
if cutoff is not None:
threshold = np.percentile(xx, cutoff)
else:
threshold = xx.max()+1
# get unique x values
xunique = np.unique(xx)
# filter points below threshold
below_threshold = (xx<=threshold)
xx, yy = xx[below_threshold], yy[below_threshold]
# get probabilities
x_to_count = np.vectorize(Counter(xx).get)
# get intervals
intervals = np.diff(xunique)
unique_below_threshold = (xunique[:-1]<=threshold)
intervals = intervals[unique_below_threshold]
# assign probabilities
x_to_cumul = np.vectorize(dict(zip(xunique[:-1][unique_below_threshold], intervals/intervals.sum())).get)
p = x_to_cumul(xx)/x_to_count(xx)
p[np.isnan(p)] = 0
# generate sample
sample_ind = np.random.choice(np.arange(xx.size), size=size, p=p)
xu, yu = xx[sample_ind], yy[sample_ind]
return xu, yu