Source code for flyqma.bleedthrough.models

import numpy as np
import statsmodels.api as sm
import warnings


[docs]class OLS: """ Ordinary least squares model fit to X and Y data. Attributes: x, y (array like) - data model (sm.OLS) - fitted OLS model """ def __init__(self, x, y, **fit_kw): """ Instantiate ordinary least squared model. """ self.x = x self.y = y self.fit(**fit_kw)
[docs] def fit(self, **kw): """ Fit model to X and Y data. """ self.domain = np.linspace(0, self.x.max(), 10) x = sm.tools.add_constant(self.x.reshape(-1, 1)) self.model = sm.OLS(self.y, x, hasconst=None).fit()
[docs] def predict(self, x): """ Make model prediction. """ xx = sm.tools.add_constant(x.reshape(-1, 1)) return self.model.predict(xx)
[docs] def detrend(self, x, y): """ Remove linear trend from X and Y data. """ return y - self.predict(x)
[docs]class GLM(OLS): """ Generalized linear model with gamma distributed residuals and an identity link function fit to X and Y data. Attributes: model (sm.genmod.GLM) - generalized linear model domain (np.ndarray[float]) - regularly spaced x-domain Inherited attributes: x, y (array like) - data """
[docs] def fit(self, N=100000, maxiter=500, shift=0): """ Fit Gamma GLM with identity link function. Args: N (int) - number of samples used maxiter (int) - maximum number of iterations for optimization shift (float) - offset used to keep values positive """ self.domain = np.linspace(0, self.x.max(), 10) # downsample if N is not None: ind = np.random.randint(0, self.x.size, size=N) else: ind = np.arange(self.x.size) x, y = self.x[ind], self.y[ind] # construct variables xx = sm.tools.add_constant(x.reshape(-1, 1)) yy = y + shift # define model family = sm.families.Gamma(link=sm.families.links.identity()) with warnings.catch_warnings(): warnings.simplefilter("ignore") glm = sm.genmod.GLM(yy, xx, family=family) # fit model start_params = [0.1+shift, 0.5] self.model = glm.fit(start_params=start_params, maxiter=maxiter) if not self.model.converged: raise Warning('GLM did not converge.')