假设概率分布为f(x),其CDF为g(x),我们不加证明的给出,产生[0, 1)之间的随机数,并将其输入g(x)的反函数,得到的值服从f(x)分布,实现的代码如下:
import numpy as np from scipy.interpolate import interp1d, UnivariateSpline from scipy.integrate import cumtrapz, trapz #import matplotlib.pyplot as plt def get_inv_cdf(f, x=None, x_range=None): ''' Get inverse of CDF given PDF f(x) f: PDF, no need for normalization x: None or array_like, input x to evaluate f, if None, use xmin, xmax and npoints to generate x x_range: None or array_like with length 3, [xmin, xmax, npoints] xmin: the minimum for x to evaluate f, f(x)approx 0 for xxmax npoints: number of points to evaluate f return: callable inverse of CDF ''' assert x is not None or x_range is not None, 'Input x or x_range' if x is None: x = np.linspace(*x_range) else: x = np.asarray(x) y = f(x) cdf = cumtrapz(y, x) cdf = np.append(0, cdf) dcdf = np.diff(cdf) dcdf = np.append(0, dcdf) cdf = cdf[dcdf>0] # print(np.diff(cdf).min()) x = x[dcdf>0] cdf = cdf/cdf[-1] invcdf = UnivariateSpline(cdf, x, k=3, s=0, ext=3) # plt.figure() # plt.plot(x, cdf) return invcdf def get_rvs(f, x=None, x_range=None): ''' Get random variable sampler given PDF f(x) f: PDF, no need for normalization x: None or array_like, input x to evaluate f, if None, use xmin, xmax and npoints to generate x x_range: None or array_like with length 3, [xmin, xmax, npoints] xmin: the minimum for x to evaluate f, f(x)approx 0 for x xmax npoints: number of points to evaluate f return: rvs, callable random variable sampler, usage is the same as np.random.rand ''' invcdf = get_inv_cdf(f=f, x=x, x_range=x_range) def _rvs(*args, **kwargs): return invcdf(np.random.rand(*args, **kwargs)) return _rvs if __name__ == '__main__': import matplotlib.pyplot as plt def normalize(x, y): x = np.asarray(x) y = np.asarray(y) norm = trapz(y, x) return y/norm def pdf(xin): xin = np.asarray(xin) result = np.zeros_like(xin) x = xin[(xin>=1)&(xin<3)] result[(xin>=1)&(xin<3)] = x + 2*x**2 + x**3/2. return result x = np.linspace(-2, 5, 1001) y = pdf(x) inv_cdf = get_inv_cdf(pdf, x_range=[-3, 6, 1001]) rvs = get_rvs(pdf, x_range = [-3, 6, 1001]) rvs2 = get_rvs(pdf, x = np.logspace(-2, 1.2, 1001)) plt.figure() plt.hist(rvs(100000), bins=100, alpha=0.5, density=True) plt.plot(x, normalize(x, y)) plt.figure() plt.hist(rvs2(100000), bins=100, alpha=0.5, density=True) plt.plot(x, normalize(x, y)) plt.figure() plt.hist(inv_cdf(np.random.rand(100000)), bins=100, alpha=0.5, density=True) plt.plot(x, normalize(x, y)) plt.show() def gaus(x, scale, mean): return np.exp(-(x-mean)**2/2./scale**2)/np.sqrt(2*np.pi*scale**2) s1 = np.random.normal(scale=1.23, loc=0.32, size=100000) s2 = get_rvs(lambda x:gaus(x, 1.23, 0.32), x_range=[-10,10,1001])(100000) x = np.linspace(-12, 12, 1001) plt.plot(x, gaus(x, 1.23, 0.32)) plt.hist(s1, bins=200, density=True, alpha=0.5) plt.hist(s2, bins=200, density=True, alpha=0.5) plt.show()
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)