Upload
yuki-oyabu
View
391
Download
9
Embed Size (px)
Citation preview
3/10/13 IPython Notebook
127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 1/6
Çßì�‚a”�!
Çßì�‚aÀlocal constant estimatorÁ for 1d
In [8]:
In [17]:
In [18]:
In [19]:
In [20]:
In [21]:
from pylab import *from pandas import *import random
def generate_1d_data(N, func=np.sin): # 正弦関数に従ったデータ(X, Y)を出力する X = np.linspace(0, np.pi*2, N) Y = func(X) return DataFrame({'X':X, 'Y':Y})
def generate_1d_training_data(N, size, func=np.sin): # 正弦関数上のデータ点を生成する DF = generate_1d_data(N, func) # ランダムサンプリング df = DF.ix[random.sample(DF.index, size)] # ノイズを加える df['Y'] = df['Y'] + (.5 - np.random.rand(len(df))) return df.sort()
def get_gaussian_kernel(h, X, x): """ calculate gaussian kernel """ return (np.sqrt(2*np.pi) ** -1) * np.exp(-.5 * ((X - x)/h) ** 2)
def get_gpke(h, X, x): """ calculate Generatalized Product Kernel Density Estimator """ K = np.empty(X.shape) for j in xrange(len(x)): K[:, j] = get_gaussian_kernel(h, X[:, j], x[j]) gpke = K.prod(axis=1) / h ** len(x) return gpke
def get_local_constant_estimator(h, X, Y, x): """ calculate local constant estimator Parameters: ----------- h: float bandwidth for kernel Y: 1D array-like The dependent variable X: 1D or 2D array-like The independent variables. x: 1D or 2D array-like The point(s) at which the density is estimated """ y = np.empty(x.shape[0]) for i in xrange(x.shape[0]): K = get_gpke(h, X, x[i])
3/10/13 IPython Notebook
127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 2/6
In [24]:
In [27]:
Çßì�‚aÀlocal constant estimatorÁ for 2d
In [39]:
In [40]:
y[i] = (Y * K).sum() / K.sum() return y
def kernel_regression_for_1d_data(N=100, S=20, h=.5, func=np.sin): fig = plt.figure(figsize=(10,5)) DF = generate_1d_data(N, func) DF.plot(x='X', y='Y', style='b--', label="正解データ") DF = generate_1d_training_data(N, S, func) DF.plot(x='X', y='Y', style='g.', label="観測データ") Y = np.asarray(DF['Y']) X = np.asarray(DF['X']).reshape(S, 1) x = np.asarray(generate_1d_data(N)['X']).reshape(N, 1) y = get_local_constant_estimator(h, X, Y, x) plot(x, y, 'r-', label=u"予測結果") title(u"Kernel Regression with local constant estimator") legend() grid() ylim(-1.5,1.5)
kernel_regression_for_1d_data(100, 10, .5, func=np.sin)
from mpl_toolkits.mplot3d import Axes3Dfrom matplotlib import cmfrom pandas import *
def get_factor_norm_gauss(_Mu, _Sigma): D = float(_Mu.shape[0]) S = abs(det(_Sigma)) numer = ((2 * np.pi)**(D*0.5)) * (S**(0.5)) denom = 1. return denom / numer def get_probability_gauss(_X, _Mu, _Sigma): # 2次元ガウス分布の確率密度を求める _Lambda = _Sigma if (_Sigma.shape != (1,1)): _Lambda = np.linalg.inv(_Sigma)
3/10/13 IPython Notebook
127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 3/6
In [41]:
In [42]:
In [43]:
In [44]:
In [45]:
In [46]:
In [57]:
norm_factor = get_factor_norm_gauss(_Mu, _Sigma) non_norm_factor = np.exp((-0.5) * (_X-_Mu).T * _Lambda * (_X-_Mu)) return norm_factor * non_norm_factor
def generate_2d_gauss(Mu, Sigma): # 2次元ガウス分布を生成する _X, _Y = np.mgrid[-5:5:0.25, -5:5:0.25] X = _X.flatten() Y = _Y.flatten() L = [matrix(e).reshape(2,1) for e in zip(X, Y)] Z = np.asarray([get_probability_gauss(e, Mu, Sigma) for e in L]).flatten() return DataFrame({'X':X, 'Y':Y, 'Z':Z})
def generate_2d_data(): # 2次元ガウス分布の混合分布を生成する Sigma = matrix([1., 0., 0., 1.]).reshape(2,2) Mu1 = matrix([-1.5, -1.5]).reshape(2,1) Mu2 = matrix([1.5, 1.5]).reshape(2,1) DF1 = generate_2d_gauss(Mu1, Sigma) DF2 = generate_2d_gauss(Mu2, Sigma) DF = DF1 DF['Z'] = DF1['Z'] + DF2['Z'] return DF
def generate_2d_training_data(size): # 2次元ガウス分布の混合分布からデータをサンプリング DF = generate_2d_data() indexes = random.sample(DF.index, size) return DF.ix[indexes]
def plot_wireframe(ax, X, Y, Z): # 3次元描画1 d = np.sqrt(len(X)) X_ = np.asarray(X).reshape(d, d) Y_ = np.asarray(Y).reshape(d, d) Z_ = np.asarray(Z).reshape(d, d) ax.plot_wireframe(X_, Y_, Z_, rstride=1, cstride=1)
def plot_trisurf(ax, X, Y, Z): # 3次元描画2 X_ = np.asarray(X) Y_ = np.asarray(Y) Z_ = np.asarray(Z) ax.plot_trisurf(X_, Y_, Z_, linewidth=0.2, cmap=cm.jet, shade=True)
def kernel_regression_for_2d_data(S=200, h=0.5, func=plot_wireframe): DF = generate_2d_data() df = generate_2d_training_data(S) data_predict = np.asarray(DF[['X', 'Y']]).reshape(len(DF), 2) exog = np.asarray(df[['X', 'Y']]).reshape(len(df), 2) endog = np.asarray(df['Z']) estimator = get_local_constant_estimator(0.5, exog, endog, data_predict) fig = plt.figure(figsize=(14, 10)) ax = fig.add_subplot(221, projection='3d', title=u'混合ガウス分布とサンプル点') func(ax, DF['X'], DF['Y'], DF['Z']) ax.scatter(df['X'], df['Y'], df['Z'], c='r') ax = fig.add_subplot(222, projection='3d', title=u"カーネル回帰結果とサンプル点") func(ax, DF['X'], DF['Y'], estimator) ax.scatter(df['X'], df['Y'], df['Z'], c='r')
kernel_regression_for_2d_data(100, h=0.1, func=plot_wireframe)
3/10/13 IPython Notebook
127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 4/6
In [53]:
Statsmodels³³›¤Çßì�‚a
In [29]:
Çßì�‚aÀlocal linear estimatorÁ for 1d
In [42]:
kernel_regression_for_2d_data(100, 0.5, func=plot_trisurf)
from statsmodels.nonparametric import kernel_regression
func = np.sinDF = generate_1d_data(100, func=func)df = generate_1d_training_data(100, 20, func=func) fig = plt.figure(figsize=(10,5))plot(DF['X'], DF['Y'], 'g--', label=u'正解データ')plot(df['X'], df['Y'], 'r.', label=u'観測データ') """KR = kernel_regression.KernelReg(df['Y'], df['X'], 'c', bw=[0.5], reg_type='lc')plot(DF['X'], KR.fit(DF['X'])[0], '-', label=u'{2}($h$={0}, $R^2$={1})'.format(KR.bw[0], KR.r_squared(), KR.reg_type.upper())) KR = kernel_regression.KernelReg(df['Y'], df['X'], 'c', bw=[0.5], reg_type='ll')plot(DF['X'], KR.fit(DF['X'])[0], '-', label=u'{2}($h$={0}, $R^2$={1})'.format(KR.bw[0], KR.r_squared(), KR.reg_type.upper()))""" KR = kernel_regression.KernelReg(df['Y'], df['X'], 'c', reg_type='lc')plot(DF['X'], KR.fit(DF['X'])[0], '-', label=u'{2}($h$={0}, $R^2$={1})'.format(KR.bw[0], KR.r_squared(), KR.reg_type.upper KR = kernel_regression.KernelReg(df['Y'], df['X'], 'c', reg_type='ll')plot(DF['X'], KR.fit(DF['X'])[0], '-', label=u'{2}($h$={0}, $R^2$={1})'.format(KR.bw[0], KR.r_squared(), KR.reg_type.upper ylim(-1.5, 1.5)grid()legend()
Out[42]: <matplotlib.legend.Legend at 0x10e070650>
3/10/13 IPython Notebook
127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 5/6
Çßì�‚aÀlocal linear estimatorÁ for 2d
In [63]:
In [64]:
Out[42]: <matplotlib.legend.Legend at 0x10e070650>
DF = generate_2d_data()df = generate_2d_training_data(200) fig = plt.figure(figsize=(14, 10)) KR = kernel_regression.KernelReg(df['Z'], df[['X', 'Y']], 'cc', reg_type='lc')Z = KR.fit(DF[['X', 'Y']])[0]ax = fig.add_subplot(221, projection='3d', title=u"{0}($h$={1}, $R^2$={2})".format(KR.reg_type.upper(), KR.bw,KR.r_squaredplot_wireframe(ax, DF['X'], DF['Y'], DF['Z'])ax.scatter(df['X'], df['Y'], df['Z'], c='r') KR = kernel_regression.KernelReg(df['Z'], df[['X', 'Y']], 'cc', reg_type='ll')Z = KR.fit(DF[['X', 'Y']])[0]ax = fig.add_subplot(222, projection='3d', title=u"{0}($h$={1}, $R^2$={2})".format(KR.reg_type.upper(), KR.bw,KR.r_squaredplot_wireframe(ax, DF['X'], DF['Y'], Z)ax.scatter(df['X'], df['Y'], df['Z'], c='r')
Out[63]: <mpl_toolkits.mplot3d.art3d.Patch3DCollection at 0x109d519d0>
DF = generate_2d_data()df = generate_2d_training_data(200) fig = plt.figure(figsize=(14, 10)) KR = kernel_regression.KernelReg(df['Z'], df[['X', 'Y']], 'cc', reg_type='lc')Z = KR.fit(DF[['X', 'Y']])[0]ax = fig.add_subplot(221, projection='3d', title=u'{0}(bw={1})'.format(KR.reg_type, KR.bw))plot_trisurf(ax, DF['X'], DF['Y'], DF['Z'])ax.scatter(df['X'], df['Y'], df['Z'], c='r') KR = kernel_regression.KernelReg(df['Z'], df[['X', 'Y']], 'cc', reg_type='ll')Z = KR.fit(DF[['X', 'Y']])[0]
3/10/13 IPython Notebook
127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 6/6
Z = KR.fit(DF[['X', 'Y']])[0]
ax = fig.add_subplot(222, projection='3d', title=u"{0}(bw={1})".format(KR.reg_type, KR.bw))plot_trisurf(ax, DF['X'], DF['Y'], Z)ax.scatter(df['X'], df['Y'], df['Z'], c='r')
Out[64]: <mpl_toolkits.mplot3d.art3d.Patch3DCollection at 0x10aea4690>