6
3/10/13 IPython Notebook 127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 1/6 local constant estimator for 1d In [8]: In [17]: In [18]: In [19]: In [20]: In [21]: from pylab import * from pandas import * import random def generate_1d_data(N, func=np.sin): # 正弦関数に従ったデータ(X, Y)を出力する X = np.linspace(0, np.pi* 2, N) Y = func(X) return DataFrame({'X':X, 'Y':Y}) def generate_1d_training_data(N, size, func=np.sin): # 正弦関数上のデータ点を生成する DF = generate_1d_data(N, func) # ランダムサンプリング df = DF.ix[random.sample(DF.index, size)] # ノイズを加える df['Y'] = df['Y'] + (.5 - np.random.rand(len(df))) return df.sort() def get_gaussian_kernel(h, X, x): """ calculate gaussian kernel """ return (np.sqrt(2* np.pi) ** - 1) * np.exp(- .5 * ((X - x)/ h) ** 2) def get_gpke(h, X, x): """ calculate Generatalized Product Kernel Density Estimator """ K = np.empty(X.shape) for j in xrange(len(x)): K[:, j] = get_gaussian_kernel(h, X[:, j], x[j]) gpke = K.prod(axis=1) / h ** len(x) return gpke def get_local_constant_estimator(h, X, Y, x): """ calculate local constant estimator Parameters: ----------- h: float bandwidth for kernel Y: 1D array-like The dependent variable X: 1D or 2D array-like The independent variables. x: 1D or 2D array-like The point(s) at which the density is estimated """ y = np.empty(x.shape[0]) for i in xrange(x.shape[0]): K = get_gpke(h, X, x[i])

Kernel regression with Python (DEMO)

Embed Size (px)

Citation preview

Page 1: Kernel regression with Python (DEMO)

3/10/13 IPython Notebook

127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 1/6

Çßì�‚a”�!

Çßì�‚aÀlocal constant estimatorÁ for 1d

In [8]:

In [17]:

In [18]:

In [19]:

In [20]:

In [21]:

from pylab import *from pandas import *import random

def generate_1d_data(N, func=np.sin): # 正弦関数に従ったデータ(X, Y)を出力する X = np.linspace(0, np.pi*2, N) Y = func(X) return DataFrame({'X':X, 'Y':Y})

def generate_1d_training_data(N, size, func=np.sin): # 正弦関数上のデータ点を生成する DF = generate_1d_data(N, func) # ランダムサンプリング df = DF.ix[random.sample(DF.index, size)] # ノイズを加える df['Y'] = df['Y'] + (.5 - np.random.rand(len(df))) return df.sort()

def get_gaussian_kernel(h, X, x): """ calculate gaussian kernel """ return (np.sqrt(2*np.pi) ** -1) * np.exp(-.5 * ((X - x)/h) ** 2)

def get_gpke(h, X, x): """ calculate Generatalized Product Kernel Density Estimator """ K = np.empty(X.shape) for j in xrange(len(x)): K[:, j] = get_gaussian_kernel(h, X[:, j], x[j]) gpke = K.prod(axis=1) / h ** len(x) return gpke

def get_local_constant_estimator(h, X, Y, x): """ calculate local constant estimator Parameters: ----------- h: float bandwidth for kernel Y: 1D array-like The dependent variable X: 1D or 2D array-like The independent variables. x: 1D or 2D array-like The point(s) at which the density is estimated """ y = np.empty(x.shape[0]) for i in xrange(x.shape[0]): K = get_gpke(h, X, x[i])

Page 2: Kernel regression with Python (DEMO)

3/10/13 IPython Notebook

127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 2/6

In [24]:

In [27]:

Çßì�‚aÀlocal constant estimatorÁ for 2d

In [39]:

In [40]:

y[i] = (Y * K).sum() / K.sum() return y

def kernel_regression_for_1d_data(N=100, S=20, h=.5, func=np.sin): fig = plt.figure(figsize=(10,5)) DF = generate_1d_data(N, func) DF.plot(x='X', y='Y', style='b--', label="正解データ") DF = generate_1d_training_data(N, S, func) DF.plot(x='X', y='Y', style='g.', label="観測データ") Y = np.asarray(DF['Y']) X = np.asarray(DF['X']).reshape(S, 1) x = np.asarray(generate_1d_data(N)['X']).reshape(N, 1) y = get_local_constant_estimator(h, X, Y, x) plot(x, y, 'r-', label=u"予測結果") title(u"Kernel Regression with local constant estimator") legend() grid() ylim(-1.5,1.5)

kernel_regression_for_1d_data(100, 10, .5, func=np.sin)

from mpl_toolkits.mplot3d import Axes3Dfrom matplotlib import cmfrom pandas import *

def get_factor_norm_gauss(_Mu, _Sigma): D = float(_Mu.shape[0]) S = abs(det(_Sigma)) numer = ((2 * np.pi)**(D*0.5)) * (S**(0.5)) denom = 1. return denom / numer def get_probability_gauss(_X, _Mu, _Sigma): # 2次元ガウス分布の確率密度を求める _Lambda = _Sigma if (_Sigma.shape != (1,1)): _Lambda = np.linalg.inv(_Sigma)

Page 3: Kernel regression with Python (DEMO)

3/10/13 IPython Notebook

127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 3/6

In [41]:

In [42]:

In [43]:

In [44]:

In [45]:

In [46]:

In [57]:

norm_factor = get_factor_norm_gauss(_Mu, _Sigma) non_norm_factor = np.exp((-0.5) * (_X-_Mu).T * _Lambda * (_X-_Mu)) return norm_factor * non_norm_factor

def generate_2d_gauss(Mu, Sigma): # 2次元ガウス分布を生成する _X, _Y = np.mgrid[-5:5:0.25, -5:5:0.25] X = _X.flatten() Y = _Y.flatten() L = [matrix(e).reshape(2,1) for e in zip(X, Y)] Z = np.asarray([get_probability_gauss(e, Mu, Sigma) for e in L]).flatten() return DataFrame({'X':X, 'Y':Y, 'Z':Z})

def generate_2d_data(): # 2次元ガウス分布の混合分布を生成する Sigma = matrix([1., 0., 0., 1.]).reshape(2,2) Mu1 = matrix([-1.5, -1.5]).reshape(2,1) Mu2 = matrix([1.5, 1.5]).reshape(2,1) DF1 = generate_2d_gauss(Mu1, Sigma) DF2 = generate_2d_gauss(Mu2, Sigma) DF = DF1 DF['Z'] = DF1['Z'] + DF2['Z'] return DF

def generate_2d_training_data(size): # 2次元ガウス分布の混合分布からデータをサンプリング DF = generate_2d_data() indexes = random.sample(DF.index, size) return DF.ix[indexes]

def plot_wireframe(ax, X, Y, Z): # 3次元描画1 d = np.sqrt(len(X)) X_ = np.asarray(X).reshape(d, d) Y_ = np.asarray(Y).reshape(d, d) Z_ = np.asarray(Z).reshape(d, d) ax.plot_wireframe(X_, Y_, Z_, rstride=1, cstride=1)

def plot_trisurf(ax, X, Y, Z): # 3次元描画2 X_ = np.asarray(X) Y_ = np.asarray(Y) Z_ = np.asarray(Z) ax.plot_trisurf(X_, Y_, Z_, linewidth=0.2, cmap=cm.jet, shade=True)

def kernel_regression_for_2d_data(S=200, h=0.5, func=plot_wireframe): DF = generate_2d_data() df = generate_2d_training_data(S) data_predict = np.asarray(DF[['X', 'Y']]).reshape(len(DF), 2) exog = np.asarray(df[['X', 'Y']]).reshape(len(df), 2) endog = np.asarray(df['Z']) estimator = get_local_constant_estimator(0.5, exog, endog, data_predict) fig = plt.figure(figsize=(14, 10)) ax = fig.add_subplot(221, projection='3d', title=u'混合ガウス分布とサンプル点') func(ax, DF['X'], DF['Y'], DF['Z']) ax.scatter(df['X'], df['Y'], df['Z'], c='r') ax = fig.add_subplot(222, projection='3d', title=u"カーネル回帰結果とサンプル点") func(ax, DF['X'], DF['Y'], estimator) ax.scatter(df['X'], df['Y'], df['Z'], c='r')

kernel_regression_for_2d_data(100, h=0.1, func=plot_wireframe)

Page 4: Kernel regression with Python (DEMO)

3/10/13 IPython Notebook

127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 4/6

In [53]:

Statsmodels³³›¤Çßì�‚a

In [29]:

Çßì�‚aÀlocal linear estimatorÁ for 1d

In [42]:

kernel_regression_for_2d_data(100, 0.5, func=plot_trisurf)

from statsmodels.nonparametric import kernel_regression

func = np.sinDF = generate_1d_data(100, func=func)df = generate_1d_training_data(100, 20, func=func) fig = plt.figure(figsize=(10,5))plot(DF['X'], DF['Y'], 'g--', label=u'正解データ')plot(df['X'], df['Y'], 'r.', label=u'観測データ') """KR = kernel_regression.KernelReg(df['Y'], df['X'], 'c', bw=[0.5], reg_type='lc')plot(DF['X'], KR.fit(DF['X'])[0], '-', label=u'{2}($h$={0}, $R^2$={1})'.format(KR.bw[0], KR.r_squared(), KR.reg_type.upper())) KR = kernel_regression.KernelReg(df['Y'], df['X'], 'c', bw=[0.5], reg_type='ll')plot(DF['X'], KR.fit(DF['X'])[0], '-', label=u'{2}($h$={0}, $R^2$={1})'.format(KR.bw[0], KR.r_squared(), KR.reg_type.upper()))""" KR = kernel_regression.KernelReg(df['Y'], df['X'], 'c', reg_type='lc')plot(DF['X'], KR.fit(DF['X'])[0], '-', label=u'{2}($h$={0}, $R^2$={1})'.format(KR.bw[0], KR.r_squared(), KR.reg_type.upper KR = kernel_regression.KernelReg(df['Y'], df['X'], 'c', reg_type='ll')plot(DF['X'], KR.fit(DF['X'])[0], '-', label=u'{2}($h$={0}, $R^2$={1})'.format(KR.bw[0], KR.r_squared(), KR.reg_type.upper ylim(-1.5, 1.5)grid()legend()

Out[42]: <matplotlib.legend.Legend at 0x10e070650>

Page 5: Kernel regression with Python (DEMO)

3/10/13 IPython Notebook

127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 5/6

Çßì�‚aÀlocal linear estimatorÁ for 2d

In [63]:

In [64]:

Out[42]: <matplotlib.legend.Legend at 0x10e070650>

DF = generate_2d_data()df = generate_2d_training_data(200) fig = plt.figure(figsize=(14, 10)) KR = kernel_regression.KernelReg(df['Z'], df[['X', 'Y']], 'cc', reg_type='lc')Z = KR.fit(DF[['X', 'Y']])[0]ax = fig.add_subplot(221, projection='3d', title=u"{0}($h$={1}, $R^2$={2})".format(KR.reg_type.upper(), KR.bw,KR.r_squaredplot_wireframe(ax, DF['X'], DF['Y'], DF['Z'])ax.scatter(df['X'], df['Y'], df['Z'], c='r') KR = kernel_regression.KernelReg(df['Z'], df[['X', 'Y']], 'cc', reg_type='ll')Z = KR.fit(DF[['X', 'Y']])[0]ax = fig.add_subplot(222, projection='3d', title=u"{0}($h$={1}, $R^2$={2})".format(KR.reg_type.upper(), KR.bw,KR.r_squaredplot_wireframe(ax, DF['X'], DF['Y'], Z)ax.scatter(df['X'], df['Y'], df['Z'], c='r')

Out[63]: <mpl_toolkits.mplot3d.art3d.Patch3DCollection at 0x109d519d0>

DF = generate_2d_data()df = generate_2d_training_data(200) fig = plt.figure(figsize=(14, 10)) KR = kernel_regression.KernelReg(df['Z'], df[['X', 'Y']], 'cc', reg_type='lc')Z = KR.fit(DF[['X', 'Y']])[0]ax = fig.add_subplot(221, projection='3d', title=u'{0}(bw={1})'.format(KR.reg_type, KR.bw))plot_trisurf(ax, DF['X'], DF['Y'], DF['Z'])ax.scatter(df['X'], df['Y'], df['Z'], c='r') KR = kernel_regression.KernelReg(df['Z'], df[['X', 'Y']], 'cc', reg_type='ll')Z = KR.fit(DF[['X', 'Y']])[0]

Page 6: Kernel regression with Python (DEMO)

3/10/13 IPython Notebook

127.0.0.1:8888/82205841-8123-485a-9050-a1896762703c/print 6/6

Z = KR.fit(DF[['X', 'Y']])[0]

ax = fig.add_subplot(222, projection='3d', title=u"{0}(bw={1})".format(KR.reg_type, KR.bw))plot_trisurf(ax, DF['X'], DF['Y'], Z)ax.scatter(df['X'], df['Y'], df['Z'], c='r')

Out[64]: <mpl_toolkits.mplot3d.art3d.Patch3DCollection at 0x10aea4690>