NGBoost#

Case 1: 線形データ・不均一分散#

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

x = np.linspace(0, 10, 1000)
sigma = np.sqrt(x)
y = norm.rvs(loc=x, scale=sigma, random_state=0)
X = x.reshape(-1, 1)

fig, ax = plt.subplots()
ax.scatter(x, y)
ax.plot(x, x, color="black", alpha=.5, label="mean")
ax.set(xlabel="x", ylabel="y")
ax.legend()
fig.show()
../../_images/338ce896cfcb5d536a57c59ba07df7864e3ba9faf0a2f57791de15cbd544bb1a.png
from ngboost import NGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

ngb = NGBRegressor().fit(X_train, y_train)
y_pred = ngb.predict(X_test)
y_dist = ngb.pred_dist(X_test)

print('Test MSE', mean_squared_error(y_pred, y_test))

# test Negative Log Likelihood
test_NLL = -y_dist.logpdf(y_test).mean()
print('Test NLL', test_NLL)
[iter 0] loss=2.7103 val_loss=0.0000 scale=1.0000 norm=3.1009
[iter 100] loss=2.1651 val_loss=0.0000 scale=2.0000 norm=3.4747
[iter 200] loss=1.9699 val_loss=0.0000 scale=2.0000 norm=3.2506
[iter 300] loss=1.8937 val_loss=0.0000 scale=2.0000 norm=3.1638
[iter 400] loss=1.8567 val_loss=0.0000 scale=2.0000 norm=3.1042
Test MSE 5.515487108921018
Test NLL 2.3621399443256466
fig, ax = plt.subplots()
ax.scatter(x, y, alpha=.5)
ax.plot(x, x, color="black", alpha=.5, label="mean")
ax.set(xlabel="x", ylabel="y")
ax.legend()

X_test = np.sort(X_test, axis=0)
y_dist = ngb.pred_dist(X_test)

alphas = [0.05, 0.01]
colors = ["darkorange", "tomato"]
for alpha, color in zip(alphas, colors):
    upper = norm.ppf(q=1 - (alpha/2), loc=y_dist.params["loc"], scale=y_dist.params["scale"])
    lower = norm.ppf(q=(alpha/2), loc=y_dist.params["loc"], scale=y_dist.params["scale"])
    ax.plot(X_test[:, 0], upper, alpha=.9, color=color, linestyle="--", label=rf"$\alpha$={alpha}")
    ax.plot(X_test[:, 0], lower, alpha=.9, color=color, linestyle="--")

ax.legend()
fig.show()
../../_images/c1e5cef50bb38f0f5555a8079ab5eb72169dfa5d8cf032aaf319481a895af5d2.png

Case 2: 非線形データ・不均一分散#

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

x = np.linspace(0, 5, 1000)
sigma = (np.sin(x / 1) + 2) * 5
z = 10 + x + x ** 2
y = norm.rvs(loc=z, scale=sigma, random_state=0)
X = x.reshape(-1, 1)

fig, ax = plt.subplots()
ax.scatter(x, y)
ax.plot(x, z, color="black", alpha=.5, label="mean")
ax.set(xlabel="x", ylabel="y")
ax.legend()
fig.show()
../../_images/f0752cf2a310236ad5694fa8e59a26f7b53e923a189c7bfda3526afc18de334c.png
from ngboost import NGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

ngb = NGBRegressor().fit(X_train, y_train)
y_pred = ngb.predict(X_test)
y_dist = ngb.pred_dist(X_test)

print('Test MSE', mean_squared_error(y_pred, y_test))

# test Negative Log Likelihood
test_NLL = -y_dist.logpdf(y_test).mean()
print('Test NLL', test_NLL)
[iter 0] loss=4.1088 val_loss=0.0000 scale=1.0000 norm=12.1106
[iter 100] loss=3.7391 val_loss=0.0000 scale=2.0000 norm=16.9144
[iter 200] loss=3.6265 val_loss=0.0000 scale=2.0000 norm=16.2138
[iter 300] loss=3.5779 val_loss=0.0000 scale=2.0000 norm=15.7726
[iter 400] loss=3.5399 val_loss=0.0000 scale=2.0000 norm=15.4319
Test MSE 108.47642101547953
Test NLL 3.6698761860973876
fig, ax = plt.subplots()
ax.scatter(x, y, alpha=.5)
ax.plot(x, x, color="black", alpha=.5, label="mean")
ax.set(xlabel="x", ylabel="y")
ax.legend()

X_test = np.sort(X_test, axis=0)
y_dist = ngb.pred_dist(X_test)

alphas = [0.05, 0.01]
colors = ["darkorange", "tomato"]
for alpha, color in zip(alphas, colors):
    upper = norm.ppf(q=1 - (alpha/2), loc=y_dist.params["loc"], scale=y_dist.params["scale"])
    lower = norm.ppf(q=(alpha/2), loc=y_dist.params["loc"], scale=y_dist.params["scale"])
    ax.plot(X_test[:, 0], upper, alpha=.9, color=color, linestyle="--", label=rf"$\alpha$={alpha}")
    ax.plot(X_test[:, 0], lower, alpha=.9, color=color, linestyle="--")

ax.legend()
fig.show()
../../_images/9e1881dc8f034e9a2a04959d71fcebe8fd8613a824103017638995efa1df7e4e.png