[1]:
import numpy as np
from sklearn.gaussian_process.kernels import Matern, RBF
from sklearn.tree import DecisionTreeRegressor

import plotly

from docs.plotting_utils import gen_model_barplots
from docs.data_generation import gen_cov_mat, gen_rbf_X, gen_matern_X
from spe.estimators import cp_arbitrary

Quickstart#

Here we show the basics of using spe.

[2]:
## number of realizations to run
niter = 100

## data generation parameters
gsize=10
n=10**2
p=30
s=30
delta = 0.75
snr = 0.4

noise_kernel = 'matern'
noise_length_scale = 1.
noise_nu = .5

X_kernel = 'matern'
X_length_scale = 5.
X_nu = 2.5

## plot parameters
model_names = ["Decision Tree"]
est_names = ["GenCp", "KFCV", "SPCV"]

Generate toy data#

[3]:
nx = ny = int(np.sqrt(n))
xs = np.linspace(0, gsize, nx)
ys = np.linspace(0, gsize, ny)
c_x, c_y = np.meshgrid(xs, ys)
c_x = c_x.flatten()
c_y = c_y.flatten()
coord = np.stack([c_x, c_y]).T
[4]:
if noise_kernel == 'rbf':
    Sigma_t = gen_cov_mat(c_x, c_y, RBF(length_scale=noise_length_scale))
elif noise_kernel == 'matern':
    Sigma_t = gen_cov_mat(c_x, c_y, Matern(length_scale=noise_length_scale, nu=noise_nu))
else:
    Sigma_t = np.eye(n)

Cov_y_ystar = delta*Sigma_t
Sigma_t = delta*Sigma_t + (1-delta)*np.eye(n)

if noise_kernel == 'rbf' or noise_kernel == 'matern':
    Chol_y = np.linalg.cholesky(Sigma_t)
else:
    Chol_y = np.eye(n)
[5]:
if X_kernel == 'rbf':
    X = gen_rbf_X(c_x, c_y, p)
elif X_kernel == 'matern':
    X = gen_matern_X(c_x, c_y, p, length_scale=X_length_scale, nu=X_nu)
else:
    X = np.random.randn(n,p)

beta = np.zeros(p)
idx = np.random.choice(p,size=s,replace=False)
beta[idx] = np.random.uniform(-1,1,size=s)

y = X @ beta + Chol_y @ np.random.randn(n)
[6]:
tr_idx = np.ones(n, dtype=bool)

Estimate MSE for \(Y, Y^* \overset{iid}{\sim} \mathcal{N}(\mu, \Sigma_Y)\)#

[7]:
ind_est = cp_arbitrary(
    DecisionTreeRegressor(max_depth=3),
    X=X,
    y=y,
    tr_idx=tr_idx,
    Chol_y=Chol_y,
    alpha=.05,
    use_trace_corr=False
)
ind_est
[7]:
1.4930723502728087

Estimate MSE for \(\begin{pmatrix} Y \\ Y^* \end{pmatrix} \sim \mathcal{N}\left(\begin{pmatrix} \mu \\ \mu \end{pmatrix}, \begin{pmatrix}\Sigma_Y & \Sigma_{Y, Y^*} \\ \Sigma_{Y^*, Y} & \Sigma_{Y} \end{pmatrix}\right)\)#

[8]:
corr_est = cp_arbitrary(
    DecisionTreeRegressor(max_depth=3),
    X=X,
    y=y,
    tr_idx=tr_idx,
    Chol_y=Chol_y,
    Cov_y_ystar=Cov_y_ystar,
    alpha=.05,
    use_trace_corr=False
)
corr_est
[8]:
0.6283975002810962
[9]:
plotly.offline.init_notebook_mode()
fig = gen_model_barplots(
    [[np.array([corr_est])]],
    ["a"],
    ["b"],
    "test",
    has_test_risk=False,
    has_elev_err=False,
)
fig.show()
[ ]: