Nothing
# compute OLS prediction from resp ~ pred - 1
project_ols <- function(resp, pred) {
as.matrix(lm(resp ~ pred - 1)$fitted.values)
}
# compute OLS coefficients from resp ~ pred - 1
get_ols_coefs <- function(resp, pred) {
as.matrix(lm(resp ~ pred - 1)$coefficients)
}
# returns the functions used to compute the conditional expectations
# and specifies if several of them are splines. If several are splines,
# the computation of the conditional expectations can be performed in
# a faster way.
get_condexp_funcs <- function(cond_method, params) {
# IF some of the nuisance parameters are estimated with splines and
# have the same additional parameters stored in params, THEN estimation can
# be performed in a faster way.
all_condexp_spline <- (cond_method == "spline")
params_identical <- FALSE
if (sum(all_condexp_spline) == 3) {
params_identical <-
identical(params[[1]], params[[2]]) &&
identical(params[[1]], params[[3]]) &&
identical(params[[2]], params[[3]])
} else if ((sum(all_condexp_spline) >= 2) && !params_identical) {
params_identical <- rep(FALSE, 3)
names(params_identical) <- c("1-2", "1-3", "2-3")
for (i in seq_len(3)) {
test_params <- as.numeric(strsplit(names(params_identical)[i], "-")[[1]])
if (!is.atomic(cond_method[[test_params[1]]]) ||
!is.atomic(cond_method[[test_params[2]]])) {
next
}
params_identical[i] <-
identical(params[[test_params[1]]], params[[test_params[2]]]) &&
(cond_method[[test_params[1]]] == "spline") &&
(cond_method[[test_params[2]]] == "spline")
}
}
# get functions to estimate conditional expectations
if (is.atomic(cond_method)) {
condexp_aa <- get(paste("condexp_", cond_method[1], sep = ""))
condexp_xx <- get(paste("condexp_", cond_method[2], sep = ""))
condexp_yy <- get(paste("condexp_", cond_method[3], sep = ""))
} else {
condexp_aa <- cond_method[[1]]
if (is.atomic(condexp_aa)) {
condexp_aa <- get(paste("condexp_", condexp_aa, sep = ""))
}
condexp_xx <- cond_method[[2]]
if (is.atomic(condexp_xx)) {
condexp_xx <- get(paste("condexp_", condexp_xx, sep = ""))
}
condexp_yy <- cond_method[[3]]
if (is.atomic(condexp_yy)) {
condexp_yy <- get(paste("condexp_", condexp_yy, sep = ""))
}
}
list(params_identical = params_identical,
all_condexp_spline = all_condexp_spline,
condexp_aa = condexp_aa,
condexp_xx = condexp_xx,
condexp_yy = condexp_yy)
}
# computes the residuals on index set I with conditional
# expectations fitted on index set Ic
residuals_samplesplit <- function(aa, ww, xx, yy, I, Ic,
cond_func_all, params) {
# split data into two folds
ww_fit <- ww[Ic, , drop = FALSE]
aa_fit <- aa[Ic, , drop = FALSE]
xx_fit <- xx[Ic, , drop = FALSE]
yy_fit <- yy[Ic, , drop = FALSE]
ww_predict <- ww[I, , drop = FALSE]
aa_predict <- aa[I, , drop = FALSE]
xx_predict <- xx[I, , drop = FALSE]
yy_predict <- yy[I, , drop = FALSE]
dim_X <- dim(xx_fit)
nn <- dim_X[1]
d <- dim_X[2]
# compute conditional expectations. If all 3 conditional expectations
# are fitted with splines, a shortcut is possible. This is encoded by
# the argument cond_func_all$all_condexp_spline. Additionally,
# it needs to be checked if the additional sets of parameters are equal,
# which is done via the variable cond_func_all$params_identical.
if ((sum(cond_func_all$all_condexp_spline) == 3) &&
(length(cond_func_all$params_identical) == 1) &&
cond_func_all$params_identical[1]) {
egW_hat_all <- condexp_spline(aa_fit = aa_fit,
xx_fit = xx_fit,
yy_fit = yy_fit,
ww_fit = ww_fit,
ww_predict = ww_predict,
params = params[[1]])
eAgW_hat <- egW_hat_all$eAgW_hat
eXgW_hat <- egW_hat_all$eXgW_hat
eYgW_hat <- egW_hat_all$eYgW_hat
} else if (sum(cond_func_all$all_condexp_spline) >= 2 &&
sum(cond_func_all$params_identical) == 1) {
if (cond_func_all$params_identical[1]) {
egW_hat_all <- condexp_spline(aa_fit = aa_fit,
xx_fit = xx_fit,
ww_fit = ww_fit,
ww_predict = ww_predict,
params = params[[1]])
eAgW_hat <- egW_hat_all$eAgW_hat
eXgW_hat <- egW_hat_all$eXgW_hat
eYgW_hat <- cond_func_all$condexp_yy(yy_fit = yy_fit, ww_fit = ww_fit,
ww_predict = ww_predict,
params = params[[3]])
} else if (cond_func_all$params_identical[2]) {
egW_hat_all <- condexp_spline(aa_fit = aa_fit,
yy_fit = yy_fit,
ww_fit = ww_fit,
ww_predict = ww_predict,
params = params[[1]])
eAgW_hat <- egW_hat_all$eAgW_hat
eYgW_hat <- egW_hat_all$eYgW_hat
eXgW_hat <- cond_func_all$condexp_xx(yy_fit = xx_fit,
ww_fit = ww_fit,
ww_predict = ww_predict,
params = params[[2]])
} else if (cond_func_all$params_identical[3]) {
egW_hat_all <- condexp_spline(xx_fit = xx_fit,
yy_fit = yy_fit,
ww_fit = ww_fit,
ww_predict = ww_predict,
params = params[[2]])
eXgW_hat <- egW_hat_all$eXgW_hat
eYgW_hat <- egW_hat_all$eYgW_hat
eAgW_hat <- cond_func_all$condexp_aa(yy_fit = aa_fit,
ww_fit = ww_fit,
ww_predict = ww_predict,
params = params[[1]])
}
} else {
eAgW_hat <- cond_func_all$condexp_aa(yy_fit = aa_fit, ww_fit = ww_fit,
ww_predict = ww_predict,
params = params[[1]])
eXgW_hat <- cond_func_all$condexp_xx(yy_fit = xx_fit, ww_fit = ww_fit,
ww_predict = ww_predict,
params = params[[2]])
eYgW_hat <- cond_func_all$condexp_yy(yy_fit = yy_fit, ww_fit = ww_fit,
ww_predict = ww_predict,
params = params[[3]])
}
# compute residuals
rA <- aa_predict - eAgW_hat
rX <- xx_predict - eXgW_hat
rY <- yy_predict - eYgW_hat
# compute projected residuals
# rX_gW = OLS rX ~ rA; rY_gW = OLS rY ~ rA
list(rX = rX,
rY = rY,
rA = rA,
rX_gW = tryCatch_WEM(project_ols(resp = rX, pred = rA),
matrix(NA, ncol = d, nrow = nn))$value,
rY_gW = tryCatch_WEM(project_ols(resp = rY, pred = rA),
matrix(NA, ncol = 1, nrow = nn))$value)
}
# computes all residual terms needed for DML and the regularization methods.
# This function performs the sample splitting step, where the observations
# are splitted into K sets of equal size if possible.
beta_get_matrices <- function(aa, ww, xx, yy, K, gamma, DML, do_DML, do_regDML,
cond_func_all, params) {
# dimensions of the data
n <- length(yy)
# the residual quantities Ra, Rx, and Ry need to be stored to estimate the
# asymptotic variance-covariance matrices afterwards
all_residuals <- all_residuals_gW <- vector(mode = "list", length = K)
n_reorder <- sample(seq_len(n), n, replace = FALSE)
folds <- if (K >= 2) {
cut(n_reorder, breaks = K, labels = FALSE)
} else {
# If K == 1, have same train and test set.
rep(1, n)
} # end folds
for (kk in seq_len(K)) {
I <- seq_len(n)[folds == kk] # test set: evaluate conditional expectations
Ic <- if (K >= 2) { # training set: estimate conditional expectations
setdiff(seq_len(n), I)
} else {
seq_len(n) # if K == 1, have same train and test set
} # end Ic
# train conditional expectations with Ic, and evaluate residuals on I.
# cond_func_all = learning method for conditional expectations
# params = additional parameters to estimate conditional expectations
residuals_samplesplit_hat <-
residuals_samplesplit(aa = aa, ww = ww,
xx = xx, yy = yy,
I = I, Ic = Ic,
cond_func_all = cond_func_all,
params = params)
all_residuals[[kk]] <- residuals_samplesplit_hat[c("rA", "rX", "rY")]
all_residuals_gW[[kk]] <- residuals_samplesplit_hat[c("rX_gW", "rY_gW")]
} # end for (kk in seq_len(K))
# return results
list(all_residuals = all_residuals,
all_residuals_gW = all_residuals_gW)
}
# estimates linear coefficient beta_0 with DML1: first estimate K individual
# estimators, then average them.
get_beta_DML1 <- function(all_residuals_gW) {
K <- length(all_residuals_gW)
d <- dim(all_residuals_gW[[1]]$rX_gW)[2]
func_tmp <- function(k, all_residuals_gW, d) {
tryCatch_WEM(get_ols_coefs(pred = all_residuals_gW[[k]]$rX_gW,
resp = all_residuals_gW[[k]]$rY_gW),
matrix(NA, nrow = d, ncol = 1))$value
}
cbind(apply(do.call(cbind,
lapply(seq_len(K),
func_tmp,
all_residuals_gW = all_residuals_gW,
d = d)),
1, mean))
}
# estimates beta_0 with DML2: average quantities first, then compute
# one final estimator with the aggregated quantities
get_beta_DML2 <- function(all_residuals_gW) {
K <- length(all_residuals_gW)
dim_rX <- dim(all_residuals_gW[[1]]$rX_gW)
d <- dim_rX[2]
n <- dim_rX[1]
mat <- matrix(0, nrow = d, ncol = d)
vec <- matrix(0, nrow = d, ncol = 1)
for (k in seq_len(K)) {
mat <- mat + crossprod(all_residuals_gW[[k]]$rX_gW, all_residuals_gW[[k]]$rX_gW) / n
vec <- vec + crossprod(all_residuals_gW[[k]]$rX_gW, all_residuals_gW[[k]]$rY_gW) / n
}
tryCatch_WEM(qr.solve(mat, vec), matrix(NA, nrow = d, ncol = 1))$value
}
# helper function to estimate beta_0 with regularized DML1 method:
# for some given gamma value, compute the DML1-version of the regularized
# parameter. That is first compute K individual estimators, and then aggregate.
# This function only computes one of the K individual estimators for
# one given value of gamma. The K corresponds to the number of sample splits.
combine_regDML1_regular_ols <- function(gamma, rX, rX_gW, rY, rY_gW, n) {
rX_gamma <- rX + (sqrt(gamma) - 1) * rX_gW
tryCatch_WEM(get_ols_coefs(pred = rX_gamma,
resp = rY + (sqrt(gamma) - 1) * rY_gW),
matrix(NA, ncol = 1, nrow = ncol(as.matrix(rX_gamma))))$value
}
# estimate beta_0 with regularized DML1 method:
# for some given gamma value, compute the DML1-version of the regularized
# parameter. That is first compute K individual estimators, and then aggregate.
get_beta_regDML1 <- function(all_residuals, all_residuals_gW, gamma) {
K <- length(all_residuals_gW)
dim_rX <- dim(all_residuals_gW[[1]]$rX_gW)
d <- dim_rX[2]
n <- dim_rX[1]
gammalen <- length(gamma)
beta_regDML1 <- matrix(0, nrow = d, ncol = gammalen)
for (k in seq_len(K)) {
beta_regDML1 <-
beta_regDML1 +
tryCatch_WEM(do.call(cbind,
lapply(gamma,
combine_regDML1_regular_ols,
rX = all_residuals[[k]]$rX,
rX_gW = all_residuals_gW[[k]]$rX_gW,
rY = all_residuals[[k]]$rY,
rY_gW = all_residuals_gW[[k]]$rY_gW,
n = n)),
matrix(NA, nrow = d, ncol = gammalen))$value
}
beta_regDML1 / K
}
# helper function to estimate beta_0 with regularized DML2 method:
# for some given value of gamma, compute the quantities of one of the K
# aggregation steps. These quantities are aggregated in the main function
# to form the final regularized estimator
get_mat_vec_regDML2 <- function(gamma, rX, rX_gW, rY, rY_gW, n) {
rX_gamma <- rX + (sqrt(gamma) - 1) * rX_gW
list(mat = crossprod(rX_gamma, rX_gamma) / n,
vec = crossprod(rX_gamma, rY + (sqrt(gamma) - 1) * rY_gW) / n)
}
# estimate beta_0 with regularized DML2 method:
# Compute the regularized estimators for all gamma values with the DML2 method.
# That is, first aggregate the K individual quantities, and then compute
# the final estimator. A final estimator is computed for all values of gamma.
get_beta_regDML2 <- function(all_residuals, all_residuals_gW, gamma) {
K <- length(all_residuals_gW)
dim_rX <- dim(all_residuals_gW[[1]]$rX_gW)
d <- dim_rX[2]
n <- dim_rX[1]
gammalen <- length(gamma)
# the dimensions 1:d in the second entry stores the matrix; the dimension
# d+1 in the second entry stores the vector
mat_vec_gamma <- array(0, dim = c(d, d + 1, gammalen))
for (k in seq_len(K)) {
mat_vec <-
sapply(gamma,
get_mat_vec_regDML2,
rX = all_residuals[[k]]$rX,
rX_gW = all_residuals_gW[[k]]$rX_gW,
rY = all_residuals[[k]]$rY,
rY_gW = all_residuals_gW[[k]]$rY_gW,
n = n)
mat_vec_gamma[, seq_len(d), ] <-
mat_vec_gamma[, seq_len(d), , drop = FALSE] +
array(unlist(mat_vec[1, ]), dim = c(d, d, gammalen))
mat_vec_gamma[, d + 1, ] <-
mat_vec_gamma[, d + 1, , drop = FALSE] +
array(unlist(mat_vec[2, ]), dim = c(d, 1, gammalen))
}
func_tmp <- function(x) {
qr.solve(x[, seq_len(d), drop = FALSE], x[, d + 1, drop = FALSE])
}
tryCatch_WEM(rbind(apply(mat_vec_gamma, 3, func_tmp)),
matrix(NA, nrow = d, ncol = gammalen))$value
}
# returns all point estimates and variances for DML and the whole gamma grid
beta_crossfit <- function(aa, ww, xx, yy, K, gamma, DML, do_DML, do_regDML,
cond_func_all, params) {
# issue a warning if sample splitting is omitted, that is, K = 1
if (K == 1) {
warning("no sample splitting performed due to K = 1")
}
# dimensions of the data
d <- ncol(xx)
gammalen <- length(gamma)
# get matrices and vectors with which the coefficient estimate
# is computed
all_matrices <- beta_get_matrices(aa = aa, ww = ww, xx = xx, yy = yy,
K = K, gamma = gamma, DML = DML,
do_DML = do_DML, do_regDML = do_regDML,
cond_func_all = cond_func_all,
params = params)
# compute estimators, variance-covariance matrices,
# and prepare results to return
to_return <- list()
# DML method (non-regularized)
if (do_DML) {
# point estimator
beta_DML <- if (DML == "DML1") { # DML1
get_beta_DML1(all_residuals_gW = all_matrices$all_residuals_gW)
} else { # DML2
get_beta_DML2(all_residuals_gW = all_matrices$all_residuals_gW)
} # end beta_DML
# get asymptotic variance of the DML point estimator.
# If necessary, compute a more stable version of it.
as_var_DML <-
tryCatch_WEM(sigma2_DML(all_residuals = all_matrices$all_residuals,
betahat = beta_DML),
matrix(NA, nrow = d, ncol = d))
# return NAs above if matrix inversion in sigma2_DML was infeasible
# if matrix inverion in the function call of sigma2_DML was not feasible,
# try if a more stable version of the asymptotic variance matrix can be
# computed.
if (!is.null(as_var_DML$error)) {
as_var_DML <-
tryCatch_WEM(sigma2_DML_stable(all_residuals = all_matrices$all_residuals,
betahat = beta_DML),
matrix(NA, nrow = d, ncol = d))
if (is.null(as_var_DML$error) & !is.null(as_var_DML$warning)) {
warning(as_var_DML$warning)
}
}
as_var_DML <- as_var_DML$value
# save results to return
to_return <- c(to_return,
list(beta_DML = as.matrix(beta_DML),
as_var_DML = as_var_DML))
}
# regDML: here, this is used for regsDML, regDMLopt and regDML for all
# gamma values
if (do_regDML) {
# point estimator for all gamma values
beta_gamma <- if (DML == "DML1") { # DML1
get_beta_regDML1(all_residuals = all_matrices$all_residuals,
all_residuals_gW = all_matrices$all_residuals_gW,
gamma = gamma)
} else { # DML2
get_beta_regDML2(all_residuals = all_matrices$all_residuals,
all_residuals_gW = all_matrices$all_residuals_gW,
gamma = gamma)
} # end beta_gamma
# asymptotic variance for all gamma values
as_var_gamma <-
array(unlist(lapply(seq_len(gammalen),
function(i) sigma2_gamma(all_residuals = all_matrices$all_residuals,
betahat = beta_gamma[, i, drop = FALSE],
gamma = gamma[i]))),
dim = c(d, d, gammalen))
# update return object
to_return <- c(to_return,
list(beta_gamma = beta_gamma, as_var_gamma = as_var_gamma))
}
to_return
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.