# R/functions.R In koekvall/crkr: Linear Models with Separable Correlation

#### Documented in boot_lrtest_modelest_sigmaprof_log_lik

```est_model <- function(Y, X, r, type = "kpcor", restr = "N", tol = 1e-16,
maxiter = 1000, verbose = FALSE)
{
#' Estimate a linear model with Kronecker-structured correlation or covariance matrix
#'
#' @param Y Matrix of dimension rc x n, each column is a (vectorized) response vector
#' @param X Matrix of dimension p x n, each column is a predictor vector
#' @param r The number of "rows" of the inverse vectorized columns of Y
#' @param type One of "kpcor", "mn", or "unstruct", indicating which model to fit
#' @param restr Allows the user to impose the matrix A or B to be diagonal. "N" for no restriction, "A"
#' for diagonal A, "B" for diagonal B, and "AB" for both (only for kpcor).
#' @param tol Algorithm terminates when an iteration increases the log-likelihood less than tol
#' @param maxiter The maximum number of iterations the algorithm runs if not converging before
#' @param verbose Print additional info about iterates if TRUE (only for kpcor)
#' @return List with log-likelihood, coefficient estimates, covariance parameter estimates,
#' estimates of the covariance matrix of the coefficients, the number of iterations,
#' and and information = 0 if converged, 1 if not converged, 2 if A iterate was not
#' positive definite at an iteration, and 3 if B iterate was not positive definite
#' at an iteration.
#'
#' @export
#' @useDynLib kpcor
#' @importFrom Rcpp evalCpp
if(!is.matrix(Y)){stop("Y needs to be a rc x n matrix of responses")}
n <- ncol(Y)
if(!(is.matrix(X) && ncol(X) == n)){stop("X needs to be a matrix of predictors with n columns")}
if(!(is.atomic(r) && length(r) == 1)){stop("r needs to be a positive integer")}
if(r <=0 || (r != floor(r))){stop("r needs to be a positive integer")}
c <- nrow(Y) / r
if(c != floor(c)){stop("Incompatible dimensions of E and r")}
if(!is.character(type)){stop("type needs to be character and one of kpcor and mn")}
if(!(type %in% c("kpcor", "mn", "unstruct"))){stop("type needs to be character and one of kpcor, mn, or unstruct")}
if(!is.character(restr)){stop("restr needs to be character and one of N, A, B, or AB")}
if(!(restr %in% c("N", "A", "B", "AB"))){stop("restr needs to be character and one of N, A, B, or AB")}
if(type == "mn" && restr == "AB"){stop("Both matrices cannot be diagonal for the matrix normal")}
if(!(is.atomic(tol) && length(tol) == 1)){stop("tol needs to be a positive scalar")}
if(tol < 0){stop("tol needs to be a positive scalar")}
if(!(is.atomic(maxiter))){stop("maxiter needs to be a positive integer")}
if(!(length(maxiter) == 1 && maxiter > 0 && (maxiter == floor(maxiter)))){stop("maxiter needs to be a positive integer")}

beta_hat <- qr.coef(qr(t(X)), t(Y))
E <- Y - t(beta_hat) %*% X

restr <- (0:3)[c("N", "A", "B", "AB") == restr]
if(type == "unstruct"){
if(n < r*c + nrow(X)){
warning("n < q + p; using independence assumption")
Sigma <- diag(diag(tcrossprod(E)) / n)
}else{
Sigma <- tcrossprod(E) / n
}
loglik <- prof_log_lik(t(chol(Sigma)), E)
return(list("ll" = loglik, "coef" = beta_hat,
"varcov_coef" = kronecker(Sigma, qr.solve(tcrossprod(X))), "Sigma" = Sigma))
} else if(type == "kpcor"){
S <- tcrossprod(E) / n
fit <- est_sigma_rcpp(E, diag(S), r, restr, tol, maxiter, verbose)
if(fit\$info %in% c(0, 1)){
Sigma_c <- kronecker(fit\$A_c, fit\$B_c)
Sigma_c <- sweep(Sigma_c, 1, fit\$D, "/")
loglik <- prof_log_lik(Sigma_c, E)
Sigma <- tcrossprod(Sigma_c)
A <- tcrossprod(fit\$A_c)
B <- tcrossprod(fit\$B_c)
} else{
Sigma <- NA
loglik <- NA
A <- NA
B <- NA
}
return(list("ll" = loglik, "coef" = beta_hat, "A" = A, "B" = B, "D" = fit\$D,
"varcov_coef" = kronecker(Sigma, qr.solve(tcrossprod(X))),
"iter" = fit\$iter, info = fit\$info))
} else if (type == "mn"){
fit <- est_sigma_mn_rcpp(E, r, restr, tol, maxiter)
loglik <- prof_log_lik(kronecker(t(chol(fit\$A)), t(chol(fit\$B))), E)
return(list("ll" = loglik, "coef" = beta_hat, "A" = fit\$A, "B" = fit\$B,
"varcov_coef" = kronecker(kronecker(fit\$A, fit\$B), qr.solve(tcrossprod(X))), "iter" = fit\$iter))
} else{
stop("This should not happen")
}
}

#' Estimate the covariance matrix Sigma = C(A otimes B)C using kpcor or MN algorithm
#'
#' @param E Matrix of dimension rc x n, each column is a (vectorized) residual vector
#' @param r The number of "rows" of the inverse vectorized columns of E
#' @param type One of "kpcor" and "mn", indicating which algorithm to use
#' @param restr Allows the user to impose the matrix A or B to be diagonal. "N" for no restriction, "A"
#' for diagonal A, "B" for diagonal B, and "AB" for both (only for kpcor).
#' @param tol Algorithm terminates when an iteration increases the log-likelihood less than tol
#' @param maxiter The maximum number of iterations the algorithm runs if not converging before
#' @param verbose Print additional info about iterates if TRUE (only for kpcor)
#' @return List with MLEs of the parameters A, B and D (D = inv(C), only for kpcor), log-likelihood at the final iterates,
#' and the numver of iterations performed.
#'
#' @export
#' @useDynLib kpcor
#' @importFrom Rcpp evalCpp
est_sigma <- function(E, r, type = "kpcor", restr= "N", tol = 1e-16, maxiter = 1000,
verbose = FALSE)
{
if(!is.matrix(E)){stop("E needs to be a rc x n matrix of residuals")}
n <- ncol(E)
if(!(is.atomic(r) && length(r) == 1)){stop("r needs to be a positive integer")}
if(r <=0 || (r != floor(r))){stop("r needs to be a positive integer")}
c <- nrow(E) / r
if(c != floor(c)){stop("Incompatible dimensions of E and r")}
if(!is.character(type)){stop("type needs to be character and one of kpcor and mn")}
if(!(type %in% c("kpcor", "mn"))){stop("type needs to be character and one of kpcor and mn")}
if(!is.character(restr)){stop("restr needs to be character and one of N, A, B, or AB")}
if(!(restr %in% c("N", "A", "B", "AB"))){stop("restr needs to be character and one of N, A, B, or AB")}
if(type == "mn" && restr == "AB"){stop("Both matrices cannot be diagonal for the matrix normal")}
if(!(is.atomic(tol) && length(tol) == 1)){stop("tol needs to be a positive scalar")}
if(tol < 0){stop("tol needs to be a positive scalar")}
if(!(is.atomic(maxiter))){stop("maxiter needs to be a positive integer")}
if(!(length(maxiter) == 1 && maxiter > 0 && (maxiter == floor(maxiter)))){stop("maxiter needs to be a positive integer")}

restr <- (0:3)[c("N", "A", "B", "AB") == restr]

if(type == "kpcor"){
S <- E %*% t(E) / n
fit <- est_sigma_rcpp(E, diag(S), r, restr, tol, maxiter, verbose)
} else{
fit <- est_sigma_mn_rcpp(E, r, restr, tol, maxiter)
}
return(fit)
}

#' Parametric bootstrap of the LRT for covariance matrix restriction
#'
#' @param Sigma0_c Cholesky root (lower tri.) of the null hypothesis covariance matrix: Sigma0 = C0 (A0 otimes B0) C0
#' @param X Matrix of dimension p x n, each column is a predictor vector
#' @param coef_mat Matrix of dimension p x cr of regression coefficients
#' @param r The numer of "rows" of the inverse vectorized columns of E
#' @param type Vector of length two, each element is one of "kpcor", "mn", or "unstruct", indicating the type of
#' the null (first element) and alternative (second element) hypothesis covariance matrices.
#' @param restr Vector of length 2, the first element indicates restrictions on the null hypothesis matrix
#' and the second indicates restrictions on the alternatice hypothesis matrix. "N" for no restriction, "A"
#' for diagonal A, "B" for diagonal B, and "AB" for both (only for kpcor).
#' @param n_boot Number of bootstrap samples generated
#' @param n_cores Number of cores used for the boostrapping
#' @param tol Fitting algorithms terminate when an iteration increases the log-likelihood less than tol
#' @param maxiter The maximum number of iterations the algorithm runs if not converging before
#' @return Vector of bootstrapped LRTs
#'
#' @export
#' @useDynLib kpcor
#' @importFrom Rcpp evalCpp
boot_lrt <- function(Sigma0_c, X, coef_mat, r, type, restr, n_boot = 1000, n_cores = 4,
tol = 1e-10, maxiter = 1000)
{
if(!is.matrix(Sigma0_c) && ncol(Sigma0_c) == nrow(Sigma0_c)){
stop("Sigma0_c should be lower triangular Cholesky root")
}

if(!(is.atomic(r) && length(r) == 1 && r > 0 && (ncol(Sigma0_c) / r) == floor(ncol(Sigma0_c) / r))){
stop("r should be a positive integer that divides the dimension of Sigma0_c")
}
c <- ncol(Sigma0_c) / r

if(!is.matrix(X)){stop("X needs to be a matrix")}
n <- ncol(X)
p <- nrow(X)

if(!(is.matrix(coef_mat) && ncol(coef_mat) == c*r && nrow(coef_mat) == p)){
stop("coef_mat should has to be a matrix conformable with X and Sigma0_c")
}

if(!(is.character(type) && length(type) == 2 && sum(type %in% c("kpcor", "mn", "unstruct")) == 2)){
stop("type should be vector of 2 characters, either 'kpcor', 'mn', or 'unstruct'")
}

if(type[2] == "mn" && type[1] == "kpcor"){stop("Models not nested")}
if(type[1] == "unstruct"){stop("Models not nested")}

if(!(is.character(restr) && length(restr) == 2 && sum(restr %in% c("N", "A", "B", "AB")) == 2)){
stop("restr should be vector of 2 characters, one of 'N', 'A', 'B', or 'AB'")
}
if(restr[2] != "N" && type[2] == "unstruct"){stop("No restrictions possible for unstructured covariance matrix")}
if(restr[2] == "A" && !(restr[1] %in% c("A", "AB"))){stop("Models not nested")}
if(restr[2] == "B" && !(restr[1] %in% c("B", "AB"))){stop("Models not nested")}
if(restr[2] == "AB"){stop("Models not nested")}
if(restr[2] == restr[1] && type[1] == type[2]){stop("Models are identical")}

if(!(is.atomic(n_boot) && length(n_boot) == 1 && n_boot > 0 && n_boot == floor(n_boot))){
stop("n_boot should be a positive integer")
}

if(!(is.atomic(n_cores) && length(n_cores) == 1 && n_cores > 0 && n_cores == floor(n_cores))){
stop("n_cores should be a positive integer")
}

if(!(is.atomic(tol) && length(tol) == 1 && tol > 0)){
stop("tol should be a positive scalar")
}

if(!(is.atomic(maxiter) && length(maxiter) == 1 && maxiter > 0 && maxiter == floor(maxiter))){
stop("maxiter should be a positive integer")
}

restr <- c((0:3)[c("N", "A", "B", "AB") == restr[1]], (0:3)[c("N", "A", "B", "AB") == restr[2]])
if(type[2] == "mn"){
boot_fun <- function(.){
lrt_stat = NA
try({
Y_boot <- Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n) + crossprod(coef_mat, X)
E_boot <- Y_boot - crossprod(qr.solve(tcrossprod(X), tcrossprod(X, Y_boot)), X)
fit_null <- est_sigma_mn_rcpp(E_boot, r, restr[1], tol = tol, maxiter = maxiter)
fit_alt <- est_sigma_mn_rcpp(E_boot, r, restr[2], tol = tol, maxiter = maxiter)
lrt_stat <- -2 * (fit_null\$ll - fit_alt\$ll)}, silent = TRUE)
return(lrt_stat)
}
} else if(type[1] == "mn" && type[2] == "kpcor"){
boot_fun <- function(.){
lrt_stat = NA
try({
Y_boot <- Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n) + crossprod(coef_mat, X)
E_boot <- Y_boot - crossprod(qr.solve(tcrossprod(X), tcrossprod(X, Y_boot)), X)
S <- tcrossprod(E_boot) / n
fit_null <- est_sigma_mn_rcpp(E_boot, r, restr[1], tol, maxiter)
fit_alt <- est_sigma_rcpp(E_boot, diag(S), r, restr[2], tol = tol, maxiter = maxiter, verbose = FALSE)
lrt_stat <- -2 * (fit_null\$ll - fit_alt\$ll)}, silent = TRUE)
return(lrt_stat)
}
} else if(type[1] == "mn" && type[2] == "unstruct"){
boot_fun <- function(.){
lrt_stat = NA
try({
Y_boot <- Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n) + crossprod(coef_mat, X)
E_boot <- Y_boot - crossprod(qr.solve(tcrossprod(X), tcrossprod(X, Y_boot)), X)
if(n < p + r * c){
S <- diag(diag(tcrossprod(E_boot)) / n)
ll_alt <- prof_log_lik(diag(sqrt(diag(S))), E_boot)
} else{
S <- tcrossprod(E_boot) / n
ll_alt <- prof_log_lik(t(chol(S)), E_boot)
}
fit_null <- est_sigma_rcpp(E_boot, diag(S), r, restr[2], tol = tol, maxiter = maxiter, verbose = FALSE)
lrt_stat <- -2 * (fit_null\$ll - ll_alt)}, silent = TRUE)
return(lrt_stat)
}
} else if(type[1] == "kpcor" && type[2] == "unstruct"){
boot_fun <- function(.){
lrt_stat <- NA
try({
Y_boot <- Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n) + crossprod(coef_mat, X)
E_boot <- Y_boot - crossprod(qr.solve(tcrossprod(X), tcrossprod(X, Y_boot)), X)
if(n < p + r * c){
S <- diag(diag(tcrossprod(E_boot)) / n)
ll_alt <- prof_log_lik(diag(sqrt(diag(S))), E_boot)
} else{
S <- tcrossprod(E_boot) / n
ll_alt <- prof_log_lik(t(chol(S)), E_boot)
}
fit_null <- est_sigma_rcpp(E_boot, diag(S), r, restr[2], tol = tol, maxiter = maxiter, verbose = FALSE)
lrt_stat <- -2 * (fit_null\$ll - ll_alt)}, silent = TRUE)
return(lrt_stat)
}
} else{
boot_fun <- function(.){
lrt_stat <- NA
try({
Y_boot <- Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n) + crossprod(coef_mat, X)
E_boot <- Y_boot - crossprod(qr.solve(tcrossprod(X), tcrossprod(X, Y_boot)), X)
S <- tcrossprod(E_boot) / n
fit_null <- est_sigma_rcpp(E_boot, diag(S), r, restr[1], tol = tol, maxiter = maxiter, verbose = FALSE)
fit_alt <- est_sigma_rcpp(E_boot, diag(S), r, restr[2], tol = tol, maxiter = maxiter, verbose = FALSE)
lrt_stat <- -2 * (fit_null\$ll - fit_alt\$ll)}, silent = TRUE)
return(lrt_stat)
}
}
return(unlist(parallel::mclapply(1:n_boot, boot_fun, mc.cores = n_cores)))
}

prof_log_lik <- function(Sigma_c, E)
#' Calculate (profile) log-likelihood for Sigma_c, i.e. the multivariate normal likelihood
#' proportional to: -log(determinant(Sigma)) - trace{t(E) solve(Sigma) E},
#'  where Sigma =  Sigma_c t(Sigma_c).
#'
#' @param Sigma_c An rc x rc lower triangular Cholesky root to evaluate the likelihood at
#' @param E An rc x n matrix of residual vectors, where n is the number of such vectors
#' @return The likelihood value evaluated at the arguments supplied, *including constants*
#'
#' @export
#' @useDynLib kpcor
#' @importFrom Rcpp evalCpp
{
if(!(is.matrix(Sigma_c) && ncol(Sigma_c) == nrow(Sigma_c) && sum(Sigma_c[upper.tri(Sigma_c)]) == 0 &&
sum(diag(Sigma_c) < 0)) == 0){stop("Sigma_c should be a lower triangular Cholesky root")}
if(!(is.matrix(E) && nrow(E) == nrow(Sigma_c))){stop("E should be an rc x n matrix of residuals")}
return(prof_log_lik_rcpp(Sigma_c, E))
}

#### UNUSED FUNCTIONS
#' #' Parametric bootstrap of all model parameters
#' #'
#' #' @param Sigma0_c Cholesky root (lower tri.) of the null hypothesis covariance
#' #' matrix: Sigma0 = C0 (A0 otimes B0) C0
#' #' @param X Matrix of dimension p x n, each column is a predictor vector
#' #' @param coef_mat0 Matrix of dimension p x cr of regression coefficients in
#' #' the null hypothesis model
#' #' @param n_r The numer of "rows" of the inverse vectorized columns of Y
#' #' @param type One of "kpcor" and "mn", indicating which algorithm to use
#' #' @param restr Allows the user to impose the matrix A or B to be diagonal. "N" for no restriction, "A"
#' #' for diagonal A, "B" for diagonal B, and "AB" for both (only for kpcor).
#' #' @param n_boot Number of bootstrap samples generated
#' #' @param n_cores Number of cores used for the boostrapping
#' #' @param tol Fitting algorithms terminate when an iteration increases the
#' #' log-likelihood less than tol
#' #' @param maxiter The maximum number of iterations the algorithm runs if not
#' #' converging before
#' #' @return List of lists with bootstrapped parameter estimates
#' #'
#' #' @export
#' #' @useDynLib kpcor
#' #' @importFrom Rcpp evalCpp
#' boot_params <- function(Sigma0_c, X, coef_mat0, n_r, type = "kpcor", restr = "N",
#'   n_boot = 1000, n_cores = 4, tol = 1e-10, maxiter = 1000)
#' {
#'   n_p <- nrow(X)
#'   n <- ncol(X)
#'   n_c <- ncol(Sigma0_c) / n_r
#'   # Bootstrap
#'   Y_hat0 <- crossprod(coef_mat0, X)
#'   if(type == "kpcor"){
#'     boot_fun <- function(xxx){
#'       Y_b <- Y_hat0 + Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n)
#'       ret <- list(matrix(nrow = n_p, ncol = n_c * n_r), matrix(nrow = n_p, ncol = n_p),
#'         matrix(nrow = n_r, ncol = n_r), rep(NA, n_r * n_c))
#'       try({
#'         fit_b <- kpcor::est_model(Y_b, X, n_r, restr = restr, tol = tol, maxiter = maxiter)
#'         ret[[1]] <- fit_b\$coef
#'         ret[[2]] <- fit_b\$A
#'         ret[[3]] <- fit_b\$B
#'         ret[[4]] <- fit_b\$D
#'       })
#'       return(ret)
#'     }
#'   } else{
#'     boot_fun <- function(xxx){
#'       Y_b <- Y_hat0 + Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n)
#'       ret <- list(matrix(nrow = n_p, ncol = n_c * n_r), matrix(nrow = n_p, ncol = n_p),
#'         matrix(nrow = n_r, ncol = n_r))
#'       try({
#'         fit_b <- kpcor::est_model(Y_b, X, n_r, type = "mn", restr = restr, tol = tol,
#'           maxiter = maxiter)
#'         ret[[1]] <- fit_b\$coef
#'         ret[[2]] <- fit_b\$A
#'         ret[[3]] <- fit_b\$B
#'       })
#'       return(ret)
#'     }
#'   }
#'   out <- parallel::mclapply(1:n_boot, boot_fun, mc.cores = n_cores)
#'   return(out)
#' }
```
koekvall/crkr documentation built on April 18, 2018, 11:17 p.m.