R/functions.R

Defines functions prof_log_lik boot_lrt est_sigma est_model

Documented in boot_lrt est_model est_sigma prof_log_lik

est_model <- function(Y, X, r, type = "kpcor", restr = "N", tol = 1e-16,
  maxiter = 1000, verbose = FALSE)
{
#' Estimate a linear model with Kronecker-structured correlation or covariance matrix
#'
#' @param Y Matrix of dimension rc x n, each column is a (vectorized) response vector
#' @param X Matrix of dimension p x n, each column is a predictor vector
#' @param r The number of "rows" of the inverse vectorized columns of Y
#' @param type One of "kpcor", "mn", or "unstruct", indicating which model to fit
#' @param restr Allows the user to impose the matrix A or B to be diagonal. "N" for no restriction, "A"
#' for diagonal A, "B" for diagonal B, and "AB" for both (only for kpcor).
#' @param tol Algorithm terminates when an iteration increases the log-likelihood less than tol
#' @param maxiter The maximum number of iterations the algorithm runs if not converging before
#' @param verbose Print additional info about iterates if TRUE (only for kpcor)
#' @return List with log-likelihood, coefficient estimates, covariance parameter estimates,
#' estimates of the covariance matrix of the coefficients, the number of iterations,
#' and and information = 0 if converged, 1 if not converged, 2 if A iterate was not
#' positive definite at an iteration, and 3 if B iterate was not positive definite
#' at an iteration.
#'
#' @export
#' @useDynLib kpcor
#' @importFrom Rcpp evalCpp
  if(!is.matrix(Y)){stop("Y needs to be a rc x n matrix of responses")}
  n <- ncol(Y)
  if(!(is.matrix(X) && ncol(X) == n)){stop("X needs to be a matrix of predictors with n columns")}
  if(!(is.atomic(r) && length(r) == 1)){stop("r needs to be a positive integer")}
  if(r <=0 || (r != floor(r))){stop("r needs to be a positive integer")}
  c <- nrow(Y) / r
  if(c != floor(c)){stop("Incompatible dimensions of E and r")}
  if(!is.character(type)){stop("type needs to be character and one of kpcor and mn")}
  if(!(type %in% c("kpcor", "mn", "unstruct"))){stop("type needs to be character and one of kpcor, mn, or unstruct")}
  if(!is.character(restr)){stop("restr needs to be character and one of N, A, B, or AB")}
  if(!(restr %in% c("N", "A", "B", "AB"))){stop("restr needs to be character and one of N, A, B, or AB")}
  if(type == "mn" && restr == "AB"){stop("Both matrices cannot be diagonal for the matrix normal")}
  if(!(is.atomic(tol) && length(tol) == 1)){stop("tol needs to be a positive scalar")}
  if(tol < 0){stop("tol needs to be a positive scalar")}
  if(!(is.atomic(maxiter))){stop("maxiter needs to be a positive integer")}
  if(!(length(maxiter) == 1 && maxiter > 0 && (maxiter == floor(maxiter)))){stop("maxiter needs to be a positive integer")}

  beta_hat <- qr.coef(qr(t(X)), t(Y))
  E <- Y - t(beta_hat) %*% X

  restr <- (0:3)[c("N", "A", "B", "AB") == restr]
  if(type == "unstruct"){
    if(n < r*c + nrow(X)){
      warning("n < q + p; using independence assumption")
      Sigma <- diag(diag(tcrossprod(E)) / n)
    }else{
      Sigma <- tcrossprod(E) / n
    }
    loglik <- prof_log_lik(t(chol(Sigma)), E)
    return(list("ll" = loglik, "coef" = beta_hat,
      "varcov_coef" = kronecker(Sigma, qr.solve(tcrossprod(X))), "Sigma" = Sigma))
  } else if(type == "kpcor"){
    S <- tcrossprod(E) / n
    fit <- est_sigma_rcpp(E, diag(S), r, restr, tol, maxiter, verbose)
    if(fit$info %in% c(0, 1)){
      Sigma_c <- kronecker(fit$A_c, fit$B_c)
      Sigma_c <- sweep(Sigma_c, 1, fit$D, "/")
      loglik <- prof_log_lik(Sigma_c, E)
      Sigma <- tcrossprod(Sigma_c)
      A <- tcrossprod(fit$A_c)
      B <- tcrossprod(fit$B_c)
    } else{
      Sigma <- NA
      loglik <- NA
      A <- NA
      B <- NA
    }
    return(list("ll" = loglik, "coef" = beta_hat, "A" = A, "B" = B, "D" = fit$D,
      "varcov_coef" = kronecker(Sigma, qr.solve(tcrossprod(X))),
      "iter" = fit$iter, info = fit$info))
  } else if (type == "mn"){
    fit <- est_sigma_mn_rcpp(E, r, restr, tol, maxiter)
    loglik <- prof_log_lik(kronecker(t(chol(fit$A)), t(chol(fit$B))), E)
    return(list("ll" = loglik, "coef" = beta_hat, "A" = fit$A, "B" = fit$B,
      "varcov_coef" = kronecker(kronecker(fit$A, fit$B), qr.solve(tcrossprod(X))), "iter" = fit$iter))
  } else{
    stop("This should not happen")
  }
}

#' Estimate the covariance matrix Sigma = C(A otimes B)C using kpcor or MN algorithm
#'
#' @param E Matrix of dimension rc x n, each column is a (vectorized) residual vector
#' @param r The number of "rows" of the inverse vectorized columns of E
#' @param type One of "kpcor" and "mn", indicating which algorithm to use
#' @param restr Allows the user to impose the matrix A or B to be diagonal. "N" for no restriction, "A"
#' for diagonal A, "B" for diagonal B, and "AB" for both (only for kpcor).
#' @param tol Algorithm terminates when an iteration increases the log-likelihood less than tol
#' @param maxiter The maximum number of iterations the algorithm runs if not converging before
#' @param verbose Print additional info about iterates if TRUE (only for kpcor)
#' @return List with MLEs of the parameters A, B and D (D = inv(C), only for kpcor), log-likelihood at the final iterates,
#' and the numver of iterations performed.
#'
#' @export
#' @useDynLib kpcor
#' @importFrom Rcpp evalCpp
est_sigma <- function(E, r, type = "kpcor", restr= "N", tol = 1e-16, maxiter = 1000, 
  verbose = FALSE)
{
  if(!is.matrix(E)){stop("E needs to be a rc x n matrix of residuals")}
  n <- ncol(E)
  if(!(is.atomic(r) && length(r) == 1)){stop("r needs to be a positive integer")}
  if(r <=0 || (r != floor(r))){stop("r needs to be a positive integer")}
  c <- nrow(E) / r
  if(c != floor(c)){stop("Incompatible dimensions of E and r")}
  if(!is.character(type)){stop("type needs to be character and one of kpcor and mn")}
  if(!(type %in% c("kpcor", "mn"))){stop("type needs to be character and one of kpcor and mn")}
  if(!is.character(restr)){stop("restr needs to be character and one of N, A, B, or AB")}
  if(!(restr %in% c("N", "A", "B", "AB"))){stop("restr needs to be character and one of N, A, B, or AB")}
  if(type == "mn" && restr == "AB"){stop("Both matrices cannot be diagonal for the matrix normal")}
  if(!(is.atomic(tol) && length(tol) == 1)){stop("tol needs to be a positive scalar")}
  if(tol < 0){stop("tol needs to be a positive scalar")}
  if(!(is.atomic(maxiter))){stop("maxiter needs to be a positive integer")}
  if(!(length(maxiter) == 1 && maxiter > 0 && (maxiter == floor(maxiter)))){stop("maxiter needs to be a positive integer")}

  restr <- (0:3)[c("N", "A", "B", "AB") == restr]

  if(type == "kpcor"){
    S <- E %*% t(E) / n
    fit <- est_sigma_rcpp(E, diag(S), r, restr, tol, maxiter, verbose)
  } else{
    fit <- est_sigma_mn_rcpp(E, r, restr, tol, maxiter)
  }
  return(fit)
}

#' Parametric bootstrap of the LRT for covariance matrix restriction
#'
#' @param Sigma0_c Cholesky root (lower tri.) of the null hypothesis covariance matrix: Sigma0 = C0 (A0 otimes B0) C0
#' @param X Matrix of dimension p x n, each column is a predictor vector
#' @param coef_mat Matrix of dimension p x cr of regression coefficients
#' @param r The numer of "rows" of the inverse vectorized columns of E
#' @param type Vector of length two, each element is one of "kpcor", "mn", or "unstruct", indicating the type of
#' the null (first element) and alternative (second element) hypothesis covariance matrices.
#' @param restr Vector of length 2, the first element indicates restrictions on the null hypothesis matrix
#' and the second indicates restrictions on the alternatice hypothesis matrix. "N" for no restriction, "A"
#' for diagonal A, "B" for diagonal B, and "AB" for both (only for kpcor).
#' @param n_boot Number of bootstrap samples generated
#' @param n_cores Number of cores used for the boostrapping
#' @param tol Fitting algorithms terminate when an iteration increases the log-likelihood less than tol
#' @param maxiter The maximum number of iterations the algorithm runs if not converging before
#' @return Vector of bootstrapped LRTs
#'
#' @export
#' @useDynLib kpcor
#' @importFrom Rcpp evalCpp
boot_lrt <- function(Sigma0_c, X, coef_mat, r, type, restr, n_boot = 1000, n_cores = 4,
                     tol = 1e-10, maxiter = 1000)
{
    if(!is.matrix(Sigma0_c) && ncol(Sigma0_c) == nrow(Sigma0_c)){
      stop("Sigma0_c should be lower triangular Cholesky root")
    }

    if(!(is.atomic(r) && length(r) == 1 && r > 0 && (ncol(Sigma0_c) / r) == floor(ncol(Sigma0_c) / r))){
      stop("r should be a positive integer that divides the dimension of Sigma0_c")
    }
    c <- ncol(Sigma0_c) / r
    
    if(!is.matrix(X)){stop("X needs to be a matrix")}
    n <- ncol(X)
    p <- nrow(X)

    if(!(is.matrix(coef_mat) && ncol(coef_mat) == c*r && nrow(coef_mat) == p)){
      stop("coef_mat should has to be a matrix conformable with X and Sigma0_c")
    }

    if(!(is.character(type) && length(type) == 2 && sum(type %in% c("kpcor", "mn", "unstruct")) == 2)){
      stop("type should be vector of 2 characters, either 'kpcor', 'mn', or 'unstruct'")
    }

    if(type[2] == "mn" && type[1] == "kpcor"){stop("Models not nested")}
    if(type[1] == "unstruct"){stop("Models not nested")}

    if(!(is.character(restr) && length(restr) == 2 && sum(restr %in% c("N", "A", "B", "AB")) == 2)){
      stop("restr should be vector of 2 characters, one of 'N', 'A', 'B', or 'AB'")
    }
    if(restr[2] != "N" && type[2] == "unstruct"){stop("No restrictions possible for unstructured covariance matrix")}
    if(restr[2] == "A" && !(restr[1] %in% c("A", "AB"))){stop("Models not nested")}
    if(restr[2] == "B" && !(restr[1] %in% c("B", "AB"))){stop("Models not nested")}
    if(restr[2] == "AB"){stop("Models not nested")}
    if(restr[2] == restr[1] && type[1] == type[2]){stop("Models are identical")}

    if(!(is.atomic(n_boot) && length(n_boot) == 1 && n_boot > 0 && n_boot == floor(n_boot))){
      stop("n_boot should be a positive integer")
    }

    if(!(is.atomic(n_cores) && length(n_cores) == 1 && n_cores > 0 && n_cores == floor(n_cores))){
      stop("n_cores should be a positive integer")
    }

    if(!(is.atomic(tol) && length(tol) == 1 && tol > 0)){
      stop("tol should be a positive scalar")
    }

    if(!(is.atomic(maxiter) && length(maxiter) == 1 && maxiter > 0 && maxiter == floor(maxiter))){
      stop("maxiter should be a positive integer")
    }

    restr <- c((0:3)[c("N", "A", "B", "AB") == restr[1]], (0:3)[c("N", "A", "B", "AB") == restr[2]])
    if(type[2] == "mn"){
      boot_fun <- function(.){
        lrt_stat = NA
        try({
        Y_boot <- Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n) + crossprod(coef_mat, X)
        E_boot <- Y_boot - crossprod(qr.solve(tcrossprod(X), tcrossprod(X, Y_boot)), X)
        fit_null <- est_sigma_mn_rcpp(E_boot, r, restr[1], tol = tol, maxiter = maxiter)
        fit_alt <- est_sigma_mn_rcpp(E_boot, r, restr[2], tol = tol, maxiter = maxiter)
        lrt_stat <- -2 * (fit_null$ll - fit_alt$ll)}, silent = TRUE)
        return(lrt_stat)
      }
    } else if(type[1] == "mn" && type[2] == "kpcor"){
      boot_fun <- function(.){
        lrt_stat = NA
        try({
        Y_boot <- Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n) + crossprod(coef_mat, X)
        E_boot <- Y_boot - crossprod(qr.solve(tcrossprod(X), tcrossprod(X, Y_boot)), X)
        S <- tcrossprod(E_boot) / n
        fit_null <- est_sigma_mn_rcpp(E_boot, r, restr[1], tol, maxiter)
        fit_alt <- est_sigma_rcpp(E_boot, diag(S), r, restr[2], tol = tol, maxiter = maxiter, verbose = FALSE)
        lrt_stat <- -2 * (fit_null$ll - fit_alt$ll)}, silent = TRUE)
        return(lrt_stat)
      }
    } else if(type[1] == "mn" && type[2] == "unstruct"){
      boot_fun <- function(.){
        lrt_stat = NA
        try({
          Y_boot <- Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n) + crossprod(coef_mat, X)
          E_boot <- Y_boot - crossprod(qr.solve(tcrossprod(X), tcrossprod(X, Y_boot)), X)
          if(n < p + r * c){
            S <- diag(diag(tcrossprod(E_boot)) / n)
            ll_alt <- prof_log_lik(diag(sqrt(diag(S))), E_boot)
          } else{
            S <- tcrossprod(E_boot) / n
            ll_alt <- prof_log_lik(t(chol(S)), E_boot)
          }
          fit_null <- est_sigma_rcpp(E_boot, diag(S), r, restr[2], tol = tol, maxiter = maxiter, verbose = FALSE)
          lrt_stat <- -2 * (fit_null$ll - ll_alt)}, silent = TRUE)
        return(lrt_stat)
      }
    } else if(type[1] == "kpcor" && type[2] == "unstruct"){
      boot_fun <- function(.){
        lrt_stat <- NA
        try({
          Y_boot <- Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n) + crossprod(coef_mat, X)
          E_boot <- Y_boot - crossprod(qr.solve(tcrossprod(X), tcrossprod(X, Y_boot)), X)
          if(n < p + r * c){
            S <- diag(diag(tcrossprod(E_boot)) / n)
            ll_alt <- prof_log_lik(diag(sqrt(diag(S))), E_boot)
          } else{
            S <- tcrossprod(E_boot) / n
            ll_alt <- prof_log_lik(t(chol(S)), E_boot)
          }
          fit_null <- est_sigma_rcpp(E_boot, diag(S), r, restr[2], tol = tol, maxiter = maxiter, verbose = FALSE)
          lrt_stat <- -2 * (fit_null$ll - ll_alt)}, silent = TRUE)
        return(lrt_stat)
      }
    } else{
      boot_fun <- function(.){
        lrt_stat <- NA
        try({
        Y_boot <- Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n) + crossprod(coef_mat, X)
        E_boot <- Y_boot - crossprod(qr.solve(tcrossprod(X), tcrossprod(X, Y_boot)), X)
        S <- tcrossprod(E_boot) / n
        fit_null <- est_sigma_rcpp(E_boot, diag(S), r, restr[1], tol = tol, maxiter = maxiter, verbose = FALSE)
        fit_alt <- est_sigma_rcpp(E_boot, diag(S), r, restr[2], tol = tol, maxiter = maxiter, verbose = FALSE)
        lrt_stat <- -2 * (fit_null$ll - fit_alt$ll)}, silent = TRUE)
        return(lrt_stat)
      }
    }
    return(unlist(parallel::mclapply(1:n_boot, boot_fun, mc.cores = n_cores)))
}

prof_log_lik <- function(Sigma_c, E)
#' Calculate (profile) log-likelihood for Sigma_c, i.e. the multivariate normal likelihood 
#' proportional to: -log(determinant(Sigma)) - trace{t(E) solve(Sigma) E},
#'  where Sigma =  Sigma_c t(Sigma_c).
#' 
#' @param Sigma_c An rc x rc lower triangular Cholesky root to evaluate the likelihood at
#' @param E An rc x n matrix of residual vectors, where n is the number of such vectors
#' @return The likelihood value evaluated at the arguments supplied, *including constants*
#' 
#' @export
#' @useDynLib kpcor
#' @importFrom Rcpp evalCpp
{
  if(!(is.matrix(Sigma_c) && ncol(Sigma_c) == nrow(Sigma_c) && sum(Sigma_c[upper.tri(Sigma_c)]) == 0 &&
    sum(diag(Sigma_c) < 0)) == 0){stop("Sigma_c should be a lower triangular Cholesky root")}
  if(!(is.matrix(E) && nrow(E) == nrow(Sigma_c))){stop("E should be an rc x n matrix of residuals")}
  return(prof_log_lik_rcpp(Sigma_c, E))
}

#### UNUSED FUNCTIONS
#' #' Parametric bootstrap of all model parameters
#' #'
#' #' @param Sigma0_c Cholesky root (lower tri.) of the null hypothesis covariance
#' #' matrix: Sigma0 = C0 (A0 otimes B0) C0
#' #' @param X Matrix of dimension p x n, each column is a predictor vector
#' #' @param coef_mat0 Matrix of dimension p x cr of regression coefficients in
#' #' the null hypothesis model
#' #' @param n_r The numer of "rows" of the inverse vectorized columns of Y
#' #' @param type One of "kpcor" and "mn", indicating which algorithm to use
#' #' @param restr Allows the user to impose the matrix A or B to be diagonal. "N" for no restriction, "A"
#' #' for diagonal A, "B" for diagonal B, and "AB" for both (only for kpcor).
#' #' @param n_boot Number of bootstrap samples generated
#' #' @param n_cores Number of cores used for the boostrapping
#' #' @param tol Fitting algorithms terminate when an iteration increases the
#' #' log-likelihood less than tol
#' #' @param maxiter The maximum number of iterations the algorithm runs if not
#' #' converging before
#' #' @return List of lists with bootstrapped parameter estimates
#' #'
#' #' @export
#' #' @useDynLib kpcor
#' #' @importFrom Rcpp evalCpp
#' boot_params <- function(Sigma0_c, X, coef_mat0, n_r, type = "kpcor", restr = "N", 
#'   n_boot = 1000, n_cores = 4, tol = 1e-10, maxiter = 1000)
#' {
#'   # ADD INPUT CHECKING
#'   n_p <- nrow(X)
#'   n <- ncol(X)
#'   n_c <- ncol(Sigma0_c) / n_r
#'   # Bootstrap
#'   Y_hat0 <- crossprod(coef_mat0, X)
#'   if(type == "kpcor"){
#'     boot_fun <- function(xxx){
#'       Y_b <- Y_hat0 + Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n)
#'       ret <- list(matrix(nrow = n_p, ncol = n_c * n_r), matrix(nrow = n_p, ncol = n_p), 
#'         matrix(nrow = n_r, ncol = n_r), rep(NA, n_r * n_c))
#'       try({
#'         fit_b <- kpcor::est_model(Y_b, X, n_r, restr = restr, tol = tol, maxiter = maxiter)
#'         ret[[1]] <- fit_b$coef
#'         ret[[2]] <- fit_b$A
#'         ret[[3]] <- fit_b$B
#'         ret[[4]] <- fit_b$D
#'       })
#'       return(ret)
#'     }
#'   } else{
#'     boot_fun <- function(xxx){
#'       Y_b <- Y_hat0 + Sigma0_c %*% matrix(stats::rnorm(n * nrow(Sigma0_c)), ncol = n)
#'       ret <- list(matrix(nrow = n_p, ncol = n_c * n_r), matrix(nrow = n_p, ncol = n_p), 
#'         matrix(nrow = n_r, ncol = n_r))
#'       try({
#'         fit_b <- kpcor::est_model(Y_b, X, n_r, type = "mn", restr = restr, tol = tol,
#'           maxiter = maxiter)
#'         ret[[1]] <- fit_b$coef
#'         ret[[2]] <- fit_b$A
#'         ret[[3]] <- fit_b$B
#'       })
#'       return(ret)
#'     }
#'   }
#'   out <- parallel::mclapply(1:n_boot, boot_fun, mc.cores = n_cores)
#'   return(out)
#' }
koekvall/crkr documentation built on Sept. 22, 2018, 6:28 p.m.