R/marginal.R

Defines functions marginal_plink marginal

Documented in marginal marginal_plink

#' Marginal (SQ/CP) approach
#'
#' @description
#' The \code{marginal} function performs a trait-by-trait univariate test for latent interactions
#' using the squared residuals and cross products.
#'
#' @param y matrix of traits (n observations by k traits)
#' @param x matrix of SNPs (n observations by m SNPs)
#' @param adjustment matrix of covariates to adjust traits
#' @param pop_struct matrix of PCs that captures population structure
#'
#' @return
#' A data frame of p-values where the columns are the cross products/squared residuals
#' and the rows are SNPs.
#'
#' @examples
#' # set seed
#' set.seed(123)
#'
#' # Generate SNPs and traits
#' X <- matrix(rbinom(10*2, size = 2, prob = 0.25), ncol = 2)
#' Y <- matrix(rnorm(10*4), ncol = 4)
#'
#' out <- marginal(Y, X)
#'
#' @seealso \code{\link{marginal_plink}}
#' @export
marginal <- function(y,
                     x,
                     adjustment = NULL,
                     pop_struct = NULL) {
  if (!(is.matrix(y) & is.matrix(x))) {
    stop("Inputs must be matrices.")
  }
  if (!(nrow(y) == nrow(x))) {
    stop("Observations are rows for each input.")
  }
  if (anyNA(y)) {
    stop("Current implementation requires no NAs in trait matrix. Need to adjust inputs accordingly ... feature will be added soon.")
  }

  if (is.null(pop_struct)) {
    h = matrix(1, nrow = nrow(y), ncol = 1)
  } else {
    if (!(is.matrix(pop_struct) & (nrow(y) == nrow(pop_struct)))){
      stop("pop_struct must be matrix and rows are observations.")
    }
    h = cbind(1, pop_struct)
  }
  if (is.null(adjustment)) {
    y <- .quick_lm_cpp(Xs = h, Ys = y)
   } else {
    if (!(is.matrix(adjustment) & (nrow(y) == nrow(adjustment)))){
      stop("pop_struct must be matrix and rows are observations.")
    }
    y <- .quick_lm_cpp(Xs = cbind(h, adjustment), Ys = y)
  }

  m <- ncol(x)
  tot_terms = choose(ncol(y), 2) + ncol(y)
  p <- matrix(nrow = m, ncol = tot_terms)
  ind <- !is.na(x)
  for (i in 1:m) {
    indtmp <- ind[,i] # remove NAs from genotypes
    xtmp <- x[indtmp, i, drop=F]
    ytmp <- y[indtmp,, drop = F]
    htmp <- h[indtmp,, drop = F]
    p[i,] <- .marginal_internal(Xs = xtmp, Ys = ytmp, Hs = htmp)
  }
  # generate names
  t <- colnames(y)
  if (is.null(t)) t <- paste0("col", 1:ncol(y))
  cpterms <- outer(t, t, FUN = function(x,y) paste0(x,"_", y))
  tmp <- upper.tri(cpterms)
  cpterms <- c(cpterms[tmp], diag(cpterms))
  colnames(p) <- cpterms
  as.data.frame(p)
}

#' Marginal (SQ/CP) approach
#'
#' @description
#' The \code{marginal_plink} function performs a trait-by-trait univariate test for latent interactions
#' using the squared residuals and cross products. This function is suitable for large
#' datasets (e.g., UK Biobank) in plink format. Note that our code to process plink files builds from the
#' \code{\link{genio}} R package.
#'
#' @param y matrix of traits (n observations by k traits)
#' @param file path to plink files
#' @param adjustment matrix of covariates to adjust traits
#' @param pop_struct matrix of PCs that captures population structure
#' @param verbose If TRUE (default) print progress.
#'
#' @return
#' A data frame of p-values where the columns are the cross products/squared residuals
#' and the rows are SNPs.
#'
#' @examples
#' # set seed
#' set.seed(123)
#'
#' # Path to plink files
#' file <- system.file("extdata", 'sample.bed', package = "genio", mustWork = TRUE)
#'
#' # Generate trait expression
#' Y <- matrix(rnorm(10*4), ncol = 4)
#'
#' out <- marginal_plink(Y, file = file)
#'
#' @seealso \code{\link{marginal_plink}}
#' @export
marginal_plink <- function(y,
                         file,
                         adjustment = NULL,
                         pop_struct = NULL,
                         verbose = TRUE) {
  if (missing(file))
    stop('Output file path is required!')
  file <- sub("\\.bed$", "", file)
  require_files_plink(file)
  file_bim = paste0(file, ".bim")
  file_fam = paste0(file, ".fam")
  file_bed = paste0(file, ".bed")

  ## Get number of snps/ind
  bim <- genio::read_bim(file_bim)
  fam <- genio::read_fam(file_fam)
  n_ind <- nrow(fam);
  m_loci <- nrow(bim);

  if (verbose)
    message('Reading: ', file_bed)
  file <- path.expand(file_bed)

  if ( !file.exists( file_bed ) )
    stop( 'File does not exist: ', file_bed )

  if (!(is.matrix(y))) {
    stop("Inputs must be matrices.")
  }

  if (is.null(pop_struct)) {
    h = matrix(1, nrow = nrow(y), ncol = 1)
  } else {
    if (!(is.matrix(pop_struct) & (nrow(y) == nrow(pop_struct)))){
      stop("pop_struct must be matrix and rows are observations.")
    }
    h = cbind(1, pop_struct)
  }

  if (is.null(adjustment)) {
    y <- .quick_lm_cpp(Xs = h, Ys = y)
  } else {
    if (!(is.matrix(adjustment) & (nrow(y) == nrow(adjustment)))){
      stop("pop_struct must be matrix and rows are observations.")
    }
    y <- .quick_lm_cpp(Xs = cbind(h, adjustment), Ys = y)
  }

  tot_terms = choose(ncol(y), 2) + ncol(y)
  lit_out <- .marginal_bed_cpp(file = file_bed, m_loci = m_loci, n_ind = n_ind, sY = y, sH = h, tot = tot_terms, verbose)
  P <- lit_out$P

  t <- colnames(y)
  if (is.null(t)) t <- paste0("col", 1:ncol(y))
  cpterms <- outer(t, t, FUN = function(x,y) paste0(x,"_", y))
  tmp <- upper.tri(cpterms)
  cpterms <- c(cpterms[tmp], diag(cpterms))
  colnames(P) <- cpterms
  P <- as.data.frame(P);
  bim <- bim[,-3] # drop posg
  bim$maf <- lit_out$MAF
  id <- bim$maf > 0.5
  bim$maf[id] <- 1 - bim$maf[id]
  out <- cbind(bim, P)
  return(out)
}

Try the lit package in your browser

Any scripts or data that you put into this service are public.

lit documentation built on Aug. 8, 2025, 6:17 p.m.