Nothing
#' Marginal (SQ/CP) approach
#'
#' @description
#' The \code{marginal} function performs a trait-by-trait univariate test for latent interactions
#' using the squared residuals and cross products.
#'
#' @param y matrix of traits (n observations by k traits)
#' @param x matrix of SNPs (n observations by m SNPs)
#' @param adjustment matrix of covariates to adjust traits
#' @param pop_struct matrix of PCs that captures population structure
#'
#' @return
#' A data frame of p-values where the columns are the cross products/squared residuals
#' and the rows are SNPs.
#'
#' @examples
#' # set seed
#' set.seed(123)
#'
#' # Generate SNPs and traits
#' X <- matrix(rbinom(10*2, size = 2, prob = 0.25), ncol = 2)
#' Y <- matrix(rnorm(10*4), ncol = 4)
#'
#' out <- marginal(Y, X)
#'
#' @seealso \code{\link{marginal_plink}}
#' @export
marginal <- function(y,
x,
adjustment = NULL,
pop_struct = NULL) {
if (!(is.matrix(y) & is.matrix(x))) {
stop("Inputs must be matrices.")
}
if (!(nrow(y) == nrow(x))) {
stop("Observations are rows for each input.")
}
if (anyNA(y)) {
stop("Current implementation requires no NAs in trait matrix. Need to adjust inputs accordingly ... feature will be added soon.")
}
if (is.null(pop_struct)) {
h = matrix(1, nrow = nrow(y), ncol = 1)
} else {
if (!(is.matrix(pop_struct) & (nrow(y) == nrow(pop_struct)))){
stop("pop_struct must be matrix and rows are observations.")
}
h = cbind(1, pop_struct)
}
if (is.null(adjustment)) {
y <- .quick_lm_cpp(Xs = h, Ys = y)
} else {
if (!(is.matrix(adjustment) & (nrow(y) == nrow(adjustment)))){
stop("pop_struct must be matrix and rows are observations.")
}
y <- .quick_lm_cpp(Xs = cbind(h, adjustment), Ys = y)
}
m <- ncol(x)
tot_terms = choose(ncol(y), 2) + ncol(y)
p <- matrix(nrow = m, ncol = tot_terms)
ind <- !is.na(x)
for (i in 1:m) {
indtmp <- ind[,i] # remove NAs from genotypes
xtmp <- x[indtmp, i, drop=F]
ytmp <- y[indtmp,, drop = F]
htmp <- h[indtmp,, drop = F]
p[i,] <- .marginal_internal(Xs = xtmp, Ys = ytmp, Hs = htmp)
}
# generate names
t <- colnames(y)
if (is.null(t)) t <- paste0("col", 1:ncol(y))
cpterms <- outer(t, t, FUN = function(x,y) paste0(x,"_", y))
tmp <- upper.tri(cpterms)
cpterms <- c(cpterms[tmp], diag(cpterms))
colnames(p) <- cpterms
as.data.frame(p)
}
#' Marginal (SQ/CP) approach
#'
#' @description
#' The \code{marginal_plink} function performs a trait-by-trait univariate test for latent interactions
#' using the squared residuals and cross products. This function is suitable for large
#' datasets (e.g., UK Biobank) in plink format. Note that our code to process plink files builds from the
#' \code{\link{genio}} R package.
#'
#' @param y matrix of traits (n observations by k traits)
#' @param file path to plink files
#' @param adjustment matrix of covariates to adjust traits
#' @param pop_struct matrix of PCs that captures population structure
#' @param verbose If TRUE (default) print progress.
#'
#' @return
#' A data frame of p-values where the columns are the cross products/squared residuals
#' and the rows are SNPs.
#'
#' @examples
#' # set seed
#' set.seed(123)
#'
#' # Path to plink files
#' file <- system.file("extdata", 'sample.bed', package = "genio", mustWork = TRUE)
#'
#' # Generate trait expression
#' Y <- matrix(rnorm(10*4), ncol = 4)
#'
#' out <- marginal_plink(Y, file = file)
#'
#' @seealso \code{\link{marginal_plink}}
#' @export
marginal_plink <- function(y,
file,
adjustment = NULL,
pop_struct = NULL,
verbose = TRUE) {
if (missing(file))
stop('Output file path is required!')
file <- sub("\\.bed$", "", file)
require_files_plink(file)
file_bim = paste0(file, ".bim")
file_fam = paste0(file, ".fam")
file_bed = paste0(file, ".bed")
## Get number of snps/ind
bim <- genio::read_bim(file_bim)
fam <- genio::read_fam(file_fam)
n_ind <- nrow(fam);
m_loci <- nrow(bim);
if (verbose)
message('Reading: ', file_bed)
file <- path.expand(file_bed)
if ( !file.exists( file_bed ) )
stop( 'File does not exist: ', file_bed )
if (!(is.matrix(y))) {
stop("Inputs must be matrices.")
}
if (is.null(pop_struct)) {
h = matrix(1, nrow = nrow(y), ncol = 1)
} else {
if (!(is.matrix(pop_struct) & (nrow(y) == nrow(pop_struct)))){
stop("pop_struct must be matrix and rows are observations.")
}
h = cbind(1, pop_struct)
}
if (is.null(adjustment)) {
y <- .quick_lm_cpp(Xs = h, Ys = y)
} else {
if (!(is.matrix(adjustment) & (nrow(y) == nrow(adjustment)))){
stop("pop_struct must be matrix and rows are observations.")
}
y <- .quick_lm_cpp(Xs = cbind(h, adjustment), Ys = y)
}
tot_terms = choose(ncol(y), 2) + ncol(y)
lit_out <- .marginal_bed_cpp(file = file_bed, m_loci = m_loci, n_ind = n_ind, sY = y, sH = h, tot = tot_terms, verbose)
P <- lit_out$P
t <- colnames(y)
if (is.null(t)) t <- paste0("col", 1:ncol(y))
cpterms <- outer(t, t, FUN = function(x,y) paste0(x,"_", y))
tmp <- upper.tri(cpterms)
cpterms <- c(cpterms[tmp], diag(cpterms))
colnames(P) <- cpterms
P <- as.data.frame(P);
bim <- bim[,-3] # drop posg
bim$maf <- lit_out$MAF
id <- bim$maf > 0.5
bim$maf[id] <- 1 - bim$maf[id]
out <- cbind(bim, P)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.