library(snpStats)
library(corpcor)
library(Matrix)
library(mvtnorm)
#' Code to sample multivariate norm
#' \code{mvs_perm} sample from a multivariate normal distribution
#'
#' @param m a vector - mean values
#' @param sigma a matrix - a positive definite covariance matrix
#' @param n a scalar - number of samples to generate (default 2)
#' @return a matrix of sampled values
mvs_perm<-function(m,sigma,n=2){
if(!is.matrix(sigma))
stop("sigma parameter is not a matrix")
if(!corpcor::is.positive.definite(sigma,,method="chol"))
stop("sigma is not positive definite")
rd<-mvtnorm::rmvnorm(n,mean=m,sigma=sigma,method="chol")
t(rd)
}
#' Code to compute sigma - genotype covariance matrix
#' \code{mvs_sigma} create a geneotype convariance matrix
#'
#' @param r a matrix of pairwise \eqn{r^2} measures generated by snpStats
#' @param quiet boolean whether to return warnings messages default=TRUE
#' @return a positive definite approximation of covariance matrix
mvs_sigma<-function(r,quiet=TRUE){
diag(r)<-1
if(any(is.na(r)))
if(!quiet)
message(sprintf("Found %s where R^2 is NA",sum(is.na(r))))
r[is.na(r)]<-0
return(methods::as(corpcor::make.positive.definite(r),"Matrix"))
}
#' convert a vcf file to snpMatrix object
#' \code{vcf2snpmatrix} convert a vcf file to snpMatrix object
#'
#' @param vcf a scalar - path to vcf file
#' @param bcft a scalar - path to bcftools binary
#' @param region_file a scalar - path to a file satisfying -R bcftools criteria (optional)
#' @param quiet a boolean - if set to false then debug information shown (default = FALSE)
#' @return a list with two slots. sm is the snpMatrix object and info is a data.table describes SNPs
#' @export
vcf2snpmatrix <- function(vcf,bcft,region_file,quiet=TRUE){
CHROM <- POS <- pid <- NULL
header_cmd <- sprintf("%s view -h %s",bcft,vcf)
if(!quiet)
message(header_cmd)
my.pipe<-pipe(header_cmd)
header<-utils::tail(scan(my.pipe,what=character(),sep="\n",quiet=TRUE),n=1)
close(my.pipe)
cnames<-unlist(strsplit(header,"\t"))
if(!missing(region_file)){
vcftools_cmd<-sprintf("%s view -R %s -O v %s | grep -v '^#'",bcft,region_file ,vcf)
}else{
vcftools_cmd<-sprintf("%s view -O v %s | grep -v '^#'",bcft,vcf)
}
if(!quiet)
message(vcftools_cmd)
tmp<-fread(vcftools_cmd)
setnames(tmp,cnames)
gt<-tmp[,10:ncol(tmp),with=FALSE]
if(nrow(gt)==0)
return(NA)
info<-tmp[,1:9,with=FALSE]
setnames(info,'#CHROM','CHROM')
if(!quiet)
message("Creating snpMatrix obj")
sm<-apply(gt,1,function(x) sub("0\\|0","1",x))
sm<-apply(sm,1,function(x) sub("(0\\|1)|(1\\|0)","2",x))
sm<-apply(sm,1,function(x) sub("1\\|1","3",x))
## set anything else to a missing value
sm<-t(apply(sm,1,function(x) as.raw(sub("[0-9]\\|[0-9]","0",x))))
info[,pid:=paste(CHROM,POS,sep=':')]
colnames(sm)<-info$pid
rownames(sm)<-colnames(gt)
return(list(sm=methods::new("SnpMatrix", sm),info=info))
}
#' simulate betas for an ld block
#' \code{simulate_beta} use the multivariate normal to simulate realistic betas
#'
#' @param sm a snpMatrix object
#' @param lor a vector - observed betas
#' @param se_lor a vector - standard error of betas
#' @param lor_shrink a vector - shrinkage values to use to adjust betas (default 1 no shrinkage)
#' @param n_sims a scalar - number of simulations to conduct
#' @return a matrix of simulated betas
simulate_beta <- function(sm,lor,se_lor,lor_shrink=1,n_sims){
beta_hat<-lor * lor_shrink
if(length(lor)==1)
return(t(stats::rnorm(n_sims,mean=beta_hat,sd=se_lor)))
# compute R statistic
r<-snpStats::ld(sm,sm,stats="R")
# compute closest pos-def covariance matrix
r<-as.matrix(mvs_sigma(Matrix::Matrix(r)))
## for beta the covariance matrix is estimates by sigma x SE * SE^T
cov_se<-tcrossprod(se_lor)
cov.beta<-cov_se * r
## simulate beta
return(mvs_perm(beta_hat,cov.beta,n=n_sims))
}
#' covariance matrix of betas for an ld block
#' \code{cov_beta}
#'
#' @param sm a snpMatrix object
#' @param se_lor a vector - standard error of betas
#' @return a covariance matrix for beta
cov_beta <- function(sm,se_lor){
#beta_hat <- lor * lor_shrink
if(length(se_lor)==1)
return(se_lor^2)
# compute R statistic
#r<-ld(sm,sm,stats="R.squared")
r<-snpStats::ld(sm,sm,stats="R")
# compute closest pos-def covariance matrix
r<-as.matrix(mvs_sigma(Matrix::Matrix(r)))
## for beta the covariance matrix is estimates by sigma x SE * SE^T
cov_se<-tcrossprod(se_lor)
return(cov_se * r)
}
#' simulate betas for a study
#' \code{simulate_study} use the multivariate normal to simulate realistic betas for a study
#'
#' @param DT a data.table - as returned by \code{\link{get_gwas_data}}
#' @param ref_gt_dir scalar - path to a dir of R objects named CHR_1kg.RData containing reference GT in snpMatrix format
#' @param shrink_beta scalar - Whether to use Bayesian shrinkage of betas (default = TRUE)
#' @param n_sims a scalar - number of simulations (default 10)
#' @param quiet a scalar - boolean whether to show progress messages
#' @return a DT of n_sims simulated studies for projection
#' @export
simulate_study <- function(DT,ref_gt_dir,shrink_beta=TRUE,n_sims=10,quiet=TRUE){
pid <- value <- trait <- variable <- NULL
s.DT <- split(DT,DT$chr)
all.chr <- lapply(names(s.DT),function(chr){
if(!quiet)
message(sprintf("Processing %s",chr))
ss.file<-file.path(ref_gt_dir,sprintf("%s.RDS",chr))
message(file.path(ref_gt_dir,sprintf("%s.RDS",chr)))
sm <- readRDS(ss.file)
#sm<-get(load(ss.file))
## there are sometimes duplicates that we need to remove
info <- data.table(pid=colnames(sm),order=1:ncol(sm))
dup.idx <- which(duplicated(info$pid))
#dup.idx<-which(duplicated(obj$info$pid))
if(length(dup.idx)>0){
if(!quiet)
message(sprintf("Warning removing %d duplicated SNPs",length(dup.idx)))
#sm$info<-sm$info[-dup.idx,]
#sm$sm <- sm$sm[,-dup.idx]
info <- info[-dup.idx,]
sm <- sm[,-dup.idx]
}
info$order <- 1:nrow(info)
#sm$info$order<-1:nrow(sm$info)
# by ld block
by.ld <- split(s.DT[[chr]],s.DT[[chr]]$ld.block)
chr.sims <- lapply(names(by.ld),function(block){
if(!quiet)
message(sprintf("Processing %s",block))
dat <- by.ld[[block]]
setkey(dat,pid)
#info <-sm$info[pid %in% dat$pid ,.(pid,order)]
linfo <- info[pid %in% dat$pid ,list(pid,order)]
setkey(linfo,pid)
dat <- dat[linfo][order(order),]
if(shrink_beta){
## compute beta shrinkage
shrink <- with(dat,wakefield_null_pp(p.val,maf,n,n1/n))
#M <- with(dat,simulate_beta(sm$sm[,order],log(or),emp_se,shrink,n_sims))
M <- with(dat,simulate_beta(sm[,order],log(or),beta_se,shrink,n_sims))
}else{
#M <- with(dat,simulate_beta(sm$sm[,order],log(or),emp_se,1,n_sims))
M <- with(dat,simulate_beta(sm[,order],log(or),beta_se,1,n_sims))
}
sims <- cbind(dat,M)
sims <- melt(sims,id.vars=names(dat))
return(sims[,c('or','trait'):=list(exp(value),paste(trait,variable,sep='_'))][,names(dat),with=FALSE])
})
chr.sims<-rbindlist(chr.sims)
})
all.chr<-rbindlist(all.chr)
setkey(all.chr,pid)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.