#'@title Rank SNPs by pval, prune for LD
#'@description Selects a set of top SNP based on p-value that are in minimal LD.
#'Performs the same function as top_snps_pval_ld but with different input format.
#'@param pairwise_ld Should have four columns (rowsnp, colsnp r^2, chunk)
#'@param ld_chunk_files List of RDS files containing pairwise ld. Provide either pairwise_ld or ld_chunk_files.
#'@param pvals Vector of pvalues (optional)
#'@param snp_list List of snps
#'@param ld_chunks chunk number for each SNP.
#'If chunk has missing values, those SNPs are assumed not in LD with any other SNPs.
#'@param pval_thresh Maximum pvalue
#'@param max_snps Maximum number of SNPs to retain
#'@param ld_thresh Maximum pairwise correlation between SNPs
#'@export
top_snps_pval_ld2 <- function(pairwise_ld, ld_chunk_files,
pvals, snp_list, ld_chunks,
pval_thresh = Inf, r2_thresh=0.01,
cores=parallel::detectCores()-1){
if(missing(pairwise_ld) & missing(ld_chunk_files)) stop("Please provide either pairwise_ld or ld_chunk_files\n")
if(!missing(pairwise_ld)){
names(pairwise_ld) <- c("rowsnp", "colsnp", "r2", "ld_chunk")
pwld <- T
}else{
pwld <- F
}
if(is.finite(pval_thresh) & missing(pvals)){
stop("pval_thresh is given but pvals is missing.\n")
}
p <- length(snp_list)
cat("You have provided information for ", p, " snps.\n")
stopifnot(length(ld_chunks) == p)
#dat <- data.frame("snp"=snp_list, "ld_chunk"=ld_chunks)
if(missing(pvals)){
o <- sample(1:p, size=p, replace=FALSE)
cat("No p-values are provided so SNPs will be ordered randomly.\n")
}else{
stopifnot(length(pvals)==p)
o <- order(pvals, decreasing=F)
ix <- sum(pvals < pval_thresh)
cat(ix, " SNPs pass p-value threshold.\n")
o <- o[1:ix]
#dat$pval <- pvals
}
#dat$order <- o
if(anyNA(ld_chunks)){
nasnps <- subset(snp_list, is.na(ld_chunks))
if(!missing(pvals)){
napvals <- subset(pvals, is.na(ld_chunks))
pvals <- subset(pvals, !is.na(ld_chunks))
keep <- subset(nasnps, napvals < pval_thresh)
}else{
keep <- nasnps
}
snp_list <- subset(snp_list, !is.na(ld_chunks))
ld_chunks <- subset(ld_chunks, !is.na(ld_chunks))
}else{
keep <- c()
}
chunks <- sort(unique(ld_chunks[o]))
cl <- makeCluster(cores, type="FORK")
keep_c <- parSapply(cl=cl, chunks, FUN=function(c){
k <- c()
cat("chunk ", c, "..")
o_c <- subset(o, ld_chunks[o]==c)
if(pwld){
ld_c <- subset(pairwise_ld, ld_chunk == c)
}else{
ld_c <- readRDS(ld_chunk_files[c])
}
while(length(o_c) > 0){
k <- c(k, snp_list[o_c[1]])
snp <- snp_list[o_c[1]]
myld <- subset(ld_c, rowsnp==snp | colsnp==snp)
if(any(myld$r2 > r2_thresh)){
remove_snps <- subset(myld, r2 > r2_thresh) %>%
with(., unique( c(rowsnp,colsnp)))
remove_ix <- which(snp_list %in% remove_snps)
}else{
remove_ix <- o_c[1]
remove_snps <- snp_list[o_c[1]]
}
ld_c <- subset(ld_c, !rowsnp %in% remove_snps & !colsnp %in% remove_snps)
o_c <- subset(o_c, !o_c %in% remove_ix)
cat(length(o_c), " ")
}
return(k)
})
stopCluster(cl)
keep <- c(keep, unlist(keep_c))
if(!missing(pvals)){
keep_pvals <- pvals[snp_list %in% keep]
o <- order(keep_pvals, decreasing=FALSE)
keep <- keep[o]
}
return(keep)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.