#'@title Rank SNPs by pval, prune for LD
#'@description Selects a set of top SNP based on p-value that are in minimal LD.
#'Performs the same function as top_snps_pval_ld but with different input format.
#'@param ld_files List of h5 files containing pairwise ld. One for each chromosome.
#'@param snp_info_rds RDS file with snp info
#'@param pvals Vector of pvalues (optional)
#'@param snps List of snps
#'@param r2_thresh r^2 threshold for pruning
#'@param pval_thresh Maximum pvalue
#'@param cores Number of cores to use
#'@export
top_snps_pval_ld2_new <- function(ld_files, snp_info_rds,
snps, pval_cols=c(),
pval_thresh = Inf, r2_thresh=0.01,
cores=parallel::detectCores()-1){
if(is.finite(pval_thresh) & length(pval_cols)==0){
stop("pval_thresh is given but pvals is missing.\n")
}
p1 <- nrow(snps)
stopifnot("SNP" %in% names(snps))
if(length(pval_cols)==0){
snps <- mutate(snps, p = sample(1:p1, size=p1, replace=FALSE)/p1)
cat("No p-values are provided so SNPs will be ordered randomly.\n")
pval_cols <- c("p")
pval_thresh <- Inf
}
cat("You have provided information for ", p1, " snps.\n")
k <- length(pval_cols)
snp_info <- readRDS(snp_info_rds)
snps <- snps %>% mutate(min_pval = apply(snps[, pval_cols], 1, min)) %>%
filter(min_pval < pval_thresh)
snp_info <- right_join(snp_info, snps, by="SNP")
chrom_chunk <- snp_info %>% filter(!is.na(region_id)) %>%
group_by(region_id) %>%
summarize(chr = first(chr)) %>%
select(chr, region_id)
cl <- makeCluster(cores, type="PSOCK")
clusterExport(cl, varlist=c("chrom_chunk","pval_thresh", "pval_cols", "k",
"r2_thresh", "ld_files", "snp_info"), envir=environment())
clusterEvalQ(cl, library(plyr))
clusterEvalQ(cl, library(dplyr))
keep <- parApply(cl, chrom_chunk, 1, FUN=function(y){
#keep <- apply(chrom_chunk, 1, FUN=function(y){
my_snp_info <- filter(snp_info, region_id == as.numeric(y[2]))
ld <- EigenH5::read_df_h5(ld_files[as.numeric(y[1])], paste0("LD_DF/", y[2], "/LD")) %>%
filter(colSNP %in% my_snp_info$SNP & rowSNP %in% my_snp_info$SNP)
o_c <- select(my_snp_info, pval_cols) %>%
alply(., 2, function(x){
order(unlist(x), decreasing=FALSE)[seq(sum(x < pval_thresh))]
})
snps_c <- my_snp_info$SNP
keep_c <- llply(o_c, function(x){
k_c <- c()
while(length(x) > 0){
snp <- snps_c[x[1]]
k_c <- c(k_c, snp)
if(length(x) == 1){
x <- c()
next
}
myld <- subset(ld, rowSNP==snp | colSNP==snp)
if(any(myld$r2 > r2_thresh)){
remove_snps <- subset(myld, r2 > r2_thresh) %>%
with(., unique( c(rowSNP,colSNP)))
remove_ix <- which(snps_c %in% remove_snps)
}else{
remove_ix <- x[1]
}
x <- subset(x, !x %in% remove_ix)
}
return(k_c)
})
return(keep_c)
})
stopCluster(cl)
if(k == 1){
keep <- unlist(keep)
return(keep)
}
keep_list <- list()
for(i in seq(k)){
keep_list[[i]] <- lapply(keep, function(x){x[[i]]}) %>% unlist()
}
return(keep_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.