#'Generate the list of available GWAS summary statistics
#'
#'Returns list of IDs of available GWAS summary statistics. The data is obtained from [IEU GWAS database](https://gwas.mrcieu.ac.uk/).
#'
#' @param id_exp ID for the exposure in IEU GWAS database.
#' @param id_out ID for the outome in IEU GWAS database.
#' @param type Set a default list of IDs.
#' \itemize{
#' \item default: Returns list of IDs of GWAS summary statistics generated by many different consortia and generated from UKB.
#' \item broad: In addition to the default list, it returns list of IDs of GWAS summary data on protein, metabolite, and eQTL levels, as well as GWAS summary statistics imported from the EBI database.
#' \item eqtl: Returns list of IDs of eQTLGen 2019 results, comprising all cis and some trans regions of gene expression in whole blood.
#' \item metabolite: Returns list of IDs of metabolite studies including human blood / immune system / circulating metabolites.
#' \item protein: Returns list of IDs of GWAS summary data on protein levels.
#' \item ubm: Returns list of IDs of GWAS summary data on brain region volumes.
#' }
#'
#'
#' @export
#' @return List
# choose background dataset list
background_ids <- function(id_exp=id_exp, id_out=id_out, type=c("default", "broad", "eqtl", "metabolite", "protein", "ubm")[1]){
stopifnot(type %in% c("default", "broad", "eqtl", "metabolite", "protein", "ubm"))
ao <- suppressMessages(TwoSampleMR::available_outcomes())
if(type[1] == "default") {
ids <- subset(ao) %>%
dplyr::arrange(dplyr::desc(sample_size)) %>%
dplyr::filter(nsnp > 100000) %>%
dplyr::filter(!duplicated(trait), mr == 1) %>%
dplyr::filter(grepl("ukb-b|ieu-a|ebi-a", id)) %>%
dplyr::filter(! id %in% c(id_exp, id_out))
ids <- subset(ids, !(category == "NA" & unit == "NA"))
id_list <- ids$id
message("Using default list of ", nrow(ids), " traits")
}
if(type[1] == "broad"){
ids <- subset(ao) %>%
dplyr::arrange(dplyr::desc(sample_size)) %>%
dplyr::filter(nsnp > 100000) %>%
dplyr::filter(!duplicated(trait), mr == 1) %>%
dplyr::filter(!grepl("ukb-a|bbj-a|ieu-b|finn-a", id)) %>%
dplyr::filter(! id %in% c(id_exp, id_out))
ids <- subset(ids, !(category == "NA" & unit == "NA"))
id_list <- ids$id
message("Using default list of ", nrow(ids), " traits")
}
if(type[1] == "eqtl") {
ids <- subset(ao) %>%
dplyr::arrange(dplyr::desc(sample_size)) %>%
dplyr::filter(nsnp > 100000) %>%
dplyr::filter(!duplicated(trait), mr == 1) %>%
dplyr::filter(grepl("eqtl-a", id)) %>%
dplyr::filter(! id %in% c(id_exp, id_out))
ids <- subset(ids, !(category == "NA" & unit == "NA"))
id_list <- ids$id
message("Using default list of ", nrow(ids), " traits")
}
if(type[1] == "metabolite") {
ids <- subset(ao) %>%
dplyr::arrange(dplyr::desc(sample_size)) %>%
dplyr::filter(nsnp > 100000) %>%
dplyr::filter(!duplicated(trait), mr == 1) %>%
dplyr::filter(grepl("met", id)) %>%
dplyr::filter(! id %in% c(id_exp, id_out))
ids <- subset(ids, !(category == "NA" & unit == "NA"))
id_list <- ids$id
message("Using default list of ", nrow(ids), " traits")
}
if(type[1] == "protein") {
ids <- subset(ao) %>%
dplyr::arrange(dplyr::desc(sample_size)) %>%
dplyr::filter(nsnp > 100000) %>%
dplyr::filter(!duplicated(trait), mr == 1) %>%
dplyr::filter(grepl("prot", id)) %>%
dplyr::filter(! id %in% c(id_exp, id_out))
ids <- subset(ids, !(category == "NA" & unit == "NA"))
id_list <- ids$id
message("Using default list of ", nrow(ids), " traits")
}
if(type[1] == "ubm") {
ids <- subset(ao) %>%
dplyr::arrange(dplyr::desc(sample_size)) %>%
dplyr::filter(nsnp > 100000) %>%
dplyr::filter(!duplicated(trait), mr == 1) %>%
dplyr::filter(grepl("ubm", id)) %>%
dplyr::filter(! id %in% c(id_exp, id_out))
ids <- subset(ids, !(category == "NA" & unit == "NA"))
id_list <- ids$id
message("Using default list of ", nrow(ids), " traits")
}
return(id_list)
}
#' Generate background data for IOS calculation
#'
#' Generates harmonised dataset of SNP-exposure and SNP-other traits that are possibly associated with suspicious SNPs. It also calculates R squared of SNP-exposure and/or SNP-other traits.
#'
#' @param exp Dataset of the instruments for the exposure, obtained using \code{extract_instruments}
#' @param id_bg List obtained using \code{background_ids}
#'
#'
#' @export
#' @return Data frame
# make background dataset
#bg_dat <- make_background(snplist = exp_dat$SNP, id_bg = id_bg)
make_background <- function(exp = exp_dat, id_bg = id_bg) {
#-----------------------------------------------------------------------------------------------------
#Generate background dataset which includes phenotypes associated with outliers (suspicious SNPs).
bdat <- extract_phewas(snplist = exp$SNP, id_bg = id_bg, nsnp_per_chunk = 10)
#bdat2 <- TwoSampleMR::extract_outcome_data(snps = exp$SNP, outcomes = id_bg, proxies = FALSE)
#Retrive info of "sample_size", "ncase", "ncontrol", "unit", "sd" (category?) from IEU GWAS database
bdat <- TwoSampleMR::add_metadata(bdat, cols = c("sample_size", "ncase", "ncontrol", "unit", "sd"))
exp <- TwoSampleMR::add_metadata(exp, cols = c("sample_size", "ncase", "ncontrol", "unit", "sd"))
#-----------------------------------------------------------------------------------------------------
#Data cleaning:: Remove rows if both of units and sd are missing to calculate R squared
#remove_na_dat <- function(dat = dat, what = "outcome"){
# if(length(unique(dat[[paste0("units.", what)]])) >= 1)
# {
# dat <- subset(dat, !(dat[[paste0("units.", what)]] == "NA") |!is.na(dat[[paste0("sd.", what)]]))
# }
# return(dat)
# }
#bdat <- remove_na_dat(bdat, "outcome")
#exp <- remove_na_dat(exp, "exposure")
#-----------------------------------------------------------------------------------------------------
#Caculate R square
bdat <- suppressMessages(TwoSampleMR::add_rsq(bdat))
exp <- suppressMessages(TwoSampleMR::add_rsq(exp))
#-----------------------------------------------------------------------------------------------------
#Merge two datasets - exposure data and background data
bdat <- merge(bdat, subset(exp, select=c(SNP, rsq.exposure), by="SNP"))
bdat$r2_ratio <- bdat$rsq.outcome / bdat$rsq.exposure
bdat <- subset(bdat, !duplicated(subset(bdat, select=c(SNP, id.outcome))))
return(bdat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.