require(tidyverse) require(foreach) require(igraph) require(devtools) require(egg) require(GGally) require(WWER) require(rslurm) require(ggupset)
file_pattern_gencor_male = "/gpfs/commons/projects/UKBB/sumstats/filtered/*.male_filtered.tsv.gz" file_pattern_gencor_female = "/gpfs/commons/projects/UKBB/sumstats/filtered/*.female_filtered.tsv.gz" save_select_sumstats = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/select_sumstats.Rdata" save_fit_sumstats = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/fit_sumstats.Rdata" p_thresh = 5e-8 file_pattern_snp_list = "/gpfs/commons/projects/UKBB/sumstats/clumped/*/all.snps" save_snps = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_snps_5e-8.Rdata" save_tce_res = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/tce_res_5e-8_5inst.Rdata" rg_result_file = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/rg_results.Rdata"
sumstats <- read_sumstats_neale(file_pattern = file_pattern_gencor_male) save(sumstats, file = save_select_sumstats) rm(sumstats) sumstats <- read_sumstats_neale(file_pattern = file_pattern_gencor_female) save(sumstats, file = save_fit_sumstats) rm(sumstats)
load(save_select_sumstats) snps_to_use <- read_snp_list(file_pattern_snp_list) selected_snps <- select_snps(sumstats, snps_to_use, p_thresh = p_thresh, verbose = TRUE) save(selected_snps, file = save_snps)
load(save_fit_sumstats) load(save_snps) tce_res <- fit_tce(sumstats, selected_snps, mr_method = "egger_w", min_instruments = 5, verbose = TRUE) save(tce_res, file = save_tce_res)
pivot_tce <- function(R_tce, SE_tce, N_obs){ plot_data <- tidyr::pivot_longer( as_tibble(R_tce, rownames="Exposure"), -Exposure, names_to="Outcome", values_to = "R_tce") plot_data$SE_tce <- tidyr::pivot_longer( as_tibble(SE_tce, rownames="Exposure"), -Exposure, names_to="Outcome", values_to = "SE_tce")[["SE_tce"]] plot_data$N_obs <- tidyr::pivot_longer( as_tibble(N_obs, rownames="Exposure"), -Exposure, names_to="Outcome", values_to = "N_obs")[["N_obs"]] plot_data <- plot_data %>% dplyr::mutate( Z_score = R_tce/SE_tce, p_val = 2 * (1 - stats::pnorm(abs(Z_score)))) plot_data$p_val[is.infinite(plot_data$Z_score)] = 1 plot_data$p_val[is.na(plot_data$Z_score)] = 1 return(plot_data) } load(save_tce_res) load(rg_result_file)
# Remove rows that are all NaN. # keep_rows_n <- apply(tce_res_new$R_tce, 1, function(x){sum(!is.na(x)) > 1}) # R_tce_n <- tce_res_new$R_tce[keep_rows_n,] # SE_tce_n <- tce_res_new$SE_tce[keep_rows_n,] # N_obs_n <- tce_res_new$N_obs[keep_rows_n,] keep_rows <- apply(tce_res$R_tce, 1, function(x){sum(!is.na(x)) > 1}) R_tce <- tce_res$R_tce[keep_rows,] SE_tce <- tce_res$SE_tce[keep_rows,] N_obs <- tce_res$N_obs[keep_rows,] row_phenotypes <- rownames(R_tce) col_phenotypes <- colnames(R_tce) N_phenos <- nrow(R_tce) data <- pivot_tce(R_tce, SE_tce, N_obs) %>% dplyr::left_join(select(rg_results, -c(starts_with("mf"))), by = c("Exposure" = "phenotype.1", "Outcome" = "phenotype.2")) rho <- matrix(rg_results$rg, nrow = length(unique(rg_results$phenotype.1))) rho[abs(rho) > 1] = 1 ord_rho <- hclust(as.dist(sqrt(2*(1-abs(rho)))))$order all_phenotypes <- rg_results$phenotype.2[1:nrow(rho)] ordered_phenotypes <- all_phenotypes[ord_rho] row_phenotypes <- intersect(ordered_phenotypes, row_phenotypes) # Preserves order of argument 1. col_phenotypes <- intersect(ordered_phenotypes, col_phenotypes) # Preserves order of argument 1. # rho <- matrix(data$rg, nrow = length(phenotypes)) # rho[abs(rho) > 1] = 1 # ord_rho <- hclust(as.dist(sqrt(2*(1-abs(rho)))))$order n_nans = sum(is.na(R_tce)) perc_nans = n_nans/(N_phenos*N_phenos - N_phenos) num_nominal_sig = sum(data$p_val < 0.05, na.rm = TRUE) data$p_fdr <- stats::p.adjust(data$p_val, method="fdr") num_fwer_sig = sum(stats::p.adjust(data$p_val, method="holm") < 0.05, na.rm=TRUE) num_fdr_sig = sum(data$p_fdr < 0.05, na.rm=TRUE) ggplot(data, aes(x=p_val)) + geom_density() data %>% ggplot(aes(x=N_obs)) + geom_density() + scale_x_continuous(trans="log10") ggplot(data, aes(x=SE_tce)) + geom_density() + xlim(0, 1) ggplot(data, aes(x=R_tce)) + geom_density() + xlim(-1, 1) data %>% filter(p_fdr < 0.05) %>% ggplot(aes(x=R_tce)) + geom_density() + xlim(-1, 1)
theme_set(theme_gray()) g1 <- ggplot(data, aes(y=Exposure, x=Outcome)) + geom_tile(aes(fill=rg)) + scale_x_discrete(limits = col_phenotypes, labels = NULL) + scale_y_discrete(limits = rev(row_phenotypes), labels = NULL) + scale_fill_gradient2() + labs(x = NULL, y = NULL, title = "A) Genetic correlation") + # guides(fill = FALSE) + theme(plot.title = element_text(size=10), axis.ticks=element_blank()) + coord_fixed() g2 <- data %>% mutate(R_tce_fill = if_else(abs(R_tce) > 1, sign(R_tce)*1.0, R_tce)) %>% ggplot(aes(y=Exposure, x=Outcome)) + geom_tile(aes(fill=R_tce_fill)) + scale_x_discrete(limits = col_phenotypes, labels = NULL) + scale_y_discrete(limits = rev(row_phenotypes), labels = NULL) + scale_fill_gradient2(name = "CE") + labs(x = NULL, y = NULL, title = "B) Estimated causal effect") + # guides(fill = FALSE) + theme(plot.title = element_text(size=10), axis.ticks=element_blank()) + coord_fixed() plot = ggarrange(g1, g2, nrow=2, left = "Exposure", bottom = "Outcome") ggsave("../Figures/wwer_fig5.pdf", device = "pdf", plot = plot, dpi = 300)
tibble("logpv" = c(0:20), "correlation" = map_dbl(exp(-c(0:20)), ~ cor(data$R_tce[data$p_fdr < .x], data$rg[data$p_fdr < .x]))) %>% ggplot(aes(x = logpv, y=correlation)) + geom_line() + scale_x_continuous(trans=c("log1p"))
# I manually edited the list from the preprocessing script to include short descriptions. load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_phenotypes_sd.Rdata") descriptions <- dplyr::select(selected_phenos, phenotype, short_description) data <- data %>% dplyr::left_join(descriptions, by = c("Exposure" = "phenotype")) %>% dplyr::left_join(descriptions, by = c("Outcome" = "phenotype"), suffix = c(".1", ".2"))
data %>% group_by(Exposure) %>% mutate(n_sig_eff = sum(p_fdr < 0.05)) %>% distinct(Exposure, short_description.1, n_sig_eff) %>% arrange(desc(n_sig_eff))
# 98, 99, 103, 104, 105, 106, 119, 120, 122, 124 334-335, 400 imid = selected_phenos$phenotype[c(98, 99, 103, 104, 105, 106, 124, 334, 335, 400)] # 207-233, 235, 237-253 biomarkers = selected_phenos$phenotype[c(208:228, 233:234, 236, 238:254)] subset_bio_col = filter(selected_phenos, phenotype %in% biomarkers, phenotype %in% col_phenotypes) subset_bio_row = filter(selected_phenos, phenotype %in% biomarkers, phenotype %in% row_phenotypes) subset_imid_col = filter(selected_phenos, phenotype %in% imid, phenotype %in% col_phenotypes) subset_imid_row = filter(selected_phenos, phenotype %in% imid, phenotype %in% row_phenotypes) bio_order = intersect(ordered_phenotypes, biomarkers) bio_order = map_int(bio_order, ~ which(.x == subset_bio_col$phenotype)) sd_order_bio = subset_bio_col$short_description[bio_order] imid_order_col = intersect(ordered_phenotypes, subset_imid_col$phenotype) imid_order_row = intersect(ordered_phenotypes, subset_imid_row$phenotype) imid_order_col = map_int(imid_order_col, ~ which(.x == subset_imid_col$phenotype)) imid_order_row = map_int(imid_order_row, ~ which(.x == subset_imid_row$phenotype)) sd_order_imid_col = subset_imid_col$short_description[imid_order_col] sd_order_imid_row = subset_imid_row$short_description[imid_order_row]
theme_set(theme_classic()) data_both <- data %>% dplyr::filter(Exposure %in% biomarkers || Outcome %in% imid) data_bd <- data %>% dplyr::filter(Exposure %in% biomarkers, Outcome %in% imid) %>% dplyr::mutate(sig_level = if_else(p_fdr < 0.0005, "3", if_else(p_fdr < 0.005, "2", if_else(p_fdr < 0.05, "1", "0")))) data_db <- data %>% dplyr::filter(Exposure %in% imid, Outcome %in% biomarkers) %>% dplyr::mutate(sig_level = if_else(p_fdr < 0.0005, "3", if_else(p_fdr < 0.005, "2", if_else(p_fdr < 0.05, "1", "0")))) g1 <- data_bd %>% dplyr::mutate(R_tce = ifelse(SE_tce < 0.5, R_tce, NA)) %>% ggplot(aes(y=short_description.1, x=short_description.2)) + geom_tile(aes(fill=R_tce)) + scale_fill_gradient2(limits = c(-0.4, 0.4)) + geom_point(aes(size = sig_level))+ scale_size_manual(values=c(NA, 0.5, 1.5, 2.5)) + labs(x = NULL, y = NULL, title = "A) Blood traits as exposures, IMID as outcomes") + guides(fill=F, size=F) + scale_y_discrete(limits = sd_order_bio) + scale_x_discrete(limits = sd_order_imid_col) + theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1), axis.line.x = element_blank(), axis.line.y = element_blank()) g2 <- data_db %>% dplyr::mutate(R_tce = ifelse(SE_tce < 0.5, R_tce, NA)) %>% ggplot(aes(y=short_description.1, x=short_description.2)) + geom_tile(aes(fill=R_tce)) + scale_fill_gradient2(limits = c(-0.4, 0.4)) + geom_point(data = data_db[data_db$sig_level != "0",], aes(size = sig_level)) + scale_size_manual(values=c("1" = 0.5, "2" = 1.5, "3" = 2.5), labels = c("q < 0.05", "q < 0.005", "q < 0.0005")) + labs(x = NULL, y = NULL, title = "B) IMID as exposures, blood traits as outcomes", fill = "CE", size = "Significance") + # guides(fill=F) + scale_y_discrete(limits = sd_order_imid_row) + scale_x_discrete(limits = sd_order_bio) + theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1), axis.line.x = element_blank(), axis.line.y = element_blank()) plot <- ggarrange(g1, g2, nrow=1, widths = c(0.9, 1.1), bottom = "Outcome", left = "Exposure") ggsave("../Figures/wwer_fig4.pdf", device = "pdf", plot = plot, dpi = 300)
data_bd %>% group_by(Exposure) %>% mutate(n_sig_eff = sum(p_fdr < 0.05)) %>% distinct(Exposure, short_description.1, n_sig_eff) %>% arrange(desc(n_sig_eff)) data_db %>% group_by(Exposure) %>% mutate(n_sig_eff = sum(p_fdr < 0.05)) %>% distinct(Exposure, short_description.1, n_sig_eff) %>% arrange(desc(n_sig_eff))
sig_bd = data_bd %>% arrange(Exposure, p_fdr) %>% filter(p_fdr < 0.05) %>% select(Exposure, Outcome, short_description.1, short_description.2, p_fdr, R_tce, SE_tce, rg) %>% rename(SD_Exposure = short_description.1, SD_Outcome = short_description.2, "CE" = R_tce, "SE" = SE_tce) sig_db = data_db %>% arrange(Exposure, p_fdr) %>% filter(p_fdr < 0.05) %>% select(Exposure, Outcome, short_description.1, short_description.2, p_fdr, R_tce, SE_tce, rg) %>% rename(SD_Exposure = short_description.1, SD_Outcome = short_description.2, "CE" = R_tce, "SE" = SE_tce) sig_both <- sig_bd %>% inner_join(sig_db, by = c("Exposure" = "Outcome", "Outcome" = "Exposure")) %>% rename("A" = Exposure, "B" = Outcome, "CE (A to B)" = CE.x, "SE (A to B)" = SE.x, "p_fdr (A to B)" = p_fdr.x, "CE (B to A)" = CE.y, "SE (B to A)" = SE.y, "p_fdr (B to A)" = p_fdr.y, "rg" = rg.y) %>% select(-rg.x)
has_inst <- unique(data$Exposure) st_phenos <- dplyr::select(selected_phenos, phenotype, description, short_description, Neff, mf_rg, mf_rg_se, ci, z0) %>% dplyr::mutate(has_inst = phenotype %in% has_inst) st_bt_imid <- rbind(dplyr::filter(st_phenos, phenotype %in% imid), dplyr::filter(st_phenos, phenotype %in% biomarkers)) outfile <- "supp_tables_ukbb.xlsx" supp_tables <- openxlsx::createWorkbook() openxlsx::addWorksheet(supp_tables, "ST18") openxlsx::addWorksheet(supp_tables, "ST19") openxlsx::addWorksheet(supp_tables, "ST20") openxlsx::addWorksheet(supp_tables, "ST21") openxlsx::addWorksheet(supp_tables, "ST22") openxlsx::writeData(supp_tables, "ST18", st_bt_imid) openxlsx::writeData(supp_tables, "ST19", sig_bd) openxlsx::writeData(supp_tables, "ST20", sig_db) openxlsx::writeData(supp_tables, "ST21", st_phenos) openxlsx::writeData(supp_tables, "ST22", data) openxlsx::saveWorkbook(wb = supp_tables, file = outfile, overwrite = TRUE)
# Comparative analysis running additional methods on a subset of the phenotypes. load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_phenotypes_sd.Rdata") # 98, 99, 103, 104, 105, 106, 119, 120, 122, 124 334-335, 400 imid = selected_phenos$phenotype[c(98, 99, 103, 104, 105, 106, 124, 334, 335, 400)] # 207-233, 235, 237-253 biomarkers = selected_phenos$phenotype[c(208:228, 233:234, 236, 238:254)] load(save_select_sumstats) sumstats$beta_hat <- dplyr::select(sumstats$beta_hat, all_of(imid), all_of(biomarkers)) sumstats$se_hat <- dplyr::select(sumstats$se_hat, all_of(imid), all_of(biomarkers)) save(sumstats, file = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/select_sumstats_bbimid_only.Rdata") rm(sumstats) load(save_fit_sumstats) sumstats$beta_hat <- dplyr::select(sumstats$beta_hat, all_of(imid), all_of(biomarkers)) sumstats$se_hat <- dplyr::select(sumstats$se_hat, all_of(imid), all_of(biomarkers)) save(sumstats, file = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/fit_sumstats_bbimid_only.Rdata") rm(sumstats)
load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/select_sumstats_bbimid_only.Rdata") snps_to_use <- read_snp_list(file_pattern_snp_list) selected_wwer <- WWER::select_snps(sumstats, snps_to_use) selected_std <- WWER::select_snps(sumstats, snps_to_use, weight = F, filter = NULL) selected_cause <- WWER::select_snps(sumstats, snps_to_use, weight = F, filter = NULL, p_thresh = 1e-4) rm(sumstats)
load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/fit_sumstats_bbimid_only.Rdata") # tce_res_wwer <- fit_tce(sumstats, selected_wwer, mr_method = "egger_w") # tce_res_mbe <- fit_tce(sumstats, selected_std, mr_method = "mbe", min_instruments = 5, verbose = TRUE) # tce_res_mrmix <- fit_tce(sumstats, selected_std, mr_method = "mr_mix", min_instruments = 5, verbose = TRUE) # tce_res_cause <- fit_tce(sumstats, selected_cause, mr_method = "cause", min_instruments = 5, verbose = TRUE) # rslurm::slurm_call( # fit_tce, # params = list(sumstats = sumstats, selected_snps = selected_wwer, mr_method = "egger_w"), # jobname = "real_data_comp_wwer", # add_objects = c("sumstats", "selected_wwer"), # slurm_options = list(time = "1:00:00", mem = "16G"), # submit = T) # # rslurm::slurm_call( # fit_tce, # params = list(sumstats = sumstats, selected_snps = selected_std, mr_method = "mbe"), # jobname = "real_data_comp_mbe", # add_objects = c("sumstats", "selected_std"), # slurm_options = list(time = "8:00:00", mem = "16G"), # submit = T) # rslurm::slurm_call( # fit_tce, # params = list(sumstats = sumstats, selected_snps = selected_std, mr_method = "mr_mix"), # jobname = "real_data_comp_mrmix", # add_objects = c("sumstats", "selected_std"), # slurm_options = list(time = "72:00:00", mem = "32G", "cpus-per-task" = "4"), # submit = T) # rslurm::slurm_call( # fit_tce, # params = list(sumstats = sumstats, selected_snps = selected_std, mr_method = "ivw"), # jobname = "real_data_comp_ivw", # add_objects = c("sumstats", "selected_std"), # slurm_options = list(time = "4:00:00", mem = "16G"), # submit = T) # # rslurm::slurm_call( # fit_tce, # params = list(sumstats = sumstats, selected_snps = selected_std, mr_method = "egger"), # jobname = "real_data_comp_egger", # add_objects = c("sumstats", "selected_std"), # slurm_options = list(time = "4:00:00", mem = "16G"), # submit = T) # # snps_to_use <- read_snp_list(file_pattern_snp_list) # selected_snps <- select_snps(sumstats, snps_to_use, p_thresh = p_thresh, verbose = TRUE) # save(selected_snps, file = save_snps)
wwer_res <- rslurm::get_slurm_out(rslurm::slurm_job(jobname = "real_data_comp_wwer", nodes = 1), outtype = "raw") mbe_res <- rslurm::get_slurm_out(rslurm::slurm_job(jobname = "real_data_comp_mbe", nodes = 1), outtype = "raw") mrmix_res <- rslurm::get_slurm_out(rslurm::slurm_job(jobname = "real_data_comp_mrmix", nodes = 1), outtype = "raw") ivw_res <- rslurm::get_slurm_out(rslurm::slurm_job(jobname = "real_data_comp_ivw", nodes = 1), outtype = "raw") egger_res <- rslurm::get_slurm_out(rslurm::slurm_job(jobname = "real_data_comp_egger", nodes = 1), outtype = "raw") wwer_res <- pivot_tce(wwer_res$R_tce, wwer_res$SE_tce, wwer_res$N_obs) mbe_res <- pivot_tce(mbe_res$R_tce, mbe_res$SE_tce, mbe_res$N_obs) mrmix_res <- pivot_tce(mrmix_res$R_tce, mrmix_res$SE_tce, mrmix_res$N_obs) ivw_res <- pivot_tce(ivw_res$R_tce, ivw_res$SE_tce, ivw_res$N_obs) egger_res <- pivot_tce(egger_res$R_tce, egger_res$SE_tce, egger_res$N_obs)
p_test <- 0.05/574 make_upset_res <- function(p_val_egger, p_val_mbe, p_val_mrmix, p_val_ivw, p_val){ res = c() if(p_val_egger < p_test) res = c(res, "Egger") if(p_val_mbe < p_test) res = c(res, "MBE") if(p_val_mrmix < p_test) res = c(res, "MrMix") if(p_val_ivw < p_test) res = c(res, "IVW") if(p_val < p_test) res = c(res, "WWER") return(res) } jacc_index <- function(p_A, p_B, thresh){ set_A <- which(p_A < thresh) set_B <- which(p_B < thresh) num <- length(intersect(set_A, set_B)) denom <- length(union(set_A, set_B)) return(num/denom) } all_res <- egger_res %>% dplyr::left_join(mbe_res, by = c("Exposure", "Outcome"), suffix = c("_egger", "_mbe")) %>% dplyr::left_join(mrmix_res, by = c("Exposure", "Outcome")) %>% dplyr::left_join(ivw_res, by = c("Exposure", "Outcome"), suffix = c("_mrmix", "_ivw")) %>% dplyr::left_join(wwer_res, by = c("Exposure", "Outcome")) %>% dplyr::filter(Exposure != Outcome) all_res <- dplyr::filter(all_res, ((Exposure %in% biomarkers)&(Outcome %in% imid))|(Exposure %in% imid)&(Outcome %in% biomarkers), !is.na(R_tce)) all_res$sig_methods <- dplyr::select(all_res, starts_with("p_val")) %>% purrr::pmap(make_upset_res) all_res$n_sig_methods <- purrr::map_int(all_res$sig_methods, length) methods <- c("Egger", "MBE", "MrMix", "IVW", "WWER") jacc_data <- dplyr::select(all_res, starts_with("p_val")) colnames(jacc_data) <- methods pairs <- expand.grid(methods, methods) pairs$jacc_index <- purrr::map2_dbl(pairs$Var1, pairs$Var2, ~ ifelse(.x == .y, NA, jacc_index(jacc_data[[.x]], jacc_data[[.y]], thresh = p_test)))
theme_set(theme_classic()) g1 <- all_res %>% filter(n_sig_methods > 0) %>% ggplot(aes(x = sig_methods)) + geom_bar() + scale_x_upset() + labs(x = "Methods", y = "Count", title = "A)") g2 <- pairs %>% ggplot(aes(y=Var1, x=Var2)) + geom_tile(aes(fill=jacc_index)) + scale_fill_gradient() + labs(x = NULL, y = NULL, fill = "Jacccard Index", title = "B)") + theme(axis.ticks = element_blank(), axis.line = element_blank()) ggarrange(g1, g2, nrow = 1, widths = c(1, 0.6))
# Analysis adjusting p-value to test effect on estimate. load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/select_sumstats_bbimid_only.Rdata") snps_to_use <- read_snp_list(file_pattern_snp_list) selected_wwer_8 <- WWER::select_snps(sumstats, snps_to_use, p_thresh = 5e-08) selected_wwer_10 <- WWER::select_snps(sumstats, snps_to_use, p_thresh = 5e-10) selected_wwer_14 <- WWER::select_snps(sumstats, snps_to_use, p_thresh = 5e-14) selected_wwer_20 <- WWER::select_snps(sumstats, snps_to_use, p_thresh = 5e-20) selected_wwer_28 <- WWER::select_snps(sumstats, snps_to_use, p_thresh = 5e-28) rm(sumstats)
load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/fit_sumstats_bbimid_only.Rdata") tce_res_wwer_8 <- fit_tce(sumstats, selected_wwer_8, mr_method = "egger_w") tce_res_wwer_10 <- fit_tce(sumstats, selected_wwer_10, mr_method = "egger_w") tce_res_wwer_14 <- fit_tce(sumstats, selected_wwer_14, mr_method = "egger_w") tce_res_wwer_20 <- fit_tce(sumstats, selected_wwer_20, mr_method = "egger_w") tce_res_wwer_28 <- fit_tce(sumstats, selected_wwer_28, mr_method = "egger_w") rm(sumstats)
load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_phenotypes_sd.Rdata") imid = selected_phenos$phenotype[c(98, 99, 103, 104, 105, 106, 124, 334, 335, 400)] biomarkers = selected_phenos$phenotype[c(208:228, 233:234, 236, 238:254)] imid_names = c("DVT", "Asthma", "Psoriasis", "Diverticulitis", "Crohns", "Osteoarthritis", "Diabetes", "Emphysema", "Hayfever", "UC") bm_names = c("WBCC", "RBCC", "HC %", "MCV", "MCHC", "RBC DW", "Plt cnt", "Plt crt", "MPV", "PDW", "Lymph cnt", "Mono cnt", "Baso cnt", "Lymph %", "Mono %", "Eosin %", "Baso %", "MRV", "MSCV", "IRF", "HLSR %", "Albumin", "Alkaline PP", "Alanine AT", "Asparate AT", "Urea", "Calcium", "Cholesterol", "Creatinine", "CRP", "Gamma GT", "Glucose", "HBA1C", "HDL chol", "IGF1", "Phosphate", "SHBG", "Tot Prot", "TG", "Urate", "Vit D")
res_wide_8 = pivot_tce(tce_res_wwer_8$R_tce, tce_res_wwer_8$SE_tce, tce_res_wwer_8$N_obs) res_wide_10 = pivot_tce(tce_res_wwer_10$R_tce, tce_res_wwer_10$SE_tce, tce_res_wwer_10$N_obs) res_wide_14 = pivot_tce(tce_res_wwer_14$R_tce, tce_res_wwer_14$SE_tce, tce_res_wwer_14$N_obs) res_wide_20 = pivot_tce(tce_res_wwer_20$R_tce, tce_res_wwer_20$SE_tce, tce_res_wwer_20$N_obs) res_wide_28 = pivot_tce(tce_res_wwer_28$R_tce, tce_res_wwer_28$SE_tce, tce_res_wwer_28$N_obs) res_p_comp = dplyr::bind_rows("5e-08" = res_wide_8, "5e-10" = res_wide_10, "5e-14" = res_wide_14, "5e-20" = res_wide_20, "5e-28" = res_wide_28, .id = "thresh") %>% filter( ((Exposure %in% imid) & !(Outcome %in% imid)) | ((Exposure %in% biomarkers) & !(Outcome %in% biomarkers))) %>% left_join(tibble(code = c(imid, biomarkers), name = c(imid_names, bm_names)), by = c("Exposure" = "code")) %>% left_join(tibble(code = c(imid, biomarkers), name = c(imid_names, bm_names)), by = c("Outcome" = "code")) %>% dplyr::mutate(pair = paste(name.x, name.y, sep = ", ")) save(res_p_comp, file="/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/res_p_comp.Rdata")
load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/res_p_comp.Rdata") p_test <- 0.05/574 sig_pairs <- (res_p_comp %>% filter(thresh=="5e-08", p_val < p_test))$pair res_p_comp_sig <- res_p_comp %>% dplyr::filter(pair %in% sig_pairs)
res_p_comp_sig %>% ggplot() + facet_wrap(facets = "pair", ncol = 6) + geom_pointrange(aes(thresh, abs(R_tce), ymin = abs(R_tce) - 1.96*SE_tce, ymax = abs(R_tce) + 1.96*SE_tce), fatten = 2) + coord_cartesian(ylim = c(0, 0.5)) + scale_x_discrete(limits=rev) + labs(x = "p-value threshold", y = "|R|") + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
res_wide_8_sig <- filter(res_p_comp_sig, thresh == "5e-08") res_wide_8_sig$R_tce_28 <- filter(res_p_comp_sig, thresh == "5e-28")$R_tce res_wide_8_sig <- res_wide_8_sig %>% dplyr::mutate(delta = sign(R_tce)*(R_tce - R_tce_28) , prop_delta = delta/abs(R_tce))
load(save_select_sumstats) snps_to_use <- read_snp_list(file_pattern_snp_list) selected_snps_10 <- select_snps(sumstats, snps_to_use, p_thresh = 5e-10, verbose = TRUE) selected_snps_14 <- select_snps(sumstats, snps_to_use, p_thresh = 5e-14, verbose = TRUE) selected_snps_20 <- select_snps(sumstats, snps_to_use, p_thresh = 5e-20, verbose = TRUE) selected_snps_28 <- select_snps(sumstats, snps_to_use, p_thresh = 5e-28, verbose = TRUE) save(selected_snps_10, file = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_snps_5e-10.Rdata") save(selected_snps_14, file = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_snps_5e-14.Rdata") save(selected_snps_20, file = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_snps_5e-20.Rdata") save(selected_snps_28, file = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_snps_5e-28.Rdata")
load(save_fit_sumstats) load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_snps_5e-10.Rdata") load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_snps_5e-14.Rdata") load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_snps_5e-20.Rdata") load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/selected_snps_5e-28.Rdata") tce_res_10 <- fit_tce(sumstats, selected_snps_10, mr_method = "egger_w", min_instruments = 5, verbose = TRUE) tce_res_14 <- fit_tce(sumstats, selected_snps_14, mr_method = "egger_w", min_instruments = 5, verbose = TRUE) tce_res_20 <- fit_tce(sumstats, selected_snps_20, mr_method = "egger_w", min_instruments = 5, verbose = TRUE) tce_res_28 <- fit_tce(sumstats, selected_snps_28, mr_method = "egger_w", min_instruments = 5, verbose = TRUE) save(tce_res_10, file = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/tce_res_5e-10.Rdata") save(tce_res_14, file = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/tce_res_5e-14.Rdata") save(tce_res_20, file = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/tce_res_5e-20.Rdata") save(tce_res_28, file = "/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/tce_res_5e-28.Rdata")
load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/tce_res_5e-10.Rdata") load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/tce_res_5e-14.Rdata") load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/tce_res_5e-20.Rdata") load("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/tce_res_5e-28.Rdata") res_wide_10 = pivot_tce(tce_res_10$R_tce, tce_res_10$SE_tce, tce_res_10$N_obs) %>% filter(Exposure %in% row_phenotypes) res_wide_14 = pivot_tce(tce_res_14$R_tce, tce_res_14$SE_tce, tce_res_14$N_obs) %>% filter(Exposure %in% row_phenotypes) res_wide_20 = pivot_tce(tce_res_20$R_tce, tce_res_20$SE_tce, tce_res_20$N_obs) %>% filter(Exposure %in% row_phenotypes) res_wide_28 = pivot_tce(tce_res_28$R_tce, tce_res_28$SE_tce, tce_res_28$N_obs) %>% filter(Exposure %in% row_phenotypes) data$R_tce_10 <- res_wide_10$R_tce data$SE_tce_10 <- res_wide_10$SE_tce data$R_tce_14 <- res_wide_14$R_tce data$SE_tce_14 <- res_wide_14$SE_tce data$R_tce_20 <- res_wide_20$R_tce data$SE_tce_20 <- res_wide_20$SE_tce data$R_tce_28 <- res_wide_28$R_tce data$SE_tce_28 <- res_wide_28$SE_tce data_sig <- data %>% filter(p_fdr < 0.05) %>% dplyr::mutate(delta = sign(R_tce)*(R_tce - R_tce_28) , prop_delta = delta/abs(R_tce))
theme_set(theme_classic()) data_all = dplyr::bind_rows( "5e-08" = select(data, names(res_wide_10)), "5e-10" = res_wide_10, "5e-14" = res_wide_14, "5e-20" = res_wide_20, "5e-28" = res_wide_28, .id = "thresh") mentioned_pairs <- readr::read_csv("/gpfs/commons/home/bbrown/ukbb_network/saved_rdata/mentioned_pairs.csv")%>% dplyr::mutate(code_pair = paste(Exposure, Outcome, sep = ", ")) unique_phenos <- sort(unique(c(mentioned_pairs$Exposure, mentioned_pairs$Outcome))) names = c("Semi-skim", "Skim", "Butter", "Non-butter", "Salt", "Dietary changes", "Father death", "Nervous feelings", "DVT", "Asthma", "Psoriasis", "Diverticulitis", "Crohns", "Osteoarthritis", "Sit height", "IoF: Heart", "IoF: emphysema", "IoM: diabetes", "BMI", "Overall health", "Water mass", "WBCC", "RBCC", "MCV", "Plateletcrit", "MPV", "PDW", "Lymph cnt", "Lymph %", "Mono %", "Eos %", "MRV", "MSCV", "Albumin", "Aspartate AT", "Cholesterol", "CRP", "Glucose", "HBA1C", "HDL chol", "IGF-1", "TG", "Urate", "Vit D", "Hip circ", "Emphysema", "Allergy", "Asprin", "Ibuprofen", "UC") mentioned_pairs <- mentioned_pairs %>% left_join(tibble(code = unique_phenos, name = names), by = c("Exposure" = "code")) %>% left_join(tibble(code = unique_phenos, name = names), by = c("Outcome" = "code")) %>% dplyr::mutate(pair = paste(name.x, name.y, sep = ", ")) data_all <- data_all %>% left_join(tibble(code = unique_phenos, name = names), by = c("Exposure" = "code")) %>% left_join(tibble(code = unique_phenos, name = names), by = c("Outcome" = "code")) %>% dplyr::mutate(pair = paste(name.x, name.y, sep = ", ")) data_all %>% filter(pair %in% mentioned_pairs$pair) %>% ggplot() + facet_wrap(facets = "pair", ncol = 6) + geom_pointrange(aes(thresh, abs(R_tce), ymin = abs(R_tce) - 1.96*SE_tce, ymax = abs(R_tce) + 1.96*SE_tce), fatten = 2) + coord_cartesian(ylim = c(0, 0.5)) + scale_x_discrete(limits=rev) + labs(x = "p-value threshold", y = "|R|") + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
data_all %>% filter(thresh == "5e-08") %>% mutate(R_tce_28 = res_wide_28$R_tce, delta = sign(R_tce)*(R_tce - R_tce_28) , prop_delta = delta/abs(R_tce), code_pair = paste(Exposure, Outcome, sep = ", ")) %>% filter(code_pair %in% mentioned_pairs$code_pair) %>% view()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.