knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options(rmarkdown.html_vignette.check_title = FALSE)
source('../R/load-data.R') source('../R/make-tables.R') source('../R/temp-fig1.R') source('../R/cox-array.R') source('../R/elastic-net.R') source('../R/random-forest.R') source('../R/wgcna.R') source('../R/linear-array.R') source('../R/linear-elastic-net.R') source('../R/linear-random-forest.R') library(magrittr) set.seed(101010) doParallel::registerDoParallel(12) future::plan(future::multisession, workers = 8)
path <- function(...) { return(file.path('..', 'inst', 'extdata', ...)) } label.path <- path('study-data', 'proteinMapping.csv') study.path <- path('study-data', 'ARICmaster_121820.dta') soma5.path <- path('study-data', 'ARICsomaV5.txt') soma3.path <- path('study-data', 'ARICsomaV3.txt') mods5.path <- path('aux-data', 'visit-five') mods3.path <- path('aux-data', 'visit-three') addtl.vars <- path('study-data', 'ARIC_newvar_010621.dta') addtl.mods <- path('aux-data', 'visit-five', 'adtlvars.csv') echov.mods <- path('aux-data', 'visit-five', 'echovars.csv')
labels <- get.labels(label.path, sid, uniprot.full.name, entrezgenesymbol, flag2 == 0) adjust.vars <- readr::read_csv(file.path(mods5.path, 'adjusted.csv')) aric.master <- haven::read_dta(study.path) soma.data.5 <- get.soma(soma5.path, labels = labels) soma.data.3 <- get.soma(soma3.path, labels = labels) proteins <- labels$term time <- "fuptime" outc <- "hfref" adjust <- adjust.vars$Variable[-1] vars <- c(proteins, adjust, time, outc)
visit.five <- get.visit(soma.data = soma.data.5, path.mods = mods5.path, master = aric.master) table1 <- make.tables(visit.five, time, outc, path('aux-data', 'table-template.csv')) res <- consort('.', mods5.path, soma.data.5, visit.five, labels, time, outc)
visit.three <- get.visit(soma.data = soma.data.3, path.mods = mods3.path, master = aric.master) table2 <- make.tables(visit.three, time, outc, path('aux-data', 'table-template.csv')) res2 <- consort('.', mods3.path, soma.data.3, visit.three, labels, time, outc)
res
knitr::kable(table1)
res2
knitr::kable(table2)
num.var <- numeric.id(visit.five, adjust) fifth.visit <- visit.five %>% get.scaled(num.var, proteins) third.visit <- visit.three %>% get.scaled(num.var, proteins)
fifth <- fifth.visit %>% dplyr::select(dplyr::all_of(vars)) univariate.results <- cox.arry(proteins, fifth, time, outc, adjust, 1)
# fifth.ref <- fifth.visit %>% # dplyr::select(dplyr::all_of(c(proteins, adjust, time, 'hfpef'))) # # fifth.pef <- fifth.visit %>% # dplyr::select(dplyr::all_of(c(proteins, adjust, time, 'hfref'))) # # univariate.results.ref <- # cox.arry(proteins, fifth.ref, # time, 'hfpef', adjust, 1) # # univariate.results.pef <- # cox.arry(proteins, fifth.pef, # time, 'hfref', adjust, 2) # # plot.data.refpef <- # dplyr::bind_rows(univariate.results.ref, # univariate.results.pef) %>% # dplyr::mutate(Name = labels[term, "name"]) # # bonferroni.ref <- # terms.to.keep(plot.data.refpef, # .pval = 0.05/length(proteins), 1) # # # bf.visit.five.ref <- bonferroni.ref$data %>% # dplyr::filter(src == 1) %>% # dplyr::arrange(pval) %>% # dplyr::left_join(univariate.results.pef %>% # dplyr::select(term, desc), # by = "term", # suffix = c(".pEF", ".rEF")) %>% # dplyr::mutate(Name = substr(Name, 1, 25)) %>% # dplyr::select(term, Name, desc.pEF, desc.rEF, ) # # names(bf.visit.five.ref) <- c("SeqId", "Name", "HR Visit 5 pEF", "HR Visit 5 rEF") # # bf.visit.five.ref %>% write.csv('~/Desktop/pef.csv', row.names=F) # # # # plot.data.visit.five.refpef <- # dplyr::left_join(univariate.results.ref, # univariate.results.pef, # by = "term", # suffix = c(".pEF", ".rEF")) %>% # dplyr::mutate(Name = substr(labels[term, 'name'], 1, 25)) # # volcanoPlot.temp(plot.data.visit.five.refpef, # length(proteins), # length(proteins), # suffix = c(".rEF", ".pEF"), # title = "ARIC Visit 5 Incident HFpEF (pEF HR on x-axis)", # subtitle = "Color = rEF Significance, Shape = pEF Significance") #
# # pdv5 <- plot.data.visit.five.refpef %>% # dplyr::mutate(pv1 = cut(pval.rEF, breaks = c(0, 0.05/4877, 0.05, 1), labels = c("royalblue", "red2", "grey30")), # pv1 = as.character(pv1), # pv2 = cut(pval.pEF, breaks = c(0, 0.05/4877, 0.05, 1), labels = c(17, 20, 4)), # pv2 = as.numeric(levels(pv2))[pv2]) %>% # dplyr::group_by(pv1, pv2) %>% # dplyr::group_split() # # par(pty = "s") # j <- rep(c(rgb(76/255, 76/255, 76/255, 0.1), rgb(238/255,0,0, 0.2), rgb(0, 35/255, 102/255, 0.6)), each = 3) # # for (i in 1:length(pdv5)) { # print(j[i]) # plot(log(pdv5[[i]]$hazr.rEF), log(pdv5[[i]]$hazr.pEF), col = j[i], pch = pdv5[[i]]$pv2[1], # xlim = c(-0.5, 0.75), # ylim = c(-0.5, 0.75), # xlab = "", # ylab = "", # asp = 1) # par(new = T) # } # title(main = "Beta vs Beta Visit 5 rEF vs Visit 5 pEF", # xlab = "Visit 5 rEF Beta", # ylab = "Visit 5 pEF Beta") # # abline(h=0, lty=c(2), col = c("black")) # abline(v=0, lty=c(2), col = c("black")) # abline(a=0, b=1, lty=c(2), col = c("black")) #
third <- third.visit %>% dplyr::select(dplyr::all_of(vars)) validation.results <- cox.arry(proteins, third, time, outc, adjust, 2)
plot.data <- dplyr::bind_rows(univariate.results, validation.results) %>% dplyr::mutate(Name = labels[term, "name"]) bonferroni <- terms.to.keep(plot.data, .pval = 0.05/length(proteins), 1) falsediscr <- terms.to.keep(plot.data, .pval = 0.05, 1) retained <- terms.to.keep(plot.data, .pval = 0.05, 2, .terms = bonferroni$kept) retained2 <- terms.to.keep(plot.data, .pval = 0.05/length(bonferroni$kept), 2, .terms = bonferroni$kept)
"<=== Sig at BF Visit 5 ===> Proteins: " %>% paste0(length(bonferroni$kept)) %>% print() "<=== Sig at BF Visit 5 " %>% paste0("FDR Visit 3 ===> Proteins: ", length(retained$kept)) %>% print() "<=== Sig at FDR Visit 5 ===> Proteins: " %>% paste0(length(falsediscr$kept)) %>% print()
bf.visit.five <- bonferroni$data %>% dplyr::filter(src == 1) %>% dplyr::arrange(pval) %>% dplyr::left_join(validation.results %>% dplyr::select(term, desc), by = "term", suffix = c(".V5", ".V3")) %>% dplyr::mutate(r = dplyr::if_else(term %in% retained$kept, "+", ""), Name = substr(Name, 1, 25)) %>% dplyr::select(term, Name, desc.V5, desc.V3, r) names(bf.visit.five) <- c("SeqId", "Name", "HR Visit 5", "HR Visit 3", "Repl")
plot.data.visit.five <- dplyr::left_join(univariate.results, validation.results, by = "term", suffix = c(".V5", ".V3")) %>% dplyr::mutate(Name = substr(labels[term, 'name'], 1, 25)) volcanoPlot(plot.data.visit.five, length(proteins), length(bonferroni$kept), suffix = c(".V5", ".V3"), title = "ARIC Visit 5 Incident HF", subtitle = "Color = V5 Significance, Shape = V3 Significance")
fifth.lasso <- elastic.net(fifth.visit, bonferroni$kept, adjust, time, outc) fifth.score <- get.score(fifth.visit, fifth.lasso$coefs, adjust, time, outc) third.score <- get.score(third.visit, fifth.lasso$coefs, adjust, time, outc)
random.forest.list <- filter.rf(fifth, time, outc, 30, adjust) min.tree <- select.min.tree(random.forest.list) third.visit.rf <- predict.new.tree(third, min.tree$rand.tree, time, outc) fifth.score.rf <- rf.tree.score(fifth, min.tree$rand.tree, time, outc) third.score.rf <- rf.tree.score(third, third.visit.rf, time, outc) p <- plot.var.imp(min.tree$rand.tree, labels)
wg.data <- fifth.visit %>% dplyr::select(dplyr::contains("SeqId")) sft <- soft.threshold(wg.data) clustered.tree <- hierTree(wg.data, sft)
eigenGenes <- calculate.eigengenes(wg.data, clustered.tree$dynCl) plot.new.colors(clustered.tree$htree, clustered.tree$dynCl, eigenGenes$mergedColors, c("Old Colors", "New Colors"))
eigenData <- eigenGenes$mergedMEs %>% dplyr::mutate_all(scale, T, T) %>% dplyr::bind_cols(fifth.visit %>% dplyr::select(dplyr::all_of(c(adjust, time, outc, "hfref", "hfpef")))) mod.summary <- summarize.modules(wg.data, eigenGenes$mergedColors, sft, labels) %>% dplyr::mutate(color = paste0("ME", color)) module.incident.hf <- cox.arry(mod.summary$color, eigenData, time, outc, adjust, .src = 1) %>% dplyr::left_join(mod.summary, by = c("term" = "color")) %>% dplyr::select(term, Name, desc, dplyr::everything()) module.incident.ref <- cox.arry(mod.summary$color, eigenData, time, "hfref", adjust, .src = 1) %>% dplyr::left_join(mod.summary, by = c("term" = "color")) %>% dplyr::select(term, Name, desc, dplyr::everything()) module.incident.pef <- cox.arry(mod.summary$color, eigenData, time, "hfpef", adjust, .src = 1) %>% dplyr::left_join(mod.summary, by = c("term" = "color")) %>% dplyr::select(term, Name, desc, dplyr::everything())
echo.vars <- readr::read_csv(echov.mods)[[1]][c(-1, -2)] adtl.vars <- readr::read_csv(addtl.mods)[[1]][c(-1)] fifth.visit.adtl <- add.data(new.path = addtl.vars, old.data = fifth.visit, mods = addtl.mods) fifth.visit.echo <- add.data(new.path = study.path, old.data = fifth.visit.adtl, mods = echov.mods) num.var.adtl <- numeric.id(fifth.visit.echo, c(echo.vars, adtl.vars)) fifth.visit.echo.scaled <- get.scaled(fifth.visit.echo, num.var.adtl) univariate.echo.models <- lin.arry.aggr(proteins, fifth.visit.echo.scaled, echo.vars, adjust, labels) numeric.echo.regression <- univariate.echo.models$numeric table.echo.regression <- univariate.echo.models$character univariate.echo.terms.bonf <- lin.reg.terms.to.keep(univariate.echo.models$all.res, 4877)
univariate.echo.models.unscaled.alladj <- lin.arry.aggr(proteins, fifth.visit.echo, echo.vars, adjust, labels) univariate.echo.models.unscaled.minadj <- lin.arry.aggr(proteins, fifth.visit.echo, echo.vars, c("age", "race", "sex"), labels) univariate.echo.models.scaled.minadj <- lin.arry.aggr(proteins, fifth.visit.echo.scaled, echo.vars, c("age", "race", "sex"), labels) uni.echo.terms.bonf.unscaled.all <- lin.reg.terms.to.keep(univariate.echo.models.unscaled.alladj$all.res, 4877) uni.echo.terms.bonf.unscaled.min <- lin.reg.terms.to.keep(univariate.echo.models.unscaled.minadj, 4877) uni.echo.terms.bonf.scaled.all <- lin.reg.terms.to.keep(univariate.echo.models, 4877) uni.echo.terms.bonf.scaled.min <- lin.reg.terms.to.keep(univariate.echo.models.scaled.minadj, 4877)
min.adj <- c("race", "sex", "age") echo.enet.unscaled.all <- linear.elastic.net.array(fifth.visit.echo, adjust, uni.echo.terms.bonf.unscaled.all) %>% linear.get.score.enet.temp(fifth.visit.echo, adjust) echo.enet.unscaled.min<- linear.elastic.net.array(fifth.visit.echo, min.adj, uni.echo.terms.bonf.unscaled.min) %>% linear.get.score.enet.temp(fifth.visit.echo, min.adj) echo.enet.scaled.min <- linear.elastic.net.array(fifth.visit.echo.scaled, min.adj, uni.echo.terms.bonf.scaled.min) %>% linear.get.score.enet.temp(fifth.visit.echo.scaled, min.adj) echo.enet.scaled.all <- linear.elastic.net.array(fifth.visit.echo.scaled, adjust, uni.echo.terms.bonf.scaled.all) %>% linear.get.score.enet.temp(fifth.visit.echo.scaled, adjust)
univ.echo.models.genes <- dplyr::bind_cols(fifth.visit.echo.scaled, eigenData %>% dplyr::select(-all_of(c(adjust, time, outc, "hfref", "hfpef")))) dyn.colors <- names(eigenData %>% dplyr::select(-all_of(c(adjust, time, outc, "hfref", "hfpef")))) module.lin.arr <- lin.arry.aggr(dyn.colors, univ.echo.models.genes, echo.vars, adjust, labels) echo.heatmap <- module.lin.arr$numeric %>% dplyr::select(term, dplyr::ends_with("estm")) %>% tibble::column_to_rownames("term") %>% data.matrix()
top.candidates.echos <- univariate.echo.terms.bonf$candidates %>% purrr::flatten_chr() %>% table() %>% sort(decreasing = TRUE) top.candidates.echos.names <- top.candidates.echos names(top.candidates.echos.names) <- labels[names(top.candidates.echos), "name"] %>% substr(1, 40) full.cands <- names(top.candidates.echos)[which(top.candidates.echos > 4)]
echo.elastic.nets <- linear.elastic.net.array(fifth.visit.echo.scaled, adjust, univariate.echo.terms.bonf) %>% linear.get.score.enet(fifth.visit.echo.scaled, adjust)
echo.randomforests <- linear.filter.rf.array(fifth.visit.echo.scaled, echo.vars, 30, adjust)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.