knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

options(rmarkdown.html_vignette.check_title = FALSE)
source('../R/load-data.R')
source('../R/make-tables.R')
source('../R/temp-fig1.R')
source('../R/cox-array.R')
source('../R/elastic-net.R')
source('../R/random-forest.R')
source('../R/wgcna.R')
source('../R/linear-array.R')
source('../R/linear-elastic-net.R')
source('../R/linear-random-forest.R')

library(magrittr)

set.seed(101010)
doParallel::registerDoParallel(12)
future::plan(future::multisession, workers = 8)
path <- function(...) { 
  return(file.path('..', 'inst', 'extdata', ...))  
}

label.path <- path('study-data', 'proteinMapping.csv')
study.path <- path('study-data', 'ARICmaster_121820.dta')

soma5.path <- path('study-data', 'ARICsomaV5.txt')
soma3.path <- path('study-data', 'ARICsomaV3.txt')

mods5.path <- path('aux-data', 'visit-five')
mods3.path <- path('aux-data', 'visit-three')

addtl.vars <- path('study-data', 'ARIC_newvar_010621.dta')
addtl.mods <- path('aux-data', 'visit-five', 'adtlvars.csv')
echov.mods <- path('aux-data', 'visit-five', 'echovars.csv')
labels <-
  get.labels(label.path, sid,
             uniprot.full.name,
             entrezgenesymbol,
             flag2 == 0)

adjust.vars <- 
  readr::read_csv(file.path(mods5.path, 
                            'adjusted.csv'))

aric.master <- 
  haven::read_dta(study.path)

soma.data.5 <-
  get.soma(soma5.path, labels = labels)

soma.data.3 <-
  get.soma(soma3.path, labels = labels)

proteins <- labels$term
time <- "fuptime"
outc <- "hfref"

adjust <- adjust.vars$Variable[-1]
vars <- c(proteins, adjust, time, outc)
visit.five <- 
  get.visit(soma.data = soma.data.5,
            path.mods = mods5.path,
            master = aric.master)

table1 <- 
  make.tables(visit.five, 
              time, outc, 
              path('aux-data', 
                   'table-template.csv'))

res <- 
  consort('.', mods5.path, soma.data.5, 
          visit.five, labels, time, outc)
visit.three <- 
  get.visit(soma.data = soma.data.3,
            path.mods = mods3.path,
            master = aric.master)

table2 <- 
  make.tables(visit.three,
              time, outc,
              path('aux-data',
                   'table-template.csv'))

res2 <- 
  consort('.', mods3.path, soma.data.3, 
          visit.three, labels, time, outc)
res
knitr::kable(table1)
res2
knitr::kable(table2)
num.var <- numeric.id(visit.five, adjust)

fifth.visit <- visit.five  %>%
  get.scaled(num.var, proteins)

third.visit <- visit.three %>%
  get.scaled(num.var, proteins)
fifth <- fifth.visit %>% 
  dplyr::select(dplyr::all_of(vars))

univariate.results <-
  cox.arry(proteins, fifth, 
           time, outc, adjust, 1)
# fifth.ref <- fifth.visit %>% 
#   dplyr::select(dplyr::all_of(c(proteins, adjust, time, 'hfpef')))
# 
# fifth.pef <- fifth.visit %>% 
#   dplyr::select(dplyr::all_of(c(proteins, adjust, time, 'hfref')))
# 
# univariate.results.ref <-
#   cox.arry(proteins, fifth.ref, 
#            time, 'hfpef', adjust, 1)
# 
# univariate.results.pef <-
#   cox.arry(proteins, fifth.pef, 
#            time, 'hfref', adjust, 2)
# 
# plot.data.refpef <-
#   dplyr::bind_rows(univariate.results.ref,
#                    univariate.results.pef) %>%
#   dplyr::mutate(Name = labels[term, "name"])
# 
# bonferroni.ref <- 
#   terms.to.keep(plot.data.refpef,
#                 .pval = 0.05/length(proteins), 1)
# 
# 
# bf.visit.five.ref <- bonferroni.ref$data %>% 
#   dplyr::filter(src == 1) %>% 
#   dplyr::arrange(pval) %>%
#   dplyr::left_join(univariate.results.pef %>% 
#                      dplyr::select(term, desc),
#                    by = "term",
#                    suffix = c(".pEF", ".rEF")) %>%
#   dplyr::mutate(Name = substr(Name, 1, 25)) %>%
#   dplyr::select(term, Name, desc.pEF, desc.rEF, )
# 
# names(bf.visit.five.ref) <- c("SeqId", "Name", "HR Visit 5 pEF", "HR Visit 5 rEF")
# 
# bf.visit.five.ref %>% write.csv('~/Desktop/pef.csv', row.names=F)
# # 
# 
# plot.data.visit.five.refpef <- 
#   dplyr::left_join(univariate.results.ref,
#                    univariate.results.pef,
#                    by = "term",
#                    suffix = c(".pEF", ".rEF")) %>%
#   dplyr::mutate(Name = substr(labels[term, 'name'], 1, 25))
# 
# volcanoPlot.temp(plot.data.visit.five.refpef, 
#             length(proteins),
#             length(proteins),
#             suffix = c(".rEF", ".pEF"), 
#             title = "ARIC Visit 5 Incident HFpEF (pEF HR on x-axis)",
#             subtitle = "Color = rEF Significance, Shape = pEF Significance") 
# 
# 
# pdv5 <- plot.data.visit.five.refpef %>%
#   dplyr::mutate(pv1 = cut(pval.rEF, breaks = c(0, 0.05/4877, 0.05, 1), labels = c("royalblue", "red2", "grey30")),
#                 pv1 = as.character(pv1),
#                 pv2 = cut(pval.pEF, breaks = c(0, 0.05/4877, 0.05, 1), labels = c(17, 20, 4)),
#                 pv2 = as.numeric(levels(pv2))[pv2]) %>%
#   dplyr::group_by(pv1, pv2) %>%
#   dplyr::group_split()
# 
# par(pty = "s")
# j <- rep(c(rgb(76/255, 76/255, 76/255, 0.1), rgb(238/255,0,0, 0.2), rgb(0, 35/255, 102/255, 0.6)), each = 3)
# 
# for (i in 1:length(pdv5)) {
#   print(j[i])
#   plot(log(pdv5[[i]]$hazr.rEF), log(pdv5[[i]]$hazr.pEF), col = j[i], pch = pdv5[[i]]$pv2[1],
#        xlim = c(-0.5, 0.75),
#        ylim = c(-0.5, 0.75),
#        xlab = "",
#        ylab = "",
#        asp = 1)
#   par(new = T)
# }
# title(main = "Beta vs Beta Visit 5 rEF vs Visit 5 pEF",
#       xlab = "Visit 5 rEF Beta",
#       ylab = "Visit 5 pEF Beta")
# 
# abline(h=0, lty=c(2), col = c("black"))
# abline(v=0, lty=c(2), col = c("black"))
# abline(a=0, b=1, lty=c(2), col = c("black"))
# 
third <- third.visit %>% 
  dplyr::select(dplyr::all_of(vars))

validation.results <- 
  cox.arry(proteins, third, 
           time, outc,  adjust, 2)
plot.data <-
  dplyr::bind_rows(univariate.results,
                   validation.results) %>%
  dplyr::mutate(Name = labels[term, "name"])

bonferroni <- 
  terms.to.keep(plot.data,
                .pval = 0.05/length(proteins), 1)
falsediscr <- 
  terms.to.keep(plot.data, 
                .pval = 0.05, 1)

retained <-
  terms.to.keep(plot.data, 
                .pval = 0.05, 2, 
                .terms = bonferroni$kept)

retained2 <-
  terms.to.keep(plot.data, 
                .pval = 0.05/length(bonferroni$kept), 2, 
                .terms = bonferroni$kept)
"<=== Sig at BF Visit 5 ===> Proteins: " %>%
  paste0(length(bonferroni$kept)) %>%
  print()

"<=== Sig at BF Visit 5 " %>% 
  paste0("FDR Visit 3 ===> Proteins: ",
         length(retained$kept)) %>%
  print()

"<=== Sig at FDR Visit 5 ===> Proteins: " %>%
  paste0(length(falsediscr$kept)) %>%
  print()
bf.visit.five <- bonferroni$data %>% 
  dplyr::filter(src == 1) %>% 
  dplyr::arrange(pval) %>%
  dplyr::left_join(validation.results %>% 
                     dplyr::select(term, desc),
                   by = "term",
                   suffix = c(".V5", ".V3")) %>%
  dplyr::mutate(r = dplyr::if_else(term %in% retained$kept, 
                                   "+", ""),
                Name = substr(Name, 1, 25)) %>%
  dplyr::select(term, Name, desc.V5, desc.V3, r)

names(bf.visit.five) <- c("SeqId", "Name", "HR Visit 5", "HR Visit 3", "Repl")
plot.data.visit.five <- 
  dplyr::left_join(univariate.results,
                   validation.results,
                   by = "term",
                   suffix = c(".V5", ".V3")) %>%
  dplyr::mutate(Name = substr(labels[term, 'name'], 1, 25))

volcanoPlot(plot.data.visit.five, 
            length(proteins),
            length(bonferroni$kept),
            suffix = c(".V5", ".V3"), 
            title = "ARIC Visit 5 Incident HF",
            subtitle = "Color = V5 Significance, Shape = V3 Significance") 
fifth.lasso <- elastic.net(fifth.visit, 
                           bonferroni$kept, 
                           adjust, time, outc)

fifth.score <- get.score(fifth.visit, 
                         fifth.lasso$coefs, 
                         adjust, time, outc)

third.score <- get.score(third.visit, 
                         fifth.lasso$coefs, 
                         adjust, time, outc)
random.forest.list <- 
  filter.rf(fifth, time, outc, 30, adjust)

min.tree <- select.min.tree(random.forest.list)

third.visit.rf <- 
  predict.new.tree(third, min.tree$rand.tree, 
                   time, outc)

fifth.score.rf <- 
  rf.tree.score(fifth, min.tree$rand.tree,
                time, outc)

third.score.rf <- 
  rf.tree.score(third, third.visit.rf, 
                time, outc)

p <- plot.var.imp(min.tree$rand.tree, labels)
wg.data <- fifth.visit %>%
  dplyr::select(dplyr::contains("SeqId"))

sft <- soft.threshold(wg.data)
clustered.tree <- hierTree(wg.data, sft)
eigenGenes <- 
  calculate.eigengenes(wg.data, clustered.tree$dynCl)

plot.new.colors(clustered.tree$htree, 
                clustered.tree$dynCl, 
                eigenGenes$mergedColors,
                c("Old Colors", "New Colors"))
eigenData <- eigenGenes$mergedMEs %>%
  dplyr::mutate_all(scale, T, T) %>%
  dplyr::bind_cols(fifth.visit %>% 
    dplyr::select(dplyr::all_of(c(adjust, time, outc, "hfref", "hfpef"))))

mod.summary <- 
  summarize.modules(wg.data, 
                    eigenGenes$mergedColors,
                    sft, labels) %>%
  dplyr::mutate(color = paste0("ME", color))

module.incident.hf <- cox.arry(mod.summary$color,
                               eigenData, time, outc, 
                               adjust, .src = 1) %>%
  dplyr::left_join(mod.summary, by = c("term" = "color")) %>%
  dplyr::select(term, Name, desc, dplyr::everything())

module.incident.ref <- cox.arry(mod.summary$color,
                               eigenData, time, "hfref", 
                               adjust, .src = 1) %>%
  dplyr::left_join(mod.summary, by = c("term" = "color")) %>%
  dplyr::select(term, Name, desc, dplyr::everything())


module.incident.pef <- cox.arry(mod.summary$color,
                               eigenData, time, "hfpef", 
                               adjust, .src = 1) %>%
  dplyr::left_join(mod.summary, by = c("term" = "color")) %>%
  dplyr::select(term, Name, desc, dplyr::everything())
echo.vars <- readr::read_csv(echov.mods)[[1]][c(-1, -2)]
adtl.vars <- readr::read_csv(addtl.mods)[[1]][c(-1)]

fifth.visit.adtl <- 
  add.data(new.path = addtl.vars, 
           old.data = fifth.visit,
           mods = addtl.mods)

fifth.visit.echo <- 
  add.data(new.path = study.path,
           old.data = fifth.visit.adtl,
           mods = echov.mods) 

num.var.adtl <- 
  numeric.id(fifth.visit.echo,
             c(echo.vars, 
               adtl.vars))

fifth.visit.echo.scaled <- 
  get.scaled(fifth.visit.echo, 
             num.var.adtl)

univariate.echo.models <- 
  lin.arry.aggr(proteins,
                fifth.visit.echo.scaled,
                echo.vars, adjust, labels) 

numeric.echo.regression <- 
  univariate.echo.models$numeric

table.echo.regression <- 
  univariate.echo.models$character 

univariate.echo.terms.bonf <- 
  lin.reg.terms.to.keep(univariate.echo.models$all.res, 4877)
univariate.echo.models.unscaled.alladj <- 
  lin.arry.aggr(proteins,
                fifth.visit.echo,
                echo.vars, adjust, labels) 

univariate.echo.models.unscaled.minadj <- 
  lin.arry.aggr(proteins,
                fifth.visit.echo,
                echo.vars, c("age", "race", "sex"), labels) 

univariate.echo.models.scaled.minadj <- 
  lin.arry.aggr(proteins,
                fifth.visit.echo.scaled,
                echo.vars, c("age", "race", "sex"), labels) 


uni.echo.terms.bonf.unscaled.all <- 
  lin.reg.terms.to.keep(univariate.echo.models.unscaled.alladj$all.res, 4877)

uni.echo.terms.bonf.unscaled.min <- 
  lin.reg.terms.to.keep(univariate.echo.models.unscaled.minadj, 4877)

uni.echo.terms.bonf.scaled.all <- 
  lin.reg.terms.to.keep(univariate.echo.models, 4877)

uni.echo.terms.bonf.scaled.min <- 
  lin.reg.terms.to.keep(univariate.echo.models.scaled.minadj, 4877)
min.adj <- c("race", "sex", "age")

echo.enet.unscaled.all <- 
  linear.elastic.net.array(fifth.visit.echo, adjust,
                           uni.echo.terms.bonf.unscaled.all) %>%
  linear.get.score.enet.temp(fifth.visit.echo, adjust)

echo.enet.unscaled.min<- 
  linear.elastic.net.array(fifth.visit.echo, min.adj,
                           uni.echo.terms.bonf.unscaled.min) %>%
  linear.get.score.enet.temp(fifth.visit.echo, min.adj)

echo.enet.scaled.min <- 
  linear.elastic.net.array(fifth.visit.echo.scaled, min.adj,
                           uni.echo.terms.bonf.scaled.min) %>%
  linear.get.score.enet.temp(fifth.visit.echo.scaled, min.adj)

echo.enet.scaled.all <- 
  linear.elastic.net.array(fifth.visit.echo.scaled, adjust,
                           uni.echo.terms.bonf.scaled.all) %>%
  linear.get.score.enet.temp(fifth.visit.echo.scaled, adjust)
univ.echo.models.genes <-
  dplyr::bind_cols(fifth.visit.echo.scaled, 
                   eigenData %>% dplyr::select(-all_of(c(adjust, time, outc, "hfref", "hfpef"))))

dyn.colors <- names(eigenData %>% dplyr::select(-all_of(c(adjust, time, outc, "hfref", "hfpef"))))
module.lin.arr <- lin.arry.aggr(dyn.colors, univ.echo.models.genes, echo.vars, adjust, labels)

echo.heatmap <- module.lin.arr$numeric %>% dplyr::select(term, dplyr::ends_with("estm")) %>%
  tibble::column_to_rownames("term") %>% data.matrix()
top.candidates.echos <- 
  univariate.echo.terms.bonf$candidates %>% 
  purrr::flatten_chr() %>% 
  table() %>% 
  sort(decreasing = TRUE)

top.candidates.echos.names <-
  top.candidates.echos

names(top.candidates.echos.names) <- 
  labels[names(top.candidates.echos), "name"] %>%
  substr(1, 40)

full.cands <- 
  names(top.candidates.echos)[which(top.candidates.echos > 4)]
echo.elastic.nets <- 
  linear.elastic.net.array(fifth.visit.echo.scaled, adjust,
                           univariate.echo.terms.bonf) %>%
  linear.get.score.enet(fifth.visit.echo.scaled, adjust)
echo.randomforests <- 
  linear.filter.rf.array(fifth.visit.echo.scaled,
                         echo.vars, 30, adjust)


pranavdorbala/proteomicsHF documentation built on March 9, 2021, 12:22 a.m.