knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

options(rmarkdown.html_vignette.check_title = FALSE)
source('../R/load-data.R')
source('../R/make-tables.R')
source('../R/temp-fig1.R')
source('../R/cox-array.R')
source('../R/elastic-net.R')
source('../R/random-forest.R')
source('../R/wgcna.R')

library(magrittr)

set.seed(101010)

doParallel::registerDoParallel(12)
future::plan(future::multisession, workers = 8)
path <- function(...) { 
  return(file.path('..', 'inst', 'extdata', ...))  
}

label.path <- path('proteinMapping.csv')
study.path <- path('ARICmaster_121820.dta')

soma.path <- path('somaV5.txt')
mods.path <- path('visit-five')
labels <-
  get.labels(label.path, sid,
             uniprot.full.name,
             entrezgenesymbol,
             flag2 == 0)

adjust.vars <- 
  file.path(mods.path, 'adjusted.csv') %>%
  readr::read_csv()

HUNT.master <- 
  haven::read_dta(study.path)

soma.data <- get.soma(soma.path, labels)

proteins <- labels$term

time <- "fuptime"
outc <- "hfdiag"

adjust <- adjust.vars$Variable[-1]
vars <- c(proteins, adjust, time, outc)
HUNT.data <- 
  get.visit(soma.data = soma.data,
            path.mods = mods.path,
            master = HUNT.master)


table1.hunt <- 
  make.tables(HUNT.data, 
              time, outc, 
              path('table-template.csv'))

res.hunt <- 
  consort('.', mods.path, soma.data, 
          HUNT.data, labels, time, outc)
res.hunt
knitr::kable(table1.hunt)
res.aric    <- readRDS(file = 'resAric.rds')
table1.aric <- readRDS(file = 'table1Aric.rds')
res.aric
knitr::kable(table1.aric)
num.var <- numeric.id(HUNT.data, adjust)

data.hunt <- HUNT.data  %>%
  get.scaled(num.var, proteins)

hunt <- data.hunt %>% 
  dplyr::select(dplyr::all_of(vars))

validation.results <-
  cox.arry(proteins, hunt,a 
           time, outc, adjust, 2)
validation.results

univariate.results <- 
  readRDS(file = 'univariateResults.rds')
plot.data <-
  dplyr::bind_rows(univariate.results,
                   validation.results) %>%
  dplyr::mutate(Name = labels[term, "name"])

bonferroni <- 
  terms.to.keep(plot.data,
                .pval = 0.05/length(proteins), 1)


retained <-
  terms.to.keep(plot.data, 
                .pval = 0.05, 2, 
                .terms = bonferroni$kept)
bonferroni2 <- 
  terms.to.keep(plot.data,
                .pval = 0.05/length(proteins), 2)

falsediscr <- 
  terms.to.keep(plot.data, 
                .pval = 0.05, 1)


retained2 <-
  terms.to.keep(plot.data, 
                .pval = 0.05, 1, 
                .terms = bonferroni2$kept)
"<=== Sig at BF Visit 5 ===> Proteins: " %>%
  paste0(length(bonferroni$kept)) %>%
  print()

"<=== Sig at BF Visit 5" %>% 
  paste0("FDR HUNT ===> Proteins: ",
         length(retained$kept)) %>%
  print()

"<=== Sig at BF HUNT ===> Proteins: " %>%
  paste0(length(bonferroni2$kept)) %>%
  print()

"<=== Sig at BF HUNT" %>% 
  paste0("FDR Visit 5 ===> Proteins: ",
         length(retained2$kept)) %>%
  print()
bf.visit.five <- bonferroni$data %>% 
  dplyr::filter(src == 1) %>% 
  dplyr::arrange(pval) %>%
  dplyr::left_join(validation.results %>% 
                     dplyr::select(term, desc),
                   by = "term",
                   suffix = c(".V5", ".HUNT")) %>%
  dplyr::select(term, Name, desc.V5, desc.HUNT) %>%
  dplyr::mutate(r = dplyr::if_else(term %in% retained$kept, 
                                   "+", ""))

bf.visit.five
plot.data.visit.five <- 
  dplyr::left_join(univariate.results,
                   validation.results,
                   by = "term",
                   suffix = c(".V5", ".HUNT"))

volcanoPlot(plot.data.visit.five, 
            length(proteins),
            length(bonferroni$kept), 
            suffix = c(".V5", ".HUNT"))
volcanoPlot(plot.data.visit.five, 
            length(proteins),
            length(bonferroni$kept), 
            suffix = c(".HUNT", ".V5"))
fifth.lasso.coefs <- readRDS(file = 'aricLassoCoefs.rds')

HUNT.lasso <- elastic.net(data.hunt, 
                          bonferroni$kept, 
                          adjust, time, outc)

HUNT.lasso.score <- get.score(data.hunt, 
                              fifth.lasso.coefs, 
                              adjust, time, outc)
visit.five.vars <- readRDS(file = 'rfsrcTerms.rds')

### DONT SEND THESE VARIABLES ## 

random.forest.list <- 
  filter.rf(hunt, time, outc, 30, adjust)

min.tree <- select.min.tree(random.forest.list)

hunt.score.rf <- 
  rf.tree.score(hunt, min.tree$rand.tree,
                time, outc)

valid.tree <- 
  valid.rf(hunt, time, outc,
           visit.five.vars)

p  <- plot.var.imp(min.tree$rand.tree,  labels)
p1 <- plot.var.imp(valid.tree$rand.tree, labels)
wg.data <- data.hunt %>%
  dplyr::select(dplyr::contains("SeqId"))

sft <- soft.threshold(wg.data)
clustered.tree <- hierTree(wg.data, sft)
dynamicColors <- readRDS(file = 'AricDynCls.rds')

eigen.genes.validation <- 
  calculate.eigengenes(wg.data, dynamicColors)

plot.new.colors(clustered.tree$htree, 
                dynamicColors, 
                eigenGenes$mergedColors,
                c("Visit 5 Module Assignment",
                  "HUNT Module Assignment Colors"))
eigen.data.valid <- eigen.genes.validation$mergedMEs %>%
  dplyr::mutate_all(scale, T, T) %>%
  dplyr::bind_cols(data.hunt %>% 
    dplyr::select(dplyr::all_of(c(adjust, time, outc))))

mod.summary.valid <- 
  summarize.modules(wg.data, 
                    dynamicColors,
                    sft, labels) %>%
  dplyr::mutate(color = paste0("ME", color))

module.incident.hf.valid <- cox.arry(mod.summary$color,
                                     eigen.data.valid, time, outc, 
                                     adjust, .src = 1) %>%
  dplyr::left_join(mod.summary.valid, by = c("term" = "color")) %>%
  dplyr::select(term, Name, desc, dplyr::everything())
saveRDS(list(validation.results, plot.data, bf.visit.five), 
        file = "univ.rds")

validation.results
plot.data
bf.visit.five
saveRDS(list(HUNT.lasso, 
             HUNT.lasso.score), 
        file = "enet.rds")

saveRDS(list(p, p1), 
        file = "rfSRCgraphs.rds")

saveRDS(list(HUNT.lasso, 
             HUNT.lasso.score),
        file = "huntLASSO.rds")

saveRDS(list(mod.summary.valid,
             module.incident.hf.valid),
        file = "RFOutput.rds")

saveRDS(list(valid = eigen.data.valid,
             modls = mod.summary.valid,
             module.incident.valid, 
             hierTree) %>%
        file = "wgcna.rmd")
# 
# eigenGenes <- 
#   calculate.eigengenes(wg.data, clustered.tree$dynCl)
# 
# plot.new.colors(clustered.tree$htree, 
#                 clustered.tree$dynCl, 
#                 eigenGenes$mergedColors,
#                 c("Old Colors", "New Colors"))
# 
# 
# eigenData <- eigenGenes$mergedMEs %>%
#   dplyr::mutate_all(scale, T, T) %>%
#   dplyr::bind_cols(data.hunt %>% 
#     dplyr::select(dplyr::all_of(c(adjust, time,outc))))
# 
# mod.summary <- 
#   summarize.modules(wg.data, 
#                     eigenGenes$mergedColors,
#                     sft, labels) %>%
#   dplyr::mutate(color = paste0("ME", color))
# 
# module.incident.hf <- cox.arry(mod.summary$color,
#                                eigenData, time, outc, 
#                                adjust, .src = 1) %>%
#   dplyr::left_join(mod.summary, by = c("term" = "color")) %>%
#   dplyr::select(term, Name, desc, dplyr::everything())
# 


pranavdorbala/proteomicsHF documentation built on March 9, 2021, 12:22 a.m.