knitr::opts_chunk$set(echo = TRUE)
#setwd("E:/BMIQ MAV/19-07-30-split_cohort_and_run_season/reports/tG_summer")
knitr::opts_knit$set(root.dir = here::here())
getwd()
library(GenABEL)
library(ggplot2)
library(missMethyl)
library(limma) 
library(IlluminaHumanMethylationEPICanno.ilm10b4.hg19)
library(DMRcate)
library(tidyverse)
library(kableExtra)
library(formattable)
library(Hmisc)
dir.create(paste0("./reports", "/", params$name))
data_output <-      paste0("./reports", "/", params$name, "/")

raw_data <-         paste0(params$raw_results)
combat_data_raw <-  readRDS(paste0(params$combat_data)) 
pheno_nona_raw <-   readRDS(paste0(params$pheno_nona))
pheno_forpcs <-     readRDS(paste0(params$pheno_forpcs))
corrplot <-         read.table(paste0(params$corr_plot), 
                               sep = "\t", 
                               header=T
                               )

###### ALSO FILL IN SvA PLOT PATH BELOW #######

cols<-c(
  "Row.names",
  "logFC",
  "AveExpr",
  "t",
  "P.Value",
  "adj.P.Val",
  "B",
  "chr",
  "pos",
  "strand",
  "Name",
  "Relation_to_Island",
  "UCSC_RefGene_Name",
  "UCSC_RefGene_Accession",
  "UCSC_RefGene_Group",
  "Regulatory_Feature_Group"
)

# # For tables saved as NON_TSV
# var_raw<-read_tsv(paste0(raw_data), 
#          col_names = F,
#          skip = 1, 
#          na = character()
#          )
# var<-var_raw[,-1] 


# For tables saved as TSV
var_raw<-readr::read_tsv(paste0(raw_data), 
         col_names = T,
         na = character()
         )

var<-var_raw

# ----------------------------------------------------------

#colnames(var)<-cols
var <- var %>% arrange(adj.P.Val) %>% rowid_to_column("Rank")
var$UCSC_RefGene_Name<-sub(";.*", "", var$UCSC_RefGene_Name)
var$UCSC_RefGene_Group<-sub(";.*", "", var$UCSC_RefGene_Group)


Lambdas & covariates {.tabset .tabset-fade .tabset-pills}

Lambdas

result_list <- tibble(
  "analysis" = params$name,
  "pcs" = params$pcs,
  "samples" = nrow(pheno_nona_raw),
  "Lambda" = (estlambda(var$P.Value, method  =  "median"))$estimate,
  "FDR<0.25" = sum(var$adj.P.Val < 0.25),
  "FDR<0.20" = sum(var$adj.P.Val < 0.20),
  "FDR<0.15" = sum(var$adj.P.Val < 0.15),
  "FDR<0.10" = sum(var$adj.P.Val < 0.10),
  "FDR<0.05" = sum(var$adj.P.Val < 0.05)
)

result_list


Covariates

tibble::enframe(colnames(pheno_forpcs)) %>% 
  slice(-1) %>% 
  select(-"name") %>% 
  rowid_to_column(var = "Number") %>% 
  rename("Covariate" = "value") 

Top 500 array hits

var %>%
  #mutate_at(7, funs(round(., 3))) %>%
  arrange(adj.P.Val) %>%
  dplyr::slice(1:500) %>%
  DT::datatable(
    filter = 'top',
    rownames = FALSE,
    extensions = list('FixedColumns' = NULL,
                      'Buttons' = NULL),
    options = list(pageLength = 10,
                   dom = 'Bfrtip',
                   scrollX = TRUE,
                   fixedColumns = TRUE,
                   columnDefs = list(
                     list(
                       className = 'dt-center', 
                       targets = 0:(ncol(var)-1))
                     ),
                   buttons = list(
                     list(
                       extend = 'colvisGroup',
                       text = 'simplified',
                       show = c(0,2,3,6,7,13,15),
                       hide = c(1,4,5,8,9,10,11,12,14,16)
                       ),
                     list(
                       extend = 'colvisGroup',
                       text = 'show all',
                       show = c(0:16)
                       ),
                     list(
                       extend = 'colvis',
                       text = 'see columns',
                       collectionLayout= 'fixed two-column'
                       )
                     )
    )
  ) %>%
  DT::formatRound(c('logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Val', 'B'), 5) %>%
  DT::formatStyle(columns = c(1:14, 16:17), `text-align` = 'center') %>%
  DT::formatStyle(
    'adj.P.Val',
    #color = DT::styleInterval(0.25, c('black', 'Black')),
    backgroundColor = DT::styleInterval(0.25, c('#b3de69', '')),
    fontWeight = DT::styleInterval(0.25, c('bold', ''))
  ) %>%
  DT::formatStyle(
    'AveExpr',
    backgroundColor = DT::styleInterval(0.80, c('', '#fbb4ae')),
    fontWeight = DT::styleInterval(0.80, c('', 'bold'))
  )


Top 100 named hits {.tabset .tabset-fade .tabset-pills}

Top 100 named genes

named_hits <- var[!(is.na(var$UCSC_RefGene_Name) | var$UCSC_RefGene_Name == ""),]
top100_named_genes <-
  named_hits %>%
  arrange(adj.P.Val) %>%
  slice(1:100) %>%
  dplyr::select(UCSC_RefGene_Name)
top100_named_genes %>%
  dplyr::rename("Top 100 genes" = "UCSC_RefGene_Name") %>% 
  #mutate_at(7, funs(round(., 3))) %>%
  DT::datatable(
    filter = 'top',
    rownames = FALSE,
    extensions = list('FixedColumns' = NULL,
                      'Buttons' = NULL),
    options = list(pageLength = 10,
                   dom = 'Brtip',
                   fixedColumns = TRUE,
                   columnDefs = list(
                     list(
                       className = 'dt-center', 
                       targets = "_all")
                   ),
                   buttons = 'copy'
                   )
    )


Top 100 cgs from named hits

top100_cgs <- 
  named_hits %>%
  arrange(adj.P.Val) %>%
  slice(1:100) %>%
  dplyr::select(Row.names) 
top100_cgs %>%
  dplyr::rename("Top 100 cgs" = "Row.names") %>% 
  #mutate_at(7, funs(round(., 3))) %>%
  DT::datatable(
    filter = 'top',
    rownames = FALSE,
    extensions = list('FixedColumns' = NULL,
                      'Buttons' = NULL),
    options = list(pageLength = 10,
                   dom = 'Brtip',
                   fixedColumns = TRUE,
                   columnDefs = list(
                     list(
                       className = 'dt-center', 
                       targets = "_all")
                   ),
                   buttons = 'copy'
                   )
    )


Significant named genes

named_hits <- var[!(is.na(var$UCSC_RefGene_Name) | var$UCSC_RefGene_Name == ""),]
sig_genenames <-
  named_hits %>%
  arrange(adj.P.Val) %>%
  filter(adj.P.Val <0.25) %>%
  dplyr::select(UCSC_RefGene_Name)
sig_genenames %>%
  dplyr::rename("Significant hits associated with named genes" = "UCSC_RefGene_Name") %>% 
  #mutate_at(7, funs(round(., 3))) %>%
  DT::datatable(
    filter = 'top',
    rownames = FALSE,
    extensions = list('FixedColumns' = NULL,
                      'Buttons' = NULL),
    options = list(pageLength = 10,
                   dom = 'Brtip',
                   fixedColumns = TRUE,
                   columnDefs = list(
                     list(
                       className = 'dt-center', 
                       targets = "_all")
                   ),
                   buttons = 'copy'
                   )
    )


Significant cgs from named hits

sig_cgs <-
  var %>%
  arrange(adj.P.Val) %>%
  filter(adj.P.Val <0.25) %>%
  dplyr::select(Row.names)
sig_cgs %>%
  dplyr::rename("Significant cgs" = "Row.names") %>% 
  DT::datatable(
    filter = 'top',
    rownames = FALSE,
    extensions = list('FixedColumns' = NULL,
                      'Buttons' = NULL),
    options = list(pageLength = 10,
                   dom = 'Brtip',
                   fixedColumns = TRUE,
                   columnDefs = list(
                     list(
                       className = 'dt-center', 
                       targets = "_all")
                   ),
                   buttons = 'copy'
                   )
    )


Universe (without duplicates)

var_univ <- var_raw[!(is.na(var_raw$UCSC_RefGene_Name) | var_raw$UCSC_RefGene_Name == ""),]
var_univ$UCSC_RefGene_Name <- sub(";.*", "", var_univ$UCSC_RefGene_Name)
gene_names <- unique(var_univ$UCSC_RefGene_Name)
gene_names <- tibble::enframe(gene_names, name = NULL, value = "Universe")
readr::write_csv(gene_names, "./universe.csv", col_names = T)

Lambdas and qqplot {.tabset .tabset-fade .tabset-pills}

qqplot and nominal p.values

var_ps <- var$P.Value

gg_qqplot <- function(var_ps, ci = 0.95) {
  n  <- length(var_ps)
  df <- data.frame(
    observed = -log10(sort(var_ps)),
    expected = -log10(ppoints(n)),
    clower   = -log10(qbeta(p = (1 - ci) / 2, shape1 = 1:n, shape2 = n:1)),
    cupper   = -log10(qbeta(p = (1 + ci) / 2, shape1 = 1:n, shape2 = n:1))
  )
  log10Pe <- expression(paste("Expected -log"[10], plain(P)))
  log10Po <- expression(paste("Observed -log"[10], plain(P)))
  ggplot(df) +
    geom_point(aes(expected, observed), shape = 1, size = 3) +
    geom_abline(intercept = 0, slope = 1, alpha = 0.5) +
    geom_line(aes(expected, cupper), linetype = 2) +
    geom_line(aes(expected, clower), linetype = 2) +
    xlab(log10Pe) +
    ylab(log10Po) +
    ggtitle("qqplot")
}
gg_qqplot(var_ps)
ggplot(var, aes(x=P.Value)) + 
  geom_histogram() +
  ggtitle("P-values: var")


Correlation plot

corrplot %>% 
  rownames_to_column() %>% 
  DT::datatable(filter = 'top',
                rownames = FALSE,
                extensions = 'FixedColumns',
                options = list(
                  pageLength = nrow(corrplot), 
                  dom = 't',
                  scrollX = TRUE,
                  fixedColumns = TRUE
                  )
                ) %>% 
  DT::formatSignif((colnames(corrplot)), digits = 3) %>% 
  DT::formatStyle(columns = 2:ncol(corrplot), `text-align` = 'center') %>% 
  DT::formatStyle((colnames(corrplot)),
    #color = DT::styleInterval(0.25, c('black', 'Black')),
    backgroundColor = DT::styleInterval(0.05, c('#fb9a99', '#b2df8a'))
    #fontWeight = DT::styleInterval(0.25, c('', ''))
  )


Other plots

Pie chart data {.tabset .tabset-fade .tabset-pills}

## Relation to island 
pie_rel_to_island_all <-
  var %>% 
  count(Relation_to_Island) %>% 
  arrange(desc(n)) 

pie_region_100 <-
  var[!(is.na(var$Relation_to_Island) | var$Relation_to_Island == ""),]
pie_rel_to_island_100 <-
  pie_region_100 %>%
  arrange(adj.P.Val) %>%
  dplyr::slice(1:100) %>%
  dplyr::select(Relation_to_Island) %>%
  count(Relation_to_Island) %>%
  arrange(desc(n))
  #rename(Relation_to_Island_100 = Relation_to_Island) %>% 


## RefGene group
pie_ref_group_all <-
  var %>% 
  count(UCSC_RefGene_Group) %>% 
  arrange(desc(n))
pie_ref_group_all$UCSC_RefGene_Group<-sub("^$", "Unknown", pie_ref_group_all$UCSC_RefGene_Group)

pie_region_100 <-
  var[!(is.na(var$UCSC_RefGene_Group) | var$UCSC_RefGene_Group == ""),]
pie_ref_group_100 <-
  pie_region_100 %>%
  arrange(adj.P.Val) %>%
  dplyr::slice(1:100) %>%
  dplyr::select(UCSC_RefGene_Group) %>%
  count(UCSC_RefGene_Group) %>% 
  arrange(desc(n)) 
  #rename(UCSC_RefGene_Group_100 = UCSC_RefGene_Group) %>% 
pie_ref_group_100$UCSC_RefGene_Group<-sub("^$", "Unknown", pie_ref_group_100$UCSC_RefGene_Group)


Relationship to island

colours <- list('#b3de69','#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#fccde5')
plotly::plot_ly() %>%
  plotly::add_pie(data = pie_rel_to_island_100,
                  labels = ~ Relation_to_Island,
                  values = ~ n,
                  type = "pie",
                  #title = list(text = "bums",
                  #             position = "top right",
                  #             font = list(size = 40)),
                  textposition = "outside",
                  textinfo = "label+percent",
                  outsidetextfont = list(family = "Helvetica",
                                         size = 14),
                  marker = list(colors = colours,
                                line = list(color = '#FFFFFF', width = 3)),
                  showlegend = FALSE,
                  sort = TRUE,
                  direction = "clockwise",
                  domain = list(x = c(0, 0.35), y = c(0, 0.7))
                  ) %>%
  plotly::add_pie(data = pie_rel_to_island_all,
                  labels = ~ Relation_to_Island,
                  values = ~ n,
                  type = "pie",
                  #title = list(text = "bums",
                  #             position = "top right",
                  #             font = list(size = 40)),
                  textposition = "outside",
                  textinfo = "label+percent",
                  outsidetextfont = list(family = "Helvetica",
                                         size = 14),
                  marker = list(colors = colours,
                                line = list(color = '#FFFFFF', width = 3)),
                  showlegend = FALSE,
                  sort = TRUE,
                  direction = "clockwise",
                  domain = list(x = c(0.60, 0.95), y = c(0, 0.7))
                  )


Refgene group

colours <- list('#b3de69','#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#fccde5')
plotly::plot_ly() %>%
  plotly::add_pie(data = pie_ref_group_100,
                  labels = ~ UCSC_RefGene_Group,
                  values = ~ n,
                  type = "pie",
                  #title = list(text = "bums",
                  #             position = "top right",
                  #             font = list(size = 40)),
                  textposition = "outside",
                  textinfo = "label+percent",
                  outsidetextfont = list(family = "Helvetica",
                                         size = 14),
                  marker = list(colors = colours,
                                line = list(color = '#FFFFFF', width = 3)),
                  showlegend = FALSE,
                  sort = TRUE,
                  direction = "clockwise",
                  domain = list(x = c(0, 0.35), y = c(0, 0.7))
                  ) %>%
  plotly::add_pie(data = pie_ref_group_all,
                  labels = ~ UCSC_RefGene_Group,
                  values = ~ n,
                  type = "pie",
                  #title = list(text = "bums",
                  #             position = "top right",
                  #             font = list(size = 40)),
                  textposition = "outside",
                  textinfo = "label+percent",
                  outsidetextfont = list(family = "Helvetica",
                                         size = 14),
                  marker = list(colors = colours,
                                line = list(color = '#FFFFFF', width = 3)),
                  showlegend = FALSE,
                  sort = TRUE,
                  direction = "clockwise",
                  domain = list(x = c(0.60, 0.95), y = c(0, 0.7))
                  )

missMethyl {.tabset .tabset-pills}

Universe<-var$Row.names
Sig_0.25<-var$Row.names[var$adj.P.Val<0.25]
top100_named_genes <- var[!(is.na(var$UCSC_RefGene_Name) | var$UCSC_RefGene_Name == ""),]
top_100_cpgs <- top100_named_genes %>% arrange(adj.P.Val) %>%  dplyr::slice(1:100) %>% pull(Row.names)

#FDR<0.25
results_0.25_go <- epicR::mm_sig_cpgs(Sig_0.25)

## 1:100
#top_100_cpgs<-as.character(top_100_cpgs) Tests gene ontology enrichment for
#significant CpGs from Illumina's Infinium HumanMethylation450 or
#MethylationEPIC array, taking into account the differing number of probes per
#gene present on the array.
Universe<-as.character(Universe)
gst_top100_go<-gometh(sig.cpg=top_100_cpgs, 
                      all.cpg = Universe, 
                      collection = "GO", 
                      array.type = "EPIC"
                      )
colnames(gst_top100_go)[2]<-"Term"

### Results 1:100 - topGO pulls the most significant from the pack
results_top100_go <- topGO(gst_top100_go, number = 50)
results_top100_go <- 
  results_top100_go %>% 
  arrange(ONTOLOGY)

Significant hits (FDR<0.25)

epicR::mm_plot_0.25(results_0.25_go)

Top 100 cpgs

results_top100_go %>% 
  DT::datatable(filter = 'top',
                rownames = FALSE,
                extensions = 'FixedColumns',
                options = list(
                  pageLength = 10, 
                  dom = 'tpf',
                  scrollX = TRUE,
                  fixedColumns = TRUE
                  )
                ) %>% 
  DT::formatStyle(columns = c("ONTOLOGY", "N", "DE", "P.DE", "FDR"), 
                  `text-align` = 'center') %>%
  DT::formatSignif(columns = c("P.DE", "FDR"), digits = 3) 


fluentin44/epicR documentation built on June 29, 2022, 6:39 a.m.