A collection of functions for placental DNA methylation analysis.
remotes::install_github('wvictor14/plomics')
library(plyr) library(tidyr) library(dplyr) library(ggplot2)
Computes pairwise linear models between several variables.
library(minfiData) library(plomics) # load example data data(RGsetEx) # calculate pcs on the data betas <- getBeta(RGsetEx) pc_obj <- prcomp(t(na.omit(betas)), center = T, scale = T) # get pc scores for each sample rotated <- pc_obj$x #rsquared rsq <- lmmatrix(dep = rotated, ind = as.data.frame(pData(RGsetEx)[,c('Sample_Group', 'age', 'sex', 'status')])) #pvalue pva <- lmmatrix(dep = rotated, ind = as.data.frame(pData(RGsetEx)[,c('Sample_Group', 'age', 'sex', 'status')]), metric = 'Pvalue') ##### plot # reshape first rsq_plot <- rsq %>% as.data.frame() %>% # add dep variables mutate(dep = rownames(rsq)) %>% # reshape gather(PC, rsquared, -dep) pva_plot <- pva %>% as.data.frame() %>% # add dep variables mutate(dep = rownames(rsq)) %>% # reshape gather(PC, pval, -dep) %>% # pvalue categories mutate(pval_cat = case_when( pval > 0.05 ~ '> 0.05', pval < 0.05 & pval > 0.01 ~ '< 0.05', pval < 0.01 & pval > 0.001 ~ '< 0.01', pval < 0.001 ~ '< 0.001' )) ggplot(rsq_plot, aes(x = PC, y = dep, fill = rsquared)) + geom_tile() + theme_bw() + scale_x_discrete(expand = c(0, 0)) + scale_y_discrete(expand = c(0, 0)) + scale_fill_gradientn(colours=c("white", "#ffffcc", "#41b6c4", "#2c7fb8", "#253494"), breaks = c(0,0.5,1), limits = c(0,1), guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")) ggplot(pva_plot, aes(x = PC, y = dep, fill = pval_cat)) + geom_tile() + theme_bw() + scale_x_discrete(expand = c(0, 0)) + scale_y_discrete(expand = c(0, 0)) + scale_fill_manual(values = c('> 0.05' = 'white', '< 0.05' = '#fee8c8', '< 0.01' = '#fdbb84', '< 0.001' = '#e34a33'))
To test if covariates are confounding each other, we need to pairwise tests of independence between covariates. If at least one covariate is numeric, we can use linear regression. Otherwise if both covariates are categorical, then a chi squared test must be used.
variables <- data.frame( row = c(1, 1, 2, 2, 3, 3), column = c(1, 1, 1, 2, 2, 2), Sex = c('m', 'm', 'm', 'f', 'f', 'f'), age = c(18, 19, 18, 27, 30, 16), ethnicity = c('AF', 'AF', 'AF', 'EU', 'EU', 'AS') ) tests <- pairtest(variables) # make categories tests <- tests %>% mutate(pval_cat = if_else(p.value < 0.001, '< 0.001', if_else(p.value < 0.01, '< 0.01', if_else(p.value < 0.05, '< 0.05', '<1')))) tests # plot heatmap of associations ggplot(tests, aes(x=Row, y = Column, fill = pval_cat)) + geom_tile(col = 'grey') + theme(panel.background = element_blank()) + scale_x_discrete(expand = c(0, 0)) + scale_y_discrete(expand = c(0, 0)) + scale_fill_manual(values = c('> 0.05' = 'white', '< 0.05' = '#fee8c8', '< 0.01' = '#fdbb84', '< 0.001' = '#e34a33'))
findsentrix
takes a vector of sentrix IDs (chip identifiers) and searches a directory for IDAT
files that match. The returned data frame contains two columns: (1) the sentrix ID (2) unique file
paths for each idat that matches the sentrix ID.
Here is an example using the robinson lab master sample sheet:
# read in master sample sheet ss <- readxl::read_xlsx('Z:/ROBLAB6 Infinium450k John/Master_Sample_Sheet.xlsx') ## specify idat directory idat_dir <- 'Z:/ROBLAB6 Infinium450k John/EPIC Raw data/' ss <- ss %>% # Take the first 6 EPIC samples dplyr::arrange(desc(Platform)) %>% dplyr::slice(1:6) %>% dplyr::select(Sample_Name, Sentrix_ID, Sentrix_Position) %>% # create sentrix column dplyr::mutate(Sentrix = paste0(Sentrix_ID, '_', Sentrix_Position))
We created a sentrix ID by taking the chip serial number (confusingly named as "sentrix_ID") and pasting this to the position identifier ("Sentrix_Position"):
Sentrix: r ss$Sentrix[1]
Sentrix Chip Number: r as.character(ss$Sentrix_ID[1])
Sentrix Position on chip: r ss$Sentrix_Position[1]
Now we can use findsentrix
to find the filepaths for idats that match each sentrix identifier:
idatfiles <- findsentrix(sentrix = ss$Sentrix, directory = idat_dir) idatfiles # join all matches, retaining unmatched and multiple matched IDs ss <- ss %>% dplyr::full_join(idatfiles, by = 'Sentrix')
Finally, we can load idats using this dataframe:
## Now you can load in these samples with minfi::read.metharray.exp rgset <- minfi::read.metharray.exp(targets = as.data.frame(ss), verbose = T) rgset
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.