knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6, fig.height = 3, fig.align = "center", tidy.opts=list(width.cutoff=80), tidy = TRUE )
Install these packages if you don't already have them:
install.packages("tidyverse") install.packages("readxl") # You'll need Bioconductor to get DESeq2 install.packages("BiocManager") BiocManager::install('DESeq2') # Install mpraprofiler itself devtools::install_github("WeirauchLab/mpraprofiler")
Then, load these libraries:
suppressMessages(library(tidyverse)) suppressMessages(library(readxl)) suppressMessages(library(DESeq2)) library(mpraprofiler)
library(tidyverse) library(readxl) library(DESeq2) library(mpraprofiler)
All the sample data are in the data folder, which is the SLE MPRA dataset
# Read all the data # Here we use data from the package. You can replace with your own data. ext_data <- system.file("extdata", package = "mpraprofiler") file_list <- list.files(ext_data, pattern = "*.xlsx") file_loc <- paste(ext_data, file_list, sep = "/") raw_counts <- read_xlsx(file_loc[2], sheet = "Raw MPRA count data ") # Only include variants with >30 unique tags filter_snps <- raw_counts %>% filter(Unique_Tag >=30) %>% pull(Variant) %>% unique() filter_counts <- raw_counts %>% filter(Variant %in% filter_snps) # Build DESeq2 required matrix cts <- filter_counts[, 6:ncol(filter_counts)] row.names(cts) <- filter_counts$Oligo_ID cts <- as.matrix(cts) # This is wgat the required format for DESeq2 looks like head(cts)
You may refer to DESeq2
package for more details
Create the design matrix for DESeq2.
coldata <- data.frame("condition" = c("ctrl", rep("sle",3)), row.names = colnames(cts)) # check the design matrix has the same order as the count matrix. all(rownames(coldata) == colnames(cts))
Perform DESeq2 analysis.
# library("DESeq2") dds <- DESeqDataSetFromMatrix( countData = cts, colData = coldata, design = ~condition ) dds <- DESeq(dds) # sle vs ctrl result dds_result <- results(dds, contrast=c("condition","sle","ctrl")) # plot fold_enhancer_plot(dds_result, xmin = 0.5)
dds_result_all <- as.data.frame(dds_result) # only care about the SLE variant anno <- read_xlsx(file_loc[1]) sle_variant <- anno %>% filter(SLE_Variant==1) %>% pull(Variant) %>% unique() dds_result_sle <- dds_result_all %>% rownames_to_column("Oligo_ID") %>% left_join(raw_counts[,1:4], by = "Oligo_ID") %>% filter(Variant %in% sle_variant) # 853 enhancer alleles dds_sle_enhancer <- dds_result_sle %>% filter(log2FoldChange>=log2(1.5)& padj< 0.05) dds_enAllele <- dds_sle_enhancer %>% pull(Oligo_ID) %>% unique() length(dds_enAllele) # 482 enhancer variant dds_enVar <- dds_sle_enhancer %>% pull(Variant) %>% unique() length(dds_enVar)
# just use the dataset prepared for DESeq2 cts <- cts + 0.5 # this step calculates the non-ref vs ref ratio for all the variants and sample cts_nr_r_ratio <- nr_r_ratio(cts) head(cts_nr_r_ratio)
# you need to indicate the exp data, ctrl data and the annotation data allelic_all <- allelic_compare(cts_nr_r_ratio[, 2:4], cts_nr_r_ratio[, 1], cts_nr_r_ratio[, c("snp", "compare_nrvsr")]) # exp, ctrl, annotation
max_fold <- max_fold(dds_result_all)
# the allelic log2 normalization data (allelic_compare) and the list of enhancer variant. allelic_p <- allelic_compare_p(allelic_all, dds_enVar)
allelic_data <- allelic_p %>% filter(pFDR < 0.05 & (log2_aver >= log2(1.25) | log2_aver <= -log2(1.25))) # allelic enVar allelic_enVar <- allelic_data %>% pull(snp) %>% unique()
# only sle plot_data <- inner_join(allelic_all,max_fold, by = "snp") %>% left_join(allelic_p[,c("snp","p_value", "pFDR")], by = "snp") %>% filter(snp %in% sle_variant) %>% mutate(pos = if_else(snp %in% allelic_enVar & (log2_aver >= log2(1.25) | log2_aver <= -log2(1.25)), 2, if_else(snp %in% allelic_enVar & (log2_aver < log2(1.25) & log2_aver > -log2(1.25)), 1,0))) # with indicator line allelic_enhancer_dot_plot(plot_data, log2 = "log2_aver", max_fold = "max_fold", label = "pos")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.