library(peprDnaStability) library(dplyr) library(tidyr) library(ggplot2) library(knitr)
gel_data_dir <- "stability_example" gel_metadata <- "stability_example/MG001_stability_metadata.csv" ## number of gel lanes ran number_of_lanes <- 12
gel_design <- read.csv(gel_metadata) %>% mutate(gel = sub(pattern = ".*_Gel_0",replacement = "Gel-", x = gel), gel = sub(pattern = ".jpg",replacement = "", x = gel))
ggplot(gel_design) + geom_raster(aes(x = gel, y = lane, fill = condition), alpha = 0.5) + geom_text(aes(x = gel, y = lane, label = vial)) + facet_wrap(~gel,nrow = 1, scales = "free_x") + theme_bw() + theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank(), legend.position = "bottom", legend.direction = "horizontal") + labs(y = "Lane", fill = "Conditions")
Images were croped using preview. Below are the quality control plots for processing the raw image files.
Rdat_files <- list.files(path = gel_data_dir,pattern = "Rdata",full.names = TRUE) lane_labels <- paste0("L",c(paste0("0",1:9),10:12))[1:number_of_lanes] for(gel_dat in Rdat_files){ dat_list <- process_gel(gel_dat) print(gel_dat) annotated_gel(image_mat = dat_list['image_dat'], lane_edges_df = dat_list[5][[1]]$lane_edges_df , peaks_df = dat_list['marker_dat'], lane_labels = lane_labels) }
batch_data <- batch_process_gels(gel_data_dir) ## combine data frames intensity_df <- dplyr::data_frame() markers_df <- dplyr::data_frame() binned_df <- dplyr::data_frame() for(x in batch_data){ gel_name <- x$gel intensity_df <- x$intensity_dat %>% dplyr::mutate(gel = gel_name) %>% dplyr::bind_rows(intensity_df) markers_df <- x$marker_dat %>% dplyr::mutate(gel = gel_name) %>% dplyr::bind_rows(markers_df) binned_df <- x$bin_dat %>% dplyr::mutate(gel = gel_name) %>% dplyr::bind_rows(binned_df) } ## adding metadata meta <- read.csv(gel_metadata,stringsAsFactors = F) %>% filter(condition != "Blank") %>% mutate(lane =gsub("L","", lane), lane = as.numeric(lane), lane = ifelse(lane < 10, paste0("L0", lane), paste0("L", lane))) intensity <- intensity_df %>% mutate(lane =gsub("L","", lane), lane = as.numeric(lane), lane = ifelse(lane < 10, paste0("L0", lane), paste0("L", lane))) %>% left_join(meta) markers <- markers_df %>% mutate(lane =gsub("L","", lane), lane = as.numeric(lane), lane = ifelse(lane < 10, paste0("L0", lane), paste0("L", lane))) %>% left_join(meta) binned <- binned_df %>% mutate(lane =gsub("L","", lane), lane = as.numeric(lane), lane = ifelse(lane < 10, paste0("L0", lane), paste0("L", lane))) %>% left_join(meta) highlow_bin <- binned %>% filter(!is.na(vial), bins %in% c("23.1-9.42", "48.5-23.1", "97-48.5","194-97")) %>% mutate(bins = ifelse(bins %in% c("23.1-9.42", "48.5-23.1"), "low","high")) %>% group_by(lane, bins, gel, vial, condition) %>% summarise(bin_intensity = sum(bin_intensity)) highlow_ratio <- highlow_bin %>% spread(bins, bin_intensity) %>% mutate(bin_ratio = high/(low + high)) highlow_ratio_control<- highlow_ratio %>% filter(condition == "Control")
control_ratio <- highlow_ratio_control %>% group_by(gel) %>% summarise(control_ratio = mean(bin_ratio)) highlow_diff <- highlow_ratio %>% left_join(control_ratio) %>% mutate(ratio_diff = bin_ratio - control_ratio) %>% filter(condition != "Control" & condition != "Ladder") %>% mutate(gel = sub(pattern = ".*Gel_0",replacement = "", x = gel), gel = sub(".jpg","", gel)) ggplot(highlow_diff) + geom_point(aes(x = condition, y = ratio_diff, color = gel)) + labs(x = "Treatment", y = "Ratio Difference from Control") + theme_bw()
make_one_sided_ttest_df <- function (dat, value, gelAsFactor = TRUE) { tt_df <- data_frame() ## Pairwise comparison to control for each bin and each condition for(bin in unique(dat$bins)){ df <- dat %>% filter(bins == bin) y <- df %>% filter(condition == "Control") %>% .[[value]] for(treatment in unique(df$condition[df$condition != "Control"])){ x <-df %>% filter(condition == treatment) %>% .[[value]] tt <- t.test(x,y,alternative = "less") tt_df <- data_frame(size_bins = bin, condition = treatment, tstatistic = tt$statistic, pvalue = tt$p.value) %>% bind_rows(tt_df) } } tt_df %>% mutate(sig = ifelse(pvalue < 0.05/n(),TRUE,FALSE)) }
tt_df <- highlow_ratio %>% mutate(bins = "H") %>% filter(condition != "Ladder") %>% make_one_sided_ttest_df(value = "bin_ratio", gelAsFactor = FALSE) tt_table <- tt_df %>% mutate(pvalue = round(pvalue, 4) %>% as.character(), pvalue = ifelse(sig == TRUE, paste0("__",pvalue,"__"),pvalue), pvalue = ifelse(pvalue == "__0__", "__<0.0000__", pvalue), tstatistic = round(tstatistic, 2)) %>% select(-sig) %>% spread(size_bins, pvalue) colnames(tt_table) <- c("Treatment", "T-Statistic", "p-value") kable(tt_table, row.names = FALSE, caption = "P-value of one sided t-tests comparing proprotion of DNA in high and low molecular weight bins between treatment and control. Significant p-values (alpha = 0.0125) are indicated in bold.")
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.