# below: yaml for html without distill # output: # html_document: # smart: no # theme: flatly # toc: true # toc_float: true
knitr::opts_chunk$set(echo = FALSE) # run code, echo = False will only show output # knitr::opts_chunk$set(include = FALSE) # show nothing library(here) library(readxl) library(plotly) # loaded first so that it doesn't become the default filter library(tidyverse) library(car) library(FSA) # for Dunn's Test library(factoextra) library(FactoMineR) #PCA library(umap) # for UMAP library(pvclust) library(dendextend) # for color_labels library(corrr) library(ggthemes) library(cowplot) library(patchwork) library(rgl) # for vis_pca_3D library(ggnewscale) # for newscale so that mk_hclust # library(scales) # for overiding scientific notation on dendrogram y axis library(ggpmisc) # for labeling through stat_poly_eq theme_set(ggplot2::theme_minimal()) devtools::load_all()
## Load in data ==== load(here("data", "ionic.rds")) load(here("data", "tevc.rds")) load(here("data", "tecc.rds")) load(here("data", "mrna.rds")) epsp <- read.csv(here("data", "epsp.csv")) %>% as_tibble() metadata <- read_excel(here("inst", "extdata", "ManualDataEphys02.xlsx"), sheet = "MetadataEphys") # tables from Northcutt 2016 and annotations mRNAInfo <- readxl::read_excel(here("inst", "extdata", "mRNAInfo.xlsx"))
# ionic$Condition # # tevc # # tecc # mrna$Time # # epsp # metadata$Condition # # mRNAInfo ionic <- ionic %>% mutate(Condition = case_when(Condition == "Baseline" ~ "0h", Condition == "Compensated" ~ "1h", Condition == "Delayed" ~ "24h" )) mrna <- mrna %>% mutate(Time = case_when(Time == "Baseline" ~ "0h", Time == "Compensated" ~ "1h", Time == "Delayed" ~ "24h" )) metadata <- metadata %>% mutate(Condition = case_when(Condition == "Baseline" ~ "0h", Condition == "Compensated" ~ "1h", Condition == "Delayed" ~ "24h" ))
## Add Experiment/Cell to metadata ==== metadata <- metadata %>% mutate(ABFType = case_when(Type %in% c("tevc") ~ "gjvc", # Type %in% c("igj","igj_50", "gj") ~ "gjcc", # Type %in% c("epsp", "epsp_50") ~ "epsp", Type %in% c("fi_50", "fi") ~ "fi")) %>% # make expected file names mutate(num_zeros = str_length(Recording)) %>% mutate(num_zeros = ifelse(num_zeros == 1, "000", ifelse(num_zeros == 2, "00", ifelse(num_zeros == 3, "0", NA)))) %>% mutate(FileName = paste0(Experiment, "_", num_zeros, as.character(Recording), ".abf" )) %>% select(-num_zeros) #TODO there are only two Type == "gj". Replace these with igj|igj_50 # Experiment Page Recording Type TEA Condition Include Notes In4 In9 ABFType FileName # <chr> <dbl> <dbl> <chr> <dbl> <chr> <lgl> <lgl> <dbl> <dbl> <chr> <chr> # 1 190906 0 32 gj 24 Delayed NA NA 5 NA NA 190906_0032.abf # 2 190906 0 50 gj 24 Delayed NA NA 4 NA NA 190906_0050.abf ### use a subset of metadata cols to add experiment/cell data to other dfs #### small_metadata <- metadata %>% select(Experiment, FileName, In4, In9) %>% gather("Channel", "Cell", c("In4", "In9")) small_metadata2 <- metadata %>% select(Experiment, Type, FileName, In4, In9) %>% gather("Channel", "Cell", c("In4", "In9"))
## Add Experiment/Cell to tecc ==== tecc <- tecc %>% mutate(PreSyn = case_when( (abs(R1S3Mean) > abs(R1S6Mean)) ~ Signal1, (abs(R1S3Mean) < abs(R1S6Mean)) ~ Signal4 ), vrest = case_when( (abs(R1S3Mean) > abs(R1S6Mean)) ~ R1S1Baseline, (abs(R1S3Mean) < abs(R1S6Mean)) ~ R1S4Baseline )) %>% mutate(PreSyn = case_when( PreSyn == "IN 9" ~ "In9", PreSyn == "IN 4" ~ "In4" )) %>% select(FileName, PreSyn, vrest, #Signal1, Signal4, R1S1Baseline, R1S4Baseline, cc, r11, r1#, r12, rc # gap junction measures we'll get from voltage clamp ) %>% distinct() %>% rename(Channel = PreSyn) tecc <- left_join(tecc, spread(small_metadata, Channel, Cell)) %>% mutate(Cell = case_when(Channel == "In9" ~ In9, Channel == "In4" ~ In4), PrePost = case_when(Channel == "In9" ~ paste0("cc.", as.character(In9), "_", as.character(In4)), Channel == "In4" ~ paste0("cc.", as.character(In4), "_", as.character(In9))) ) %>% filter(!is.na(Cell) & !is.na(In4) & !is.na(In9)) %>% filter(In4 %in% c(3, 4, 5) & In9 %in% c(3, 4, 5)) %>% select(-In4, -In9) %>% filter(!is.na(cc)) %>% spread(PrePost, cc)
## Add Experiment/Cell to tevc ==== tevc <- tevc %>% select(FileName, PreSyn, MedianIg) %>% distinct() %>% mutate(PreSyn = case_when( PreSyn == "IN 9" ~ "In9", PreSyn == "IN 4" ~ "In4" )) %>% rename(Channel = PreSyn, ig = MedianIg) tevc <- left_join(tevc, spread(small_metadata, Channel, Cell)) %>% mutate(Cell = case_when(Channel == "In9" ~ In9, Channel == "In4" ~ In4), PrePost = case_when(Channel == "In9" ~ paste0("ig.", as.character(In9), "_", as.character(In4)), Channel == "In4" ~ paste0("ig.", as.character(In4), "_", as.character(In9))) ) %>% filter(!is.na(Cell) & !is.na(In4) & !is.na(In9)) %>% filter(In4 %in% c(3, 4, 5) & In9 %in% c(3, 4, 5)) %>% select(-In4, -In9) %>% filter(!is.na(ig)) %>% spread(PrePost, ig) # is conductance symmetric? # tevc_plt <- tevc %>% # select(-FileName, -Channel, -Cell) %>% # group_by(Experiment) %>% # mutate_all(median, na.rm = T) %>% # distinct() # # tevc_plt %>% # ggplot(aes(x = ig.3_4, y = ig.4_3))+ # geom_point() # # tevc_plt %>% # ggplot(aes(x = ig.3_5, y = ig.5_3))+ # geom_point() # # tevc_plt %>% # ggplot(aes(x = ig.4_5, y = ig.5_4))+ # geom_point() tevc[is.na(tevc$ig.3_4), "ig.3_4"] <- tevc[is.na(tevc$ig.3_4), "ig.4_3"] tevc[is.na(tevc$ig.3_5), "ig.3_5"] <- tevc[is.na(tevc$ig.3_5), "ig.5_3"] tevc[is.na(tevc$ig.4_5), "ig.4_5"] <- tevc[is.na(tevc$ig.4_5), "ig.5_4"] tevc <- tevc %>% select(-ig.4_3, -ig.5_3, -ig.5_4)
## Add Experment/Cell to epsp ==== epsp <- left_join(epsp, small_metadata2) # TODO How do we best caputre variace? find range between max/min across all sweeps when calculating auc? # note that there are plenty of replicates here # and that there are not epsp_50's for each experiment. These are likely sufficiently close to -50. To be on the safe side, we'll only consider epsp not epsp_50 epsp %>% group_by(Experiment, Cell, Type) %>% filter(Key != "predicted") %>% # filter(Sweep == 1 & Key == "predicted") %>% tally() %>% spread(Type, n) %>% mutate(epsp = ifelse(is.na(epsp), NA, T), epsp_50 = ifelse(is.na(epsp_50), NA, T)) epsp %>% group_by(Experiment, Cell, Key) %>% filter(Type == "epsp") %>% mutate( Cor.IQR = IQR(Cor, na.rm = T), Min.V.IQR = IQR(Min.V, na.rm = T), Max.Amplitude.IQR = IQR(Max.Amplitude, na.rm = T), AUC.IQR = IQR(AUC, na.rm = T) ) %>% mutate( Cor = median(Cor, na.rm = T), Min.V = median(Min.V, na.rm = T), Max.Amplitude = median(Max.Amplitude, na.rm = T), AUC = median(AUC, na.rm = T) )
# Relabel Brian's cells ---- ## temp <- mrna[mrna$Source == "Brian" & mrna$Pharm == "AP" & mrna$Time == "24h", "Cell"]%>% unlist() %>% str_remove(pattern = "#") %>% str_split(pattern = " ") %>% transpose() mrna[mrna$Source == "Brian" & mrna$Pharm == "AP" & mrna$Time == "24h", "Experiment"] <- unlist(transpose(temp[[1]])) mrna[mrna$Source == "Brian" & mrna$Pharm == "AP" & mrna$Time == "24h", "Cell"] <- unlist(transpose(temp[[2]])) %>% str_remove(pattern = "LC") ## temp <- mrna[mrna$Source == "Brian" & mrna$Pharm == "None" & mrna$Time == "0h", "Cell"] %>% unlist() %>% str_remove(pattern = "Control #") mrna[mrna$Source == "Brian" & mrna$Pharm == "None" & mrna$Time == "0h", "Experiment"] <- temp mrna[mrna$Source == "Brian" & mrna$Pharm == "None" & mrna$Time == "0h", "Cell"] <- NA ## temp <- mrna[mrna$Source == "Brian" & mrna$Pharm == "None" & mrna$Time == "24h", "Cell"] %>% unlist() %>% str_split(pattern = " ") %>% transpose() mrna[mrna$Source == "Brian" & mrna$Pharm == "None" & mrna$Time == "24h", "Experiment"] <- unlist(transpose(temp[[1]])) mrna[mrna$Source == "Brian" & mrna$Pharm == "None" & mrna$Time == "24h", "Cell"] <- unlist(transpose(temp[[2]])) %>% str_remove(pattern = "LC") ## temp <- mrna[mrna$Source == "Brian" & mrna$Pharm == "TEA" & mrna$Time == "1h", "Cell"] %>% unlist() %>% str_remove(pattern = "TEA #") mrna[mrna$Source == "Brian" & mrna$Pharm == "TEA" & mrna$Time == "1h", "Experiment"] <- temp mrna[mrna$Source == "Brian" & mrna$Pharm == "TEA" & mrna$Time == "1h", "Cell"] <- NA ## temp <- mrna[mrna$Source == "Brian" & mrna$Pharm == "TEA" & mrna$Time == "24h", "Cell"] %>% unlist() %>% str_remove(pattern = "TEA24-") mrna[mrna$Source == "Brian" & mrna$Pharm == "TEA" & mrna$Time == "24h", "Experiment"] <- temp mrna[mrna$Source == "Brian" & mrna$Pharm == "TEA" & mrna$Time == "24h", "Cell"] <- NA ## temp <- mrna[mrna$Source == "Brian" & mrna$Pharm == "TTX", "Cell"] %>% unlist() %>% str_split(pattern = "-") %>% transpose() mrna[mrna$Source == "Brian" & mrna$Pharm == "TTX", "Experiment"] <- unlist(transpose(temp[[1]])) mrna[mrna$Source == "Brian" & mrna$Pharm == "TTX", "Cell"] <- unlist(transpose(temp[[2]])) ## deduplicate mrna mrna <- mrna %>% gather(key = "mrna", value = "count", names(mrna)[!(names(mrna) %in% c("Source", "Pharm", "Time", "Sample", "Experiment", "Cell"))]) %>% filter(mrna != c("htr2b", "kainate2b", "nmda1a", "nmda2b", "mglur7", "trpm1", "machrb", "htr1b")) %>% # drop fully missing mrnas group_by(Source, Pharm, Time, Experiment, Cell, mrna) %>% mutate(count = median(count, na.rm = T)) %>% ungroup() %>% distinct() %>% spread(key = "mrna", value = "count")
# Was there activity on the EC? metadata <- full_join(metadata, tibble::tribble( ~Experiment, ~ExtracellularActive, "190808a", TRUE, "190830", TRUE, "190903", TRUE, "190904", TRUE, "190905", TRUE, "190906", TRUE, "190907", TRUE, "190915", TRUE, "190916a", TRUE, "190917a", NA, # File not found. Entry not found in notebook either. "190917", TRUE, "190918", TRUE, "190924", TRUE, "190924a", TRUE, "191002", TRUE, "191004", TRUE, "190926", TRUE, "190927", FALSE, "190927a", TRUE, "190930", FALSE, "190930a", FALSE, "191001", TRUE ) ) metadata %>% select(Experiment, Condition, ExtracellularActive) %>% filter(Condition %in% c("0h", "1h", "24h")) %>% distinct() %>% group_by(Condition, ExtracellularActive) %>% tally() %>% filter(!is.na(ExtracellularActive)) %>% pivot_wider(names_from = "ExtracellularActive", values_from = "n")
small_metadata <- metadata[, c("Experiment", "FileName", "Condition")] M_ionic<- ionic %>% mutate(Cell = as.character(Cell)) #Ihtk.0 Ihtk.Slope Ia.0 Ia.Slope M_tecc <- left_join(tecc, small_metadata) %>% select(-FileName) %>% mutate(Cell = as.character(Cell)) # QC drop all non positive resistances, all ccs outside 0-1 M_tecc <- M_tecc %>% mutate( # cc = ifelse(cc > 0 & cc < 1, cc, NA), r11 = ifelse(r11 >0, r11, NA), r1 = ifelse(r1 >0, r1, NA), # r12 = ifelse(r12 >0, r12, NA), # rc = ifelse(rc >0, rc, NA) cc.3_4 = ifelse(cc.3_4 > 0 & cc.3_4 < 1, cc.3_4, NA), cc.3_5 = ifelse(cc.3_5 > 0 & cc.3_5 < 1, cc.3_5, NA), cc.4_3 = ifelse(cc.4_3 > 0 & cc.4_3 < 1, cc.4_3, NA), cc.4_5 = ifelse(cc.4_5 > 0 & cc.4_5 < 1, cc.4_5, NA), cc.5_3 = ifelse(cc.5_3 > 0 & cc.5_3 < 1, cc.5_3, NA), cc.5_4 = ifelse(cc.5_4 > 0 & cc.5_4 < 1, cc.5_4, NA) ) #%>% # mutate(rc = rc^-1) %>% # rename(rc.ig = rc) #vrest cc r11 r12 r1 rc M_tevc <- left_join(tevc, small_metadata) %>% select(-FileName) %>% mutate(Cell = as.character(Cell)) #ig # M_epsp <- left_join(epsp, small_metadata) %>% # select(-FileName, -Type) %>% # mutate(Cell = as.character(Cell)) %>% # mutate(Key = case_when(Key %in% c("In4", "In9") ~ "Obs", # Key %in% c("predicted") ~ "Sim")) %>% # pivot_wider(names_from = "Key", values_from = c("Min.V", "Max.Amplitude", "AUC")) %>% # mutate(Max.Amplitude_Sim = Max.Amplitude_Sim + (Min.V_Obs - Min.V_Sim)) %>% # mutate(Max.Amplitude_Delta = Max.Amplitude_Obs - Max.Amplitude_Sim, # AUC_Delta = AUC_Obs - AUC_Sim) %>% # group_by(Condition, Experiment, Cell) %>% # select(-Min.V_Sim) %>% # mutate_at(c("Cor", # "Min.V_Obs", #"Min.V_Sim", "Min.V_Delta", # "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta", # "AUC_Obs", "AUC_Sim", "AUC_Delta"), median, na.rm = T) %>% # distinct() %>% # ungroup() M_epsp <- left_join(epsp, small_metadata) %>% filter(Type == "epsp") %>% select(-FileName, -Type) %>% mutate(Cell = as.character(Cell)) %>% mutate(Key = case_when(Key %in% c("In4", "In9") ~ "Obs", Key %in% c("predicted") ~ "Sim")) %>% pivot_wider(names_from = "Key", values_from = c("Min.V", "Max.Amplitude", "AUC")) %>% mutate(Max.Amplitude_Sim = Max.Amplitude_Sim + (Min.V_Obs - Min.V_Sim)) %>% mutate(Max.Amplitude_Delta = Max.Amplitude_Obs - Max.Amplitude_Sim, AUC_Delta = AUC_Obs - AUC_Sim) %>% group_by(Condition, Experiment, Cell) %>% select(-Min.V_Sim) %>% mutate( Cor.IQR = IQR(Cor, na.rm = T), Min.V_Obs.IQR = IQR(Min.V_Obs, na.rm = T), Max.Amplitude_Obs.IQR = IQR(Max.Amplitude_Obs, na.rm = T), Max.Amplitude_Sim.IQR = IQR(Max.Amplitude_Sim, na.rm = T), Max.Amplitude_Delta.IQR = IQR(Max.Amplitude_Delta, na.rm = T), AUC_Obs.IQR = IQR(AUC_Obs, na.rm = T), AUC_Sim.IQR = IQR(AUC_Sim, na.rm = T), AUC_Delta.IQR = IQR(AUC_Delta, na.rm = T) ) %>% mutate_at(c("Cor", "Min.V_Obs", #"Min.V_Sim", "Min.V_Delta", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta", "AUC_Obs", "AUC_Sim", "AUC_Delta"), median, na.rm = T) %>% select(-Sweep) %>% distinct() %>% ungroup() #Sweep Cor Key Min.V Max.Amplitude AUC M_mrna <- left_join(mrna[mrna$Source == "Daniel", ], small_metadata) %>% select(-FileName) %>% mutate(Cell = as.character(Cell)) #Set up data cols to help organize full dataset ionic_cols <- c("Ihtk.0", "Ihtk.Slope", "Ihtk.0.s", "Ihtk.Slope.s", "Ia.0", "Ia.Slope", "Ia.0.s", "Ia.Slope.s") tecc_cols <- c("vrest", "r11", "r1", "cc.3_4", "cc.3_5", "cc.4_3", "cc.4_5", "cc.5_3", "cc.5_4")#, "r12", "rc.ig") # ignore rc, ig is a more direct measure tevc_cols <- c("ig.3_4", "ig.3_5", "ig.4_5") epsp_cols <- c("Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta") mrna_cols <- names(mrna)[!(names(mrna) %in% c("Source", "Pharm", "Time", "Sample", "Experiment", "Cell"))] M_all <- full_join(M_ionic, M_tecc) %>% full_join(M_tevc) %>% full_join(M_epsp) %>% full_join(M_mrna) reordered_names <- c(names(M_all)[!(names(M_all) %in% c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols))], c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols)) M_all <- M_all[, reordered_names] %>% filter(Condition %in% c("0h", "1h", "24h") & Cell %in% c(3, 4, 5)) %>% distinct() # #TODO this needs some prep work. We'll want to capture the variance in these measures. # # Not clear how to do that. # epsp %>% ggplot(aes(x = FileName, Max.Amplitude, color = interaction(Experiment, Cell)))+ # geom_line()+ # geom_point()+ # facet_grid(.~Key) #TODO move this to top # QC data by completion rate, having variance, and median mrna abundance M_all_outliers <- M_all M_all_skim <- M_all %>% skimr::skim() M_all_skim <- M_all_skim[M_all_skim$complete_rate >= 0.19 & M_all_skim$numeric.sd > 0, ] M_all_skim <- M_all_skim[!(M_all_skim$skim_variable %in% mrna_cols) | ((M_all_skim$skim_variable %in% mrna_cols) & M_all_skim$numeric.p50 >= 50 ), ] M_all <- M_all[, names(M_all) %in% c( "Condition", "Cell", "Experiment", "Channel", "Sweep", "Source", "Pharm", "Time", "Sample", M_all_skim$skim_variable[!is.na(M_all_skim$skim_variable)]) ] # update cols now that data is QCed ionic_cols <- c("Ihtk.0", "Ihtk.Slope", "Ihtk.0.s", "Ihtk.Slope.s", "Ia.0", "Ia.Slope", "Ia.0.s", "Ia.Slope.s")[ionic_cols %in% names(M_all)] tecc_cols <- c("vrest", "r11", "r1", "cc.3_4", "cc.3_5", "cc.4_3", "cc.4_5", "cc.5_3", "cc.5_4")[tecc_cols %in% names(M_all)] #, "r12", "rc.ig") # ignore rc, ig is a more direct measure tevc_cols <- c("ig.3_4", "ig.3_5", "ig.4_5")[tevc_cols %in% names(M_all)] epsp_cols <- c("Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta")[epsp_cols %in% names(M_all)] mrna_cols <- mrna_cols[mrna_cols %in% names(M_all)]
"Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
temp <- mrna %>% select(Pharm, Time, Source) %>% distinct() temp %>% group_by(Pharm, Source) %>% mutate(exists = "T") %>% spread(Time, exists) mrna %>% filter( (Source == "Daniel") | (Source == "Brian" & Pharm == "TEA") | (Source == "Brian" & Pharm == "None" & Time == "0h") ) %>% distinct() %>% group_by(Pharm, Time, Source) %>% tally()
metadata %>% select(Experiment, Condition, ExtracellularActive) %>% filter(Condition %in% c("0h", "1h", "24h")) %>% distinct() %>% group_by(Condition, ExtracellularActive) %>% tally() %>% filter(!is.na(ExtracellularActive)) %>% pivot_wider(names_from = "ExtracellularActive", values_from = "n")
M_all %>% select(Condition, Experiment, Cell) %>% distinct() %>% group_by(Condition) %>% tally() M_all %>% select(Condition, Experiment) %>% distinct() %>% group_by(Condition) %>% tally()
mrna_cols <- c( "nav", "cav1", "cav2", "bkkca", "shaker", "shal", "shab", "shaw1", "shaw2", "inx1", "inx2", "inx3" #"inx4", "inx5", #<- inx5 is likely not expressed ) memb_cols <- c("vrest", "r11", "r1", "Ihtk.0", "Ihtk.Slope", "Ihtk.0.s", "Ihtk.Slope.s", "Ia.0", "Ia.Slope", "Ia.0.s", "Ia.Slope.s") epsp_cols <- c("Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", "Cor.IQR", "Min.V_Obs.IQR", "Max.Amplitude_Obs.IQR", #"Max.Amplitude_Sim.IQR", "AUC_Obs.IQR", #"AUC_Sim.IQR", "Max.Amplitude_Delta.IQR", "AUC_Delta.IQR")
# select the mRNAs present in both Brian's data and mine. mrna_b <-select(mrna, Source, Pharm, Time, Experiment, Cell, nav, cav1, cav2, bkkca, shaker, shal, shab, shaw1, shaw2, inx1, inx2, inx3 #inx4, inx5, #<- inx5 is likely not expressed ) %>% filter(Source == "Brian") # mutate(Pharm = ifelse(Source == "Daniel" & Time == "0h", "None", Pharm)) mrna_b %>% skimr::skim() mrna_b %>% group_by(Time, Pharm, Source) %>% tally() mrna_b$uid <- as.character(seq(1, nrow(mrna_b))) mrna_b <- mrna_b %>% select(-Experiment, -Cell) %>% unite(Group, Source, Pharm, Time) mrna_b <- mrna_b[, c("Group", "uid", names(mrna_b)[!(names(mrna_b) %in% c("Group", "uid"))]) ] # Remove Outliers mrna_b_winxiqr_list <- map(unique(mrna_b$Group), function(group){ temp <- mrna_b[mrna_b$Group == group, ] dvs <- names(mrna_b)[!(names(mrna_b) %in% c("Group", "uid"))] for (dv_col in dvs){ temp[[dv_col]] <- ifelse(win_x_iqr(temp[[dv_col]]), temp[[dv_col]], NA) } return(temp) }) mrna_b_winxiqr <- do.call(rbind, mrna_b_winxiqr_list) # Median interpolate missing mrna_b_winxiqr_interp <- imputeMissings::impute(mrna_b_winxiqr, method = "median/mode")
treatment <- unique(mrna_b_winxiqr$Group)[1] mrna1 <- "inx1" mrna2 <- "inx2" M <- mrna_b_winxiqr %>% filter(Group == treatment) #%>% select(uid, mrna1, mrna2) ## setup mrnas <- c("nav", "cav1", "cav2", "bkkca", "shaker", "shal", "shab", "shaw1", "shaw2", "inx1", "inx2", "inx3") ### Prep cor df mrna_cors <- list() for(i in seq_along(mrnas)){ for(j in seq(i, length(mrnas))){ if(i != j){ mrna_cors[[(length(mrna_cors)+1)]] <- c(mrnas[i], mrnas[j]) } } } mrna_cors <- data.frame(do.call(rbind, mrna_cors)) names(mrna_cors) <- c("mrna1", "mrna2") mrna_cors["Cor"] <- NA mrna_cors["ep"] <- NA ### Fill cor df for(i in seq(1, nrow(mrna_cors))){ mrna1 <- mrna_cors[i, "mrna1"] mrna2 <- mrna_cors[i, "mrna2"] # is there a significant correlation? cor_obs <- cor(M[, c(mrna1)], M[, c(mrna2)], use = "pairwise.complete.obs", method = "pearson") cor_obs <- as.numeric(cor_obs) niter <- 1000 cor_sim <- unlist(map(seq(1, niter), function(i){ # return(cor(M[[mrna1]], sample(M[[mrna2]]) ) ) cor_obs <- cor(M[, c(mrna1)], sample(unlist(M[, c(mrna2)])), use = "pairwise.complete.obs", method = "pearson") return(unlist(cor_obs)) })) cor_ep <- mean(c(cor_obs, abs(cor_sim)) >= cor_obs) mrna_cors[i, "Cor"] <- cor_obs mrna_cors[i, "ep"] <- cor_ep } # fix levels mrna_cors$mrna1 <- factor(mrna_cors$mrna1, levels = mrnas) mrna_cors$mrna2 <- factor(mrna_cors$mrna2, levels = mrnas) # install.packages("GGally")
## Data prep # mrna_cors %>% # ggplot(aes(x = 0, y = 0))+ # geom_tile(aes(fill = Cor) )+ # geom_label(aes(label = Cor))+ # facet_grid(mrna1 ~ mrna2) # get the right color for the background fill_val <- mrna_cors[( (mrna_cors$mrna1 == mrna1 & mrna_cors$mrna2 == mrna2) & (mrna_cors$ep < 0.05)), "Cor"] mk_cor_scatter <- function(M, mrna1, mrna2, fill_val){ if( length(fill_val) == 0 ){ # if there's a non-significant correlation, don't add fit. fill_val <- "#00000000" plt <- M %>% ggplot(aes_string(x = mrna1, mrna2))+ # stat_poly_line(formula = x ~ y, color = "black") + geom_point(alpha = 0.5)+ # stat_poly_eq(aes(label = paste(after_stat(eq.label), # after_stat(rr.label), sep = "*\", \"*")))+ theme_void()+ theme(#plot.background = element_rect(fill = fill_val), axis.title.x = element_text(face = "italic"), axis.title.y = element_text(face = "italic")) } else { # if there's a significant correlation, add the fit. vals <- seq(-1, to = 1, length.out = 7) fill_val <- RColorBrewer::brewer.pal(9, "PuOr")[seq(2, 8)][(fill_val < vals)][1] plt <- M %>% ggplot(aes_string(x = mrna1, mrna2))+ geom_rect(aes(xmin=min(M$mrna1, na.rm = T), xmax=max(M$mrna1, na.rm = T), ymin=min(M$mrna2, na.rm = T), ymax=max(M$mrna2, na.rm = T) ), fill=fill_val)+ stat_poly_line(formula = x ~ y, color = "black") + geom_point(alpha = 0.5)+ # stat_poly_eq(aes(label = paste(after_stat(eq.label), # after_stat(rr.label), sep = "*\", \"*")))+ theme_void()+ theme(plot.background = element_rect(fill = fill_val), axis.title.x = element_text(face = "italic"), axis.title.y = element_text(face = "italic")) } return(plt) } mk_cor_scatter(M = M, mrna1 = mrna1, mrna2 = mrna2, fill_val = fill_val) # make all the plots plt_list <- list() for(i in seq(1, length(mrnas))){ for(j in seq(i, length(mrnas))){ if(i != j){ mrna1 = mrnas[i] mrna2 = mrnas[j] fill_val <- mrna_cors[( (mrna_cors$mrna1 == mrna1 & mrna_cors$mrna2 == mrna2) & (mrna_cors$ep < 0.05)), "Cor"] print(fill_val) plt_list[[(1+length(plt_list))]] <- mk_cor_scatter( M = M, mrna1 = mrna1, mrna2 = mrna2, fill_val = fill_val) } } } # plt_list library(GGally) side_len <- (length(mrnas) -1) pm <- ggmatrix( map(seq(1 , side_len**2), function(i){}), nrow = side_len, ncol = side_len, # xAxisLabels = c("A", "B", "C"), # yAxisLabels = c("D", "E"), title = "Matrix Title" ) diagoffset <- 0 colcount <- 1 rowcount <- 1 for(i in seq_along(plt_list)){ pm[rowcount, colcount] <- plt_list[[i]] if(colcount == side_len){ diagoffset <- diagoffset + 1 colcount <- 1 + diagoffset rowcount <- rowcount + 1 } else { colcount <- colcount + 1 } } # pm[1, 2] <- plt_list[[1]] pm # # # ggpairs(select(M, -Group, -uid), # upper = list(continuous = "cor"), # lower = list(na ="na"), # diag = list( na = "naDiag"))
plot_grid(plt_list) library(patchwork) # quick and dirty get all the plots ordered patchwork_string <- c() ith <- 1 for(i in seq(1, length(mrnas))){ for(j in seq(1, length(mrnas))){ if(i == j){ } else if(j > i){ patchwork_string <- c(patchwork_string, paste0("plt_list[[", as.character(ith), "]]")) ith <- ith+1 } else { patchwork_string <- c(patchwork_string, "plot_spacer()") } } } patchwork_string <- paste(patchwork_string, collapse = "+") # patchwork_string+patchwork::plot_layout(ncol = length(mrnas)) plt_list[[1]]+plt_list[[2]]+plt_list[[3]]+plt_list[[4]]+plt_list[[5]]+plt_list[[6]]+plt_list[[7]]+plt_list[[8]]+plt_list[[9]]+plt_list[[10]]+plt_list[[11]]+plot_spacer()+plt_list[[12]]+plt_list[[13]]+plt_list[[14]]+plt_list[[15]]+plt_list[[16]]+plt_list[[17]]+plt_list[[18]]+plt_list[[19]]+plt_list[[20]]+plt_list[[21]]+plot_spacer()+plot_spacer()+plt_list[[22]]+plt_list[[23]]+plt_list[[24]]+plt_list[[25]]+plt_list[[26]]+plt_list[[27]]+plt_list[[28]]+plt_list[[29]]+plt_list[[30]]+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[31]]+plt_list[[32]]+plt_list[[33]]+plt_list[[34]]+plt_list[[35]]+plt_list[[36]]+plt_list[[37]]+plt_list[[38]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[39]]+plt_list[[40]]+plt_list[[41]]+plt_list[[42]]+plt_list[[43]]+plt_list[[44]]+plt_list[[45]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[46]]+plt_list[[47]]+plt_list[[48]]+plt_list[[49]]+plt_list[[50]]+plt_list[[51]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[52]]+plt_list[[53]]+plt_list[[54]]+plt_list[[55]]+plt_list[[56]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[57]]+plt_list[[58]]+plt_list[[59]]+plt_list[[60]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[61]]+plt_list[[62]]+plt_list[[63]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[64]]+plt_list[[65]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[66]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+patchwork::plot_layout(ncol = length(mrnas)) # as_tbl_graph(mrna_cors) %>% # ggraph(layout = 'linear', circular = TRUE) + # geom_edge_arc(aes(colour = ep)) + # coord_fixed()
library(ggraph) library(igraph) temp <- mutate(mrna_cors, from = as.numeric(as.factor(mrna1)), to = as.numeric(as.factor(mrna2)) ) # graph <- graph_from_data_frame(temp[, c("from", "to")]) # graph <- temp %>% # graph_from_data_frame(directed = FALSE) # # # Not specifying the layout - defaults to "auto" # ggraph(graph) + # # geom_edge_link(aes(colour = factor(Cor))) + # geom_edge_link(aes(edge_alpha = 1-ep)) + # geom_node_point() # = = = = # Create a tidy data frame of correlations tidy_cors <- M[, mrnas] %>% correlate() %>% stretch() tidy_cors["ep"] <- NA for(i in seq(1, nrow(mrna_cors))){ mrna1 <- mrna_cors[i, "mrna1"] mrna2 <- mrna_cors[i, "mrna2"] tidy_cors[((tidy_cors$x == mrna1) & (tidy_cors$y == mrna2)) | ( (tidy_cors$x == mrna2) & (tidy_cors$y == mrna1) ) , "ep"] <- mrna_cors[i, "ep"] } # eps1 <- full_join( # tidy_cors, # rename(select(mrna_cors, -Cor), x = mrna1, y = mrna2) # ) %>% # filter(!is.na(ep)) # # eps2 <- full_join( # tidy_cors, # rename(select(mrna_cors, -Cor), x = mrna2, y = mrna1) # ) # # full_join( # tidy_cors, # eps1) %>% full_join(eps2) # Convert correlations stronger than some value # to an undirected graph object # graph_cors <- tidy_cors %>% # filter(abs(r) > 0.7) %>% # graph_from_data_frame(directed = FALSE) # # # Plot # ggraph(graph_cors) + # geom_edge_link() + # geom_node_point() + # geom_node_text(aes(label = name), repel = TRUE) + # theme_graph() # = = = = #ref https://www.data-imaginist.com/2017/ggraph-introduction-layouts/ graph_cors <- tidy_cors %>% # mutate(bool = case_when(ep < 0.05 ~ TRUE, # ep >= 0.05 ~ FALSE)) %>% # filter(abs(ep) < 0.05) %>% graph_from_data_frame(directed = FALSE) graph_cors_sig <- tidy_cors %>% mutate(r = case_when(ep < 0.05 ~ r, ep >= 0.05~ 0)) %>% graph_from_data_frame(directed = FALSE) custom_network_plot <- ggraph(graph_cors_sig, layout = 'linear', circular = TRUE) + geom_edge_link(aes(edge_alpha = 1/ep, edge_width = 1/ep, color = r)) + guides(edge_alpha = "none", edge_width = "none") + scale_edge_colour_gradientn(limits = c(-1, 1), colors = c("#B35806", "White","#542788")) + geom_node_point(color = "gray", size = 5) + geom_node_text(aes(label = name), repel = TRUE) + theme_graph() + labs(title = "")
All correlations in Brian's data
library(ggcorrplot) cor_plt_list <- list() for (temp.group in c("Brian_All", unique(mrna_b_winxiqr$Group))){ print(temp.group) if (temp.group == "Brian_All"){ cor_df <- select(mrna_b_winxiqr, -Group, -uid) } else { cor_df <- select(filter(mrna_b_winxiqr, Group == temp.group), -Group, -uid) } # p.mat <- cor_pmat(cor_df) cor_df_bin <- cor(cor_df, use = "pairwise.complete.obs") # bin the correlations so there are fewer colors used in the figure cor_bins <- seq(-1, 1, length.out = 8) for (i in 1:(length(cor_bins)-1)){ # use the average bin value # cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- (cor_bins[i] + cor_bins[i+1]) # use the more extreme value to shade with cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]] } cor_plt_list[[(length(cor_plt_list)+1)]] <- ggcorrplot( cor_df_bin, # p.mat = p.mat, insig = "blank", type = "upper", outline.col = "white", colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)] )+ labs(subtitle = temp.group)+ theme(legend.position = "") } cor_plt_list[[3]] + cor_plt_list[[5]] + plot_spacer() + plot_spacer() + cor_plt_list[[4]] + cor_plt_list[[6]] + cor_plt_list[[2]]+ cor_plt_list[[7]] + plot_layout(ncol = 4) # cor_plt_list[[1]] + cor_plt_list[[3]] + cor_plt_list[[4]] + cor_plt_list[[5]] + cor_plt_list[[6]] + # plot_spacer() + plot_spacer() + cor_plt_list[[2]] + plot_spacer() + cor_plt_list[[7]] + plot_layout(ncol = 4) #plot_layout(ncol = 5)
Correlation multiplot
cor_df_list <- map(c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h"), function(temp.group){ cor_df <- select(filter(mrna_b_winxiqr, Group == temp.group), -Group, -uid) cor_df <- cor(cor_df, use = "pairwise.complete.obs") cor_df_bin <- cor_df # bin the correlations so there are fewer colors used in the figure cor_bins <- seq(-1, 1, length.out = 8) for (i in 1:(length(cor_bins)-1)){ # use the more extreme value to shade with cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]] } return(list(cor = cor_df, cor_bin = cor_df_bin)) }) temp <- map(seq_along(cor_df_list), function(i){ temp <- cor_df_list[[i]][[1]] temp <- as.data.frame(temp) temp %>% rownames_to_column() %>% gather("colname", "Cor", names(temp)) %>% mutate(Group = c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h")[i]) }) temp <- do.call(rbind, temp) temp_fill <- map(seq_along(cor_df_list), function(i){ temp <- cor_df_list[[i]][[2]] temp <- as.data.frame(temp) temp %>% rownames_to_column() %>% gather("colname", "Cor", names(temp)) %>% mutate(Group = c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h")[i]) }) temp_fill <- do.call(rbind, temp_fill) %>% rename(CorFill = Cor) cor_col_plt <- full_join(temp, temp_fill) %>% # filter(rowname == "bkkca", colname == "cav1") %>% mutate(x = case_when(Group == "Brian_None_0h" ~ "0 Hours", Group == "Brian_TEA_1h" ~ "1 Hour", Group == "Brian_TEA_24h" ~ "24 Hours") ) %>% mutate(Cor = ifelse(rowname == colname, NA, Cor)) %>% ggplot(aes(x = x, y = Cor, fill = Cor))+ geom_col()+ geom_hline(yintercept = 0)+ facet_grid(rowname ~ colname)+ scale_fill_distiller(palette = "PuOr", direction = 1)+ coord_cartesian(ylim = c(-1,1))+ labs(x = "")+ scale_y_continuous(breaks = c(-0.5, 0.5))+ theme(legend.position = "", # panel.grid.major.y = element_line() # axis.ticks.length.y.left = element_line(), axis.text.x = element_text(angle = 90)) cor_bin_fill_ref_plot <- ggplot(data.frame(Cor = seq(-1, 1, length.out = 8)), aes(x = Cor, y = 0, fill = Cor))+ geom_tile(aes(height = 2/8))+ geom_text(aes(label = round(Cor, digits = 1), size = 1))+ scale_fill_distiller(palette = "PuOr", direction = 1)+ theme_void()+ theme(legend.position = "")+ coord_fixed()+ plot_spacer() correlogram_set <- (cor_plt_list[[3]]+labs(subtitle= "0 Hours") + cor_plt_list[[5]]+labs(subtitle= "1 Hours") + cor_plt_list[[6]]+labs(subtitle= "24 Hours")) figure_w <- c(1) figure_h <- c(1/3,1,5) # (cor_bin_fill_ref_plot + plot_spacer()+ plot_spacer() ) / cor_bin_fill_ref_plot / correlogram_set / cor_col_plt + plot_layout(widths = figure_w, heights = figure_h) # (cor_plt_list[[3]]+labs(subtitle= "0 Hours") + # cor_plt_list[[5]]+labs(subtitle= "1 Hours") + # cor_plt_list[[6]]+labs(subtitle= "24 Hours") + # cor_bin_fill_ref_plot + plot_spacer() + plot_spacer() + # plot_spacer() + plot_spacer()+ plot_spacer()) | # cor_col_plt+ # plot_layout(widths = figure_w, heights = figure_h) # cm_multiplier = 15 # ggsave("BrianCorMulti.svg", # width = sum(figure_w)*cm_multiplier, # height = sum(figure_h)*cm_multiplier, # # units = "cm", # limitsize = F)
Corrplots with non-sig dropped
cor_plt_list <- map(c("0h", "1h", "24h"), function(Time){ df = mrna_b_winxiqr %>% mutate(Condition = Group) %>% mutate(Condition = case_when(Condition == "Brian_None_0h" ~ "0h", Condition == "Brian_TEA_1h" ~ "1h", Condition == "Brian_TEA_24h" ~ "24h")) df = df[df$Condition == Time, c(mrna_cols)] corr <- cor(df, use = "pairwise.complete.obs", method = "spearman") p.mat <- cor_pmat(df, use = "pairwise.complete.obs", method = "spearman") cor_df_bin <- corr cor_bins <- seq(-1, 1, length.out = 8) for (i in 1:(length(cor_bins)-1)){ # use the more extreme value to shade with cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]] } ggcorrplot( cor_df_bin, p.mat = p.mat, insig = "blank", type = "upper", outline.col = "white", colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)] )+ labs(subtitle = Time)+ theme(legend.position = ""#, # axis.text.x = element_text(angle = 90) ) }) # plot_grid(plotlist = cor_plt_list) (cor_plt_list[[1]]+theme( axis.text.x = element_text(angle = 90, vjust = 0))) + (cor_plt_list[[2]]+theme( axis.text.x = element_text(angle = 90, vjust = 0), # axis.text.x = element_blank(), axis.text.y = element_blank()))+ (cor_plt_list[[3]]+theme( axis.text.x = element_text(angle = 90, vjust = 0), # axis.text.x = element_blank(), axis.text.y = element_blank()))
cor_list_df <- map(unique(mrna_b_winxiqr$Group), function(group){ # temp <- cor(select(mrna_b_winxiqr[mrna_b_winxiqr$Group == group, ], -Group, -uid), use = "pairwise.complete.obs") %>% as.data.frame() temp <- corrr::correlate(select(mrna_b_winxiqr[mrna_b_winxiqr$Group == group, ], -Group, -uid), use = "pairwise.complete.obs") %>% corrr::shave() %>% as.data.frame() temp$row <- rownames(temp) temp <- gather(temp, "col", "Corr", names(temp)[!(names(temp) %in% c("term", "row"))]) temp$Group <- group return(temp) }) cor_list_df <- do.call(rbind, cor_list_df) # cor_list_df$Group %>% unique() ecdf_corr_list <- map( list( list("Brian_None_0h", "Brian_TEA_1h"), list("Brian_TEA_1h", "Brian_TEA_24h"), list("Brian_None_0h", "Brian_TEA_24h"), list("Brian_None_0h", "Brian_None_24h"), list("Brian_None_0h", "Brian_TEA_24h"), list("Brian_None_0h", "Brian_TEA_1h"), list("Brian_None_24h", "Brian_AP_24h"), list("Brian_None_24h", "Brian_TEA_24h"), list("Brian_None_24h", "Brian_TTX_24h") ), function(group_comparison){ plot_ecdf_ks( df = cor_list_df[cor_list_df$Group %in% group_comparison, ], data.col = "Corr", group.col = "Group", group1 = group_comparison[1], group2 = group_comparison[2], colors = c("black", "red")) }) # cowplot::plot_grid(plotlist = ecdf_corr_list) ecdf_corr_list[[1]] + labs(subtitle = "", x = "") + ecdf_corr_list[[2]] + labs(subtitle = "", x = "") + theme(legend.position = "top")+ ecdf_corr_list[[3]] + labs(subtitle = "", x = "") # ggsave(plot = last_plot(), # filename = here("officer_output", "ContrastSource_ecdfs.tiff"), # width = 11.5, height = 4.76) # list("Brian_None_0h", "Brian_TEA_1h"), # list("Brian_TEA_1h", "Brian_TEA_24h"), # list("Brian_None_0h", "Brian_TEA_24h"), plot_ecdf_ks( df = cor_list_df[cor_list_df$Group %in% list("Brian_None_0h", "Brian_TEA_1h"), ], data.col = "Corr", group.col = "Group", group1 = "Brian_None_0h", group2 = "Brian_TEA_1h", colors = c("black", "orange"))+ labs(title = "0 Hours vs 1 Hour", subtitle = "p = 0.44", x = "")+ plot_ecdf_ks( df = cor_list_df[cor_list_df$Group %in% list("Brian_TEA_1h", "Brian_TEA_24h"), ], data.col = "Corr", group.col = "Group", group1 = "Brian_TEA_1h", group2 = "Brian_TEA_24h", colors = c("orange", "red"))+ labs(title = "1 Hour vs 24 Hours", subtitle = "p < 0.01", x = "")+ plot_ecdf_ks( df = cor_list_df[cor_list_df$Group %in% list("Brian_None_0h", "Brian_TEA_24h"), ], data.col = "Corr", group.col = "Group", group1 = "Brian_None_0h", group2 = "Brian_TEA_24h", colors = c("black", "red"))+ labs(title = "0 Hours vs 24 Hours", subtitle = "p < 0.01", x = "") ## what's happening in the high correlations at zero? temp <- cor_list_df[cor_list_df$Group %in% list("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h"), ] %>% spread(Group, Corr) %>% filter(Brian_None_0h >=0.66) %>% gather(Group, Corr, c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h")) plot_ecdf_ks( df = temp[temp$Group %in% list("Brian_None_0h", "Brian_TEA_1h"), ], data.col = "Corr", group.col = "Group", group1 = "Brian_None_0h", group2 = "Brian_TEA_1h", colors = c("black", "orange"))+ plot_ecdf_ks( df = temp[temp$Group %in% list("Brian_TEA_1h", "Brian_TEA_24h"), ], data.col = "Corr", group.col = "Group", group1 = "Brian_TEA_1h", group2 = "Brian_TEA_24h", colors = c("orange", "red"))+ plot_ecdf_ks( df = temp[temp$Group %in% list("Brian_None_0h", "Brian_TEA_24h"), ], data.col = "Corr", group.col = "Group", group1 = "Brian_None_0h", group2 = "Brian_TEA_24h", colors = c("black", "red"))
# refresh NO median interpolation mrna_b_winxiqr <- do.call(rbind, mrna_b_winxiqr_list) mrna_b_winxiqr_to_test <- mrna_b_winxiqr %>% separate(Group, c("Source", "Pharm", "Time")) %>% select(-Source) %>% distinct() library(broom)
cav2, shab, and shaker change over time.
# desired tests # time in control temp <- mrna_b_winxiqr_to_test %>% filter(Pharm %in% c("None")) temp %>% group_by(Time) %>% tally() temp_ys <- names(select(temp, -Time, -Pharm, -uid)) temp_fms <- map(temp_ys, function(temp_y){ fm <- lm(as.formula(paste0(temp_y," ~ Time")), data = temp) res <- broom::tidy(fm) res$y <- temp_y return(res) }) temp_p <- do.call(rbind, temp_fms) %>% filter(term != "(Intercept)") temp_p <- temp_p %>% select(y, term, estimate, std.error, statistic, p.value) %>% mutate(p.adj = p.adjust(p.value, method = "holm")) %>% mutate(stars = case_when(p.adj < 0.001 ~ "***", p.adj >= 0.001 & p.adj < 0.01 ~ "**", p.adj >= 0.01 & p.adj < 0.05 ~ "*", p.adj >= 0.05 & p.adj < 0.1 ~ ".", p.adj >= 0.1 ~ " " )) temp_p
temp %>% pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))) %>% # group_by(Time) %>% ggplot(aes(x= Time, y= Count, fill = mRNA))+ geom_boxplot()+ geom_point()+ ggthemes::theme_clean()+ scale_fill_brewer()+ facet_grid(.~mRNA)
bkkca expression, inx1 expression, and maybe inx3 expression
# time in TEA temp <- mrna_b_winxiqr_to_test %>% filter(Pharm %in% c("TEA")) temp %>% group_by(Time) %>% tally() temp_ys <- names(select(temp, -Time, -Pharm, -uid)) temp_fms <- map(temp_ys, function(temp_y){ fm <- lm(as.formula(paste0(temp_y," ~ Time")), data = temp) res <- broom::tidy(fm) res$y <- temp_y return(res) }) temp_p <- do.call(rbind, temp_fms) %>% filter(term != "(Intercept)") temp_p <- temp_p %>% select(y, term, estimate, std.error, statistic, p.value) %>% mutate(p.adj = p.adjust(p.value, method = "holm")) %>% mutate(stars = case_when(p.adj < 0.001 ~ "***", p.adj >= 0.001 & p.adj < 0.01 ~ "**", p.adj >= 0.01 & p.adj < 0.05 ~ "*", p.adj >= 0.05 & p.adj < 0.1 ~ ".", p.adj >= 0.1 ~ " " )) temp_p
temp %>% pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))) %>% # group_by(Time) %>% ggplot(aes(x= Time, y= Count, fill = mRNA))+ geom_boxplot()+ geom_point()+ ggthemes::theme_clean()+ scale_fill_brewer()+ facet_grid(.~mRNA)
# steady state differences? # ap vs TEA vs ttx vs control temp <- mrna_b_winxiqr_to_test %>% filter(Time %in% c("24h")) temp %>% group_by(Pharm) %>% tally() temp_fms <- map(temp_ys, function(temp_y){ fm <- lm(as.formula(paste0(temp_y," ~ Pharm")), data = temp) res <- broom::tidy(fm) res$y <- temp_y return(res) }) temp_p <- do.call(rbind, temp_fms) %>% filter(term != "(Intercept)") temp_p <- temp_p %>% select(y, term, estimate, std.error, statistic, p.value) %>% mutate(p.adj = p.adjust(p.value, method = "holm")) %>% mutate(stars = case_when(p.adj < 0.001 ~ "***", p.adj >= 0.001 & p.adj < 0.01 ~ "**", p.adj >= 0.01 & p.adj < 0.05 ~ "*", p.adj >= 0.05 & p.adj < 0.1 ~ ".", p.adj >= 0.1 ~ " " )) temp_p
temp %>% pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))) %>% mutate(Pharm = factor(Pharm, levels = c("None", "TEA", "AP", "TTX"))) %>% ggplot(aes(x= Pharm, y= Count, fill = mRNA))+ geom_boxplot()+ geom_point()+ ggthemes::theme_clean()+ scale_fill_brewer()+ facet_wrap(.~mRNA, scales = "free_y")
library(ggpubr) plot_grid(plotlist = map(seq_along(unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))), function(i){ temp %>% pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))[i] ) %>% mutate(Pharm = factor(Pharm, levels = c("None", "TEA", "AP", "TTX"))) %>% ggboxplot(x = "Pharm", y = "Count", fill = "Pharm", width = 0.2, palette = RColorBrewer::brewer.pal(4, "Set1") )+ geom_point(position = position_jitter(width = 0.05))+ stat_compare_means(comparisons = list( c("None", "TEA"), c("None", "AP"), c("None", "TTX"), c("TEA", "AP") ), label = "p.signif")+ # Add significance levels stat_compare_means(label.y = 0)+ theme(legend.position = "", axis.title.y = element_text(face = "italic"))+ labs(x = "", y = as.character(unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))[i])) }) )
We've seen that at least some mRNAs change, presumably due to time and time incubated in the treatment.
Can we replicate these effects? When we focus on TEA do we get changes in the same transcripts?
# select the mRNAs present in both Brian's data and mine. mrna_rep <-select(mrna, Source, Pharm, Time, Experiment, Cell, bkkca, cav1, cav2, inx1, inx2, inx3, inx4, #inx5, #<- inx5 is likely not expressed nav, shab, shaker, shal, shaw1, shaw2) %>% filter( (Source == "Brian" & Pharm %in% c("None", "TEA")) | Source == "Daniel" ) %>% filter(!(Source == "Brian" & Pharm %in% c("None") & Time == "24h")) %>% mutate(Pharm = ifelse(Source == "Daniel" & Time == "0h", "None", Pharm)) %>% select(-Pharm) %>% distinct() mrna_rep %>% skimr::skim() mrna_rep %>% group_by(Time, Source) %>% tally() %>% spread("Time", "n") mrna_rep$uid <- as.character(seq(1, nrow(mrna_rep))) mrna_rep <- mrna_rep %>% select(-Experiment, -Cell) %>% unite(Group, Source, Time) mrna_rep <- mrna_rep[, c("Group", "uid", names(mrna_rep)[!(names(mrna_rep) %in% c("Group", "uid"))]) ] # Remove Outliers mrna_rep_winxiqr_list <- map(unique(mrna_rep$Group), function(group){ temp <- mrna_rep[mrna_rep$Group == group, ] dvs <- names(mrna_rep)[!(names(mrna_rep) %in% c("Group", "uid"))] for (dv_col in dvs){ temp[[dv_col]] <- ifelse(win_x_iqr(temp[[dv_col]]), temp[[dv_col]], NA) } return(temp) }) mrna_rep_winxiqr <- do.call(rbind, mrna_rep_winxiqr_list) mrna_rep_winxiqr <- mrna_rep_winxiqr %>% separate("Group", into = c("Source", "Time"), sep = "_")
plt_df <- data.frame( cor = round(cor_df[rw, cl], digits = 2), rad = abs(round(cor_df[rw, cl], digits = 2)), cor_col = cor_color_df[rw, cl] )
# print( # plt_df # ) plt <- ggplot(plt_df)+ geom_circle(aes(x0 = 0, y0 = 0, r = rad), color = plt_df$cor_col, fill = plt_df$cor_col)+ geom_text(aes(x = 0, y = 1.25, label = cor))+ theme_void()+ lims(y = c(-1.5, 1.5), x = c(-1.5, 1.5))+ coord_fixed()
## Get correlations ==== cor_groupings <- expand.grid( Source = c("Brian", "Daniel"), Time = c("0h", "1h", "24h") ) cor_list <- map(seq(1, nrow(cor_groupings)), function(i){ cor_df <- filter(mrna_rep_winxiqr, Source == cor_groupings[i, "Source"], Time == cor_groupings[i, "Time"]) %>% select(all_of(mrna_cols)) cor_df <- cor(cor_df, use = "pairwise.complete.obs", method = "pearson") # Shave to diagonal for (j in seq(1, nrow(cor_df))){ cor_df[j, 1:j] <- NA } cor_df <- as.data.frame(cor_df) %>% mutate(x = rownames(cor_df)) cor_df <- pivot_longer(cor_df, cols = names(select(cor_df, -x)), names_to = "y", values_to = "Corr") %>% mutate( Source = cor_groupings[i, "Source"], Time = cor_groupings[i, "Time"] ) %>% select(Source, Time, x, y, Corr) return(cor_df) }) cor_comp_df <- do.call(rbind, cor_list) %>% filter(!is.na(Corr)) # add numeric version of the factor to use geom circle cor_comp_df <- cor_comp_df %>% mutate(x.num = as.numeric(factor(x, levels = mrna_cols)), y.num = as.numeric(factor(y, levels = mrna_cols))) ## Plotting ==== ### Correlogram #### library(ggforce) # for geom_circle # Corrgram, but in ggplot compare_corgrams_plt <- cor_comp_df %>% mutate(Source = factor(Source, levels = c("Brian", "Daniel"), labels = c("Pilot", "Replicate"))) %>% # df$supp <- factor(df$supp, levels = c("OJ", "VC"), # labels = c("Orange Juice", "Vitamin C") # ) ggplot()+ geom_circle(aes(x0 = x.num, y0 = y.num, r = abs(Corr)/2, fill = Corr))+ scale_fill_distiller(palette = "PuOr", direction = 1, limits=c(-1,1))+ scale_x_continuous( name = "", position = "top", breaks = 1:length(mrna_cols), labels = mrna_cols)+ scale_y_continuous( name = "", breaks = 1:length(mrna_cols), labels = mrna_cols)+ facet_grid(Source~Time)+ theme( panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = NA), axis.text.x = element_text(angle = 45, hjust = -0., vjust = .5, face = "italic"), axis.text.y = element_text(face = "italic"), legend.position = "right", strip.placement = "outside")+ coord_fixed() ### Compare correlations across time #### # make dataset amenable to faceting cor_comp_df_for_points <- cor_comp_df %>% spread(Time, Corr) temp_1 <- cor_comp_df_for_points %>% select(Source, x, y, x.num, y.num, `0h`, `24h`) %>% rename(x_time = `0h`, y_time = `24h`) %>% mutate(facet_x = "0h vs 24h") temp_2 <- cor_comp_df_for_points %>% select(Source, x, y, x.num, y.num, `0h`, `1h`) %>% rename(x_time = `0h`, y_time = `1h`) %>% mutate(facet_x = "0h vs 1h") temp_3 <- cor_comp_df_for_points %>% select(Source, x, y, x.num, y.num, `1h`, `24h`) %>% rename(x_time = `1h`, y_time = `24h`) %>% mutate(facet_x = "1h vs 24h") cor_comp_df_for_points <- rbind(rbind(temp_1, temp_2), temp_3) cor_comp_df_for_points <- cor_comp_df_for_points %>% mutate(facet_x = factor(facet_x, levels = c( "0h vs 24h", "0h vs 1h", "1h vs 24h" ))) # plot library(ggsci) # for scale_color_aaas library(ggpubr) # for stat_cor and stat_regline compare_cor_scatter_plt <- cor_comp_df_for_points %>% mutate(Source = factor(Source, levels = c("Brian", "Daniel"), labels = c("Pilot", "Replicate"))) %>% ggplot(aes(x = x_time, y = y_time, color = Source))+ geom_abline(intercept = 0, slope = 1)+ geom_hline(yintercept = 0, color = "darkgray")+ geom_vline(xintercept = 0, color = "darkgray")+ geom_smooth(method = "lm", se = F, fullrange = T)+ geom_point(alpha = 0.4)+ scale_color_aaas()+ labs(title = "", x = "", y = "")+ xlim( c(-1, 1))+ ylim( c(-1, 1))+ facet_grid(Source~facet_x)+ stat_cor(label.x = -.61, label.y = -.8, size = 2.5) + stat_regline_equation(label.x = -.61, label.y = -1, size = 2.5)+ theme(legend.position = "")+ coord_fixed() ### Compare ECDFs across time #### # current_Source = "Brian" # time1 = "0h" # time2 = "1h" ecdf_list <- map(list( list(current_Source = "Brian", time1 = "0h", time2 = "24h"), list(current_Source = "Brian", time1 = "0h", time2 = "1h"), list(current_Source = "Brian", time1 = "1h", time2 = "24h"), list(current_Source = "Daniel", time1 = "0h", time2 = "24h"), list(current_Source = "Daniel", time1 = "0h", time2 = "1h"), list(current_Source = "Daniel", time1 = "1h", time2 = "24h") ), function(input_list){ # input_list = list(current_Source = "Brian", time1 = "0h", time2 = "24h") current_Source = input_list$current_Source time1 = input_list$time1 time2 = input_list$time2 data1 <- filter(cor_comp_df, Source == current_Source & Time == time1 ) %>% select(Corr) %>% unlist() data2 <- filter(cor_comp_df, Source == current_Source & Time == time2 ) %>% select(Corr) %>% unlist() ecdf1 <- ecdf(data1) ecdf2 <- ecdf(data2) # used to get the most extreme difference between the two samples MostExtremeDiff <- seq(min(data1, data2, na.rm = T), max(data1, data2, na.rm = T), length.out = length(data1)) x0 <- MostExtremeDiff[which(abs(ecdf1(MostExtremeDiff) - ecdf2(MostExtremeDiff)) == max(abs(ecdf1(MostExtremeDiff) - ecdf2(MostExtremeDiff))))] y0 <- ecdf1(x0) y1 <- ecdf2(x0) # Run two sided KS test on data test.res <- ks.test(data1, data2) # Make output df graph.df <- data.frame(data1, data2) %>% gather(Condition, Value, 1:2) %>% mutate(Condition = case_when(Condition == "data1" ~ time1, Condition == "data2" ~ time2), x0_extreme = x0[1], y0_extreme = y0[1], y1_extreme = y1[1], Source = current_Source, Contrast = paste0(time1 , " vs ", time2), test_stat = test.res$statistic, test_p = test.res$p.value ) return(graph.df) }) ecdf_list_df <- do.call(rbind, ecdf_list) ecdf_list_df <- ecdf_list_df %>% mutate(Contrast = factor(Contrast, levels = c("0h vs 24h", "0h vs 1h", "1h vs 24h"))) compare_cor_ecdf_plt <- ecdf_list_df %>% mutate(Source = factor(Source, levels = c("Brian", "Daniel"), labels = c("Pilot", "Replicate"))) %>% ggplot()+ geom_segment(aes(x = x0_extreme, y = y0_extreme, xend = x0_extreme, yend = y1_extreme), # linetype = "dashed", color = "darkgray", size = 1)+ geom_point(aes(x = x0_extreme , y= y0_extreme), color="darkgray", size=2) + geom_point(aes(x = x0_extreme , y= y1_extreme), color="darkgray", size=2) + stat_ecdf(aes(x = Value, group = Condition, color = Condition))+ geom_label(aes(x = -0.75, y = 0.75, label = paste0( "KS = ", as.character(round(test_stat, 3)), "\np = ", as.character(round(test_p, 3)) )), size = 2)+ labs(x = "Sample", y = "ECDF"#, # subtitle = paste("K-S Test", as.character(group1), "vs", as.character(group2), # "\np-value:", as.character(round(test.res$p.value, digits = 4))) )+ scale_color_brewer(palette = "Set2" )+ xlim( c(-1, 1))+ ylim( c(0, 1))+ theme_minimal()+ theme(legend.position = "right")+ facet_grid(Source~Contrast)+ coord_fixed() # scale_color_manual(values = colors)#+ # theme(text=element_text(family="Calibri Light", size=14)) ### Corr Table #### cor_comp_df %>% spread(Source, Corr) %>% filter(Brian >= 0.7 & Daniel >= 0.7) %>% gather(Source, Corr, c("Brian", "Daniel")) %>% # spread(Time, Corr) %>% pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% select(-x.num, -y.num) %>% select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h) library(flextable) # make display table cor_comp_ft <- cor_comp_df %>% mutate(Corr = round(Corr, digits = 3)) %>% spread(Source, Corr) %>% # mutate(Group = case_when( (Brian > 0.7) & (Daniel > 0.7) ~ "Both", # (Brian > 0.7) & !(Daniel > 0.7) ~ "Pilot", # !(Brian > 0.7) & (Daniel > 0.7) ~ "Replicate", # !(Brian > 0.7) & !(Daniel > 0.7) ~ "Neither" # )) %>% gather(Source, Corr, c("Brian", "Daniel")) %>% pivot_wider(names_from = c("Source", "Time"), values_from = c("Corr")) %>% #, "Group")) %>% select(-x.num, -y.num) %>% select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h) %>% flextable() cor_comp_ft cor_comp_ft <- cor_comp_ft %>% theme_vanilla() cor_comp_ft <- italic(cor_comp_ft, ~ y == y, ~ y, italic = TRUE) cor_comp_ft <- italic(cor_comp_ft, ~ x == x, ~ x, italic = TRUE) cor_comp_ft <- bold(cor_comp_ft, ~ Brian_0h >= 0.7, ~ Brian_0h, bold = TRUE) cor_comp_ft <- bold(cor_comp_ft, ~ Brian_1h >= 0.7, ~ Brian_1h, bold = TRUE) cor_comp_ft <- bold(cor_comp_ft, ~ Brian_24h >= 0.7, ~ Brian_24h, bold = TRUE) cor_comp_ft <- bold(cor_comp_ft, ~ Daniel_0h >= 0.7, ~ Daniel_0h, bold = TRUE) cor_comp_ft <- bold(cor_comp_ft, ~ Daniel_1h >= 0.7, ~ Daniel_1h, bold = TRUE) cor_comp_ft <- bold(cor_comp_ft, ~ Daniel_24h >= 0.7, ~ Daniel_24h, bold = TRUE) color_sig <- "#a1d99b" cor_comp_ft <- bg(cor_comp_ft, ~ Brian_0h >= 0.7, ~ Brian_0h, bg = color_sig) cor_comp_ft <- bg(cor_comp_ft, ~ Brian_1h >= 0.7, ~ Brian_1h, bg = color_sig) cor_comp_ft <- bg(cor_comp_ft, ~ Brian_24h >= 0.7, ~ Brian_24h, bg = color_sig) cor_comp_ft <- bg(cor_comp_ft, ~ Daniel_0h >= 0.7, ~ Daniel_0h, bg = color_sig) cor_comp_ft <- bg(cor_comp_ft, ~ Daniel_1h >= 0.7, ~ Daniel_1h, bg = color_sig) cor_comp_ft <- bg(cor_comp_ft, ~ Daniel_24h >= 0.7, ~ Daniel_24h, bg = color_sig) cor_comp_ft <- set_header_labels(cor_comp_ft, values = list( x = "", y = "", Brian_0h = "Pilot 0h", Brian_1h = "Pilot 1h", Brian_24h = "Pilot 24h", Daniel_0h = "Replicate 0h", Daniel_1h = "Replicate 1h", Daniel_24h = "Replicate 24h" ) ) cor_comp_ft <- autofit(cor_comp_ft) cor_comp_ft pptx_file <- "./output/officer_output/CorComparison.pptx" save_as_pptx("my table" = cor_comp_ft, path = pptx_file) docx_file <- "./output/officer_output/CorComparison.docx" save_as_docx("my table" = cor_comp_ft, path = docx_file) # make counts tables partial_count_table <- cor_comp_df %>% mutate(Corr = ifelse(Corr > 0.7, T, F)) %>% spread(Source, Corr) %>% mutate(Group = case_when(Brian & Daniel ~ "Both", Brian & !Daniel ~ "Pilot", !Brian & Daniel ~ "Replicate", !Brian & !Daniel ~ "Neither" )) %>% mutate(Group = factor(Group, levels = c("Both", "Pilot", "Replicate", "Neither"))) cor_change_count_ft <- partial_count_table %>% select(Time, x, y, Group) %>% group_by(Time, Group) %>% tally() %>% spread(Time, n) %>% flextable() cor_change_count_ft cor_change_count_ft <- cor_change_count_ft %>% theme_vanilla() cor_change_count_ft <- set_header_labels(cor_change_count_ft, values = list( Group = "R >= 0.7" ) ) cor_change_count_ft <- autofit(cor_change_count_ft) cor_change_count_ft pptx_file <- "./output/officer_output/CorComparisonCount.pptx" save_as_pptx("my table" = cor_change_count_ft, path = pptx_file) docx_file <- "./output/officer_output/CorComparisonCount.docx" save_as_docx("my table" = cor_change_count_ft, path = docx_file) partial_count_table %>% select(Time, x, y, Group) %>% group_by(Time, Group) %>% spread(Time, Group) partial_count_table %>% select(Time, x, y, Group) %>% unite(cor, x, y, sep = " vs ") %>% ggplot(aes(x=Time, y=cor, fill = Group))+ geom_raster()+ scale_fill_manual(values = c("#631879", "#EE0000", "#3B4992", "White"))+ labs(y = "")+ theme(axis.text.y = element_text(face = "italic")) cor_comp_df %>% spread(Source, Corr) %>% filter(Brian >= 0.7 ) %>% ## | Daniel >= 0.7) %>% gather(Source, Corr, c("Brian", "Daniel")) %>% # spread(Time, Corr) %>% pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% select(-x.num, -y.num) %>% select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h) cor_comp_df %>% spread(Source, Corr) %>% filter(Daniel >= 0.7 ) %>% ## | Daniel >= 0.7) %>% gather(Source, Corr, c("Brian", "Daniel")) %>% # spread(Time, Corr) %>% pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% select(-x.num, -y.num) %>% select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h) cor_comp_df %>% ggplot(aes(x = Corr, color = Time, fill = Time))+ geom_density(alpha = 0.3)+ facet_grid(Source~.) # compare_cor_table <- # cor_comp_df %>% # spread(Source, Corr) %>% # # filter(Brian >= 0.7 | Daniel >= 0.7) %>% # gather(Source, Corr, c("Brian", "Daniel")) %>% # # spread(Time, Corr) %>% # pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% # select(-x.num, -y.num) %>% # select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h) %>% # mutate(Diff_0h = Brian_0h - Daniel_0h, # Diff_1h = Brian_1h - Daniel_1h, # Diff_24h = Brian_24h - Daniel_24h) %>% # ggplot()+ # geom_point(aes(x = Brian_0h, y = Daniel_0h), color = "black") library(gridExtra) ggsave(here("output", "officer_output", "compare_corgrams_plt.tiff"), compare_corgrams_plt, width = 8.5, units = "in") ggsave(here("output", "officer_output", "compare_cor_scatter_plt.tiff"), compare_cor_scatter_plt, width = 8.5, units = "in") ggsave(here("output", "officer_output", "compare_cor_ecdf_plt.tiff"), compare_cor_ecdf_plt, width = 8.5, units = "in")
Make a just in case set of tables
unique_comparisons <- list() for (i in seq_along(mrna_cols)){ for (j in i:length(mrna_cols) ){ unique_comparisons[[length(unique_comparisons)+1]] <- data.frame(x = mrna_cols[i], y = mrna_cols[j]) } } unique_comparisons <- do.call(rbind, unique_comparisons) unique_comparisons <- unique_comparisons %>% filter(x != y) unique_comparisons$slope.p <- NA # Ancova pb = txtProgressBar(min = 0, max = nrow(unique_comparisons), initial = 0, style = 3, char = '█') for (i in 1:nrow(unique_comparisons)){ setTxtProgressBar(pb,i) temp <- filter(mrna_rep_winxiqr, Source == "Daniel") %>% select(unique_comparisons[i, "x"], unique_comparisons[i, "y"], "Time") fm <- lm( as.formula(paste(unique_comparisons[i, "y"], "~", unique_comparisons[i, "x"], "* Time") ), data = temp ) unique_comparisons[i, "slope.p"] <- broom::tidy(car::Anova(fm))[3, "p.value"] } # Pearson unique_comparisons <- rbind(mutate(unique_comparisons, Time = "0h"), mutate(unique_comparisons, Time = "1h")) %>% rbind(., mutate(unique_comparisons, Time = "24h")) unique_comparisons$cor <- NA unique_comparisons$t <- NA unique_comparisons$p.value <- NA unique_comparisons$df <- NA pb = txtProgressBar(min = 0, max = nrow(unique_comparisons), initial = 0, style = 3, char = '█') for (i in 1:nrow(unique_comparisons)){ setTxtProgressBar(pb,i) temp <- filter(mrna_rep_winxiqr, Source == "Daniel", Time == unique_comparisons[i, "Time"]) %>% select(unique_comparisons[i, "x"], unique_comparisons[i, "y"]) # Cors res <- cor.test(x = temp[[unique_comparisons[i, "x"]]], y = temp[[unique_comparisons[i, "y"]]], method = "pearson", use = "pairwise.complete.obs") res <- broom::tidy(res)[, c("estimate", "statistic", "p.value", "parameter")] names(res) <- c("cor", "t", "p.value", "df") unique_comparisons[i, c("cor", "t", "cor.p", "df")] <- res } # full ref # useful ref cor_use_ft <- unique_comparisons %>% select(x, y, Time, slope.p, cor, cor.p) %>% #fdr adj slope p mutate(slope.p = p.adjust(slope.p, method = "fdr")) %>% pivot_wider(names_from = "Time", values_from = c("cor.p", "cor")) %>% mutate( cor.p_0h = p.adjust(cor.p_0h, method = "fdr"), cor.p_1h = p.adjust(cor.p_1h, method = "fdr"), cor.p_24h = p.adjust(cor.p_24h, method = "fdr") ) %>% mutate_at(.vars = c( "slope.p", "cor.p_0h", "cor.p_1h", "cor.p_24h", "cor_0h", "cor_1h", "cor_24h"), .funs = round, digits = 3) %>% flextable() cor_use_ft <- cor_use_ft %>% theme_vanilla() cor_use_ft <- bold(cor_use_ft, ~ slope.p <= 0.05, ~ x, bold = TRUE) cor_use_ft <- bold(cor_use_ft, ~ slope.p <= 0.05, ~ y, bold = TRUE) cor_use_ft <- bold(cor_use_ft, ~ slope.p <= 0.05, ~ slope.p, bold = TRUE) cor_use_ft <- bold(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor.p_0h, bold = TRUE) cor_use_ft <- bold(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor_0h, bold = TRUE) cor_use_ft <- bold(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor.p_1h, bold = TRUE) cor_use_ft <- bold(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor_1h, bold = TRUE) cor_use_ft <- bold(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor.p_24h, bold = TRUE) cor_use_ft <- bold(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor_24h, bold = TRUE) color_sig <- "#a1d99b" cor_use_ft <- bg(cor_use_ft, ~ slope.p <= 0.05, ~ slope.p, bg = color_sig) cor_use_ft <- bg(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor.p_0h, bg = color_sig) cor_use_ft <- bg(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor_0h, bg = color_sig) cor_use_ft <- bg(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor.p_1h, bg = color_sig) cor_use_ft <- bg(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor_1h, bg = color_sig) cor_use_ft <- bg(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor.p_24h, bg = color_sig) cor_use_ft <- bg(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor_24h, bg = color_sig) cor_use_ft <- set_header_labels(cor_use_ft, values = list( x = "", y = "", slope.p = "ANCOVA p", cor.p_0h = "p 0h", cor.p_1h = "p 1h", cor.p_24h = "p 24h", cor_0h = "R 0h", cor_1h = "R 1h", cor_24h = "R 24h" ) ) cor_use_ft <- autofit(cor_use_ft) cor_use_ft pptx_file <- "./output/officer_output/DanielAncova.pptx" save_as_pptx("my table" = cor_use_ft, path = pptx_file) docx_file <- "./output/officer_output/DanielAncova.docx" save_as_docx("my table" = cor_use_ft, path = docx_file)
df_for_cor_of_cors <- do.call(rbind, cor_list) %>% filter(!is.na(Corr))%>% spread(Time, Corr) # df_for_cor_of_cors %>% # ggplot(aes(x = `0h`, y=`1h`, group = Source, color = Source, fill = Source))+ # geom_segment(aes(x = -1, y = -1, xend = 1, yend = 1), color = "black", size = 1, linetype = "dashed")+ # geom_smooth(method = "lm", fullrange = T)+ # geom_point()+ # geom_point(shape = 1, color = "black") # # geom_point(aes(x = `0h`, y=`24h`), color = "red") # # df_for_cor_of_cors %>% # ggplot(aes(x = `0h`, y=`24h`, group = Source, color = Source, fill = Source))+ # geom_segment(aes(x = -1, y = -1, xend = 1, yend = 1), color = "black", size = 1, linetype = "dashed")+ # geom_smooth(method = "lm", fullrange = T)+ # geom_point()+ # geom_point(shape = 1, color = "black") df_for_cor_of_cors %>% gather("Time", "Corr", c("0h", "1h", "24h")) %>% spread("Source", "Corr") %>% ggplot(aes(x = Brian, y = Daniel, group = Time, color = Time, fill = Time))+ geom_segment(aes(x = -1, y = -1, xend = 1, yend = 1), color = "black", size = 1, linetype = "dashed")+ geom_smooth(method = "lm", fullrange = T)+ geom_point()+ geom_point(shape = 1, color = "black")+ facet_grid(.~Time)+ coord_fixed() # correlation between us df_for_cor_of_cors %>% gather("Time", "Corr", c("0h", "1h", "24h")) %>% spread("Source", "Corr") %>% group_by(Time) %>% summarise(CorrCorr = cor(Brian, Daniel, method = "spearman")) %>% ggplot(aes(x = Time, y = CorrCorr, color = CorrCorr))+ geom_hline(yintercept = 0)+ geom_segment(aes(xend = Time, yend = 0), size= 1)+ geom_point(size= 2)+ geom_point(size= 2, shape = 1, color = "black")+ scale_color_gradient2( low = RColorBrewer::brewer.pal(9, "PuOr")[1], high = RColorBrewer::brewer.pal(9, "PuOr")[9])+ lims(y = c(-1, 1))+ theme(legend.position = "") set.seed(8978979) resample_cor_of_cor <- map(seq(1, 1001), function(i){ if (i == 1){ res <- df_for_cor_of_cors %>% ungroup() %>% gather("Time", "Corr", c("0h", "1h", "24h")) %>% spread("Source", "Corr") %>% group_by(Time) %>% summarise(CorrCorr = cor(Brian, Daniel, method = "spearman")) } else { res <- df_for_cor_of_cors %>% ungroup() %>% gather("Time", "Corr", c("0h", "1h", "24h")) %>% mutate(Corr = sample(Corr)) %>% spread("Source", "Corr") %>% group_by(Time) %>% summarise(CorrCorr = cor(Brian, Daniel, method = "spearman")) } res$i <- i return(res) }) resample_cor_of_cor <- do.call(rbind, resample_cor_of_cor) resample_cor_of_cor %>% mutate(obs = case_when(Time == "0h" ~ as.numeric(select(filter(resample_cor_of_cor, i == 1 & Time == "0h"), CorrCorr)), Time == "1h" ~ as.numeric(select(filter(resample_cor_of_cor, i == 1 & Time == "1h"), CorrCorr)), Time == "24h" ~ as.numeric(select(filter(resample_cor_of_cor, i == 1 & Time == "24h"), CorrCorr)))) %>% mutate(bool = CorrCorr >= obs) %>% group_by(Time) %>% mutate(ep = mean(bool)) %>% ggplot(aes(x = CorrCorr, color = Time, fill = Time))+ geom_segment(aes(x = obs, xend = obs, y = 0, yend = 3))+ geom_density(alpha = 0.2)+ facet_grid(Time~.) # Time CorrCorr i obs bool ep # <chr> <dbl> <int> <dbl> <lgl> <dbl> # 1 0h 0.176 1 0.176 TRUE 0.0549 # 2 1h 0.380 1 0.380 TRUE 0.000999 # 3 24h 0.128 1 0.128 TRUE 0.124 # spread(Time, CorrCorr) %>%
Contrast correlation thresholds -- did they fall in similar ranges? Bin correlations and examine with a jaccard index
n_bins <- 5 library('clusteval') # for cluster_similarity(x, y, similarity="jaccard") set.seed(9080) cor_jaccard_list <- map(c("0h", "1h", "24h"), function(Time){ temp <- cor_comp_df[cor_comp_df$Time == Time, ] %>% mutate(Bins = cut(Corr, breaks = seq(-1, 1, length.out = n_bins))) %>% select(-Corr) %>% pivot_wider(names_from = "Source", values_from = "Bins") obs_jaccard <- cluster_similarity(temp$Brian, temp$Daniel, similarity="jaccard") null_jaccard <- map(1:10000, function(i){ cluster_similarity(sample(temp$Brian, replace = F), temp$Daniel, similarity="jaccard") }) %>% unlist() temp <- with(density(null_jaccard), data.frame(x, y)) temp <- temp %>% mutate(xmax = max(x), obs = obs_jaccard, ep = mean(null_jaccard >= obs_jaccard)) return(temp) }) plot_grid(plotlist = map(cor_jaccard_list, function(temp){ ggplot(data = temp, aes(x = x, y = y))+ geom_line()+ geom_vline(xintercept = temp$obs[1])+ geom_ribbon(data = temp[temp$x > temp$obs, ], aes(xmin = obs, xmax = xmax, ymin = 0, ymax = y))+ labs(subtitle = paste(" ep=", as.character(temp$ep[1]))) }), labels = c("0h", "1h", "24h"), nrow = 1)
# calc p values with kruskal-Wallis and follow it up with a Dunn's #### mrna_rep_winxiqr <- mrna_rep_winxiqr %>% pivot_longer( names_to = "mRNA", values_to = "Count", cols = names(select(mrna_rep_winxiqr, -Source, -Time, -uid))) stats <- expand.grid( Source = c("Brian", "Daniel"), # Time = c("0h", "1h", "24h"), mRNA = unique(mrna_rep_winxiqr$mRNA), Main = NA, BvsC = NA, BvsD = NA, CvsD = NA ) for (i in 1:nrow(stats)){ t_Source <- stats[i, "Source"] t_mRNA <- stats[i, "mRNA"] temp <- mrna_rep_winxiqr %>% filter(Source == t_Source & mRNA == t_mRNA & !is.na(Count)) %>% mutate(Time = as.factor(Time)) # Avoid coering to factor in dunnTest() stats[i, "Main"] <- kruskal.test(Count ~ Time, temp)$p.value # i = 1 dunn_res <- dunnTest(Count ~ Time, data = temp, method = "bonferroni") # bonferroni and hs produce the same result since it's within a single round of kruskal-wallis #FIXME -- Is bonferroni ideal here or would we be better off with sidak, holm, or hs? dunn_res <- dunn_res$res stats[i, "BvsC"] <- filter(dunn_res, Comparison == "0h - 1h")$P.adj stats[i, "BvsD"] <- filter(dunn_res, Comparison == "0h - 24h")$P.adj stats[i, "CvsD"] <- filter(dunn_res, Comparison == "1h - 24h")$P.adj } # get medians and prepare to plot #### mrna_rep_medians <- mrna_rep_winxiqr %>% group_by(Source, Time, mRNA) %>% summarise(MedianCount = median(Count, na.rm =T)) %>% arrange(Source, mRNA, Time) mrna_rep_medians <- mrna_rep_medians %>% mutate( MedianCount2 = MedianCount, id = case_when(Time == "0h" ~ 1, Time == "1h" ~ 2, Time == "24h" ~ 3)) %>% pivot_wider(names_from = "Time", values_from = "MedianCount") %>% rename(MedianCount = MedianCount2) %>% group_by(Source, mRNA) %>% mutate(`0h` = median(`0h`, na.rm = T), `1h` = median(`1h`, na.rm = T), `24h` = median(`24h`, na.rm = T)) %>% ungroup() %>% mutate(updown12 = ifelse(`0h` < `1h`, "up", "down"), updown13 = ifelse(`0h` < `24h`, "up", "down"), updown23 = ifelse(`1h` < `24h`, "up", "down")) # combine stats and prepared plotting df#### mrna_rep_medians <- full_join(mrna_rep_medians, stats) %>% mutate(color1 = ifelse(Main < 0.05 & BvsC < 0.05, updown12, "none"), color2 = ifelse(Main < 0.05 & CvsD < 0.05, updown23, "none")) %>% mutate(color1 = factor(color1, levels = c("down", "none", "up")), color2 = factor(color2, levels = c("down", "none", "up"))) # plot #### temp <- mrna_rep_winxiqr %>% spread("Time", "Count") ggplot(mrna_rep_medians)+ geom_point(data = temp, aes(x = 0.5, y = `0h`), shape = "-")+ geom_point(data = temp, aes(x = 1.5, y = `1h`), shape = "-")+ geom_point(data = temp, aes(x = 2.5, y = `24h`), shape = "-")+ geom_segment(aes(x = 0, xend = 1, y = `0h`, yend = `0h`), size = 1)+ geom_segment(aes(x = 1, xend = 2, y = `1h`, yend = `1h`, color = color1), size = 1)+ geom_segment(aes(x = 2, xend = 3, y = `24h`, yend = `24h`, color = color2), size = 1)+ geom_segment(aes(x = 1, xend = 1, y = `0h`, yend = `1h`, color = color1), arrow = arrow(length = unit(0.03, "npc")), size = 1)+ geom_segment(aes(x = 2, xend = 2, y = `1h`, yend = `24h`, color = color2), arrow = arrow(length = unit(0.03, "npc")), size = 1)+ scale_color_manual(values = c("red", "gray", "green"))+ theme_clean()+ theme(legend.position = "")+ facet_wrap(Source ~ mRNA, scales = "free_y", nrow = 2)
How similar are the results?
temp <- mrna_rep_medians %>% select(Source, mRNA, starts_with("updown"), Main, BvsC, BvsD, CvsD) %>% distinct() # Convert the results to catagories based on what our conclusion would be # Is there a main effect? # Significantly different group? # What direction is it different in? temp <- temp %>% # If there is no main effect all comparisons are in the NA group mutate( BvsC = ifelse(Main >= 0.05, NA, BvsC), BvsD = ifelse(Main >= 0.05, NA, BvsD), CvsD = ifelse(Main >= 0.05, NA, CvsD) ) %>% # If there is no post hoc effect that comparison is in the NA group mutate( BvsC = ifelse(BvsC >= 0.05, NA, BvsC), BvsD = ifelse(BvsD >= 0.05, NA, BvsD), CvsD = ifelse(CvsD >= 0.05, NA, CvsD) ) %>% # Re-write lack of a change as 'none' instead of NA # if there _is_ a post hoc effect then assign that cell the sign of change mutate( BvsC = ifelse(is.na(BvsC), 'none', updown12 ), BvsD = ifelse(is.na(BvsD), 'none', updown13 ), CvsD = ifelse(is.na(CvsD), 'none', updown23 ) ) %>% select(Source, mRNA, BvsC, BvsD, CvsD ) temp <- temp %>% pivot_longer(cols = c("BvsC", "BvsD", "CvsD")) %>% mutate(value = factor(value, levels = c("down", "none", "up"))) %>% pivot_wider(names_from = "Source", values_from = "value") temp %>% pivot_longer(c("Brian", "Daniel"), names_to = "Source", values_to = "value") %>% ggplot(aes(x = Source, y = value, color = Source, group = Source))+ geom_hline(yintercept = c("none"))+ geom_segment(aes(xend = Source, yend = "none"), size = 3, lineend = "butt")+ facet_grid(name~mRNA)+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), legend.position = "") obs_jaccard <- cluster_similarity(temp$Brian, temp$Daniel, similarity="jaccard") null_jaccard <- map(1:10000, function(i){ cluster_similarity(sample(temp$Brian, replace = F), temp$Daniel, similarity="jaccard") }) %>% unlist() temp <- with(density(null_jaccard), data.frame(x, y)) temp <- temp %>% mutate(xmax = max(x), obs = obs_jaccard, ep = mean(null_jaccard >= obs_jaccard)) ggplot(data = temp, aes(x = x, y = y))+ geom_line()+ geom_vline(xintercept = temp$obs[1])+ geom_ribbon(data = temp[temp$x > temp$obs, ], aes(xmin = obs, xmax = xmax, ymin = 0, ymax = y))+ labs(subtitle = paste(" ep=", as.character(temp$ep[1])))
# Copied from above ---- M_winxiqr_list <- map(c("0h", "1h", "24h"), function(condition){ temp <- M_all[M_all$Condition == condition, ] dvs <- names(M_all)[!(names(M_all) %in% c("Condition", "Cell", "Experiment", "Channel", "Sweep", "Source", "Pharm", "Time", "Sample"))] for (dv_col in dvs){ temp[[dv_col]] <- ifelse(win_x_iqr(temp[[dv_col]]), temp[[dv_col]], NA) } return(temp) }) M_winxiqr <- do.call(rbind, M_winxiqr_list) M_winxiqr <- M_winxiqr %>% # select(-Sweep) %>% group_by(Condition, Cell, Experiment, Channel, Source, Pharm, Time, Sample) %>% mutate_all(median, na.rm = T) %>% ungroup() %>% distinct() # Median imputation ---- M_winxiqr <- M_winxiqr %>% select(-Channel, -Source, -Pharm, -Time, -Sample) M_winxiqr <- M_winxiqr %>% group_by(Condition, Experiment, Cell) %>% mutate_all(median, na.rm = T) %>% ungroup() %>% distinct() library(imputeMissings) M_winxiqr <- imputeMissings::impute(M_winxiqr, method = "median/mode") for_label_Condition <- M_winxiqr$Condition rownames(M_winxiqr) <- paste(M_winxiqr$Experiment, M_winxiqr$Cell, sep = "-")
list_for_pca <- list( M_winxiqr[M_winxiqr$Condition == "0h", names(M_winxiqr)[!(names(M_winxiqr) %in% c("Condition", "Cell", "Experiment"))]], M_winxiqr[M_winxiqr$Condition == "1h", names(M_winxiqr)[!(names(M_winxiqr) %in% c("Condition", "Cell", "Experiment"))]], M_winxiqr[M_winxiqr$Condition == "24h", names(M_winxiqr)[!(names(M_winxiqr) %in% c("Condition", "Cell", "Experiment"))]] ) pca_summary_descriptions <- map(list_for_pca, function(df){ mk_pca_multi_plt(input.df = df, scree.max.y = 100) }) pca_summary_descriptions <- map(list_for_pca, function(df){ mk_pca_multi_plt(input.df = df, scree.max.y = 100) }) # {p4|p0}/{p1+p2+p3} {pca_summary_descriptions[[1]]$scree | pca_summary_descriptions[[2]]$scree | pca_summary_descriptions[[3]]$scree}/ {pca_summary_descriptions[[1]]$biplt | pca_summary_descriptions[[2]]$biplt | pca_summary_descriptions[[3]]$biplt} # {pca_summary_descriptions[[1]]$cont1 | pca_summary_descriptions[[2]]$cont1 | pca_summary_descriptions[[3]]$cont1} ggsave(plot = last_plot(), filename = here("output", "officer_output", "PCA_Contrast.tiff"), width = 11.5, height = 4.76)
3d
## mrna only ==== vis_pca_3D_plotly( input.df = M_winxiqr[, names(M_winxiqr)[!(names(M_winxiqr) %in% c( "Condition", "Cell", "Experiment", "vrest", "r11", "r1", "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope" ))]], color.by = unlist(select(M_winxiqr, Condition)), use.colors = RColorBrewer::brewer.pal(3, "Set2"),#c("#cc4c02", "#e31a1c", "#800026"), point.size = 3, point.opacity = 1, dropdowns = T, dropdown.width = 1.5, dropdown.opacity = 0.5, ellipse.opacity = 0#.02 ) ## membrane only ==== vis_pca_3D_plotly( input.df = M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c( # "Condition", "Cell", "Experiment", "vrest", "r11", "r1", # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", # "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", # "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope" ))]], color.by = unlist(select(M_winxiqr, Condition)), use.colors = RColorBrewer::brewer.pal(3, "Set2"),#c("#cc4c02", "#e31a1c", "#800026"), point.size = 3, point.opacity = 1, dropdowns = T, dropdown.width = 1.5, dropdown.opacity = 0.5, ellipse.opacity = 0#.02 ) ## excitability only ==== vis_pca_3D_plotly( input.df = M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c( # "Condition", "Cell", "Experiment", # "vrest", "r11", "r1", # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta"#, # "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope" ))]], color.by = unlist(select(M_winxiqr, Condition)), use.colors = RColorBrewer::brewer.pal(3, "Set2"),#c("#cc4c02", "#e31a1c", "#800026"), point.size = 3, point.opacity = 1, dropdowns = T, dropdown.width = 1.5, dropdown.opacity = 0.5, ellipse.opacity = 0#.02 )
set.seed(16069) temp.umap = umap(select(M_winxiqr, -Condition, -Cell, -Experiment, -cc.3_4, -cc.3_5, -ig.3_4, -ig.3_5)) temp <- temp.umap$layout %>% as.tibble() col_by_df <- M_winxiqr[, "Condition"] # plot with convex hulls df <- temp df$Group <- col_by_df find_hull <- function(df) df[chull(df$V1, df$V2), ] hulls <- plyr::ddply(df, "Group", find_hull) ggplot(df, aes(V1, V2, color = Group, fill = Group))+ geom_polygon(data = hulls, alpha = 0.1) + geom_point()+ scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+ scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+ coord_fixed() ## mrna only ==== temp.umap = umap( M_winxiqr[, names(M_winxiqr)[!(names(M_winxiqr) %in% c( "Condition", "Cell", "Experiment", "vrest", "r11", "r1", "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope" ))]] ) temp <- temp.umap$layout %>% as.tibble() col_by_df <- M_winxiqr[, "Condition"] # plot with convex hulls df <- temp df$Group <- col_by_df find_hull <- function(df) df[chull(df$V1, df$V2), ] hulls <- plyr::ddply(df, "Group", find_hull) umap_mrna <- ggplot(df, aes(V1, V2, color = Group, fill = Group))+ geom_polygon(data = hulls, alpha = 0.1) + geom_point()+ scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+ scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+ theme(legend.position = "bottom") # coord_fixed() ## membrane only ==== temp.umap = umap( M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c( # "Condition", "Cell", "Experiment", "vrest", "r11", "r1", # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", # "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", # "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope" ))]] ) temp <- temp.umap$layout %>% as.tibble() col_by_df <- M_winxiqr[, "Condition"] # plot with convex hulls df <- temp df$Group <- col_by_df find_hull <- function(df) df[chull(df$V1, df$V2), ] hulls <- plyr::ddply(df, "Group", find_hull) umap_membrane <- ggplot(df, aes(V1, V2, color = Group, fill = Group))+ geom_polygon(data = hulls, alpha = 0.1) + geom_point()+ scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+ scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+ theme(legend.position = "bottom") # coord_fixed() ## excitability only ==== temp.umap = umap( M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c( # "Condition", "Cell", "Experiment", # "vrest", "r11", "r1", # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta"#, # "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope" ))]] ) temp <- temp.umap$layout %>% as.tibble() col_by_df <- M_winxiqr[, "Condition"] # plot with convex hulls df <- temp df$Group <- col_by_df find_hull <- function(df) df[chull(df$V1, df$V2), ] hulls <- plyr::ddply(df, "Group", find_hull) umap_excite <- ggplot(df, aes(V1, V2, color = Group, fill = Group))+ geom_polygon(data = hulls, alpha = 0.1) + geom_point()+ scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+ scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+ theme(legend.position = "bottom") # coord_fixed() umap_mrna + umap_membrane + umap_excite
mk_hclust_plts <- function( df = unite(M_winxiqr, uid, Experiment, Cell, sep = "-"), cluster_by = c("vrest", "r11", "r1", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"), uid_col = "uid", n_clusters = 3, true_groups = "Condition", true_colors = RColorBrewer::brewer.pal(3, "Set2"), cluster_colors = RColorBrewer::brewer.pal(3, "Set1") ){ df <- as.data.frame(df) if (!exists("cluster_colors")){ cluster_colors = rainbow(n_clusters) } ## prep # move uid to rowname row.names(df) <- df[[uid_col]] df_groups <- select(df, all_of(true_groups)) df <- df[, cluster_by] ## Cluster cluster <- pvclust(t(df), method.hclust = "ward.D2", method.dist = "correlation", use.cor = "pairwise.complete.obs") ## make dendrogram #### dend <- cluster$hclust %>% as.dendrogram() # iteratively coloring the labels is a workaround to get the "true" groups shown dend_labs <- rownames_to_column(df_groups, var = "rownames")[, ] dend_labs <- full_join(data.frame(rownames = labels(dend)), dend_labs) for(i in seq_along(unique(df_groups[[true_groups]]))){ true_group <- unique(df_groups[[true_groups]])[i] true_color <- true_colors[i] dend <- dend %>% dendextend::color_labels( col = true_color, labels = dend_labs[dend_labs[[true_groups]] == true_group, "rownames"]) } dend <- dend %>% set("branches_k_color", k = n_clusters, value = cluster_colors ) %>% set("branches_lwd", 0.7) %>% set("labels_cex", 0.6) dend_cluster_only <- dend %>% set("labels_colors", k = n_clusters, value = cluster_colors) %>% as.ggdend() dend <- dend %>% as.ggdend() plt_dend_cluster_only <- ggplot(dend_cluster_only)+ theme(axis.ticks.y = element_line(), axis.text.y = element_text(), axis.line.y = element_line()) plt_dend <- ggplot(dend)+ theme(axis.ticks.y = element_line(), axis.text.y = element_text(), axis.line.y = element_line()) ## Add reality ribbon with or without clustering result #### groups_to_plt <- full_join( as.data.frame(dend$labels), rownames_to_column(var = "label", df_groups)) plt_grouping <- groups_to_plt %>% ggplot(aes_string(x="x", y="0", fill = true_groups))+ geom_tile()+ scale_fill_manual(values = true_colors)+ theme_void()+ labs(x = "", y = "")+ theme(legend.position = "left") plt_grouping_contrast <- ggplot()+ geom_tile(data = groups_to_plt, aes_string(x="x", y="0.5", fill = true_groups))+ scale_fill_manual(values = true_colors)+ ggnewscale::new_scale("fill") + geom_tile(data = data.frame(x = seq_along(dend_cluster_only$labels$col), cluster_groups = as.character(as.numeric(as.factor(dend_cluster_only$labels$col))) ), aes_string(x="x", y= "-0.5", fill = "cluster_groups"), )+ scale_fill_manual(values = cluster_colors)+ theme_void()+ labs(x = "", y = "")+ theme(legend.position = "left") ## Add heatmap #### data_to_plt <- full_join( as.data.frame(dend$labels), rownames_to_column(var = "label", df)) data_to_plt <- data_to_plt %>% gather("key", "value", names(data_to_plt)[ !(names(data_to_plt) %in% c("x", "y", "label", "col", "cex", true_groups)) ]) plt_heatmap_raw <- data_to_plt %>% ggplot(aes(x, y = key, fill = value))+ geom_tile()+ scale_fill_viridis_c()+ labs(x = "", y = "")+ theme(panel.background = element_blank(), axis.ticks.x = element_blank(), axis.text.x = element_blank(), legend.position = "left") plt_heatmap_z <- data_to_plt %>% group_by(key) %>% mutate(mean = mean(value, na.rm = T), sd = sd(value, na.rm = T)) %>% mutate(value = ((value - mean)/sd)) %>% # Now Z scores ggplot(aes(x, y = key, fill = value))+ geom_tile()+ scale_fill_viridis_c()+ labs(x = "", y = "")+ theme(panel.background = element_blank(), axis.ticks.x = element_blank(), axis.text.x = element_blank(), legend.position = "left") ## Return plots, manually tweak layout #### return( list( pvclust_out = cluster, dendrogram_clusters = plt_dend_cluster_only, dendrogram_both = plt_dend, group_tile = plt_grouping, group_compare_tile = plt_grouping_contrast, heatmap_raw = plt_heatmap_raw, heatmap_z = plt_heatmap_z ) ) }
## mrna only ==== o_mrna <- mk_hclust_plts( df = rownames_to_column(M_winxiqr, "uid"), cluster_by = mrna_cols, uid_col = "uid", n_clusters = 3, true_groups = "Condition", true_colors = RColorBrewer::brewer.pal(3, "Set2"), #c("#fdd49e", "#fc8d59", "#b30000"), cluster_colors = RColorBrewer::brewer.pal(3, "Set1") ) (o_mrna$dendrogram_both+ scale_y_continuous(limits = c(-1.51, 3), labels = scales::comma) # theme(plot.margin=unit(c(1,1,1.5,1),"cm")) )/ (o_mrna$group_compare_tile+ # lims(y = c(2, -2))+ # y axis can be flipped like so theme(legend.position = "") )+ patchwork::plot_layout(heights = c(2, .2)) (o_mrna$group_compare_tile+ # lims(y = c(2, -2))+ # y axis can be flipped like so theme(legend.position = "") ) / (o_mrna$heatmap_z + theme(legend.position = "bottom")) + patchwork::plot_layout(heights = c(.2, 5)) (o_mrna$dendrogram_both+ scale_y_continuous(limits = c(-1.51, 1.51), labels = scales::comma)#c(-1.51, 3), labels = scales::comma) # theme(plot.margin=unit(c(1,1,1.5,1),"cm")) )/ (o_mrna$group_compare_tile+ # lims(y = c(2, -2))+ # y axis can be flipped like so theme(legend.position = "") ) / (o_mrna$heatmap_z + theme(legend.position = "right", axis.text.y = element_text(face = "italic")) / (o_mrna$heatmap_raw+ylim()+theme(legend.position = "right", axis.text.y = element_text(face = "italic")))+ patchwork::plot_layout(heights = c(5, .5, 5, 5)) ## membrane only ==== o_mem <- mk_hclust_plts( df = rownames_to_column(M_winxiqr, "uid"), cluster_by = c("vrest", "r11", "r1", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"), uid_col = "uid", n_clusters = 3, true_groups = "Condition", true_colors = RColorBrewer::brewer.pal(3, "Set2"), cluster_colors = RColorBrewer::brewer.pal(3, "Set1") ) (o_mem$dendrogram_both+ scale_y_continuous(limits = c(-.251, 1.25), labels = scales::comma) # theme(plot.margin=unit(c(1,1,1.5,1),"cm")) )/ (o_mem$group_compare_tile+ # lims(y = c(2, -2))+ # y axis can be flipped like so theme(legend.position = "") )+ patchwork::plot_layout(heights = c(2, .2)) (o_mem$group_compare_tile+ # lims(y = c(2, -2))+ # y axis can be flipped like so theme(legend.position = "") ) / (o_mem$heatmap_z + theme(legend.position = "bottom")) + patchwork::plot_layout(heights = c(.2, 1)) (o_mem$dendrogram_both+ scale_y_continuous(limits = c(-.251, 1.25), labels = scales::comma) # theme(plot.margin=unit(c(1,1,1.5,1),"cm")) )/ (o_mem$group_compare_tile+ # lims(y = c(2, -2))+ # y axis can be flipped like so theme(legend.position = "") ) / (o_mem$heatmap_z + theme(legend.position = "bottom")) + patchwork::plot_layout(heights = c(2, .2, 1)) ## excitability only ==== o_exc <- mk_hclust_plts( df = rownames_to_column(M_winxiqr, "uid"), cluster_by = epsp_cols, uid_col = "uid", n_clusters = 3, true_groups = "Condition", true_colors = RColorBrewer::brewer.pal(3, "Set2"), cluster_colors = RColorBrewer::brewer.pal(3, "Set1") ) (o_exc$dendrogram_both+ scale_y_continuous(limits = c(-.15, .75), labels = scales::comma) # theme(plot.margin=unit(c(1,1,1.5,1),"cm")) )/ (o_exc$group_compare_tile+ # lims(y = c(2, -2))+ # y axis can be flipped like so theme(legend.position = "") )+ patchwork::plot_layout(heights = c(2, .2)) (o_exc$group_compare_tile+ # lims(y = c(2, -2))+ # y axis can be flipped like so theme(legend.position = "") ) / (o_exc$heatmap_z + theme(legend.position = "bottom")) + patchwork::plot_layout(heights = c(.2, 1)) (o_exc$dendrogram_both+ scale_y_continuous(limits = c(-.15, .75), labels = scales::comma) # theme(plot.margin=unit(c(1,1,1.5,1),"cm")) )/ (o_exc$group_compare_tile+ # lims(y = c(2, -2))+ # y axis can be flipped like so theme(legend.position = "") ) / (o_exc$heatmap_z + theme(legend.position = "bottom")) + patchwork::plot_layout(heights = c(2, .2, 1)) # (o_mrna$group_compare_tile+theme(legend.position = "")) / # (o_mrna$heatmap_z + theme(legend.position = "bottom"))+ # patchwork::plot_layout(heights = c(.2, 1)) | # # (o_exc$group_compare_tile+theme(legend.position = "")) / # (o_exc$heatmap_z + theme(legend.position = "bottom"))+ # patchwork::plot_layout(heights = c(.2, 1)) | # # (o_mem$group_compare_tile+theme(legend.position = "")) / # (o_mem$heatmap_z + theme(legend.position = "bottom"))+ # patchwork::plot_layout(heights = c(.2, 1)) ((o_mrna$dendrogram_both+ scale_y_continuous(limits = c(-.52, 3), labels = scales::comma))+ (o_mem$dendrogram_both+ scale_y_continuous(limits = c(-.251, 1.25), labels = scales::comma))+ (o_exc$dendrogram_both+ scale_y_continuous(limits = c(-.15, .75), labels = scales::comma)))/ ((o_mrna$group_compare_tile+theme(legend.position = ""))+ (o_mem$group_compare_tile+theme(legend.position = ""))+ (o_exc$group_compare_tile+theme(legend.position = "")))+ patchwork::plot_layout(heights = c(1, .1))
Evaluating clustering quality
# # if (!requireNamespace("BiocManager", quietly = TRUE)) # # install.packages("BiocManager") # # BiocManager::install("BiocGenerics") # library(BiocGenerics) # library(imputeMissings) # library(magrittr) # for %>% (this is part of tidyverse) # library(clues) # library(NMF) # # ## Evaluate clustering performance ========================================== # get_cluster_comparisons <- function(reference.clustering = mrna_raw$Cell, # generated.clustering) { # if (length(reference.clustering) != length(generated.clustering)) { # warning("Input vectors are not of the same length!") # } else { # # reference.clustering = mrna_raw$Cell # # generated.clustering = kmeans.m$cluster # output <- array(0, dim = 6) # reference.clustering <- as.numeric(reference.clustering) %>% as.factor() # generated.clustering <- as.numeric(generated.clustering) %>% as.factor() # output <- NMF::purity(reference.clustering, generated.clustering) # names(output) <- "Purity" # # Get a lot of concurrance measures # # https://davetang.org/muse/2017/09/21/adjusted-rand-index/ # output <- c( # output, # clues::adjustedRand( # BiocGenerics::as.vector(reference.clustering), # BiocGenerics::as.vector(generated.clustering) # ) # ) # return(output) # } # }
resampling based evaluation of jaccard
# temp.clust <- as.dendrogram(cluster$hclust) # assignment <- cutree(temp.clust, k = 3)[order.dendrogram(temp.clust)] # # # use names to get groupings # time_groups <- as.factor(unlist(transpose(stringr::str_split(names(assignment), pattern = "-"))[[1]])) # # library(purrr) # set.seed(9348957) # resample.indices <- map(1:10001, function(i){ # if (i == 1){ # indices <- get_cluster_comparisons(reference.clustering = time_groups, # assignment) # }else{ # indices <- get_cluster_comparisons(reference.clustering = time_groups, # sample(assignment, replace = F)) # } # }) # resample.indices <- do.call(rbind, resample.indices) # resample.indices <- resample.indices %>% as.data.frame() # # library(ggplot2) # resample.indices %>% # ggplot(aes(x = Jaccard))+ # geom_density()+ # geom_vline(xintercept = as.numeric(resample.indices[1, "Jaccard"]))+ # xlim(0,1)+ # theme_minimal()+ # labs(subtitle = paste0("ep = ", as.character(round(mean(resample.indices$Jaccard >= resample.indices[1, "Jaccard"]), digits = 4)))) # # empirical p-value from 10,000 random samplings # ggsave(plot = last_plot(), # filename = here("officer_output", "hclust_jaccard_dist.tiff"), # width = 11.5, height = 4.76)
# corr <- cor(M_winxiqr[, mrna_cols], use = "pairwise.complete.obs", method = "spearman") # p.mat <- cor_pmat(M_winxiqr[, mrna_cols], use = "pairwise.complete.obs", method = "spearman") # # ggcorrplot( # corr, # p.mat = p.mat, # hc.order = FALSE, # # method = "circle", # type = "lower", # insig = "blank", # colors = RColorBrewer::brewer.pal(9, "PuOr")[c(1, 5, 9)] # ) # Time = "0h" cor_plt_list <- map(c("0h", "1h", "24h"), function(Time){ df = M_winxiqr[M_winxiqr$Condition == Time, c(mrna_cols, memb_cols, epsp_cols)] corr <- cor(df, use = "pairwise.complete.obs", method = "spearman") p.mat <- cor_pmat(df, use = "pairwise.complete.obs", method = "spearman") cor_df_bin <- corr cor_bins <- seq(-1, 1, length.out = 8) for (i in 1:(length(cor_bins)-1)){ # use the more extreme value to shade with cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]] } ggcorrplot( cor_df_bin, p.mat = p.mat, insig = "blank", type = "upper", outline.col = "white", colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)] )+ labs(subtitle = Time)+ theme(legend.position = ""#, # axis.text.x = element_text(angle = 90) ) }) # plot_grid(plotlist = cor_plt_list) (cor_plt_list[[1]]+theme( axis.text.x = element_text(angle = 90))) + (cor_plt_list[[2]]+theme( axis.text.x = element_text(angle = 90), # axis.text.x = element_blank(), axis.text.y = element_blank()))+ (cor_plt_list[[3]]+theme( axis.text.x = element_text(angle = 90), # axis.text.x = element_blank(), axis.text.y = element_blank()))
cor_plt_list <- map(c("0h", "1h", "24h"), function(Time){ df = M_winxiqr[M_winxiqr$Condition == Time, c(mrna_cols)] corr <- cor(df, use = "pairwise.complete.obs", method = "spearman") p.mat <- cor_pmat(df, use = "pairwise.complete.obs", method = "spearman") cor_df_bin <- corr cor_bins <- seq(-1, 1, length.out = 8) for (i in 1:(length(cor_bins)-1)){ # use the more extreme value to shade with cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]] } ggcorrplot( cor_df_bin, p.mat = p.mat, insig = "blank", type = "upper", outline.col = "white", colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)] )+ labs(subtitle = Time)+ theme(legend.position = ""#, # axis.text.x = element_text(angle = 90) ) }) # plot_grid(plotlist = cor_plt_list) (cor_plt_list[[1]]+theme( axis.text.x = element_text(angle = 90))) + (cor_plt_list[[2]]+theme( axis.text.x = element_text(angle = 90), # axis.text.x = element_blank(), axis.text.y = element_blank()))+ (cor_plt_list[[3]]+theme( axis.text.x = element_text(angle = 90), # axis.text.x = element_blank(), axis.text.y = element_blank()))
temp_list <- map(unique(M_winxiqr$Condition), function(condition){ temp <- filter(M_winxiqr, Condition == condition) %>% select(-Cell, -Experiment) temp <- temp[, c(mrna_cols, memb_cols, epsp_cols )] %>% correlate() #%>% shave() temp$Condition <- condition temp <- gather(temp, "term2", "Cor", names(temp)[!(names(temp) %in% c("term", "Condition"))]) return(temp) }) # cor breaks # cor_br <- seq(-1, 1, length.out = 8) # # plt <- do.call(rbind, temp_list) %>% # filter(term %in% mrna_cols) %>% # filter(term2 %in% c(memb_cols, epsp_cols)) %>% # ggplot(aes(term, term2, fill = Cor))+ # geom_tile() # # # plt+scale_fill_stepsn(colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)] ) # # # plt+scale_fill_gradientn(colors=RColorBrewer::brewer.pal(n = 9, name = "PuOr"), # na.value = "transparent", # breaks=seq(-1, 1, length.out = 8), # # labels=c("Minimum",0,"Maximum"), # limits=c(-1,1)) library(corrr) library(ggtext) library(glue) # do.call(rbind, temp_list) corr_bt_mrna_phys <- map(1:3, function(i){ temp_list[[i]] %>% filter(term %in% mrna_cols) %>% filter(term2 %in% c(memb_cols, epsp_cols)) %>% mutate(term = glue(("<i>{term}</i>"))) %>% ggplot(aes(term, term2, fill = Cor))+ geom_tile()+ labs(x= "mRNA", y = "")+ theme(axis.text.x = element_markdown(angle = 45))+ scale_fill_stepsn( colors=RColorBrewer::brewer.pal(n = 8, name = "PuOr"), na.value = "transparent", breaks=round(seq(-1, 1, length.out = 9), digits = 2)[2:8], limits=c(-1,1) )+ coord_fixed() }) plot_grid(plotlist = corr_bt_mrna_phys) corr_bt_mrna_phys[[1]]+theme(legend.position = "") + corr_bt_mrna_phys[[2]]+theme(legend.position = "") + corr_bt_mrna_phys[[3]] library(GGally) ggpairs(select(filter(M_winxiqr),#, Condition == "0h"), all_of(c("Condition", mrna_cols, "r1", "Ihtk.0", "Ihtk.0.s", "Ia.0", "Ia.0.s" ))),#memb_cols, "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "AUC_Obs"))), mapping = ggplot2::aes(color = Condition), lower = list(continuous = wrap("smooth", se = F, alpha = 0.8, size=1))) # ggplot2::aes(colour=Condition, aes = 0.1))
#FIXME What library did I use for "BaselineTEA.png" ?
# FIXME
#TODO
tictoc::tic() temp_list <- map( c(mrna_cols, memb_cols, epsp_cols), # c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols), function(temp_col){ print(temp_col) temp <- M_all[M_all$Source == "Daniel", c("Condition", "Experiment", "Cell", temp_col)] %>% distinct() # Identify outliers for each group temp_b <- temp[temp$Condition == "0h", ] temp_c <- temp[temp$Condition == "1h", ] temp_d <- temp[temp$Condition == "24h", ] temp_b <- temp_b[win_x_iqr(temp_b[[temp_col]], multiplier = 1.5), ] temp_c <- temp_c[win_x_iqr(temp_c[[temp_col]], multiplier = 1.5), ] temp_d <- temp_d[win_x_iqr(temp_d[[temp_col]], multiplier = 1.5), ] temp <- rbind(temp_b, temp_c) %>% rbind(temp_d) # temp <- temp[win_x_iqr(temp[[temp_col]], multiplier = 1.5), ] temp <- temp[!is.na(temp[[temp_col]]), ] # make sure we don't have pseudoreplicates for network measurements if (temp_col %in% c("ig.3_4", "ig.3_5", "ig.4_5")){ temp <- temp %>% select(-Cell) %>% group_by(Condition, Experiment) %>% mutate_all(median, na.rm = T) %>% distinct() } if(nrow(temp) < 10){ return(list( dv = temp_col, p = NA, ep = NA, hsd_groups = data.frame(dv = c(NA, NA, NA), groups = c(NA, NA, NA), Condition = c(NA, NA, NA) ), fm = NA)) } else { temp_shuffle <- temp resample_array <- map(1:1000, function(i){ temp_shuffle$Condition <- sample(temp_shuffle$Condition, replace = F) fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp_shuffle) return(car::Anova(fm)[1,3]) }) %>% unlist() fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp) fm_hsd <- agricolae::HSD.test(fm, trt = "Condition") fm_hsd <- fm_hsd$group fm_hsd$Condition <- rownames(fm_hsd) fm_hsd_p <- agricolae::HSD.test(fm, trt = "Condition", group = F)$comparison fm_hsd_p[rownames(fm_hsd_p) == "0h - 1h", "pvalue"] return(list( dv = temp_col, p = car::Anova(fm)[1,4], ep = mean(resample_array >= car::Anova(fm)[1,3]), hsd_groups = fm_hsd, fm = fm, p0.1 = fm_hsd_p[rownames(fm_hsd_p) == "0h - 1h", "pvalue"], p0.24 = fm_hsd_p[rownames(fm_hsd_p) == "0h - 24h", "pvalue"], p1.24 = fm_hsd_p[rownames(fm_hsd_p) == "1h - 24h", "pvalue"] )) } }) tictoc::toc() temp_list <- temp_list %>% transpose() fm_overview_table <- data.frame(dv = unlist(temp_list[[1]]), p = unlist(temp_list[[2]]), ep = unlist(temp_list[[3]]), p0.1 = unlist(temp_list[[6]]), p0.24 = unlist(temp_list[[7]]), p1.24 = unlist(temp_list[[8]]) ) # work up post hoc stats temp_list_hsd <- map(seq_along(temp_list[[4]]), function(i){ if (!is.na(unique(temp_list[[4]][[i]]$groups))){ temp_list[[4]][[i]] %>% gather("dv", "Estimate", names(temp_list[[4]][[i]])[1]) %>% pivot_wider(names_from = "Condition", values_from = c("groups", "Estimate") ) %>% mutate_all(as.character) } }) fm_overview_hsd <- do.call(rbind, temp_list_hsd) fm_overview_table <- full_join(fm_overview_table, fm_overview_hsd) # for adding tukey hsd p vals into plots fm_overview <- fm_overview_table
# add groupings fm_overview_table <- fm_overview_table %>% mutate(Family = case_when( dv %in% c("vrest") ~ "Resting Voltage", dv %in% c("r11", "r1") ~ "Membrane Resistance", dv %in% ionic_cols ~ "Outward Currents", dv %in% epsp_cols ~ "Excitability", dv %in% c("r12", "rc", "ig", "cc", "ig.3_4", "ig.3_5", "ig.4_5") ~ "Coupling Measures" )) fm_overview_table <- fm_overview_table %>% mutate(Family = factor(fm_overview_table$Family, levels = c("Membrane Resistance", "Resting Voltage", "Coupling Measures", "Outward Currents", "Excitability"))) %>% arrange(Family) %>% mutate(Family = as.character(Family)) # temp <- rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName) fm_overview_table <- full_join(fm_overview_table, rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName, Family2 = Family)) fm_overview_table <- fm_overview_table %>% mutate(Family = ifelse(is.na(Family) & !is.na(Family2), Family2, Family)) %>% select(-Family2) %>% filter(!is.na(p)) # add additional cols fm_overview_table$fdr <- p.adjust(fm_overview_table$ep, method = "fdr") fm_overview_table <- fm_overview_table %>% mutate(Estimate_0h = as.numeric(Estimate_0h), Estimate_1h = as.numeric(Estimate_1h), Estimate_24h = as.numeric(Estimate_24h)) %>% mutate(p = round(p, digits = 3), ep = round(ep, digits = 3), fdr = round(fdr, digits = 3), Estimate_0h = round(Estimate_0h, digits = 3), Estimate_1h = round(Estimate_1h, digits = 3), Estimate_24h = round(Estimate_24h, digits = 3), p0.1 = round(p0.1, digits = 3), p0.24 = round(p0.24, digits = 3), p1.24 = round(p1.24, digits = 3) ) %>% mutate(stars = case_when(fdr > 0.10 ~ "", fdr < 0.10 & fdr > 0.05 ~ ".", fdr < 0.05 & fdr > 0.01 ~ "*", fdr < 0.01 & fdr > 0.001 ~ "**", fdr < 0.001 ~ "***" )) # library(officer) library(flextable) M_ft <-flextable(fm_overview_table, col_keys = c("Family", "dv", "p", "ep", "fdr", "stars", "Estimate_0h", "Estimate_1h", "Estimate_24h", "p0.1", "p0.24", "p1.24")) M_ft <- theme_vanilla(M_ft) M_ft <- merge_v(M_ft, j = c("Family") ) # M_ft <- color(M_ft, ~ fdr < 0.05, ~ stars, color = "red") M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ dv, bold = TRUE) M_ft <- bold(M_ft, ~ p <= 0.05, ~ p, bold = TRUE) M_ft <- bold(M_ft, ~ ep <= 0.05, ~ ep, bold = TRUE) M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ fdr, bold = TRUE) M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ stars, bold = TRUE) # get the groupings without showing the columns. color_a = "#ffeda0" color_ab= "#feb24c" color_b = "#f03b20" color_c = "#2b8cbe" M_ft <- bg(M_ft, ~ groups_0h == "a", ~ Estimate_0h, bg = color_a) M_ft <- bg(M_ft, ~ groups_0h == "ab",~ Estimate_0h, bg = color_ab) M_ft <- bg(M_ft, ~ groups_0h == "b", ~ Estimate_0h, bg = color_b) M_ft <- bg(M_ft, ~ groups_0h == "c", ~ Estimate_0h, bg = color_c) M_ft <- bg(M_ft, ~ groups_1h == "a", ~ Estimate_1h, bg = color_a) M_ft <- bg(M_ft, ~ groups_1h == "ab",~ Estimate_1h, bg = color_ab) M_ft <- bg(M_ft, ~ groups_1h == "b", ~ Estimate_1h, bg = color_b) M_ft <- bg(M_ft, ~ groups_1h == "c", ~ Estimate_1h, bg = color_c) M_ft <- bg(M_ft, ~ groups_24h == "a", ~ Estimate_24h, bg = color_a) M_ft <- bg(M_ft, ~ groups_24h == "ab",~ Estimate_24h, bg = color_ab) M_ft <- bg(M_ft, ~ groups_24h == "b", ~ Estimate_24h, bg = color_b) M_ft <- bg(M_ft, ~ groups_24h == "c", ~ Estimate_24h, bg = color_c) M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_0h, bg = "White") M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_1h, bg = "White") M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_24h, bg = "White") M_ft <- set_header_labels(M_ft, dv = "", stars = "", Estimate_0h = "0h", Estimate_1h = "1h", Estimate_24h = "24h", p0.1 = "0h vs 1h", p0.24 = "0h vs 24h", p1.24 = "1h vs 24h" ) M_ft <- autofit(M_ft) M_ft pptx_file <- "./officer_output/DanielAnova.pptx" save_as_pptx("my table" = M_ft, path = pptx_file) docx_file <- "./officer_output/DanielAnova.docx" save_as_docx("my table" = M_ft, path = docx_file)
# mrna_cols, memb_cols, epsp_cols plot_univariate_decorated_0 <- function(temp = plt_membrane, yvar = "Ihtk.0", new.y = "", new.title = ""){ # allow for customization of y axis if (typeof(new.title) == "character"){ if(typeof(new.y) == "character"){ if(new.y == ""){ new.title <- yvar } } else if (typeof(new.y) == "character" | typeof(new.y) == "expression"){ new.title <- new.title } else { warning("new.y not supported type") new.title <- yvar } } names(temp)[names(temp) == yvar] <- "yvar" temp_bands <- temp %>% group_by(Condition) %>% summarise( q10 = quantile(yvar, probs = 0.1, na.rm = T), q25 = quantile(yvar, probs = 0.25, na.rm = T), # q45 = quantile(yvar, probs = 0.45, na.rm = T), q50 = quantile(yvar, probs = 0.5, na.rm = T), # q55 = quantile(yvar, probs = 0.55, na.rm = T), q75 = quantile(yvar, probs = 0.75, na.rm = T), q90 = quantile(yvar, probs = 0.9, na.rm = T), ) %>% mutate(outlier_up = q50 + 1.5*(q75 - q25), outlier_down = q50 - 1.5*(q75 - q25)) temp <- full_join(temp, temp_bands) temp_plt <- ggplot(data = temp, aes(x = Condition, y = yvar))+ # geom_ribbon(aes(x = Condition, ymin = q10, ymax = q90, group = 1), color = "lightgray", fill = "cornflowerblue", alpha = 0.3)+ geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = 1), color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+ # geom_ribbon(aes(x = Condition, ymin = q45, ymax = q55, group = 1), color = "black", fill = "cornflowerblue", alpha = 0.3)+ geom_line(aes(x = Condition, y = q50, group = 1), linetype = "dashed", color = "blue")+ geom_line(aes(x = Condition, y = outlier_up, group = 1), linetype = "dashed", color = "black")+ geom_line(aes(x = Condition, y = outlier_down, group = 1), linetype = "dashed", color = "black")+ geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+ # geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 1)+ geom_point(alpha = 0.5)+ labs(title = new.title, y=new.y, x = "") return(temp_plt) }
plt_membrane <- M_all %>% select(Condition, Experiment, Cell, all_of(memb_cols)) %>% group_by(Condition, Experiment, Cell) %>% mutate_all(median, na.rm = T) %>% ungroup() %>% distinct() membrane_dvs_plt <- c("r11", "r1", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope", "vrest") new.y_vector <- c(expression(M~Omega), expression(M~Omega), "nA", expression(frac(nA, mV)), "nA", expression(frac(nA, mV)), "mV" ) new.title_vector <- c(expression(R[In]), expression(R[Membrane]), expression(I[HTK] ~ Intercept), expression(I[HTK] ~ Slope), expression(I[A] ~ Intercept), expression(I[A] ~ Slope), expression(V[Rest])) membrane_plts <- map(seq_along(membrane_dvs_plt), function(i){plot_univariate_decorated_0( temp = ungroup(plt_membrane), yvar = membrane_dvs_plt[i], new.y = new.y_vector[i], new.title = new.title_vector[i])}) # run through dependent variables, if there was a significant effect, get the HSD p values and add them for each significant comparison for (i in seq_along(membrane_dvs_plt)){ # <--------------------------------------- ! temp_annotation <- fm_overview_table %>% filter(dv == membrane_dvs_plt[i]) # <-- ! if (temp_annotation$fdr <= 0.05){ # base y position for on max y and % of range current_yrange <- ggplot_build( membrane_plts[[i]] # <------------------------------------------------------ ! )$layout$panel_params[[1]]$y.range start_y <- .76*(current_yrange[2] - current_yrange[1]) + current_yrange[1] step_size <- (current_yrange[2] - current_yrange[1])*.08 # create the annotation data frame df <- data.frame( group1 = c("0h", "0h", "1h"), group2 = c("1h", "24h", "24h"), p.adj = c(temp_annotation$p0.1, temp_annotation$p0.24, temp_annotation$p1.24) ) %>% mutate(sig = ifelse(p.adj <= 0.05, 1, 0)) %>% mutate(p.adj = as.character(p.adj)) %>% mutate(p.adj = ifelse(p.adj == "0", "< 0.001", p.adj)) # only increase the y position if there was a significant difference # begin at 1 to get the right output steps_up <- cumsum(df$sig) steps_up[1] <- 1 df$y.position <- seq(start_y, by = step_size, length.out = 3)[steps_up] membrane_plts[[i]] <- membrane_plts[[i]]+ # <------------------------------------- ! stat_pvalue_manual( data = df, hide.ns = T ) } } # (membrane_plts[[1]] / # membrane_plts[[2]]) | # (membrane_plts[[3]] / # membrane_plts[[4]]) | # (membrane_plts[[5]] / # membrane_plts[[6]]) | # (membrane_plts[[7]] / # patchwork::plot_spacer()) # add to make the scaling match other multiplots length(membrane_plts) for(i in 8:14){ membrane_plts[[i]] <- ggplot() } ggsave(here("officer_output", "membrane_plts.svg"), cowplot::plot_grid(plotlist = membrane_plts, labels = c(LETTERS[1:7], rep("", times = 7))), # width = 8.5, # width = 16, # height = 8, width = 16, height = 16, units = "in") # walk(seq_along(membrane_dvs_plt), function(i){ # ggsave(paste0(membrane_dvs_plt[i],".tiff"), # plot = membrane_plts[[i]], # path = here("officer_output"), # width = 11.5/4, height = 4.76) # })
# Columns: 8 # $ Cor <dbl> 0.5569679, 0.5569679, 0.5569679, 0.5569679, 0.556967~ # $ Min.V_Obs <dbl> -53.92456, -53.92456, -53.92456, -53.92456, -53.9245~ # $ Max.Amplitude_Obs <dbl> -16.6320804, -16.6320804, -16.6320804, -16.6320804, ~ # $ Max.Amplitude_Sim <dbl> -25.5070750, -25.5070750, -25.5070750, -25.5070750, ~ # $ AUC_Obs <dbl> 55976.54, 55976.54, 55976.54, 55976.54, 55976.54, 55~ # $ AUC_Sim <dbl> 32069.58, 32069.58, 32069.58, 32069.58, 32069.58, 32~ # $ Max.Amplitude_Delta <dbl> 8.981806, 8.981806, 8.981806, 8.981806, 8.981806, 8.~ # $ AUC_Delta <dbl> 18563.238, 18563.238, 18563.238, 18563.238, 18563.23~ plt_epsp <- M_all[, c("Condition", "Experiment", "Cell", epsp_cols)] %>% group_by(Condition, Experiment, Cell) %>% mutate_all(median, na.rm = T) %>% ungroup() %>% distinct() # $ Condition <chr> "1h", "1h", "1h", "24h", "1h", "24h", "24h", "24h", ~ # $ Experiment <chr> "190808a", "190808a", "190830", "190903", "190904", ~ # $ Cell <chr> "4", "5", "4", "3", "5", "4", "5", "4", "5", "5", "4~ # $ Cor <dbl> 0.5569679, 0.6112679, NA, 0.9774225, 0.8716307, 0.94~ # $ Min.V_Obs <dbl> -53.924562, -57.052614, NA, -61.050416, -51.895143, ~ # $ Max.Amplitude_Obs <dbl> -16.6320804, -24.1851812, NA, -0.9613037, -45.288086~ # $ Max.Amplitude_Sim <dbl> -25.5070750, -33.6612173, NA, -0.4841494, -37.848033~ # $ AUC_Obs <dbl> 55976.539, 35180.643, NA, 61242.214, 8488.885, 36081~ # $ AUC_Sim <dbl> 32069.584, 38518.177, NA, 59193.615, 3901.010, 29769~ # $ Max.Amplitude_Delta <dbl> 8.981806, 6.438339, NA, 0.308876, 2.380225, 28.70748~ # $ AUC_Delta <dbl> 18563.23796, -251.67701, NA, 2050.58898, 5702.37419,~ epsp_dvs_plt <- c( # "Cor", # "Min.V_Obs", # "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta", # "AUC_Obs", "AUC_Sim", "AUC_Delta" "Cor", "Cor.IQR", "Min.V_Obs", "Min.V_Obs.IQR", "Max.Amplitude_Obs", "Max.Amplitude_Obs.IQR", "AUC_Obs", "AUC_Obs.IQR", "Max.Amplitude_Delta", "Max.Amplitude_Delta.IQR", "AUC_Delta", "AUC_Delta.IQR", "Max.Amplitude_Sim", "AUC_Sim") new.y_vector <- c("R", "R", "mV", "mV", "mV", "mV", "mV/Sec", "mV/Sec", "mV", "mV", "mV/Sec", "mV/Sec", "mV", "mV/Sec") new.title_vector <- c("Correlation", "Correlation IQR", "Baseline", "Baseline IQR", "Obs. Amp.", "Obs. Amp. IQR", "Obs. AUC", "Obs. AUC IQR", "Diff. Amplitude", "Diff. Amp. IQR", "Diff. AUC", "Diff. AUC IQR", "Sim. Amp.", "Sim. AUC" ) epsp_plts <- map(seq_along(epsp_dvs_plt), function(i){plot_univariate_decorated_0( temp = ungroup(plt_epsp), yvar = epsp_dvs_plt[i], new.y = new.y_vector[i], new.title = new.title_vector[i] )}) # ((epsp_plts[[1]]+ylim(c(0, 1))) / # patchwork::plot_spacer()/ # patchwork::plot_spacer()) | # (epsp_plts[[3]] / # epsp_plts[[4]]/ # epsp_plts[[5]]) | # (epsp_plts[[6]] / # epsp_plts[[7]]/ # epsp_plts[[8]]) # run through dependent variables, if there was a significant effect, get the HSD p values and add them for each significant comparison for (i in seq_along(epsp_dvs_plt)){ # <--------------------------------------- ! temp_annotation <- fm_overview_table %>% filter(dv == epsp_dvs_plt[i]) # <-- ! if (temp_annotation$fdr <= 0.05){ # base y position for on max y and % of range current_yrange <- ggplot_build( epsp_plts[[i]] # <------------------------------------------------------ ! )$layout$panel_params[[1]]$y.range start_y <- .76*(current_yrange[2] - current_yrange[1]) + current_yrange[1] step_size <- (current_yrange[2] - current_yrange[1])*.08 # create the annotation data frame df <- data.frame( group1 = c("0h", "0h", "1h"), group2 = c("1h", "24h", "24h"), p.adj = c(temp_annotation$p0.1, temp_annotation$p0.24, temp_annotation$p1.24) ) %>% mutate(sig = ifelse(p.adj <= 0.05, 1, 0)) %>% mutate(p.adj = as.character(p.adj)) %>% mutate(p.adj = ifelse(p.adj == "0", "< 0.001", p.adj)) # only increase the y position if there was a significant difference # begin at 1 to get the right output steps_up <- cumsum(df$sig) steps_up[1] <- 1 df$y.position <- seq(start_y, by = step_size, length.out = 3)[steps_up] epsp_plts[[i]] <- epsp_plts[[i]]+ # <------------------------------------- ! stat_pvalue_manual( data = df, hide.ns = T ) } } # cowplot::plot_grid(plotlist = epsp_plts, labels = LETTERS) ggsave(here("officer_output", "epsp_plts.svg"), cowplot::plot_grid(plotlist = epsp_plts, labels = LETTERS), width = 16, height = 16, # width = 8.5, units = "in") # walk(seq_along(epsp_dvs_plt), function(i){ # ggsave(paste0(epsp_dvs_plt[i],".tiff"), # plot = epsp_plts[[i]], # path = here("officer_output"), # width = 11.5/4, height = 4.76) # })
univariate change over time -- bend quartile
plt_mrna <- M_all[, c("Condition", "Experiment", "Cell", mrna_cols)] %>% group_by(Condition, Experiment, Cell) %>% mutate_all(median, na.rm = T) %>% ungroup() %>% distinct() mrna_plts <- map(seq_along(mrna_cols), function(i){plot_univariate_decorated_0( temp = ungroup(plt_mrna), yvar = mrna_cols[i], new.y = "", new.title = mrna_cols[i] )+theme(plot.title = element_text(face="italic"))}) # na_y = c(0, 20000) # ca_y = c(0, 7000) # k_y = c(0, 5000) # inx_y <- c(0, 40000) # # # mrna_plts[[1]]+coord_cartesian(ylim = na_y)+ # # mrna_plts[[2]]+coord_cartesian(ylim = ca_y) + # mrna_plts[[3]]+coord_cartesian(ylim = ca_y) + # mrna_plts[[4]]+coord_cartesian(ylim = ca_y) + # patchwork::plot_spacer()+ # # mrna_plts[[5]]+coord_cartesian(ylim = k_y) + # mrna_plts[[6]]+coord_cartesian(ylim = k_y) + # mrna_plts[[7]]+coord_cartesian(ylim = k_y) + # mrna_plts[[8]]+coord_cartesian(ylim = k_y) + # mrna_plts[[9]]+coord_cartesian(ylim = k_y) + # # mrna_plts[[10]]+coord_cartesian(ylim = inx_y) + # mrna_plts[[11]]+coord_cartesian(ylim = inx_y) + # mrna_plts[[12]]+coord_cartesian(ylim = inx_y) + # patchwork::plot_spacer() + # patchwork::plot_spacer() + # # patchwork::plot_layout(ncol = 5, nrow = 3) #TODO probably a more elegant way to do this using the following functions # patchwork::get_dim() # patchwork::set_dim() # mrna_plts[[1]]+ set_dim(mrna_plts[[2]], get_dim(mrna_plts[[1]])) # get_max_dim(mrna_plts[[1]], mrna_plts[[2]]) # run through dependent variables, if there was a significant effect, get the HSD p values and add them for each significant comparison for (i in seq_along(mrna_cols)){ # <--------------------------------------- ! temp_annotation <- fm_overview_table %>% filter(dv == mrna_cols[i]) # <-- ! if (temp_annotation$fdr <= 0.05){ # base y position for on max y and % of range current_yrange <- ggplot_build( mrna_plts[[i]] # <------------------------------------------------------ ! )$layout$panel_params[[1]]$y.range start_y <- .76*(current_yrange[2] - current_yrange[1]) + current_yrange[1] step_size <- (current_yrange[2] - current_yrange[1])*.08 # create the annotation data frame df <- data.frame( group1 = c("0h", "0h", "1h"), group2 = c("1h", "24h", "24h"), p.adj = c(temp_annotation$p0.1, temp_annotation$p0.24, temp_annotation$p1.24) ) %>% mutate(sig = ifelse(p.adj <= 0.05, 1, 0)) %>% mutate(p.adj = as.character(p.adj)) %>% mutate(p.adj = ifelse(p.adj == "0", "< 0.001", p.adj)) # only increase the y position if there was a significant difference # begin at 1 to get the right output steps_up <- cumsum(df$sig) steps_up[1] <- 1 df$y.position <- seq(start_y, by = step_size, length.out = 3)[steps_up] mrna_plts[[i]] <- mrna_plts[[i]]+ # <------------------------------------- ! x2 stat_pvalue_manual( data = df, hide.ns = T ) } } ggsave(here("officer_output", "mrna_plts.svg"), cowplot::plot_grid(plotlist = mrna_plts, labels = LETTERS), width = 16, height = 12, # width = 8.5, units = "in") ggsave(here("officer_output", "mrna_plts.svg"), cowplot::plot_grid(plotlist = mrna_plts, labels = LETTERS, nrow = 4, ncol = 4), width = 16, height = 16, # width = 8.5, units = "in") # length(membrane_plts) # for(i in 8:14){ # membrane_plts[[i]] <- ggplot() # } # # ggsave(here("officer_output", "membrane_plts.svg"), # cowplot::plot_grid(plotlist = membrane_plts, labels = c(LETTERS[1:7], rep("", times = 7))), # # width = 8.5, # # # width = 16, # # height = 8, # # width = 16, # height = 16, # # units = "in") # walk(seq_along(mrna_cols), function(i){ # ggsave(paste0(mrna_cols[i],".tiff"), # plot = mrna_plts[[i]]+theme(plot.title = element_text(face="italic")), # path = here("officer_output"), # width = 11.5/4, height = 4.76) # })
# plt_mrna_long <- plt_mrna %>% # gather(mrna, count, mrna_cols) %>% # filter(!is.na(count)) # # # plt_mrna_long_bands <- plt_mrna_long %>% # group_by(Condition, mrna) %>% # summarise( # q10 = quantile(count, probs = 0.1, na.rm = T), # q25 = quantile(count, probs = 0.25, na.rm = T), # q50 = quantile(count, probs = 0.5, na.rm = T), # q75 = quantile(count, probs = 0.75, na.rm = T), # q90 = quantile(count, probs = 0.9, na.rm = T), # ) %>% # mutate(outlier_up = q50 + 1.5*(q75 - q25), # outlier_down = q50 - 1.5*(q75 - q25)) # # temp <- full_join(plt_mrna_long, plt_mrna_long_bands) # # # # # devtools::install_github("teunbrand/ggh4x") # library(ggh4x) # # filter(temp, mrna %in% c("nav")) %>% # ggplot(aes(x = Condition, y = count))+ # geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = mrna), # color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+ # geom_line(aes(x = Condition, y = q50, group = 1), # linetype = "dashed", color = "blue")+ # geom_line(aes(x = Condition, y = outlier_up, group = 1), # linetype = "dashed", color = "black")+ # geom_line(aes(x = Condition, y = outlier_down, group = 1), # linetype = "dashed", color = "black")+ # # geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+ # # geom_point(alpha = 0.5)+ # facet_wrap(~ mrna, strip.position = "bottom") # # # # filter(temp, mrna %in% c("shaker", "shab", "shal", "shaw1", "shaw2", "bkkca", "cav1", "cav2")) %>% # ggplot(aes(x = Condition, y = count))+ # geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = mrna), # color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+ # geom_line(aes(x = Condition, y = q50, group = 1), # linetype = "dashed", color = "blue")+ # geom_line(aes(x = Condition, y = outlier_up, group = 1), # linetype = "dashed", color = "black")+ # geom_line(aes(x = Condition, y = outlier_down, group = 1), # linetype = "dashed", color = "black")+ # # geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+ # # geom_point(alpha = 0.5)+ # facet_wrap(~ mrna, strip.position = "bottom") # # # # filter(temp, mrna %in% c("inx1", "inx2", "inx3")) %>% # ggplot(aes(x = Condition, y = count))+ # geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = mrna), # color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+ # geom_line(aes(x = Condition, y = q50, group = 1), # linetype = "dashed", color = "blue")+ # geom_line(aes(x = Condition, y = outlier_up, group = 1), # linetype = "dashed", color = "black")+ # geom_line(aes(x = Condition, y = outlier_down, group = 1), # linetype = "dashed", color = "black")+ # # geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+ # # geom_point(alpha = 0.5)+ # facet_wrap(~ mrna, strip.position = "bottom")
univariate change over time -- boxplots
# devtools::install_github("teunbrand/ggh4x") library(ggh4x) # FIXME plt_mrna_long <- plt_membrane %>% gather(mrna, count, mrna_cols) %>% filter(!is.na(count)) # filter(mrna %in% c("bkkca", "shal")) %>% return_nested_mrna_plt <- function(temp = plt_mrna_long){ ggplot(temp, aes(mrna, count))+ geom_boxplot(aes(fill = Condition))+ geom_point(position = position_jitter(width = 0.05), #size =3, alpha = 0.7)+ theme(axis.text.x=element_blank(), legend.position = "")+ # writes over mrna labels so they only show up in the facet labs(x = "")+ facet_nested(~ mrna + Condition, scales = "free_x", switch = "x") } # return_nested_mrna_plt(temp = filter(plt_mrna_long, # mrna %in% unlist(mRNAInfo[mRNAInfo$Class == "Channel", "RGeneName"]) # )) return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% unlist(mRNAInfo[mRNAInfo$Family == "Voltage-dependent K+ Channel", "RGeneName"]) )) return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% unlist(mRNAInfo[mRNAInfo$Family == "Other K+ Channel", "RGeneName"]) )) return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% unlist(mRNAInfo[mRNAInfo$Family == "Ca2+ Channel", "RGeneName"]) )) # innexins # TODO flix axis to right, bind return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% c("inx1", "inx2", "inx3", "inx4"))) return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% c("inx5"))) return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% c("inx5")))+scale_y_log10()
What happens at the network level?
plt_tevc <- M_all_outliers %>% select(Condition, Experiment, ig.3_4, ig.3_5, ig.4_5) %>% distinct() %>% gather("ElectricalSynapse", "ig", c("ig.3_4", "ig.3_5", "ig.4_5")) %>% filter(!is.na(ig)) plt_1.1 <- plot_univariate_decorated_0( temp = ungroup(filter(plt_tevc, ElectricalSynapse == "ig.3_4")), yvar = "ig", new.y = "nA/mV", new.title = expression(I[g]~LC[3] %<->% LC[4]))+ coord_cartesian(ylim = c(0, 0.1)) plt_1.2 <- plot_univariate_decorated_0( temp = ungroup(filter(plt_tevc, ElectricalSynapse == "ig.3_5")), yvar = "ig", new.y = "nA/mV", new.title = expression(I[g]~LC[3] %<->% LC[5]))+ coord_cartesian(ylim = c(0, 0.1)) plt_1.3 <- plot_univariate_decorated_0( temp = ungroup(filter(plt_tevc, ElectricalSynapse == "ig.4_5")), yvar = "ig", new.y = "nA/mV", new.title = expression(I[g]~LC[4] %<->% LC[5]))+ coord_cartesian(ylim = c(0, 0.5))+ scale_y_continuous(position = "right") library(patchwork) plt_1.1 + plt_1.2 + plt_1.3+ plot_layout(widths = c(1, 1, 1) ) ggsave(paste0("ig_lc34.tiff"), plot = plt_1.1, path = here("officer_output"), width = 11.5/4, height = 4.76) ggsave(paste0("ig_lc35.tiff"), plot = plt_1.2, path = here("officer_output"), width = 11.5/4, height = 4.76) ggsave(paste0("ig_lc45.tiff"), plot = plt_1.3, path = here("officer_output"), width = 11.5/4, height = 4.76) #### #### plt_tecc <- M_all_outliers %>% select(Condition, Experiment, cc.3_4, cc.3_5, cc.4_3, cc.4_5, cc.5_3, cc.5_4) %>% group_by(Condition, Experiment) %>% mutate_all(median, na.rm = T) %>% ungroup() %>% distinct() cowplot::plot_grid(plotlist = list( plot_univariate_decorated_0( temp = plt_tecc, yvar = "cc.3_4", new.y = "Coupling Coefficient", new.title = expression(LC[3] %->% LC[4]))+ coord_cartesian(ylim = c(0, 1)), plot_univariate_decorated_0( temp = plt_tecc, yvar = "cc.3_5", new.y = "Coupling Coefficient", new.title = expression(LC[3] %->% LC[5]))+ coord_cartesian(ylim = c(0, 1)), plot_univariate_decorated_0( temp = plt_tecc, yvar = "cc.4_3", new.y = "Coupling Coefficient", new.title = expression(LC[4] %->% LC[3]))+ coord_cartesian(ylim = c(0, 1)), plot_univariate_decorated_0( temp = plt_tecc, yvar = "cc.4_5", new.y = "Coupling Coefficient", new.title = expression(LC[4] %->% LC[5]))+ coord_cartesian(ylim = c(0, 1)), plot_univariate_decorated_0( temp = plt_tecc, yvar = "cc.5_3", new.y = "Coupling Coefficient", new.title = expression(LC[5] %->% LC[4]))+ coord_cartesian(ylim = c(0, 1)), plot_univariate_decorated_0( temp = plt_tecc, yvar = "cc.5_4", new.y = "Coupling Coefficient", new.title = expression(LC[5] %->% LC[4]))+ coord_cartesian(ylim = c(0, 1)) )) #### #### triangle_lower_df <- data.frame( id = c("b", "b", "b"), value = c("2", "2", "2"), x = c(0, 1, 1), y = c(0, 1, 0) ) triangle_upper_df <- data.frame( id = c("a", "a", "a"), value = c("1", "1", "1"), x = c(0, 1, 0), y = c(0, 1, 1) ) plt_2.1 <- plt_tecc %>% ggplot(aes(cc.3_4, cc.4_3))+ geom_polygon(data = triangle_upper_df, aes(x = x, y = y, group = id), fill = "#e41a1c")+ geom_polygon(data = triangle_lower_df, aes(x = x, y = y, group = id), fill = "#377eb8")+ geom_abline(slope = 1, intercept = 0, linetype = "dashed")+ geom_point()+ facet_grid(.~Condition) plt_2.2 <- plt_tecc %>% ggplot(aes(cc.3_5, cc.5_3))+ geom_polygon(data = triangle_upper_df, aes(x = x, y = y, group = id), fill = "#e41a1c")+ geom_polygon(data = triangle_lower_df, aes(x = x, y = y, group = id), fill = "#4daf4a")+ geom_point()+ facet_grid(.~Condition) plt_2.3 <- plt_tecc %>% ggplot(aes(cc.4_5, cc.5_4))+ geom_polygon(data = triangle_upper_df, aes(x = x, y = y, group = id), fill = "#377eb8")+ geom_polygon(data = triangle_lower_df, aes(x = x, y = y, group = id), fill = "#4daf4a")+ geom_point()+ facet_grid(.~Condition) plt_2.1 / plt_2.2 / plt_2.3
what happens at the level of output (excitability)?
library(brms) # fit1 <- brm(shal ~ Condition, # data = temp, # family = gaussian()) # # 'C:/rtools40/usr/mingw_/bin/g++' not found fit1 <- readRDS(here("data", "brms_models", paste0("shal", ".RDS"))) plot(fit1, pars = c("Condition")) # plot(conditional_effects(fit1, effects = "Condition")) # # temp_col <- "Ia.0" tictoc::tic() walk( c(mrna_cols, memb_cols, # "Ia.Slope" # "Ia.0.s", "Ia.Slope.s" # "Cor" epsp_cols # "Min.V_Obs", "Max.Amplitude_Obs", #"Max.Amplitude_Sim", # "AUC_Obs"#, "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta" ), # c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols), function(temp_col){ if (!file.exists(here("data", "brms_models", paste0(temp_col, ".RDS")))){ print(temp_col) temp <- M_all[M_all$Source == "Daniel", c("Condition", "Experiment", "Cell", temp_col)] %>% distinct() # Identify outliers for each group temp_b <- temp[temp$Condition == "0h", ] temp_c <- temp[temp$Condition == "1h", ] temp_d <- temp[temp$Condition == "24h", ] temp_b <- temp_b[win_x_iqr(temp_b[[temp_col]], multiplier = 1.5), ] temp_c <- temp_c[win_x_iqr(temp_c[[temp_col]], multiplier = 1.5), ] temp_d <- temp_d[win_x_iqr(temp_d[[temp_col]], multiplier = 1.5), ] temp <- rbind(temp_b, temp_c) %>% rbind(temp_d) # temp <- temp[win_x_iqr(temp[[temp_col]], multiplier = 1.5), ] temp <- temp[!is.na(temp[[temp_col]]), ] # make sure we don't have pseudoreplicates for network measurements if (temp_col %in% c("ig.3_4", "ig.3_5", "ig.4_5")){ temp <- temp %>% select(-Cell) %>% group_by(Condition, Experiment) %>% mutate_all(median, na.rm = T) %>% distinct() } if(nrow(temp) < 10){ return(list( dv = temp_col, p = NA, ep = NA, hsd_groups = data.frame(dv = c(NA, NA, NA), groups = c(NA, NA, NA), Condition = c(NA, NA, NA) ), fm = NA)) } else { fit1 <- brm(as.formula(paste0(temp_col, " ~ Condition")), data = temp, family = gaussian()) saveRDS(fit1, here("data", "brms_models", paste0(temp_col, ".RDS"))) # rm(list = "fit1") } } }) tictoc::toc()
run anovas
# temp <- M_all %>% # select(Condition, Experiment, Cell, Ihtk.0) %>% # group_by(Condition, Experiment, Cell) %>% # mutate(Ihtk.0 = median(Ihtk.0, na.rm = T)) %>% # distinct() %>% # ungroup() # # temp_bands <- temp %>% group_by(Condition) %>% summarise( # q10 = quantile(Ihtk.0, probs = 0.1, na.rm = T), # q25 = quantile(Ihtk.0, probs = 0.25, na.rm = T), # q45 = quantile(Ihtk.0, probs = 0.45, na.rm = T), # q50 = quantile(Ihtk.0, probs = 0.5, na.rm = T), # q55 = quantile(Ihtk.0, probs = 0.55, na.rm = T), # q75 = quantile(Ihtk.0, probs = 0.75, na.rm = T), # q90 = quantile(Ihtk.0, probs = 0.9, na.rm = T), # ) # # temp <- full_join(temp, temp_bands) # # ggplot(data = temp, aes(x = Condition, y = Ihtk.0))+ # geom_ribbon(aes(x = Condition, ymin = q10, ymax = q90, group = 1), color = "lightgray", fill = "cornflowerblue", alpha = 0.3)+ # geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = 1), color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+ # # geom_ribbon(aes(x = Condition, ymin = q45, ymax = q55, group = 1), color = "black", fill = "cornflowerblue", alpha = 0.3)+ # geom_line(aes(x = Condition, y = q50, group = 1), linetype = "dashed", color = "blue")+ # geom_point(aes(x = Condition, y = q50), color = "lightgray", size = 4)+ # geom_point(aes(x = Condition, y = q50), color = "black", size = 4, shape = 1)+ # geom_point(alpha = 0.5) # Make table ---- # c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols) # # temp_col = "Ihtk.0" #TODO figure out what to do with the epsp cols tictoc::tic() temp_list <- map( c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols), function(temp_col){ print(temp_col) temp <- M_all[, c("Condition", "Experiment", "Cell", temp_col)] %>% distinct() # Identify outliers for each group temp_b <- temp[temp$Condition == "0h", ] temp_c <- temp[temp$Condition == "1h", ] temp_d <- temp[temp$Condition == "24h", ] temp_b <- temp_b[win_x_iqr(temp_b[[temp_col]], multiplier = 1.5), ] temp_c <- temp_c[win_x_iqr(temp_c[[temp_col]], multiplier = 1.5), ] temp_d <- temp_d[win_x_iqr(temp_d[[temp_col]], multiplier = 1.5), ] temp <- rbind(temp_b, temp_c) %>% rbind(temp_d) # temp <- temp[win_x_iqr(temp[[temp_col]], multiplier = 1.5), ] temp <- temp[!is.na(temp[[temp_col]]), ] # make sure we don't have pseudoreplicates for network measurements if (temp_col %in% c("ig.3_4", "ig.3_5", "ig.4_5")){ temp <- temp %>% select(-Cell) %>% group_by(Condition, Experiment) %>% mutate_all(median, na.rm = T) %>% distinct() } if(nrow(temp) < 10){ return(list( dv = temp_col, p = NA, ep = NA, hsd_groups = data.frame(dv = c(NA, NA, NA), groups = c(NA, NA, NA), Condition = c(NA, NA, NA) ), fm = NA)) } else { temp_shuffle <- temp resample_array <- map(1:1000, function(i){ temp_shuffle$Condition <- sample(temp_shuffle$Condition, replace = F) fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp_shuffle) return(car::Anova(fm)[1,3]) }) %>% unlist() fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp) fm_hsd <- agricolae::HSD.test(fm, trt = "Condition") fm_hsd <- fm_hsd$group fm_hsd$Condition <- rownames(fm_hsd) return(list( dv = temp_col, p = car::Anova(fm)[1,4], ep = mean(resample_array >= car::Anova(fm)[1,3]), hsd_groups = fm_hsd, fm = fm)) } }) tictoc::toc() temp_list <- temp_list %>% transpose() fm_overview_table <- data.frame(dv = unlist(temp_list[[1]]), p = unlist(temp_list[[2]]), ep = unlist(temp_list[[3]])) # work up post hoc stats temp_list_hsd <- map(seq_along(temp_list[[4]]), function(i){ if (!is.na(unique(temp_list[[4]][[i]]$groups))){ temp_list[[4]][[i]] %>% gather("dv", "Estimate", names(temp_list[[4]][[i]])[1]) %>% pivot_wider(names_from = "Condition", values_from = c("groups", "Estimate") ) %>% mutate_all(as.character) } }) fm_overview_hsd <- do.call(rbind, temp_list_hsd)
Make table
fm_overview_table <- full_join(fm_overview_table, fm_overview_hsd) # add groupings fm_overview_table <- fm_overview_table %>% mutate(Family = case_when( dv %in% c("vrest") ~ "Resting Voltage", dv %in% c("r11", "r1") ~ "Membrane Resistance", dv %in% ionic_cols ~ "Outward Currents", dv %in% epsp_cols ~ "Excitability", dv %in% c("r12", "rc", "ig", "cc", "ig.3_4", "ig.3_5", "ig.4_5") ~ "Coupling Measures" )) fm_overview_table <- fm_overview_table %>% mutate(Family = factor(fm_overview_table$Family, levels = c("Membrane Resistance", "Resting Voltage", "Coupling Measures", "Outward Currents", "Excitability"))) %>% arrange(Family) %>% mutate(Family = as.character(Family)) # temp <- rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName) fm_overview_table <- full_join(fm_overview_table, rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName, Family2 = Family)) fm_overview_table <- fm_overview_table %>% mutate(Family = ifelse(is.na(Family) & !is.na(Family2), Family2, Family)) %>% select(-Family2) %>% filter(!is.na(p)) # add additional cols fm_overview_table$fdr <- p.adjust(fm_overview_table$ep, method = "fdr") fm_overview_table <- fm_overview_table %>% mutate(Estimate_0h = as.numeric(Estimate_0h), Estimate_1h = as.numeric(Estimate_1h), Estimate_24h = as.numeric(Estimate_24h)) %>% mutate(p = round(p, digits = 4), ep = round(ep, digits = 4), fdr = round(fdr, digits = 4), Estimate_0h = round(Estimate_0h, digits = 4), Estimate_1h = round(Estimate_1h, digits = 4), Estimate_24h = round(Estimate_24h, digits = 4)) %>% mutate(stars = case_when(fdr > 0.10 ~ "", fdr < 0.10 & fdr > 0.05 ~ ".", fdr < 0.05 & fdr > 0.01 ~ "*", fdr < 0.01 & fdr > 0.001 ~ "**", fdr < 0.001 ~ "***" )) # library(officer) library(flextable) M_ft <-flextable(fm_overview_table, col_keys = c("Family", "dv", "p", "ep", "fdr", "stars", "Estimate_0h", "Estimate_1h", "Estimate_24h")) M_ft <- theme_vanilla(M_ft) M_ft <- merge_v(M_ft, j = c("Family") ) # M_ft <- color(M_ft, ~ fdr < 0.05, ~ stars, color = "red") M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ dv, bold = TRUE) M_ft <- bold(M_ft, ~ p <= 0.05, ~ p, bold = TRUE) M_ft <- bold(M_ft, ~ ep <= 0.05, ~ ep, bold = TRUE) M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ fdr, bold = TRUE) M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ stars, bold = TRUE) # get the groupings without showing the columns. color_a = "#ffeda0" color_ab= "#feb24c" color_b = "#f03b20" color_c = "#2b8cbe" M_ft <- bg(M_ft, ~ groups_0h == "a", ~ Estimate_0h, bg = color_a) M_ft <- bg(M_ft, ~ groups_0h == "ab",~ Estimate_0h, bg = color_ab) M_ft <- bg(M_ft, ~ groups_0h == "b", ~ Estimate_0h, bg = color_b) M_ft <- bg(M_ft, ~ groups_0h == "c", ~ Estimate_0h, bg = color_c) M_ft <- bg(M_ft, ~ groups_1h == "a", ~ Estimate_1h, bg = color_a) M_ft <- bg(M_ft, ~ groups_1h == "ab",~ Estimate_1h, bg = color_ab) M_ft <- bg(M_ft, ~ groups_1h == "b", ~ Estimate_1h, bg = color_b) M_ft <- bg(M_ft, ~ groups_1h == "c", ~ Estimate_1h, bg = color_c) M_ft <- bg(M_ft, ~ groups_24h == "a", ~ Estimate_24h, bg = color_a) M_ft <- bg(M_ft, ~ groups_24h == "ab",~ Estimate_24h, bg = color_ab) M_ft <- bg(M_ft, ~ groups_24h == "b", ~ Estimate_24h, bg = color_b) M_ft <- bg(M_ft, ~ groups_24h == "c", ~ Estimate_24h, bg = color_c) M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_0h, bg = "White") M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_1h, bg = "White") M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_24h, bg = "White") M_ft <- set_header_labels(M_ft, dv = "", stars = "", Estimate_0h = "0h", Estimate_1h = "1h", Estimate_24h = "24h" ) M_ft <- autofit(M_ft) M_ft pptx_file <- "./officer_output/DanielAnova.pptx" save_as_pptx("my table" = M_ft, path = pptx_file) # fm_overview_table %>% # ggplot(aes(x = ep))+ # geom_histogram(binwidth = 0.025)+ # geom_vline(xintercept = 0.05)
df <- full_join(metadata[, c("Experiment", "FileName", "Condition")], epsp) df <- df %>% filter(!is.na(Channel)) df %>% unite(uid, Experiment, Cell) %>% filter(Key != "predicted") %>% ggplot(aes(x = uid, AUC, color = Type))+ geom_point(alpha = 0.5)+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = -0.))+ facet_wrap(.~Condition) # it looks like we don't have full coverage of epsp_50 df %>% group_by(Experiment, Cell, Key, Type) %>% filter(Sweep == 1) %>% tally() %>% spread(Type, n) %>% View()
# FIXME Column `AUC` doesn't exist # Goal: extract Absolute measures and differential measures for # 1. Burst Amplitude (max mV) # 2. AUC # 3. Stimulus correlation (between) # read in trace brian2_trace_files <- list.files(here("data", "predicted_voltage")) brian2_trace_sets <- brian2_trace_files %>% str_remove(pattern = "-actual_v.rds") %>% str_remove(pattern = "-predict_v.rds") %>% unique() epsp_summary_list <- map(seq(1, length(brian2_trace_sets)), function(i){ print(i) brian2_read_in <- brian2_trace_files[str_detect(brian2_trace_files, brian2_trace_sets[i])] # retrieve data and merge into a single df temp <- map(brian2_read_in, function(read_file){ temp_predict <- readRDS(here("data", "predicted_voltage", read_file)) if(str_detect(read_file, pattern = "predict_v")){ #consolidate into one df for (j in seq(1, length(temp_predict))){ temp_predict[[j]]$Sweep <- j } temp_predict <- do.call(rbind, temp_predict) } return(temp_predict) }) names(temp) <- brian2_read_in temp_a <- temp[str_detect(names(temp), pattern = "actual_v")][[1]] temp_p <- temp[str_detect(names(temp), pattern = "predict_v")][[1]] %>% as.data.frame() %>% as_tibble() temp_a$nrow <- seq(1, nrow(temp_a)) temp_p$nrow <- seq(1, nrow(temp_p)) temp_a$Sweep <- as.character(temp_a$Sweep) temp_p$Sweep <- as.character(temp_p$Sweep) temp <- full_join(temp_a, select(temp_p, -Time)) %>% select(-nrow) # Show difference between expected/actual # i = 1 # [1] "190808a_0014-epsp-In4-actual_v.rds" "190808a_0014-epsp-In4-predict_v.rds" shows why we need to be able to audit the outcome. # Here the end of sweep 4 deviates from baseline so the delta is off # TODO instead of subtracting a value at the end, we should look at the times with i_inj less than x and then use a percentile from that to estimate the baseline. # To get around this in the short term we'll calulate the factors independently for expected/obs # downsample_data(temp, len = 10000) %>% # # temp %>% # mutate(predicted = predicted - offset) %>% # mutate(difference = In4-predicted) %>% # ggplot(aes(x = Time))+ # geom_ribbon(aes(ymin=In4, ymax=predicted), fill = "cornflowerblue")+ # geom_line(aes(y=In4), color = "black")+ # geom_line(aes(y=predicted), color = "cornflowerblue", alpha = 0.3)+ # # geom_line(aes(y=difference-offset), color = "purple")+ # facet_grid(Sweep~.) # This is an inelegant workaround for data that is based on in9/12. This prevents duplication of the large set below rewrite_names <- F if ("In9" %in% names(temp)){ names(temp) <- c("In4", "In7", "Sweep", "Time", "predicted", "offset") rewrite_names <- T } Use.Bin.Size <- 19687 temp_processed <- temp temp_processed$TimeBin <- rep(rep(seq(1, (nrow(temp)/4)/Use.Bin.Size), each = Use.Bin.Size), times = 4) # begin to work with data proper # Add optional ways to slice the data based on what we expect to occur # At the moment we leave this unused temp_processed <- temp_processed %>% mutate( Period = case_when( Time > 0.40 & Time < 4.87 ~ "A", Time > 4.87 & Time < 10.74 ~ "B", Time > 10.74 & Time < 16.74 ~ "C", Time > 16.74 & Time < 19.685 ~ "D" ), Period_eq = case_when( Time > 0.40 & Time < 0.40 + 2.945 ~ "A", Time > 4.87 & Time < 4.87 + 2.945 ~ "B", Time > 10.74 & Time < 10.74 + 2.945 ~ "C", Time > 16.74 & Time < 16.74 + 2.945 ~ "D" ), Burst = case_when( Time > 0.40 & Time < 2 ~ "A", Time > 4.87 & Time < 6.17 ~ "B", Time > 10.74 & Time < 12.54 ~ "C", Time > 16.74 & Time < 18.5 ~ "D" ), Burst_eq = case_when( Time > 0.40 & Time < 2 + 1.30 ~ "A", Time > 4.87 & Time < 6.17 + 1.30 ~ "B", Time > 10.74 & Time < 12.54 + 1.30 ~ "C", Time > 16.74 & Time < 18.5 + 1.30 ~ "D" ) ) %>% # correlation with expected group_by(Sweep, TimeBin) %>% mutate(Cor = cor(In4, predicted)) %>% ungroup() %>% gather(Key, mV, c("In4", "predicted")) %>% ## Metrics for whole sweep group_by(Key, Sweep) %>% # Min voltage mutate(Min.V = min(mV, na.rm = T)) %>% # Defining Baseline as average of the lowest 20% of points mutate(Use.in.Base = quantile(mV, probs = .2, na.rm = T)) %>% mutate(Use.in.Base = ifelse(mV <= Use.in.Base, mV, NA)) %>% mutate(Use.in.Base = mean(Use.in.Base, na.rm = T)) %>% # Max voltage mutate(Max.Amplitude = max(mV, na.rm = T)) %>% # AUC # Using Min.V # mutate(AUC = sum(mV - Min.V, na.rm = T)/(max(temp$Time)/Use.Bin.Size)) %>% ungroup() temp_processed <- temp_processed[seq(1, nrow(temp_processed), by = Use.Bin.Size), ] if (rewrite_names){ temp_processed <- temp_processed %>% mutate(Key = ifelse(Key == "In4", "In9", Key)) } temp_processed <- temp_processed[, c("Sweep", "TimeBin", "Time", "Cor", "Key", "Min.V", #"Use.in.Base", "Max.Amplitude", "AUC")] %>% distinct() #%>% # pivot_wider(names_from = "Key", values_from = c("Min.V", #"Use.in.Base", # "Max.Amplitude", "AUC")) # add annotations to aid merging file_name <- brian2_read_in[[1]] %>% str_split(pattern = "-") temp_processed$Experiment <- as.character(str_split(as.character(file_name[[1]][1]), pattern = "_")[[1]][1]) temp_processed$FileName <- paste0(as.character(file_name[[1]][1]), ".abf") temp_processed$Channel <- as.character(file_name[[1]][3]) return(temp_processed) }) epsp_summary <- do.call(rbind, epsp_summary_list) # epsp_summary <- epsp_summary[, c("Experiment", "FileName", "Channel", "Sweep", "Cor", "Key", "Min.V", "Max.Amplitude", "AUC")] epsp_summary %>% filter(FileName == "190808a_0014.abf", Sweep == 1) %>% ggplot(aes(x = Time, y = Cor))+ geom_point() ggplot(aes(x = Time, y = Cor))+ scattermore::geom_scattermost( as.data.frame(epsp_summary[epsp_summary$Sweep == 1, ]), # col=viridisLite::viridis(100, alpha=0.05)[1+99*d[,2]], pointsize=1, pixels=c(700,700)) + ggtitle("geom_scattermost")
# Goal: extract Absolute measures and differential measures for # 1. Burst Amplitude (max mV) # 2. AUC # 3. Stimulus correlation (between) # read in trace brian2_trace_files <- list.files(here("data", "predicted_voltage")) brian2_trace_sets <- brian2_trace_files %>% str_remove(pattern = "-actual_v.rds") %>% str_remove(pattern = "-predict_v.rds") %>% unique() epsp_summary_list <- map(seq(1, length(brian2_trace_sets)), function(i){ print(i) brian2_read_in <- brian2_trace_files[str_detect(brian2_trace_files, brian2_trace_sets[i])] # retrieve data and merge into a single df temp <- map(brian2_read_in, function(read_file){ temp_predict <- readRDS(here("data", "predicted_voltage", read_file)) if(str_detect(read_file, pattern = "predict_v")){ #consolidate into one df for (j in seq(1, length(temp_predict))){ temp_predict[[j]]$Sweep <- j } temp_predict <- do.call(rbind, temp_predict) } return(temp_predict) }) names(temp) <- brian2_read_in temp_a <- temp[str_detect(names(temp), pattern = "actual_v")][[1]] temp_p <- temp[str_detect(names(temp), pattern = "predict_v")][[1]] %>% as.data.frame() %>% as_tibble() temp_a$nrow <- seq(1, nrow(temp_a)) temp_p$nrow <- seq(1, nrow(temp_p)) temp_a$Sweep <- as.character(temp_a$Sweep) temp_p$Sweep <- as.character(temp_p$Sweep) temp <- full_join(temp_a, select(temp_p, -Time)) %>% select(-nrow) # Show difference between expected/actual # i = 1 # [1] "190808a_0014-epsp-In4-actual_v.rds" "190808a_0014-epsp-In4-predict_v.rds" shows why we need to be able to audit the outcome. # Here the end of sweep 4 deviates from baseline so the delta is off # TODO instead of subtracting a value at the end, we should look at the times with i_inj less than x and then use a percentile from that to estimate the baseline. # To get around this in the short term we'll calulate the factors independently for expected/obs # downsample_data(temp, len = 10000) %>% # # temp %>% # mutate(predicted = predicted - offset) %>% # mutate(difference = In4-predicted) %>% # ggplot(aes(x = Time))+ # geom_ribbon(aes(ymin=In4, ymax=predicted), fill = "cornflowerblue")+ # geom_line(aes(y=In4), color = "black")+ # geom_line(aes(y=predicted), color = "cornflowerblue", alpha = 0.3)+ # # geom_line(aes(y=difference-offset), color = "purple")+ # facet_grid(Sweep~.) # This is an inelegant workaround for data that is based on in9/12. This prevents duplication of the large set below rewrite_names <- F if ("In9" %in% names(temp)){ names(temp) <- c("In4", "In7", "Sweep", "Time", "predicted", "offset") rewrite_names <- T } # begin to work with data proper # Add optional ways to slice the data based on what we expect to occur # At the moment we leave this unused temp_processed <- temp %>% mutate( Segment = case_when( Time > 0.40 & Time < 2 ~ "A_B", Time > 2 & Time < 4.87 ~ "A_I", Time > 4.87 & Time < 6.17 ~ "B_B", Time > 6.17 & Time < 10.74 ~ "B_I", Time > 10.74 & Time < 12.54 ~ "C_B", Time > 12.54 & Time < 16.74 ~ "C_I", Time > 16.74 & Time < 18.5 ~ "D_B", Time > 18.5 & Time < 19.685 ~ "D_I" ) ) %>% # correlation with expected group_by(Sweep, Segment) %>% mutate(Cor = cor(In4, predicted)) %>% ungroup() %>% gather(Key, mV, c("In4", "predicted")) %>% ## Metrics for whole sweep group_by(Key, Sweep) %>% # Min voltage mutate(Min.V = min(mV, na.rm = T)) %>% # Defining Baseline as average of the lowest 20% of points mutate(Use.in.Base = quantile(mV, probs = .2, na.rm = T)) %>% mutate(Use.in.Base = ifelse(mV <= Use.in.Base, mV, NA)) %>% mutate(Use.in.Base = mean(Use.in.Base, na.rm = T)) %>% # Max voltage mutate(Max.Amplitude = max(mV, na.rm = T)) %>% # AUC # Using Min.V # mutate(AUC = sum(mV - Min.V, na.rm = T)/19.68685) %>% ungroup() if (rewrite_names){ temp_processed <- temp_processed %>% mutate(Key = ifelse(Key == "In4", "In9", Key)) } temp_processed <- temp_processed[, c("Sweep", "Segment", "Cor", "Key", "Min.V", #"Use.in.Base", "Max.Amplitude")] %>% distinct() #%>% # pivot_wider(names_from = "Key", values_from = c("Min.V", #"Use.in.Base", # "Max.Amplitude", "AUC")) # add annotations to aid merging file_name <- brian2_read_in[[1]] %>% str_split(pattern = "-") temp_processed$Experiment <- as.character(str_split(as.character(file_name[[1]][1]), pattern = "_")[[1]][1]) temp_processed$FileName <- paste0(as.character(file_name[[1]][1]), ".abf") temp_processed$Channel <- as.character(file_name[[1]][3]) return(temp_processed) }) epsp_summary <- do.call(rbind, epsp_summary_list) # epsp_summary <- epsp_summary[, c("Experiment", "FileName", "Channel", "Sweep", "Cor", "Key", "Min.V", "Max.Amplitude", "AUC")] epsp_summary <- left_join(epsp_summary, metadata) %>% select(-FileName, -Type) %>% mutate(Cell = case_when(Channel == "In4" ~ In4, Channel == "In9" ~ In9)) %>% mutate(Cell = as.character(Cell)) %>% mutate(Key = case_when(Key %in% c("In4", "In9") ~ "Obs", Key %in% c("predicted") ~ "Sim")) %>% pivot_wider(names_from = "Key", values_from = c("Min.V", "Max.Amplitude")) %>% mutate(Max.Amplitude_Sim = Max.Amplitude_Sim + (Min.V_Obs - Min.V_Sim)) %>% mutate(Max.Amplitude_Delta = Max.Amplitude_Obs - Max.Amplitude_Sim) %>% group_by(Condition, Experiment, Cell, Segment, Sweep) %>% select(-Min.V_Sim) %>% mutate_at(c("Cor", "Min.V_Obs", #"Min.V_Sim", "Min.V_Delta", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta"), median, na.rm = T) %>% distinct() %>% ungroup() epsp_summary %>% ggplot(aes(x = Segment, y = Cor, group = Sweep, color = Sweep))+ geom_point(position = position_jitter(width = 0.1), alpha = 0.3)+ geom_smooth(se = F)+ facet_grid(.~Condition)+ ggsci::scale_color_aaas() epsp_summary %>% filter(!is.na(Segment)) %>% ggplot(aes(x = Cor, y = Sweep, fill = factor(stat(quantile))))+#, group = Sweep, color = Sweep))+ # ggridges::geom_density_ridges()+ ggridges::stat_density_ridges( geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = 4, quantile_lines = TRUE ) + scale_fill_viridis_d(name = "Quartiles")+ coord_flip()+ facet_grid(Condition~Segment)+ theme(legend.position = "") ## Here's the stimulus used in the protocol ==== # sweep duration should be 19.687 seconds epsp_stim <- readABF_as_matrix(here("inst", "extdata", "epsp_stim_files", "08 current injection.abf"), channels = "Axo1I2") epsp_stim <- as_tibble(epsp_stim) %>% mutate(Time = Time - min(Time, na.rm = T)) %>% rename(Stim = Axo1I2) epsp_stim <- mutate(epsp_stim, Stim = Stim - min(Stim, na.rm = T), Stim = (Stim / 11.9034)-1) epsp_summary %>% filter(!is.na(Segment)) %>% mutate(x = case_when( Segment == "A_B" ~ 0.40, Segment == "A_I" ~ 2, Segment == "B_B" ~ 4.87, Segment == "B_I" ~ 6.17, Segment == "C_B" ~ 10.74, Segment == "C_I" ~ 12.54, Segment == "D_B" ~ 16.74, Segment == "D_I" ~ 18.5 ), xend = case_when( Segment == "A_B" ~ 2, Segment == "A_I" ~ 4.87, Segment == "B_B" ~ 6.17, Segment == "B_I" ~ 10.74, Segment == "C_B" ~ 12.54, Segment == "C_I" ~ 16.74, Segment == "D_B" ~ 18.5, Segment == "D_I" ~ 19.687 )) %>% ggplot()+ geom_segment(aes(x = x, y = Cor, xend = xend, yend = Cor, colour = Cor))+ facet_grid(Condition~Sweep)+ geom_line(data = epsp_stim, aes(Time, Stim))+ theme(legend.position = "")+ scale_color_viridis_c(direction = -1, option = "B") # option # A character string indicating the colormap option to use. Four options are available: "magma" (or "A"), "inferno" (or "B"), "plasma" (or "C"), "viridis" (or "D", the default option) and "cividis" (or "E"). epsp_stim %>% mutate(Stim = Stim - min(Stim, na.rm = T), Stim = (Stim / 11.9034)-1) %>% ggplot(aes(Time, Stim))+ geom_line() # # # shading_annotations <- data.frame( # starts = c(0.40, # 4.87, # 10.74, # 16.74), # next_start = c(4.87, # 10.74, # 16.74, # 19.685), # equal_len = c(0.40, # 4.87, # 10.74, # 16.74) + 2.945, # on_end = c(2, # 6.17, # 12.54, # 18.5), # equal_on = c(0.40, # 4.87, # 10.74, # 16.74) + 1.30 # ) # # annotation_df <- data.frame(x = c(0.5, 0.5, 2.1, 1.8), # y = seq(0.25, -1.25, length.out = 4), # text = c("Period", # "Minumum Period", # "Burst", # "Min Burst")) # # segmentation_demo_plt <- ggplot()+ # geom_vline(xintercept = c(shading_annotations$starts, shading_annotations$equal_len, shading_annotations$next_start, shading_annotations$equal_on), color = "gray")+ # geom_path(data = epsp_stim, aes(Time, Stim+1.5))+ # geom_rect(data = shading_annotations, aes(xmin = starts, xmax = next_start, ymin = 0, ymax = 0.5), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[1], color = RColorBrewer::brewer.pal(3, "Set1")[1])+ # geom_rect(data = shading_annotations, aes(xmin = starts, xmax = equal_len, ymin = -0.5, ymax = 0), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[1], color = RColorBrewer::brewer.pal(3, "Set1")[1])+ # geom_rect(data = shading_annotations, aes(xmin = starts, xmax = on_end, ymin = -1, ymax = -0.5), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[2], color = RColorBrewer::brewer.pal(3, "Set1")[2])+ # geom_rect(data = shading_annotations, aes(xmin = starts, xmax = equal_on, ymin = -1.5, ymax = -1), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[2], color = RColorBrewer::brewer.pal(3, "Set1")[2])+ # theme_classic()+ # # geom_vline(xintercept = 12.54)+ # geom_text(data = annotation_df[1:2, ], aes(x=x, y=y, label = text), hjust = "inward", color = RColorBrewer::brewer.pal(3, "Set1")[1])+ # geom_text(data = annotation_df[3:4, ], aes(x=x, y=y, label = text), hjust = "inward", color = RColorBrewer::brewer.pal(3, "Set1")[2])+ # ylim(-1.5, 9.5)+ # labs(title = "Ways to Segement EPSP Stim") # # # segmentation_demo_plt
# Goal: extract Absolute measures and differential measures for # 1. Burst Amplitude (max mV) # 2. AUC # 3. Stimulus correlation (between) # read in trace brian2_trace_files <- list.files(here("data", "predicted_voltage")) brian2_trace_sets <- brian2_trace_files %>% str_remove(pattern = "-actual_v.rds") %>% str_remove(pattern = "-predict_v.rds") %>% unique() epsp_summary_list <- map(seq(1, length(brian2_trace_sets)), function(i){ print(i) brian2_read_in <- brian2_trace_files[str_detect(brian2_trace_files, brian2_trace_sets[i])] # retrieve data and merge into a single df temp <- map(brian2_read_in, function(read_file){ temp_predict <- readRDS(here("data", "predicted_voltage", read_file)) if(str_detect(read_file, pattern = "predict_v")){ #consolidate into one df for (j in seq(1, length(temp_predict))){ temp_predict[[j]]$Sweep <- j } temp_predict <- do.call(rbind, temp_predict) } return(temp_predict) }) names(temp) <- brian2_read_in temp_a <- temp[str_detect(names(temp), pattern = "actual_v")][[1]] temp_p <- temp[str_detect(names(temp), pattern = "predict_v")][[1]] %>% as.data.frame() %>% as_tibble() temp_a$nrow <- seq(1, nrow(temp_a)) temp_p$nrow <- seq(1, nrow(temp_p)) temp_a$Sweep <- as.character(temp_a$Sweep) temp_p$Sweep <- as.character(temp_p$Sweep) temp <- full_join(temp_a, select(temp_p, -Time)) %>% select(-nrow) # Show difference between expected/actual # i = 1 # [1] "190808a_0014-epsp-In4-actual_v.rds" "190808a_0014-epsp-In4-predict_v.rds" shows why we need to be able to audit the outcome. # Here the end of sweep 4 deviates from baseline so the delta is off # TODO instead of subtracting a value at the end, we should look at the times with i_inj less than x and then use a percentile from that to estimate the baseline. # To get around this in the short term we'll calulate the factors independently for expected/obs # downsample_data(temp, len = 10000) %>% # # temp %>% # mutate(predicted = predicted - offset) %>% # mutate(difference = In4-predicted) %>% # ggplot(aes(x = Time))+ # geom_ribbon(aes(ymin=In4, ymax=predicted), fill = "cornflowerblue")+ # geom_line(aes(y=In4), color = "black")+ # geom_line(aes(y=predicted), color = "cornflowerblue", alpha = 0.3)+ # # geom_line(aes(y=difference-offset), color = "purple")+ # facet_grid(Sweep~.) # This is an inelegant workaround for data that is based on in9/12. This prevents duplication of the large set below rewrite_names <- F if ("In9" %in% names(temp)){ names(temp) <- c("In4", "In7", "Sweep", "Time", "predicted", "offset") rewrite_names <- T } # begin to work with data proper # Add optional ways to slice the data based on what we expect to occur # At the moment we leave this unused temp_processed <- temp %>% mutate( Period = case_when( Time > 0.40 & Time < 4.87 ~ "A", Time > 4.87 & Time < 10.74 ~ "B", Time > 10.74 & Time < 16.74 ~ "C", Time > 16.74 & Time < 19.685 ~ "D" ), Period_eq = case_when( Time > 0.40 & Time < 0.40 + 2.945 ~ "A", Time > 4.87 & Time < 4.87 + 2.945 ~ "B", Time > 10.74 & Time < 10.74 + 2.945 ~ "C", Time > 16.74 & Time < 16.74 + 2.945 ~ "D" ), Burst = case_when( Time > 0.40 & Time < 2 ~ "A", Time > 4.87 & Time < 6.17 ~ "B", Time > 10.74 & Time < 12.54 ~ "C", Time > 16.74 & Time < 18.5 ~ "D" ), Burst_eq = case_when( Time > 0.40 & Time < 2 + 1.30 ~ "A", Time > 4.87 & Time < 6.17 + 1.30 ~ "B", Time > 10.74 & Time < 12.54 + 1.30 ~ "C", Time > 16.74 & Time < 18.5 + 1.30 ~ "D" ) ) %>% # correlation with expected group_by(Sweep) %>% mutate(Cor = cor(In4, predicted)) %>% ungroup() %>% gather(Key, mV, c("In4", "predicted")) %>% ## Metrics for whole sweep group_by(Key, Sweep) %>% # Min voltage mutate(Min.V = min(mV, na.rm = T)) %>% # Defining Baseline as average of the lowest 20% of points mutate(Use.in.Base = quantile(mV, probs = .2, na.rm = T)) %>% mutate(Use.in.Base = ifelse(mV <= Use.in.Base, mV, NA)) %>% mutate(Use.in.Base = mean(Use.in.Base, na.rm = T)) %>% # Max voltage mutate(Max.Amplitude = max(mV, na.rm = T)) %>% # AUC # Using Min.V mutate(AUC = sum(mV - Min.V, na.rm = T)/19.68685) %>% ungroup() if (rewrite_names){ temp_processed <- temp_processed %>% mutate(Key = ifelse(Key == "In4", "In9", Key)) } temp_processed <- temp_processed[, c("Sweep", "Cor", "Key", "Min.V", #"Use.in.Base", "Max.Amplitude", "AUC")] %>% distinct() #%>% # pivot_wider(names_from = "Key", values_from = c("Min.V", #"Use.in.Base", # "Max.Amplitude", "AUC")) # add annotations to aid merging file_name <- brian2_read_in[[1]] %>% str_split(pattern = "-") temp_processed$Experiment <- as.character(str_split(as.character(file_name[[1]][1]), pattern = "_")[[1]][1]) temp_processed$FileName <- paste0(as.character(file_name[[1]][1]), ".abf") temp_processed$Channel <- as.character(file_name[[1]][3]) return(temp_processed) }) epsp_summary <- do.call(rbind, epsp_summary_list) epsp_summary <- epsp_summary[, c("Experiment", "FileName", "Channel", "Sweep", "Cor", "Key", "Min.V", "Max.Amplitude", "AUC")]
scatter_w_wo_outliers(temp = distinct(select(filter(M_all, Time == "Baseline"), shal, Ia.0)), X = "shal", Y = "Ia.0") scatter_w_wo_outliers(temp = distinct(select(filter(M_all, Time == "Compensated"), shal, Ia.0)), X = "shal", Y = "Ia.0") scatter_w_wo_outliers(temp = distinct(select(filter(M_all, Time == "Delayed"), shal, Ia.0)), X = "shal", Y = "Ia.0")
# ref ---- # c( # ## Ion Channels ==== # #Kv # "Shab", "Shaker", "Shal", "Shaw1", "Shaw2", # "KCNQ1", "KCNQ2", # "KCNH1", "KCNH2", "KCNH3", # #K # "BKKCa", "SKKCa", # "KCNT1", "IRK", "KCNK1", "KCNK2", # #Ca # "CaV1", "CaV2", "CaV3", # #Na # "CbNaV", # "NALCN", # #Hyp/CyclNuc Activated/Gated # "IH", # ## CNGs # # #TRPs # "TRP-A1", "TRP-A-like", "TRP-M1", "TRP-M3", "TRP-M-like", # ##-V5, -V6, -Pyrexia # # # #Biogenic amine activated # "HisCL", # # ## Receptors Biogenic Amines ==== # #octopamine # # #dopamine # "DAR1A", "DAR2", "Dopa-1Br", # #serotonin # "HTR1A", "HTR2", "HTR7", "5HTr-1Br", # # #histamine # "His-1r", "His-2r", "His-3r", # #gaba # "GABAB-R1", # "LCCH3r", # "RDLr", # "mGABA2", "mGABA3", # # ## Receptors Glutamate/Acetylcholine ==== # #metabotropic glutamate receptors # "mGluR1", "mGluR2", "mGluR4", "mGluR5", "mGluR7", # #kainate-like receptors # "Kainate-1A", "Kainate-1B", "Kainate-2A", "Kainate-2B", "Kainate-2C", # #NMDA-like receptors # "NMDA-1A", "NMDA-1B", "NMDA-2A", "NMDA-2B", "NMDA-2-like", # #glutamate-gated chloride channel # "GluCl", # #acetylcholine receptors # "mACHrA", "mACHrB", # # # Crustacean cardioactive peptide receptor === # "CCAPr", # # #Acetylcholinesterase # "ACHE", # # Actylcholine catalyst # "ChAT", # # #vesicular ach transporter # "vAChT", # #vesicular glut transporter # "vGluT", # # ## Innexins ==== # "INX1", "INX2", "INX3", "INX4", "INX5" # )
load(here("data", "ionic.rds")) load(here("data", "tevc.rds")) load(here("data", "tecc.rds")) load(here("data", "mrna.rds")) mRNAInfo <- readxl::read_excel(here("inst", "extdata", "mRNAInfo.xlsx")) # tables from Northcutt 2016 and annotations
M <- left_join(ionic, mrna) M <- M[!is.na(M$x18s), ] MeasuredmRNA <- names(mrna)[!(names(mrna) %in% c("Source", "Pharm", "Time", "Sample", "Experiment", "Cell"))] # temp.plts <- map(seq_along(MeasuredmRNA), function(i){ # ggplot(M, aes_string(MeasuredmRNA[i], y = "Ihtk.0", color = "Time"))+ # geom_smooth(method = "lm", se = F)+ # geom_point() # }) # # cowplot::plot_grid(plotlist = temp.plts) # # mrna
PercentNotNA <- map(MeasuredmRNA, function(i){ mean(!is.na(M[[i]])) }) %>% unlist() ThresholdPercentNotNA <- 0.6 MeasuredmRNA <- MeasuredmRNA[PercentNotNA >= ThresholdPercentNotNA] # use in map call MeasuredKCurrents <- c("Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope") # use in map call
Ia ~ shal
# TEXTING WITH DAVE scatter_w_wo_outliers(temp = filter(M, Time == "0h"), X = "shal", Y = "Ia.0") # broom::tidy(lm(Ia.0 ~ shal, temp)) # broom::tidy(lm(Ia.0 ~ shal, temp[temp$flag, ]))
library(brms) # Loading required package: StanHeaders # rstan (Version 2.19.3, GitRev: 2e1f913d3ca3) # For execution on a local, multicore CPU with excess RAM we recommend calling options(mc.cores = parallel::detectCores()) # To avoid recompilation of unchanged Stan programs, we recommend calling rstan_options(auto_write = TRUE) # For improved execution time, we recommend calling Sys.setenv(LOCAL_CPPFLAGS = '-march=corei7 -mtune=corei7') # although this causes Stan to throw an error on a few processors. temp <- temp[!is.na(temp$Ia.0), ] tic <- Sys.time() fm1 <- lm(Ia.0 ~ shal, temp) toc1 <- Sys.time() - tic tic <- Sys.time() fm2 <- brm(Ia.0 ~ shal, data = temp, family = gaussian()) toc2 <- Sys.time() - tic tic <- Sys.time() fm3 <- brm(Ia.0 ~ shal, data = temp, family = student()) toc3 <- Sys.time() - tic toc1 toc2 toc3 t1 <- broom::tidy(fm1) t2 <- broom::tidy(fm2) t3 <- broom::tidy(fm3) bayes_ex1 <- ggplot(temp, aes(x = shal, y = Ia.0))+ geom_point()+ geom_abline(slope = as.numeric(t1[2, "estimate"]), intercept = as.numeric(t1[1, "estimate"]), size = 1, color = "gray")+ geom_abline(slope = as.numeric(t2[2, "estimate"]), intercept = as.numeric(t2[1, "estimate"]), size = 1, color = "cornflowerblue")+ geom_abline(slope = as.numeric(t3[2, "estimate"]), intercept = as.numeric(t3[1, "estimate"]), size = 1, color = "firebrick")+ theme_bw()
Ia ~ shaker
scatter_w_wo_outliers(temp = filter(M, Time == "0h"), X = "shaker", Y = "Ia.0")
Model comparison shal best predicts Ia
temp <- filter(M, Time == "0h") # What's the best model? library(AICcmodavg) # no rm points aictab(cand.set = list( lm(Ia.0 ~ shal, temp), lm(Ia.0 ~ shaker, temp), lm(Ia.0 ~ shal+shaker, temp), lm(Ia.0 ~ shal*shaker, temp) )) # rm points temp_no_outliers <- temp %>% mutate(shal = ifelse(win_x_iqr(shal), shal, NA)) %>% mutate(shaker = ifelse(win_x_iqr(shaker), shaker, NA)) %>% mutate(Ia.0 = ifelse(win_x_iqr(Ia.0), Ia.0, NA)) aictab(cand.set = list( lm(Ia.0 ~ shal, temp_no_outliers), lm(Ia.0 ~ shaker, temp_no_outliers), lm(Ia.0 ~ shal+shaker, temp_no_outliers), lm(Ia.0 ~ shal*shaker, temp_no_outliers) ))
IHTK ~ bkkca
scatter_w_wo_outliers(temp = filter(M, Time == "0h"), X = "bkkca", Y = "Ihtk.0")
How might we exclude outliers? Cooks Distance? Iterated Cooks?
temp <- filter(M, Time == "Baseline") %>% mutate(flag = ifelse(bkkca < 3000, T, F)) fm <- lm(Ihtk.0 ~ bkkca, temp) temp$cooksd <- cooks.distance(fm) ggplot(temp, aes_string("bkkca", "Ihtk.0", color = "flag"))+ geom_smooth(method = lm, se = F, color = "gray")+ geom_smooth(data = temp[temp$flag == T, ], method = lm, se = F, fullrange = T)+ geom_point(aes(color = cooksd < (mean(cooks.distance(fm))*2.2) ))+ scale_color_manual(values = c("gray", "black"))+ theme_bw()+ theme(legend.position = "") # broom::tidy(lm(Ihtk.0 ~ bkkca, temp)) # broom::tidy(lm(Ihtk.0 ~ bkkca, temp[temp$flag, ])) broom::tidy(lm(Ihtk.0 ~ bkkca, temp[temp$cooksd < (mean(cooks.distance(fm))*4) , ])) # temp %>% filter(flag == F)
Checking the use of iterated Cook's distance Using cooks is going to be challenging. It'll be hard to choose a stopping criteria as
flag_cooks_outliers <- function( df = filter(M, Time == "Baseline"), mod = "Ihtk.0 ~ bkkca", multiplier = 4, center = "mean"){ fm <- lm(as.formula(mod), df) cd <- cooks.distance(fm) if (center == "mean"){ cd_outlier <- cd > mean(cd)*multiplier } else if (center == "median") { cd_outlier <- cd > median(cd)*multiplier } else { warning("center must be mean or median") } return(cd_outlier) } # run 10 iterations of cooks distance. When a cell is identified as an outlier, drop it and note when it was deamed an outlier temp <- M %>% dplyr::select(Ihtk.0, bkkca, Time) %>% filter(Time == "Baseline") temp$dropout <- NA for (i in 1:10){ current_outliers <- flag_cooks_outliers(df = temp[is.na(temp$dropout), ], mod = "Ihtk.0 ~ bkkca", multiplier = 4, center = "mean") temp[is.na(temp$dropout), ][current_outliers, "dropout"] <- i } ggplot(temp, aes(x = bkkca, y = Ihtk.0, color = as.factor(dropout)))+ geom_point() bkkca_cook_outliers <- temp$dropout temp %>% group_by(dropout) %>% tally() # # A tibble: 5 x 2 # dropout n # <int> <int> # 1 1 1 # 2 2 1 # 3 3 1 # 4 4 1 # 5 NA 12 temp <- M %>% filter(Time == "Baseline") temp$dropout <- NA for (i in 1:10){ current_outliers <- flag_cooks_outliers(df = temp[is.na(temp$dropout), ], mod = "Ia.0 ~ shal", multiplier = 4, center = "mean") temp[is.na(temp$dropout), ][current_outliers, "dropout"] <- i } temp %>% group_by(dropout) %>% tally() temp <- M %>% filter(Time == "Baseline") temp$dropout <- NA for (i in 1:10){ current_outliers <- flag_cooks_outliers(df = temp[is.na(temp$dropout), ], mod = "Ia.0 ~ shaker", multiplier = 4, center = "mean") temp[is.na(temp$dropout), ][current_outliers, "dropout"] <- i } temp %>% group_by(dropout) %>% tally()
contrast cooks with a fit t distribution.
# temp <- filter(M, Time == "Baseline") # # tic <- Sys.time() # fm0 <- lm(Ihtk.0 ~ bkkca, temp[temp$bkkca < 3000, ]) # toc1 <- Sys.time() - tic # # tic <- Sys.time() # fm1 <- lm(Ihtk.0 ~ bkkca, temp) # toc1 <- Sys.time() - tic # # tic <- Sys.time() # fm2 <- brm(Ihtk.0 ~ bkkca, data = temp, family = gaussian()) # toc2 <- Sys.time() - tic # # tic <- Sys.time() # fm3 <- brm(Ihtk.0 ~ bkkca, data = temp, family = student()) # toc3 <- Sys.time() - tic # # # # toc1 # toc2 # toc3 # # t0 <- broom::tidy(fm0) # t1 <- broom::tidy(fm1) # t2 <- broom::tidy(fm2) # t3 <- broom::tidy(fm3) # # # temp$dropout <- bkkca_cook_outliers # # temp[is.na(temp$dropout), "dropout"] <- 11 # # alpha_param <- 0.8 # # bayes_ex1 <- # ggplot(temp, aes(x = bkkca, y = Ihtk.0, color = as.factor(dropout)))+ # # geom_abline(slope = as.numeric(t0[2, "estimate"]), intercept = as.numeric(t0[1, "estimate"]), size = 1, alpha = alpha_param, color = "black")+ # geom_abline(slope = as.numeric(t1[2, "estimate"]), intercept = as.numeric(t1[1, "estimate"]), size = 1, alpha = alpha_param, color = "black")+ # geom_abline(slope = as.numeric(t2[2, "estimate"]), intercept = as.numeric(t2[1, "estimate"]), size = 1, alpha = alpha_param, color = "cornflowerblue")+ # geom_abline(slope = as.numeric(t3[2, "estimate"]), intercept = as.numeric(t3[1, "estimate"]), size = 1, alpha = alpha_param, color = "firebrick")+ # # geom_point(size = 2, color = "white")+ # geom_point(size = 2)+ # geom_point(size = 2, color = "black", shape = 1)+ # # theme_bw()+ # theme(legend.position = "bottom")+ # scale_color_brewer(type = "div", palette = "PuOr")
IHTK ~ skkca
scatter_w_wo_outliers(temp = filter(M, Time == "Baseline"), X = "skkca", Y = "Ihtk.0")
How do K channels predict IKv intercepts?
# TODO redo this using the info in mRNAInfo # TODO use next block as inspiration. # filter(M, Time == "Baseline") %>% # select(Ihtk.0, Ihtk.Slope, Ia.0, Ia.Slope, # #Kv # shab, shaker, shal, shaw1, shaw2, # kcnq1, kcnq2, # kcnh1, kcnh2, kcnh3, # #k # bkkca, skkca, # kcnt1, irk, kcnk1, kcnk2 # ) %>% # pivot_longer(cols = c( # #kv # "shab", "shaker", "shal", "shaw1", "shaw2", # "kcnq1", "kcnq2", # "kcnh1", "kcnh2", "kcnh3", # #k # "bkkca", "skkca", # "kcnt1", "irk", "kcnk1", "kcnk2" # ), names_to = "mRNA", values_to = "Count") %>% # pivot_longer(cols = c("Ihtk.0", # # "Ihtk.Slope", # "Ia.0"#, # # "Ia.Slope" # ), # names_to = "Key", values_to = "Value") %>% # ggplot(aes(x = Count, y = Value))+ # geom_smooth(method = lm, se = F)+ # geom_point()+ # # facet_wrap(Key ~ mRNA, nrow = 4, scales = "free") # facet_grid(Key ~ mRNA)+ # theme_bw() # # # PlotList <- # map(c( # #Kv # "Shab", "Shaker", "Shal", "Shaw1", "Shaw2", # "Kcnq1", "Kcnq2", # "Kcnh1", "Kcnh2", "Kcnh3", # #K # "BkkCa", "SkkCa", # "Kcnt1", "Irk", "Kcnk1", "Kcnk2" # ), function(X){ # map(c("Ihtk.0", # # "Ihtk.Slope", # "Ia.0"#, # # "Ia.Slope" # ), function(Y){ # ggplot(filter(M, Time == "Baseline"), aes_string(X, Y))+ # geom_smooth(method = lm, se = F)+ # geom_point()+ # theme_bw() # }) # }) # # PlotList <- transpose(PlotList) # # library(cowplot) # cowplot::plot_grid(plotlist = PlotList[[1]]) # # cowplot::plot_grid(plotlist = PlotList[[2]])
This falls under the auspices of EDA.
KCurrent = "Ihtk.0" mRNA = "BkkCa" # Make outlier free version of the baseline data M_baseline_outlier_free <- filter(M, Time == "Baseline") for (NAME in names(M)[!(names(M) %in% c("Condition" , "Cell", "Experiment", "Source", "Pharm", "Time", "Sample"))]){ M_baseline_outlier_free[[NAME]] <- ifelse(win_x_iqr(M_baseline_outlier_free[[NAME]]), M_baseline_outlier_free[[NAME]], NA) } # Which have relationships at baseline? fm.list <- map(MeasuredKCurrents, function(KCurrent){ map(MeasuredmRNA, function(mRNA){ lm(as.formula(paste0(KCurrent, " ~ ", mRNA)), data = M_baseline_outlier_free) }) }) p.list <- map(fm.list, function(ListLvl1){ map(ListLvl1, function(ListLvl2){ broom::tidy(ListLvl2)[2, "p.value"] # non-intercept p }) %>% unlist() }) p.df <- do.call(cbind, p.list) %>% as.data.frame() names(p.df) <- MeasuredKCurrents p.df$mRNA <- MeasuredmRNA # Uncorrected p.df.long <- p.df %>% gather(iK, pValue, all_of(MeasuredKCurrents)) # dplyr::filter(iK %in% c("Ia.0", "Ihtk.0")) %>% # mutate(pAdj = p.adjust(pValue, method = "fdr")) %>% highlight.text.temp.df <- p.df.long %>% mutate(highlight = ifelse(pValue <= 0.05, 1, 0)) %>% select(-pValue) %>% pivot_wider(names_from = iK, values_from = highlight) %>% mutate(highlight = ifelse(Ihtk.0 == 0, ifelse(Ihtk.Slope == 0, ifelse(Ia.0 == 0, ifelse(Ia.Slope == 0, 0, 1), 1), 1), 1) ) %>% mutate(highlight = ifelse(highlight == 1, "firebrick", "black")) %>% arrange(mRNA) plt.fm <- p.df.long %>% ggplot(aes(x = mRNA, color = pValue <= 0.05))+ geom_hline(yintercept = 0.95, color = "gray", linetype = "dashed")+ # geom_point()+ # geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pValue), size = 2, color = "gray", alpha = 0.0001)+ geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pValue), size = 2)+ # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pValue), size = 2)+ # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pAdj), size = 2)+ facet_grid(iK~.)+ scale_color_manual(values = c("black", "firebrick"))+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = highlight.text.temp.df[["highlight"]]), legend.position = "") # Corrected with fdr plt.fm.fdr <- p.df %>% gather(iK, pValue, all_of(MeasuredKCurrents)) %>% # dplyr::filter(iK %in% c("Ia.0", "Ihtk.0")) %>% mutate(pAdj = p.adjust(pValue, method = "fdr")) %>% ggplot(aes(x = mRNA, color = pAdj <= 0.05))+ geom_hline(yintercept = 0.95, color = "gray", linetype = "dashed")+ # geom_point()+ geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pValue), size = 2, color = "gray", alpha = 0.0001)+ geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pAdj), size = 2, color = "black")+ # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pValue), size = 2)+ # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pAdj), size = 2)+ facet_grid(iK~.)+ scale_color_manual(values = c("black", "firebrick"))+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), legend.position = "")
library(factoextra) # library(missMDA) # for imputePCA library("FactoMineR") #PCA MNumericOnly <- M_baseline_outlier_free MNumericOnly <- MNumericOnly[, c(MeasuredKCurrents, MeasuredmRNA)] ResPCA <- PCA(MNumericOnly, scale.unit = T) get_eig(ResPCA) fviz_screeplot(ResPCA, addlabels = T) fviz_pca_biplot(ResPCA, repel = T) # Ihtk.Slope, Ihtk.0, Ia.0, Ia.Slope, and Kainate1A are all loading together (roughly) fviz_pca_var(ResPCA, col.var = "contrib", # gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE # Avoid text overlapping )
library(corrr) MCorrr <- MNumericOnly %>% correlate() temp <- MCorrr %>% as_tibble() %>% select(rowname, Ihtk.0, Ihtk.Slope, Ia.0, Ia.Slope) %>% gather(colname, Cor, c("Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope")) # temp$rowname <- as.factor(temp$rowname) #TODO fix ordering temp %>% ggplot(aes(rowname, Cor, color = Cor))+ geom_segment(aes(x = rowname, xend = rowname, y = 0, yend = Cor))+ geom_point()+ geom_point(shape = 1, color = "black")+ geom_hline(yintercept = 0)+ ylim(-1,1)+ facet_grid(colname~.)+ theme_bw()+ theme(axis.text.x = element_text(angle = 90, hjust = 1))+ scale_color_gradient2(low = RColorBrewer::brewer.pal(7, "PuOr")[1], mid = RColorBrewer::brewer.pal(7, "PuOr")[4], high = RColorBrewer::brewer.pal(7, "PuOr")[7], midpoint = 0, na.value = "#00000000") # # # # # rplot(MCorrr) # # # You have to zoom or the chords don't show up. ¯\_(ツ)_/¯ # # network_plot(MCorrr, min_cor = .7) # # library(ggcorrplot) # MCorr <- cor(MNumericOnly, use = "pairwise.complete.obs") pMat <- cor_pmat(MNumericOnly) # library(RColorBrewer) # ggcorrplot(MCorr, # colors = RColorBrewer::brewer.pal(7, "PuOr")[c(1, 4 ,7)]#, # # p.mat = pMat # # lab = T # ) # # # library(GGally) # # GGally::ggscatmat(MNumericOnly) # GGally::ggcorr(MCorr, # # nbreaks = 5, # low = RColorBrewer::brewer.pal(7, "PuOr")[1], # mid = RColorBrewer::brewer.pal(7, "PuOr")[4], # high = RColorBrewer::brewer.pal(7, "PuOr")[7]) library(corrplot) corrplot(MCorr, method = "square", type = "upper", tl.col = "black", order = "hclust", col = brewer.pal(n = 9, name = "PuOr"), p.mat = pMat, sig.level = 0.01, insig = "blank")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.