# below: yaml for html without distill
# output:
#   html_document:
#     smart: no
#     theme: flatly
#     toc: true
#     toc_float: true
knitr::opts_chunk$set(echo = FALSE) # run code, echo = False will only show output
# knitr::opts_chunk$set(include = FALSE) # show nothing

library(here)
library(readxl)

library(plotly) # loaded first so that it doesn't become the default filter
library(tidyverse)
library(car)
library(FSA) # for Dunn's Test

library(factoextra)
library(FactoMineR) #PCA
library(umap) # for UMAP
library(pvclust)
library(dendextend) # for color_labels
library(corrr)

library(ggthemes)
library(cowplot)
library(patchwork)
library(rgl) # for vis_pca_3D
library(ggnewscale) # for newscale so that mk_hclust
# library(scales) # for overiding scientific notation on dendrogram y axis 


library(ggpmisc) # for labeling through stat_poly_eq
theme_set(ggplot2::theme_minimal())
devtools::load_all()

Data preparation

Load data and create a uid

## Load in data ====
load(here("data", "ionic.rds"))
load(here("data", "tevc.rds"))
load(here("data", "tecc.rds"))
load(here("data", "mrna.rds"))
epsp <- read.csv(here("data", "epsp.csv")) %>% as_tibble()
metadata <- read_excel(here("inst", "extdata", "ManualDataEphys02.xlsx"),
                       sheet = "MetadataEphys")  
# tables from Northcutt 2016 and annotations
mRNAInfo <- readxl::read_excel(here("inst", "extdata", "mRNAInfo.xlsx")) 

Convert condition labels

# ionic$Condition
# # tevc
# # tecc
# mrna$Time
# # epsp
# metadata$Condition
# # mRNAInfo

ionic <- ionic %>% 
  mutate(Condition = case_when(Condition == "Baseline"    ~ "0h",
                               Condition == "Compensated" ~ "1h",
                               Condition == "Delayed" ~ "24h"
  ))

mrna <- mrna %>% 
  mutate(Time = case_when(Time == "Baseline"    ~ "0h",
                          Time == "Compensated" ~ "1h",
                          Time == "Delayed" ~ "24h"
  ))

metadata <- metadata %>% 
  mutate(Condition = case_when(Condition == "Baseline"    ~ "0h",
                               Condition == "Compensated" ~ "1h",
                               Condition == "Delayed" ~ "24h"
  ))
## Add Experiment/Cell to metadata ====
metadata <- metadata %>% 
  mutate(ABFType = case_when(Type %in% c("tevc") ~ "gjvc",
                             # Type %in% c("igj","igj_50", "gj") ~ "gjcc",
                             # Type %in% c("epsp", "epsp_50") ~ "epsp",
                             Type %in% c("fi_50", "fi") ~ "fi")) %>% 
  # make expected file names
  mutate(num_zeros = str_length(Recording)) %>% 
  mutate(num_zeros = ifelse(num_zeros == 1, "000", 
                            ifelse(num_zeros == 2, "00", 
                                   ifelse(num_zeros == 3, "0", NA)))) %>% 
  mutate(FileName = paste0(Experiment, "_", num_zeros, as.character(Recording), ".abf" )) %>% 
  select(-num_zeros)



#TODO there are only two Type == "gj". Replace these with igj|igj_50
#   Experiment  Page Recording Type    TEA Condition Include Notes   In4   In9 ABFType FileName       
#   <chr>      <dbl>     <dbl> <chr> <dbl> <chr>     <lgl>   <lgl> <dbl> <dbl> <chr>   <chr>          
# 1 190906         0        32 gj       24 Delayed   NA      NA        5    NA NA      190906_0032.abf
# 2 190906         0        50 gj       24 Delayed   NA      NA        4    NA NA      190906_0050.abf



### use a subset of metadata cols to add experiment/cell data to other dfs ####
small_metadata <- metadata %>% 
  select(Experiment, FileName, In4, In9) %>% 
  gather("Channel", "Cell", c("In4", "In9"))

small_metadata2 <- metadata %>% 
  select(Experiment, Type, FileName, In4, In9) %>% 
  gather("Channel", "Cell", c("In4", "In9"))

Current Clamp

## Add Experiment/Cell to tecc ====
tecc <- tecc %>% 
  mutate(PreSyn = case_when(
    (abs(R1S3Mean) > abs(R1S6Mean)) ~ Signal1, 
    (abs(R1S3Mean) < abs(R1S6Mean)) ~ Signal4
  ),
  vrest = case_when(
    (abs(R1S3Mean) > abs(R1S6Mean)) ~ R1S1Baseline, 
    (abs(R1S3Mean) < abs(R1S6Mean)) ~ R1S4Baseline
  )) %>% 
  mutate(PreSyn = case_when(
    PreSyn == "IN 9" ~ "In9",
    PreSyn == "IN 4" ~ "In4"
  )) %>% 
  select(FileName, PreSyn, vrest, #Signal1, Signal4, R1S1Baseline, R1S4Baseline, 
         cc, r11, r1#, r12, rc # gap junction measures we'll get from voltage clamp
  ) %>% 
  distinct() %>% 
  rename(Channel = PreSyn)


tecc <- left_join(tecc, spread(small_metadata, Channel, Cell)) %>% 
  mutate(Cell = case_when(Channel == "In9" ~ In9,
                          Channel == "In4" ~ In4),
         PrePost = case_when(Channel == "In9" ~ paste0("cc.", as.character(In9), "_", as.character(In4)),
                             Channel == "In4" ~ paste0("cc.", as.character(In4), "_", as.character(In9)))
  ) %>% 
  filter(!is.na(Cell) &
           !is.na(In4) &
           !is.na(In9)) %>% 
  filter(In4 %in% c(3, 4, 5) &
           In9 %in% c(3, 4, 5)) %>% 
  select(-In4, -In9) %>% 
  filter(!is.na(cc)) %>% 
  spread(PrePost, cc)

Voltage Clamp (Gap Junction Procedure)

## Add Experiment/Cell to tevc ====
tevc <-
  tevc %>% 
  select(FileName, PreSyn, MedianIg) %>% 
  distinct() %>% 
  mutate(PreSyn = case_when(
    PreSyn == "IN 9" ~ "In9",
    PreSyn == "IN 4" ~ "In4"
  )) %>% 
  rename(Channel = PreSyn,
         ig = MedianIg)


tevc <- left_join(tevc, spread(small_metadata, Channel, Cell)) %>% 
  mutate(Cell = case_when(Channel == "In9" ~ In9,
                          Channel == "In4" ~ In4),
         PrePost = case_when(Channel == "In9" ~ paste0("ig.", as.character(In9), "_", as.character(In4)),
                             Channel == "In4" ~ paste0("ig.", as.character(In4), "_", as.character(In9)))
  ) %>% 
  filter(!is.na(Cell) &
           !is.na(In4) &
           !is.na(In9)) %>% 
  filter(In4 %in% c(3, 4, 5) &
           In9 %in% c(3, 4, 5)) %>% 
  select(-In4, -In9) %>% 
  filter(!is.na(ig)) %>% 
  spread(PrePost, ig)

# is conductance symmetric?
# tevc_plt <- tevc %>% 
#   select(-FileName, -Channel, -Cell) %>%
#   group_by(Experiment) %>% 
#   mutate_all(median, na.rm = T) %>% 
#   distinct() 
# 
# tevc_plt %>% 
# ggplot(aes(x = ig.3_4, y = ig.4_3))+
#   geom_point()
# 
# tevc_plt %>% 
# ggplot(aes(x = ig.3_5, y = ig.5_3))+
#   geom_point()
# 
# tevc_plt %>% 
# ggplot(aes(x = ig.4_5, y = ig.5_4))+
#   geom_point()

tevc[is.na(tevc$ig.3_4), "ig.3_4"] <- tevc[is.na(tevc$ig.3_4), "ig.4_3"]
tevc[is.na(tevc$ig.3_5), "ig.3_5"] <- tevc[is.na(tevc$ig.3_5), "ig.5_3"]
tevc[is.na(tevc$ig.4_5), "ig.4_5"] <- tevc[is.na(tevc$ig.4_5), "ig.5_4"]

tevc <- tevc %>% select(-ig.4_3, -ig.5_3, -ig.5_4)

Add metadata to epsp

## Add Experment/Cell to epsp ====
epsp <- left_join(epsp, small_metadata2)

# TODO How do we best caputre variace? find range between max/min across all sweeps when calculating auc? 
# note that there are plenty of replicates here
# and that there are not epsp_50's for each experiment. These are likely sufficiently close to -50. To be on the safe side, we'll only consider epsp not epsp_50
epsp %>% 
  group_by(Experiment, Cell, Type) %>% 
  filter(Key != "predicted") %>% 
  # filter(Sweep == 1 & Key == "predicted") %>% 
  tally() %>% 
  spread(Type, n) %>%
  mutate(epsp = ifelse(is.na(epsp), NA, T),
         epsp_50 = ifelse(is.na(epsp_50), NA, T))

epsp %>% 
  group_by(Experiment, Cell, Key) %>% 
  filter(Type == "epsp") %>% 
  mutate(
    Cor.IQR = IQR(Cor, na.rm = T),
    Min.V.IQR = IQR(Min.V, na.rm = T),
    Max.Amplitude.IQR = IQR(Max.Amplitude, na.rm = T),
    AUC.IQR = IQR(AUC, na.rm = T)
  ) %>% 
  mutate(
    Cor = median(Cor, na.rm = T),
    Min.V = median(Min.V, na.rm = T),
    Max.Amplitude = median(Max.Amplitude, na.rm = T),
    AUC = median(AUC, na.rm = T)
  )

Update labeling on Brian's cells

# Relabel Brian's cells ----
##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "AP" &
               mrna$Time == "24h", "Cell"]%>% 
  unlist() %>% 
  str_remove(pattern = "#") %>% 
  str_split(pattern = " ") %>%
  transpose()

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "AP" &
       mrna$Time == "24h", "Experiment"] <- unlist(transpose(temp[[1]]))

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "AP" &
       mrna$Time == "24h", "Cell"] <- unlist(transpose(temp[[2]])) %>% str_remove(pattern = "LC")


##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "None" &
               mrna$Time == "0h", "Cell"] %>% 
  unlist() %>% 
  str_remove(pattern = "Control #")

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "None" &
       mrna$Time == "0h", "Experiment"] <- temp

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "None" &
       mrna$Time == "0h", "Cell"] <- NA


##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "None" &
               mrna$Time == "24h", "Cell"] %>% 
  unlist() %>% 
  str_split(pattern = " ") %>%
  transpose()

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "None" &
       mrna$Time == "24h", "Experiment"] <- unlist(transpose(temp[[1]]))

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "None" &
       mrna$Time == "24h", "Cell"] <- unlist(transpose(temp[[2]])) %>% str_remove(pattern = "LC")


##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "TEA" &
               mrna$Time == "1h", "Cell"] %>% 
  unlist() %>% 
  str_remove(pattern = "TEA #")

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TEA" &
       mrna$Time == "1h", "Experiment"] <- temp

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TEA" &
       mrna$Time == "1h", "Cell"] <- NA

##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "TEA" &
               mrna$Time == "24h", "Cell"] %>% 
  unlist() %>% 
  str_remove(pattern = "TEA24-")

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TEA" &
       mrna$Time == "24h", "Experiment"] <- temp

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TEA" &
       mrna$Time == "24h", "Cell"] <- NA

##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "TTX", "Cell"] %>% 
  unlist() %>% 
  str_split(pattern = "-") %>%
  transpose()

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TTX", "Experiment"] <- unlist(transpose(temp[[1]]))

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TTX", "Cell"] <- unlist(transpose(temp[[2]]))
## deduplicate mrna
mrna <- mrna %>%
  gather(key = "mrna",
         value = "count",
         names(mrna)[!(names(mrna) %in% c("Source", "Pharm", "Time", "Sample", "Experiment", "Cell"))]) %>%
  filter(mrna != c("htr2b", "kainate2b", "nmda1a", "nmda2b", "mglur7", "trpm1", "machrb", "htr1b")) %>% # drop fully missing mrnas
  group_by(Source, Pharm, Time, Experiment, Cell, mrna) %>%
  mutate(count = median(count, na.rm = T)) %>%
  ungroup() %>%
  distinct() %>%
  spread(key = "mrna", value = "count")

Add whether network was active to metadata

# Was there activity on the EC? 
metadata <- full_join(metadata,
                      tibble::tribble(
                        ~Experiment, ~ExtracellularActive,
                        "190808a",                 TRUE,
                        "190830",                 TRUE,
                        "190903",                 TRUE,
                        "190904",                 TRUE,
                        "190905",                 TRUE,
                        "190906",                 TRUE,
                        "190907",                 TRUE,
                        "190915",                 TRUE,
                        "190916a",                 TRUE,
                        "190917a",                   NA, # File not found. Entry not found in notebook either.
                        "190917",                 TRUE,
                        "190918",                 TRUE,
                        "190924",                 TRUE,
                        "190924a",                 TRUE,
                        "191002",                 TRUE,
                        "191004",                 TRUE,
                        "190926",                 TRUE,
                        "190927",                FALSE,
                        "190927a",                 TRUE,
                        "190930",                FALSE,
                        "190930a",                FALSE,
                        "191001",                 TRUE
                      )
)

metadata %>% 
  select(Experiment, Condition, ExtracellularActive) %>% 
  filter(Condition %in% c("0h", "1h", "24h")) %>% 
  distinct() %>% 
  group_by(Condition, ExtracellularActive) %>% 
  tally() %>%
  filter(!is.na(ExtracellularActive)) %>% 
  pivot_wider(names_from = "ExtracellularActive", values_from = "n")

Create cross type data, sans Brian's cells

small_metadata <- metadata[, c("Experiment",  "FileName", "Condition")]

M_ionic<- ionic %>% mutate(Cell = as.character(Cell))  
#Ihtk.0 Ihtk.Slope  Ia.0 Ia.Slope
M_tecc <- left_join(tecc, small_metadata) %>% select(-FileName) %>% mutate(Cell = as.character(Cell))
# QC drop all non positive resistances, all ccs outside 0-1

M_tecc <- M_tecc %>% mutate(
  # cc  = ifelse(cc > 0 & cc < 1, cc, NA),
  r11 = ifelse(r11 >0, r11, NA),
  r1  = ifelse(r1 >0, r1, NA),
  # r12  = ifelse(r12 >0, r12, NA),
  # rc  = ifelse(rc >0, rc, NA)

  cc.3_4  = ifelse(cc.3_4 > 0 & cc.3_4 < 1, cc.3_4, NA),
  cc.3_5  = ifelse(cc.3_5 > 0 & cc.3_5 < 1, cc.3_5, NA),
  cc.4_3  = ifelse(cc.4_3 > 0 & cc.4_3 < 1, cc.4_3, NA),

  cc.4_5  = ifelse(cc.4_5 > 0 & cc.4_5 < 1, cc.4_5, NA),
  cc.5_3  = ifelse(cc.5_3 > 0 & cc.5_3 < 1, cc.5_3, NA),
  cc.5_4  = ifelse(cc.5_4 > 0 & cc.5_4 < 1, cc.5_4, NA)
) #%>% 
# mutate(rc = rc^-1) %>% 
# rename(rc.ig = rc)

#vrest     cc   r11    r12    r1    rc 
M_tevc <- left_join(tevc, small_metadata) %>% select(-FileName) %>% mutate(Cell = as.character(Cell)) 
#ig
# M_epsp <- left_join(epsp, small_metadata) %>% 
#   select(-FileName, -Type) %>% 
#   mutate(Cell = as.character(Cell))  %>% 
#   mutate(Key = case_when(Key %in% c("In4", "In9") ~ "Obs",
#                          Key %in% c("predicted")  ~ "Sim")) %>% 
#   pivot_wider(names_from = "Key", values_from  = c("Min.V", "Max.Amplitude", "AUC")) %>% 
#   mutate(Max.Amplitude_Sim = Max.Amplitude_Sim + (Min.V_Obs - Min.V_Sim)) %>% 
#   mutate(Max.Amplitude_Delta = Max.Amplitude_Obs - Max.Amplitude_Sim,
#          AUC_Delta = AUC_Obs - AUC_Sim) %>% 
#   group_by(Condition, Experiment, Cell) %>% 
#   select(-Min.V_Sim) %>% 
#   mutate_at(c("Cor", 
#               "Min.V_Obs", #"Min.V_Sim", "Min.V_Delta", 
#               "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta", 
#               "AUC_Obs", "AUC_Sim", "AUC_Delta"), median, na.rm = T) %>% 
#   distinct() %>% 
#   ungroup()

M_epsp <- left_join(epsp, small_metadata) %>% 
  filter(Type == "epsp") %>% 
  select(-FileName, -Type) %>% 
  mutate(Cell = as.character(Cell))  %>% 
  mutate(Key = case_when(Key %in% c("In4", "In9") ~ "Obs",
                         Key %in% c("predicted")  ~ "Sim")) %>% 
  pivot_wider(names_from = "Key", values_from  = c("Min.V", "Max.Amplitude", "AUC")) %>% 
  mutate(Max.Amplitude_Sim = Max.Amplitude_Sim + (Min.V_Obs - Min.V_Sim)) %>% 
  mutate(Max.Amplitude_Delta = Max.Amplitude_Obs - Max.Amplitude_Sim,
         AUC_Delta = AUC_Obs - AUC_Sim) %>% 
  group_by(Condition, Experiment, Cell) %>% 
  select(-Min.V_Sim) %>% 

  mutate(
    Cor.IQR = IQR(Cor, na.rm = T),
    Min.V_Obs.IQR = IQR(Min.V_Obs, na.rm = T),

    Max.Amplitude_Obs.IQR = IQR(Max.Amplitude_Obs, na.rm = T),
    Max.Amplitude_Sim.IQR = IQR(Max.Amplitude_Sim, na.rm = T),
    Max.Amplitude_Delta.IQR = IQR(Max.Amplitude_Delta, na.rm = T),

    AUC_Obs.IQR = IQR(AUC_Obs, na.rm = T),
    AUC_Sim.IQR = IQR(AUC_Sim, na.rm = T),
    AUC_Delta.IQR = IQR(AUC_Delta, na.rm = T)
  ) %>% 

  mutate_at(c("Cor", 
              "Min.V_Obs", #"Min.V_Sim", "Min.V_Delta", 
              "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta", 
              "AUC_Obs", "AUC_Sim", "AUC_Delta"), median, na.rm = T) %>% 

  select(-Sweep) %>% 

  distinct() %>% 
  ungroup()  









#Sweep   Cor Key        Min.V Max.Amplitude    AUC 
M_mrna <- left_join(mrna[mrna$Source == "Daniel", ], small_metadata) %>% select(-FileName) %>% mutate(Cell = as.character(Cell))


#Set up data cols to help organize full dataset
ionic_cols <- c("Ihtk.0", "Ihtk.Slope", "Ihtk.0.s", "Ihtk.Slope.s", "Ia.0", "Ia.Slope", "Ia.0.s", "Ia.Slope.s")
tecc_cols <- c("vrest", "r11", "r1", "cc.3_4", "cc.3_5", "cc.4_3", "cc.4_5", "cc.5_3", "cc.5_4")#, "r12", "rc.ig") # ignore rc, ig is a more direct measure
tevc_cols <- c("ig.3_4", "ig.3_5", "ig.4_5")
epsp_cols <- c("Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "AUC_Obs", "AUC_Sim", 
               "Max.Amplitude_Delta", "AUC_Delta")
mrna_cols <- names(mrna)[!(names(mrna) %in% c("Source", "Pharm", "Time", "Sample", "Experiment", "Cell"))]


M_all <- full_join(M_ionic, M_tecc) %>% full_join(M_tevc) %>% full_join(M_epsp) %>% full_join(M_mrna)

reordered_names <- c(names(M_all)[!(names(M_all) %in% c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols))], 
                     c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols))


M_all <- M_all[, reordered_names] %>% filter(Condition %in% c("0h", "1h", "24h") & Cell %in% c(3, 4, 5)) %>% distinct()

# #TODO this needs some prep work. We'll want to capture the variance in these measures.
# # Not clear how to do that. 
# epsp %>% ggplot(aes(x = FileName, Max.Amplitude, color = interaction(Experiment, Cell)))+
#   geom_line()+
#   geom_point()+
#   facet_grid(.~Key)

#TODO move this to top
# QC data by completion rate, having variance, and median mrna abundance
M_all_outliers <- M_all
M_all_skim <- M_all %>% skimr::skim()
M_all_skim <- M_all_skim[M_all_skim$complete_rate >= 0.19 & 
                           M_all_skim$numeric.sd > 0, ]

M_all_skim <- M_all_skim[!(M_all_skim$skim_variable %in% mrna_cols) |
                           ((M_all_skim$skim_variable %in% mrna_cols) & M_all_skim$numeric.p50 >= 50 ), ]

M_all <- M_all[, names(M_all) %in% c(
  "Condition", "Cell", "Experiment", "Channel", 
  "Sweep", "Source", "Pharm", "Time", "Sample", 
  M_all_skim$skim_variable[!is.na(M_all_skim$skim_variable)])
]

# update cols now that data is QCed
ionic_cols <- c("Ihtk.0", "Ihtk.Slope", "Ihtk.0.s", "Ihtk.Slope.s", "Ia.0", "Ia.Slope", "Ia.0.s", "Ia.Slope.s")[ionic_cols %in% names(M_all)]
tecc_cols <- c("vrest", "r11", "r1", "cc.3_4", "cc.3_5", "cc.4_3", "cc.4_5", "cc.5_3", "cc.5_4")[tecc_cols %in% names(M_all)]
#, "r12", "rc.ig") # ignore rc, ig is a more direct measure
tevc_cols <- c("ig.3_4", "ig.3_5", "ig.4_5")[tevc_cols %in% names(M_all)]
epsp_cols <- c("Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "AUC_Obs", "AUC_Sim", 
               "Max.Amplitude_Delta", "AUC_Delta")[epsp_cols %in% names(M_all)]
mrna_cols <- mrna_cols[mrna_cols %in% names(M_all)]

"Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"

About the data

What conditions exist in both datasets?

temp <- mrna %>% select(Pharm, Time, Source) %>% distinct()

temp %>% group_by(Pharm, Source) %>%  mutate(exists = "T") %>% spread(Time, exists)

mrna %>% 
  filter(
    (Source == "Daniel") |
    (Source == "Brian" & Pharm == "TEA") |
    (Source == "Brian" & Pharm == "None" & Time == "0h") 
    ) %>% 
  distinct() %>% 
  group_by(Pharm, Time, Source) %>% 
  tally()

How many networks had any extracellular activity by their colleciton point?

metadata %>% 
  select(Experiment, Condition, ExtracellularActive) %>% 
  filter(Condition %in% c("0h", "1h", "24h")) %>% 
  distinct() %>% 
  group_by(Condition, ExtracellularActive) %>% 
  tally() %>%
  filter(!is.na(ExtracellularActive)) %>% 
  pivot_wider(names_from = "ExtracellularActive", values_from = "n")

How many individuals were there in each group?

M_all %>% select(Condition, Experiment, Cell) %>% distinct() %>% group_by(Condition) %>% tally() 

M_all %>% select(Condition, Experiment) %>% distinct() %>% group_by(Condition) %>% tally() 

set up DV groupings

mrna_cols <- c(
  "nav", "cav1", "cav2", "bkkca", 
  "shaker", "shal", "shab", "shaw1", "shaw2",
  "inx1", "inx2", "inx3" #"inx4", "inx5", #<- inx5 is likely not expressed
)

memb_cols <- c("vrest", "r11", "r1", 
               "Ihtk.0", "Ihtk.Slope", 
               "Ihtk.0.s", "Ihtk.Slope.s", 
               "Ia.0", "Ia.Slope", 
               "Ia.0.s", "Ia.Slope.s")

epsp_cols <- c("Cor", "Min.V_Obs", 
               "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
               "AUC_Obs", "AUC_Sim", 
               "Max.Amplitude_Delta", "AUC_Delta",

               "Cor.IQR", "Min.V_Obs.IQR", 
               "Max.Amplitude_Obs.IQR", #"Max.Amplitude_Sim.IQR", 
               "AUC_Obs.IQR", #"AUC_Sim.IQR", 
               "Max.Amplitude_Delta.IQR", "AUC_Delta.IQR")

1

What's happening in Brian's Data?

Prep data -- cull outliers and median interpolate missing values

# select the mRNAs present in both Brian's data and mine. 
mrna_b <-select(mrna, 
                Source, Pharm, Time, Experiment, Cell, 
                nav, cav1, cav2, bkkca, 
                shaker, shal, shab, shaw1, shaw2,
                inx1, inx2, inx3 #inx4, inx5, #<- inx5 is likely not expressed
) %>% 
  filter(Source == "Brian")
# mutate(Pharm = ifelse(Source == "Daniel" & Time == "0h", "None", Pharm)) 

mrna_b %>% skimr::skim()

mrna_b %>% 
  group_by(Time, Pharm, Source) %>% 
  tally()

mrna_b$uid <- as.character(seq(1, nrow(mrna_b)))
mrna_b <- mrna_b %>% 
  select(-Experiment, -Cell) %>% 
  unite(Group, Source, Pharm, Time)

mrna_b <- mrna_b[, c("Group", "uid",
                     names(mrna_b)[!(names(mrna_b) %in% c("Group", "uid"))])
]


# Remove Outliers 
mrna_b_winxiqr_list <- map(unique(mrna_b$Group), function(group){
  temp <- mrna_b[mrna_b$Group == group, ]
  dvs <- names(mrna_b)[!(names(mrna_b) %in% c("Group", "uid"))]

  for (dv_col  in dvs){
    temp[[dv_col]] <- ifelse(win_x_iqr(temp[[dv_col]]), temp[[dv_col]], NA)
  }

  return(temp)
})

mrna_b_winxiqr <- do.call(rbind, mrna_b_winxiqr_list)

# Median interpolate missing
mrna_b_winxiqr_interp <- imputeMissings::impute(mrna_b_winxiqr, method = "median/mode")

treatment <- unique(mrna_b_winxiqr$Group)[1]
mrna1 <- "inx1"
mrna2 <- "inx2"

M <- mrna_b_winxiqr %>% filter(Group == treatment) #%>% select(uid, mrna1, mrna2)

## setup 
mrnas <- c("nav", "cav1", "cav2", "bkkca", "shaker", "shal", "shab", "shaw1", "shaw2", "inx1", "inx2", "inx3")

### Prep cor df
mrna_cors <- list()
for(i in seq_along(mrnas)){
  for(j in seq(i, length(mrnas))){
    if(i != j){
      mrna_cors[[(length(mrna_cors)+1)]] <- c(mrnas[i], mrnas[j])
    }
  }  
}
mrna_cors <- data.frame(do.call(rbind, mrna_cors))
names(mrna_cors) <- c("mrna1", "mrna2")
mrna_cors["Cor"] <- NA
mrna_cors["ep"] <- NA
### Fill cor df

for(i in seq(1, nrow(mrna_cors))){
  mrna1 <- mrna_cors[i, "mrna1"]
  mrna2 <- mrna_cors[i, "mrna2"]

  # is there a significant correlation?
  cor_obs <- cor(M[, c(mrna1)], M[, c(mrna2)], use = "pairwise.complete.obs", method = "pearson")
  cor_obs <- as.numeric(cor_obs)

  niter <- 1000
  cor_sim <- unlist(map(seq(1, niter), function(i){
    # return(cor(M[[mrna1]], sample(M[[mrna2]]) ) )
    cor_obs <- cor(M[, c(mrna1)], sample(unlist(M[, c(mrna2)])), use = "pairwise.complete.obs", method = "pearson")
    return(unlist(cor_obs))
    }))
  cor_ep <- mean(c(cor_obs, abs(cor_sim)) >= cor_obs)

  mrna_cors[i, "Cor"] <- cor_obs
  mrna_cors[i, "ep"]  <- cor_ep
}

# fix levels
mrna_cors$mrna1 <- factor(mrna_cors$mrna1, levels = mrnas)
mrna_cors$mrna2 <- factor(mrna_cors$mrna2, levels = mrnas)




# install.packages("GGally")
## Data prep
# mrna_cors %>% 
#   ggplot(aes(x = 0, y = 0))+
#   geom_tile(aes(fill = Cor) )+
#   geom_label(aes(label = Cor))+
#   facet_grid(mrna1 ~ mrna2)


# get the right color for the background
fill_val <- mrna_cors[(
  (mrna_cors$mrna1 == mrna1 & mrna_cors$mrna2 == mrna2) & 
  (mrna_cors$ep < 0.05)), "Cor"]


mk_cor_scatter <- function(M, mrna1, mrna2,
                           fill_val){

  if( length(fill_val) == 0 ){ # if there's a non-significant correlation, don't add fit.
    fill_val <- "#00000000"

    plt <- M %>% 
      ggplot(aes_string(x = mrna1, mrna2))+
      # stat_poly_line(formula = x ~ y, color = "black") +
      geom_point(alpha = 0.5)+
      # stat_poly_eq(aes(label = paste(after_stat(eq.label),
      #                                after_stat(rr.label), sep = "*\", \"*")))+
      theme_void()+
      theme(#plot.background = element_rect(fill = fill_val),
            axis.title.x = element_text(face = "italic"),
            axis.title.y = element_text(face = "italic"))


  } else {    # if there's a significant correlation, add the fit.
    vals <- seq(-1, to = 1, length.out = 7)
    fill_val <- RColorBrewer::brewer.pal(9, "PuOr")[seq(2, 8)][(fill_val < vals)][1]

    plt <- M %>% 
      ggplot(aes_string(x = mrna1, mrna2))+
      geom_rect(aes(xmin=min(M$mrna1, na.rm = T), xmax=max(M$mrna1, na.rm = T),
                    ymin=min(M$mrna2, na.rm = T), ymax=max(M$mrna2, na.rm = T)
                    ), fill=fill_val)+
      stat_poly_line(formula = x ~ y, color = "black") +
      geom_point(alpha = 0.5)+
      # stat_poly_eq(aes(label = paste(after_stat(eq.label),
      #                                after_stat(rr.label), sep = "*\", \"*")))+
      theme_void()+
      theme(plot.background = element_rect(fill = fill_val),
            axis.title.x = element_text(face = "italic"),
            axis.title.y = element_text(face = "italic"))
  }

  return(plt)
}

mk_cor_scatter(M = M, 
               mrna1 = mrna1, 
               mrna2 = mrna2,
               fill_val = fill_val)

# make all the plots
plt_list <- list()

for(i in seq(1, length(mrnas))){
  for(j in seq(i, length(mrnas))){
    if(i != j){

      mrna1 = mrnas[i]
      mrna2 = mrnas[j]

      fill_val <- mrna_cors[(
        (mrna_cors$mrna1 == mrna1 & mrna_cors$mrna2 == mrna2) & 
        (mrna_cors$ep < 0.05)), "Cor"]

      print(fill_val)

      plt_list[[(1+length(plt_list))]] <- mk_cor_scatter(
        M = M, 
        mrna1 = mrna1, 
        mrna2 = mrna2,
        fill_val = fill_val)


    }
  }  
}





# plt_list
library(GGally)
side_len <- (length(mrnas) -1)

pm <- ggmatrix(
  map(seq(1 , side_len**2), function(i){}),
  nrow = side_len, ncol = side_len,
  # xAxisLabels = c("A", "B", "C"),
  # yAxisLabels = c("D", "E"),
  title = "Matrix Title"
)

diagoffset <- 0
colcount <- 1
rowcount <- 1

for(i in seq_along(plt_list)){
  pm[rowcount, colcount] <- plt_list[[i]]

  if(colcount == side_len){
    diagoffset <- diagoffset + 1
    colcount <- 1 + diagoffset
    rowcount <- rowcount + 1
  } else {
    colcount <- colcount + 1
  }


}

# pm[1, 2] <- plt_list[[1]]
pm

# 
# 
# ggpairs(select(M,  -Group, -uid), 
#         upper = list(continuous = "cor"),
#   lower = list(na ="na"),
#   diag = list( na = "naDiag"))
plot_grid(plt_list)

library(patchwork)

# quick and dirty get all the plots ordered
patchwork_string <- c()

ith <- 1
for(i in seq(1, length(mrnas))){
  for(j in seq(1, length(mrnas))){
    if(i == j){

    } else if(j > i){
      patchwork_string <- c(patchwork_string, paste0("plt_list[[", as.character(ith), "]]"))
      ith <- ith+1
    } else {
      patchwork_string <- c(patchwork_string, "plot_spacer()")

    }
  }
}
patchwork_string <- paste(patchwork_string, collapse = "+")

# patchwork_string+patchwork::plot_layout(ncol = length(mrnas))

plt_list[[1]]+plt_list[[2]]+plt_list[[3]]+plt_list[[4]]+plt_list[[5]]+plt_list[[6]]+plt_list[[7]]+plt_list[[8]]+plt_list[[9]]+plt_list[[10]]+plt_list[[11]]+plot_spacer()+plt_list[[12]]+plt_list[[13]]+plt_list[[14]]+plt_list[[15]]+plt_list[[16]]+plt_list[[17]]+plt_list[[18]]+plt_list[[19]]+plt_list[[20]]+plt_list[[21]]+plot_spacer()+plot_spacer()+plt_list[[22]]+plt_list[[23]]+plt_list[[24]]+plt_list[[25]]+plt_list[[26]]+plt_list[[27]]+plt_list[[28]]+plt_list[[29]]+plt_list[[30]]+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[31]]+plt_list[[32]]+plt_list[[33]]+plt_list[[34]]+plt_list[[35]]+plt_list[[36]]+plt_list[[37]]+plt_list[[38]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[39]]+plt_list[[40]]+plt_list[[41]]+plt_list[[42]]+plt_list[[43]]+plt_list[[44]]+plt_list[[45]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[46]]+plt_list[[47]]+plt_list[[48]]+plt_list[[49]]+plt_list[[50]]+plt_list[[51]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[52]]+plt_list[[53]]+plt_list[[54]]+plt_list[[55]]+plt_list[[56]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[57]]+plt_list[[58]]+plt_list[[59]]+plt_list[[60]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[61]]+plt_list[[62]]+plt_list[[63]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[64]]+plt_list[[65]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[66]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+patchwork::plot_layout(ncol = length(mrnas))


# as_tbl_graph(mrna_cors) %>% 
#   ggraph(layout = 'linear', circular = TRUE) + 
#   geom_edge_arc(aes(colour = ep)) + 
#   coord_fixed()
library(ggraph)
library(igraph)

temp <- mutate(mrna_cors, 
               from = as.numeric(as.factor(mrna1)), 
               to   = as.numeric(as.factor(mrna2))
               )
# graph <- graph_from_data_frame(temp[, c("from", "to")])

# graph <- temp %>% 
#     graph_from_data_frame(directed = FALSE)
# 
# # Not specifying the layout - defaults to "auto"
# ggraph(graph) + 
#     # geom_edge_link(aes(colour = factor(Cor))) + 
#     geom_edge_link(aes(edge_alpha = 1-ep)) + 
#     geom_node_point()


# = = = = 

# Create a tidy data frame of correlations
tidy_cors <- M[, mrnas] %>% 
  correlate() %>% 
  stretch()

tidy_cors["ep"] <- NA

for(i in seq(1, nrow(mrna_cors))){
  mrna1 <- mrna_cors[i, "mrna1"]
  mrna2 <- mrna_cors[i, "mrna2"]

  tidy_cors[((tidy_cors$x == mrna1) & (tidy_cors$y == mrna2)) | (
    (tidy_cors$x == mrna2) & (tidy_cors$y == mrna1)
  ) , "ep"] <- mrna_cors[i, "ep"]

}


# eps1 <- full_join(
#   tidy_cors, 
#   rename(select(mrna_cors, -Cor), x = mrna1, y = mrna2)
#   ) %>% 
#   filter(!is.na(ep))
#   
# eps2 <- full_join(
#   tidy_cors,
#     rename(select(mrna_cors, -Cor), x = mrna2, y = mrna1)
#           ) 
#   
# full_join(
#   tidy_cors,
#   eps1) %>% full_join(eps2)

# Convert correlations stronger than some value
# to an undirected graph object
# graph_cors <- tidy_cors %>% 
#   filter(abs(r) > 0.7) %>% 
#   graph_from_data_frame(directed = FALSE)
# 
# # Plot
# ggraph(graph_cors) +
#   geom_edge_link() +
#   geom_node_point() +
#   geom_node_text(aes(label = name), repel = TRUE) +
#   theme_graph()


# = = = = 
#ref https://www.data-imaginist.com/2017/ggraph-introduction-layouts/
graph_cors <- tidy_cors %>% 
  # mutate(bool = case_when(ep < 0.05 ~ TRUE,
  #                         ep >= 0.05 ~ FALSE)) %>% 
  # filter(abs(ep) < 0.05) %>% 
  graph_from_data_frame(directed = FALSE)


graph_cors_sig <- tidy_cors %>% 
  mutate(r = case_when(ep < 0.05 ~ r,
                       ep >= 0.05~ 0)) %>%

  graph_from_data_frame(directed = FALSE)

custom_network_plot <- 
ggraph(graph_cors_sig, layout = 'linear', circular = TRUE) +
  geom_edge_link(aes(edge_alpha = 1/ep, edge_width = 1/ep, color = r)) +
  guides(edge_alpha = "none", edge_width = "none") +
  scale_edge_colour_gradientn(limits = c(-1, 1), colors = c("#B35806", "White","#542788")) +
  geom_node_point(color = "gray", size = 5) +
  geom_node_text(aes(label = name), repel = TRUE) +
  theme_graph() +
  labs(title = "")

Correlations

Correlation heatmaps

All correlations in Brian's data

library(ggcorrplot)

cor_plt_list <- list()

for (temp.group in c("Brian_All", unique(mrna_b_winxiqr$Group))){
  print(temp.group)
  if (temp.group == "Brian_All"){
    cor_df <- select(mrna_b_winxiqr, -Group, -uid)
  } else {
    cor_df <- select(filter(mrna_b_winxiqr, Group == temp.group), -Group, -uid)
  }

  # p.mat <- cor_pmat(cor_df)
  cor_df_bin <- cor(cor_df, use = "pairwise.complete.obs")

  # bin the correlations so there are fewer colors used in the figure
  cor_bins <- seq(-1, 1, length.out = 8)
  for (i in 1:(length(cor_bins)-1)){
    # use the average bin value
    # cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- (cor_bins[i] + cor_bins[i+1])
    # use the more extreme value to shade with
    cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]]
  }

  cor_plt_list[[(length(cor_plt_list)+1)]] <- ggcorrplot(
    cor_df_bin, 
    # p.mat = p.mat,
    insig = "blank",
    type = "upper",
    outline.col = "white",
    colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)]
  )+
    labs(subtitle = temp.group)+
    theme(legend.position = "")

}

cor_plt_list[[3]] + cor_plt_list[[5]] + plot_spacer() + plot_spacer() +
  cor_plt_list[[4]] + cor_plt_list[[6]] + cor_plt_list[[2]]+ cor_plt_list[[7]] + 
  plot_layout(ncol = 4)


# cor_plt_list[[1]] +
cor_plt_list[[3]] + cor_plt_list[[4]] +
  cor_plt_list[[5]] + cor_plt_list[[6]] +
  # plot_spacer() +
  plot_spacer() + cor_plt_list[[2]] +
  plot_spacer() + cor_plt_list[[7]] + plot_layout(ncol = 4) #plot_layout(ncol = 5)

Correlation multiplot

cor_df_list <- map(c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h"), function(temp.group){
  cor_df <- select(filter(mrna_b_winxiqr, Group == temp.group), -Group, -uid)
  cor_df <- cor(cor_df, use = "pairwise.complete.obs")
  cor_df_bin <- cor_df
  # bin the correlations so there are fewer colors used in the figure
  cor_bins <- seq(-1, 1, length.out = 8)
  for (i in 1:(length(cor_bins)-1)){
    # use the more extreme value to shade with
    cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]]
  }

  return(list(cor = cor_df,
              cor_bin = cor_df_bin))
})




temp <- 
map(seq_along(cor_df_list), function(i){
  temp <- cor_df_list[[i]][[1]]
  temp <- as.data.frame(temp) 
  temp %>% 
    rownames_to_column() %>% 
    gather("colname", "Cor", names(temp)) %>% 
    mutate(Group = c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h")[i])
})

temp <- do.call(rbind, temp)


temp_fill <- map(seq_along(cor_df_list), function(i){
  temp <- cor_df_list[[i]][[2]]
  temp <- as.data.frame(temp) 
  temp %>% 
    rownames_to_column() %>% 
    gather("colname", "Cor", names(temp)) %>% 
    mutate(Group = c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h")[i])
})


temp_fill <- do.call(rbind, temp_fill) %>% rename(CorFill = Cor)

cor_col_plt <- 
full_join(temp, temp_fill) %>% 
  # filter(rowname == "bkkca", colname == "cav1") %>%
  mutate(x =  
         case_when(Group == "Brian_None_0h" ~ "0 Hours",
                       Group == "Brian_TEA_1h" ~ "1 Hour",
                       Group == "Brian_TEA_24h" ~ "24 Hours")
         ) %>% 
  mutate(Cor = ifelse(rowname == colname, NA, Cor)) %>% 
  ggplot(aes(x = x, y = Cor, fill = Cor))+
  geom_col()+
  geom_hline(yintercept = 0)+
  facet_grid(rowname ~ colname)+
  scale_fill_distiller(palette = "PuOr", direction = 1)+
  coord_cartesian(ylim = c(-1,1))+
  labs(x = "")+
  scale_y_continuous(breaks = c(-0.5, 0.5))+
  theme(legend.position = "", 
        # panel.grid.major.y = element_line() 
        # axis.ticks.length.y.left = element_line(), 
        axis.text.x = element_text(angle = 90))



cor_bin_fill_ref_plot <- 
ggplot(data.frame(Cor = seq(-1, 1, length.out = 8)),
       aes(x = Cor, y = 0, fill = Cor))+
  geom_tile(aes(height = 2/8))+
  geom_text(aes(label = round(Cor, digits = 1), size = 1))+
  scale_fill_distiller(palette = "PuOr", direction = 1)+
  theme_void()+
  theme(legend.position = "")+
  coord_fixed()+
    plot_spacer()

correlogram_set <- (cor_plt_list[[3]]+labs(subtitle= "0 Hours") + 
  cor_plt_list[[5]]+labs(subtitle= "1 Hours")  +
  cor_plt_list[[6]]+labs(subtitle= "24 Hours"))



figure_w <- c(1)
figure_h <- c(1/3,1,5)


 # (cor_bin_fill_ref_plot + plot_spacer()+ plot_spacer() ) /
cor_bin_fill_ref_plot /
correlogram_set /
cor_col_plt +  
  plot_layout(widths = figure_w, heights = figure_h)

# (cor_plt_list[[3]]+labs(subtitle= "0 Hours") + 
#   cor_plt_list[[5]]+labs(subtitle= "1 Hours")  +
#   cor_plt_list[[6]]+labs(subtitle= "24 Hours") +
#   cor_bin_fill_ref_plot + plot_spacer() + plot_spacer() +
#   plot_spacer() + plot_spacer()+ plot_spacer()) |
#   cor_col_plt+
#   plot_layout(widths = figure_w, heights = figure_h)


# cm_multiplier = 15
# ggsave("BrianCorMulti.svg", 
#        width = sum(figure_w)*cm_multiplier,
#        height = sum(figure_h)*cm_multiplier,
#          
#        units = "cm",
#        limitsize = F)

Corrplots with non-sig dropped

cor_plt_list <- map(c("0h", "1h", "24h"), function(Time){
  df = mrna_b_winxiqr %>% 
    mutate(Condition = Group) %>% 
    mutate(Condition = case_when(Condition == "Brian_None_0h" ~ "0h",
                                 Condition == "Brian_TEA_1h"  ~ "1h",
                                 Condition == "Brian_TEA_24h" ~ "24h"))

  df = df[df$Condition == Time, c(mrna_cols)]

  corr <- cor(df, use = "pairwise.complete.obs", method = "spearman")
  p.mat <- cor_pmat(df, use = "pairwise.complete.obs", method = "spearman")

  cor_df_bin <- corr
  cor_bins <- seq(-1, 1, length.out = 8)
  for (i in 1:(length(cor_bins)-1)){
    # use the more extreme value to shade with
    cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]]
  }


  ggcorrplot(
    cor_df_bin, 
    p.mat = p.mat,
    insig = "blank",
    type = "upper",
    outline.col = "white",
    colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)]
  )+
    labs(subtitle = Time)+
    theme(legend.position = ""#,
          # axis.text.x = element_text(angle = 90)
          )
})




# plot_grid(plotlist = cor_plt_list)

(cor_plt_list[[1]]+theme(
    axis.text.x = element_text(angle = 90, vjust = 0))) +
(cor_plt_list[[2]]+theme(
    axis.text.x = element_text(angle = 90, vjust = 0),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))+
(cor_plt_list[[3]]+theme(
  axis.text.x = element_text(angle = 90, vjust = 0),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))

Correlations ECDFs

cor_list_df <- map(unique(mrna_b_winxiqr$Group), function(group){
  # temp <- cor(select(mrna_b_winxiqr[mrna_b_winxiqr$Group == group, ], -Group, -uid), use = "pairwise.complete.obs") %>% as.data.frame() 
  temp <- corrr::correlate(select(mrna_b_winxiqr[mrna_b_winxiqr$Group == group, ], -Group, -uid), use = "pairwise.complete.obs") %>%

    corrr::shave() %>% 
    as.data.frame()

  temp$row <- rownames(temp)
  temp <- gather(temp, "col", "Corr", names(temp)[!(names(temp) %in% c("term", "row"))])
  temp$Group <- group

  return(temp)  
})
cor_list_df <- do.call(rbind, cor_list_df)

# cor_list_df$Group %>% unique()

ecdf_corr_list <- map(
  list(
    list("Brian_None_0h", "Brian_TEA_1h"),
    list("Brian_TEA_1h", "Brian_TEA_24h"),
    list("Brian_None_0h", "Brian_TEA_24h"),

    list("Brian_None_0h", "Brian_None_24h"),
    list("Brian_None_0h", "Brian_TEA_24h"),
    list("Brian_None_0h", "Brian_TEA_1h"),
    list("Brian_None_24h", "Brian_AP_24h"),
    list("Brian_None_24h", "Brian_TEA_24h"),
    list("Brian_None_24h", "Brian_TTX_24h")
  ),

  function(group_comparison){
    plot_ecdf_ks(
      df = cor_list_df[cor_list_df$Group %in% group_comparison, ],
      data.col = "Corr",
      group.col = "Group",
      group1 = group_comparison[1],
      group2 = group_comparison[2],
      colors = c("black", "red"))
  })

# cowplot::plot_grid(plotlist = ecdf_corr_list)

ecdf_corr_list[[1]] + labs(subtitle = "", x = "") + 
  ecdf_corr_list[[2]] + labs(subtitle = "", x = "") + theme(legend.position = "top")+
  ecdf_corr_list[[3]] + labs(subtitle = "", x = "") 


# ggsave(plot = last_plot(),         
#        filename = here("officer_output", "ContrastSource_ecdfs.tiff"),        
#        width = 11.5, height = 4.76)


 # list("Brian_None_0h", "Brian_TEA_1h"),
 #    list("Brian_TEA_1h", "Brian_TEA_24h"),
 #    list("Brian_None_0h", "Brian_TEA_24h"),

plot_ecdf_ks(
  df = cor_list_df[cor_list_df$Group %in% list("Brian_None_0h", "Brian_TEA_1h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_None_0h",
  group2 = "Brian_TEA_1h",
  colors = c("black", "orange"))+ 
  labs(title = "0 Hours vs 1 Hour",
       subtitle = "p = 0.44", x = "")+


plot_ecdf_ks(
  df = cor_list_df[cor_list_df$Group %in% list("Brian_TEA_1h", "Brian_TEA_24h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_TEA_1h",
  group2 = "Brian_TEA_24h",
  colors = c("orange", "red"))+ 
  labs(title = "1 Hour vs 24 Hours",
       subtitle = "p < 0.01", x = "")+


plot_ecdf_ks(
  df = cor_list_df[cor_list_df$Group %in% list("Brian_None_0h", "Brian_TEA_24h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_None_0h",
  group2 = "Brian_TEA_24h",
  colors = c("black", "red"))+ 
  labs(title = "0 Hours vs 24 Hours",
    subtitle = "p < 0.01", x = "")



## what's happening in the high correlations at zero?
temp <- cor_list_df[cor_list_df$Group %in% list("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h"), ] %>% 
  spread(Group, Corr) %>% 
  filter(Brian_None_0h >=0.66) %>% 
  gather(Group, Corr, c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h"))




plot_ecdf_ks(
  df = temp[temp$Group %in% list("Brian_None_0h", "Brian_TEA_1h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_None_0h",
  group2 = "Brian_TEA_1h",
  colors = c("black", "orange"))+

plot_ecdf_ks(
  df = temp[temp$Group %in% list("Brian_TEA_1h", "Brian_TEA_24h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_TEA_1h",
  group2 = "Brian_TEA_24h",
  colors = c("orange", "red"))+

plot_ecdf_ks(
  df = temp[temp$Group %in% list("Brian_None_0h", "Brian_TEA_24h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_None_0h",
  group2 = "Brian_TEA_24h",
  colors = c("black", "red"))

Changes in mean mRNA levels

# refresh NO median interpolation
mrna_b_winxiqr <- do.call(rbind, mrna_b_winxiqr_list)

mrna_b_winxiqr_to_test <- mrna_b_winxiqr %>% 
  separate(Group, c("Source", "Pharm", "Time")) %>% 
  select(-Source) %>% 
  distinct()

library(broom)

How does time alter control cells?

cav2, shab, and shaker change over time.

# desired tests
# time in control
temp <- mrna_b_winxiqr_to_test %>% filter(Pharm %in% c("None"))
temp %>% group_by(Time) %>% tally()

temp_ys <- names(select(temp, -Time, -Pharm, -uid))

temp_fms <- map(temp_ys, function(temp_y){
  fm <- lm(as.formula(paste0(temp_y," ~ Time")), data = temp)
  res <- broom::tidy(fm)
  res$y <- temp_y
  return(res)
})

temp_p <- do.call(rbind, temp_fms) %>% filter(term != "(Intercept)")
temp_p <- temp_p %>% select(y, term, estimate, std.error, statistic, p.value) %>% 
  mutate(p.adj = p.adjust(p.value, method = "holm")) %>% 
  mutate(stars = case_when(p.adj < 0.001 ~ "***",
                           p.adj >= 0.001 & p.adj < 0.01 ~ "**",
                           p.adj >= 0.01 & p.adj < 0.05 ~ "*",
                           p.adj >= 0.05 & p.adj < 0.1 ~ ".",
                           p.adj >= 0.1 ~ " "
  ))

temp_p
temp %>% 
  pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% 
  filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))) %>% 
  # group_by(Time) %>% 
  ggplot(aes(x= Time, y= Count, fill = mRNA))+
  geom_boxplot()+
  geom_point()+
  ggthemes::theme_clean()+
  scale_fill_brewer()+
  facet_grid(.~mRNA)

How does time alter TEA cells?

bkkca expression, inx1 expression, and maybe inx3 expression

# time in TEA
temp <- mrna_b_winxiqr_to_test %>% filter(Pharm %in% c("TEA"))
temp %>% group_by(Time) %>% tally()

temp_ys <- names(select(temp, -Time, -Pharm, -uid))

temp_fms <- map(temp_ys, function(temp_y){
  fm <- lm(as.formula(paste0(temp_y," ~ Time")), data = temp)
  res <- broom::tidy(fm)
  res$y <- temp_y
  return(res)
})

temp_p <- do.call(rbind, temp_fms) %>% filter(term != "(Intercept)")
temp_p <- temp_p %>% select(y, term, estimate, std.error, statistic, p.value) %>% 
  mutate(p.adj = p.adjust(p.value, method = "holm")) %>% 
  mutate(stars = case_when(p.adj < 0.001 ~ "***",
                           p.adj >= 0.001 & p.adj < 0.01 ~ "**",
                           p.adj >= 0.01 & p.adj < 0.05 ~ "*",
                           p.adj >= 0.05 & p.adj < 0.1 ~ ".",
                           p.adj >= 0.1 ~ " "
  ))

temp_p
temp %>% 
  pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% 
  filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))) %>% 
  # group_by(Time) %>% 
  ggplot(aes(x= Time, y= Count, fill = mRNA))+
  geom_boxplot()+
  geom_point()+
  ggthemes::theme_clean()+
  scale_fill_brewer()+
  facet_grid(.~mRNA)

How do blockers alter cells after all have been incubated?

# steady state differences?
# ap vs TEA vs ttx vs control
temp <- mrna_b_winxiqr_to_test %>% filter(Time %in% c("24h"))
temp %>% group_by(Pharm) %>% tally()

temp_fms <- map(temp_ys, function(temp_y){
  fm <- lm(as.formula(paste0(temp_y," ~ Pharm")), data = temp)
  res <- broom::tidy(fm)
  res$y <- temp_y
  return(res)
})

temp_p <- do.call(rbind, temp_fms) %>% filter(term != "(Intercept)")
temp_p <- temp_p %>% select(y, term, estimate, std.error, statistic, p.value) %>% 
  mutate(p.adj = p.adjust(p.value, method = "holm")) %>% 
  mutate(stars = case_when(p.adj < 0.001 ~ "***",
                           p.adj >= 0.001 & p.adj < 0.01 ~ "**",
                           p.adj >= 0.01 & p.adj < 0.05 ~ "*",
                           p.adj >= 0.05 & p.adj < 0.1 ~ ".",
                           p.adj >= 0.1 ~ " "
  ))

temp_p
temp %>% 
  pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% 
  filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))) %>% 
  mutate(Pharm = factor(Pharm, levels = c("None", "TEA", "AP", "TTX"))) %>% 
  ggplot(aes(x= Pharm, y= Count, fill = mRNA))+
  geom_boxplot()+
  geom_point()+
  ggthemes::theme_clean()+
  scale_fill_brewer()+
  facet_wrap(.~mRNA, scales = "free_y")
library(ggpubr)

plot_grid(plotlist = 

map(seq_along(unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))), function(i){
  temp %>% 
  pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% 
  filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))[i] ) %>% 
  mutate(Pharm = factor(Pharm, levels = c("None", "TEA", "AP", "TTX"))) %>% 
  ggboxplot(x = "Pharm", 
            y = "Count", 
            fill = "Pharm",
            width = 0.2,
            palette = RColorBrewer::brewer.pal(4, "Set1")
  )+
  geom_point(position = position_jitter(width = 0.05))+
  stat_compare_means(comparisons = list(
    c("None", "TEA"),
    c("None", "AP"),
    c("None", "TTX"),
    c("TEA", "AP")
  ), 
  label = "p.signif")+ # Add significance levels
  stat_compare_means(label.y = 0)+
  theme(legend.position = "",
        axis.title.y = element_text(face = "italic"))+
    labs(x = "", 
         y = as.character(unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))[i]))
})
)

2

Transition -- Do some of these changes replicate?

We've seen that at least some mRNAs change, presumably due to time and time incubated in the treatment.

Can we replicate these effects? When we focus on TEA do we get changes in the same transcripts?

Data preparation

# select the mRNAs present in both Brian's data and mine. 
mrna_rep <-select(mrna, 
                  Source, Pharm, Time, Experiment, Cell, 
                  bkkca, cav1, cav2, 
                  inx1, inx2, inx3, inx4, #inx5, #<- inx5 is likely not expressed
                  nav, 
                  shab, shaker, shal, shaw1, shaw2) %>% 
  filter(
    (Source == "Brian" & Pharm %in% c("None", "TEA")) |
      Source == "Daniel"
  ) %>% 
  filter(!(Source == "Brian" & Pharm %in% c("None") & Time == "24h")) %>% 
  mutate(Pharm = ifelse(Source == "Daniel" & Time == "0h", "None", Pharm)) %>% 
  select(-Pharm) %>% 
  distinct()

mrna_rep %>% skimr::skim()

mrna_rep %>% 
  group_by(Time, Source) %>% 
  tally() %>% 
  spread("Time", "n")


mrna_rep$uid <- as.character(seq(1, nrow(mrna_rep)))
mrna_rep <- mrna_rep %>% 
  select(-Experiment, -Cell) %>% 
  unite(Group, Source, Time)

mrna_rep <- mrna_rep[, c("Group", "uid",
                         names(mrna_rep)[!(names(mrna_rep) %in% c("Group", "uid"))])
]


# Remove Outliers 
mrna_rep_winxiqr_list <- map(unique(mrna_rep$Group), function(group){
  temp <- mrna_rep[mrna_rep$Group == group, ]
  dvs <- names(mrna_rep)[!(names(mrna_rep) %in% c("Group", "uid"))]

  for (dv_col  in dvs){
    temp[[dv_col]] <- ifelse(win_x_iqr(temp[[dv_col]]), temp[[dv_col]], NA)
  }

  return(temp)
})

mrna_rep_winxiqr <- do.call(rbind, mrna_rep_winxiqr_list)

mrna_rep_winxiqr <- mrna_rep_winxiqr %>% 
  separate("Group", into = c("Source", "Time"), sep = "_")

Figure 1

plt_df <- data.frame( cor = round(cor_df[rw, cl], digits = 2), rad = abs(round(cor_df[rw, cl], digits = 2)), cor_col = cor_color_df[rw, cl] )

# print(
#   plt_df
# )

plt <- ggplot(plt_df)+
  geom_circle(aes(x0 = 0, y0 = 0, r = rad), color = plt_df$cor_col, fill = plt_df$cor_col)+
  geom_text(aes(x = 0, y = 1.25, label = cor))+
  theme_void()+
  lims(y = c(-1.5, 1.5), x = c(-1.5, 1.5))+
  coord_fixed()

Compare Correlations

## Get correlations ====
cor_groupings <- expand.grid(
  Source = c("Brian", "Daniel"),
  Time = c("0h", "1h", "24h")
)

cor_list <- map(seq(1, nrow(cor_groupings)), function(i){
  cor_df <- filter(mrna_rep_winxiqr, 
                   Source == cor_groupings[i, "Source"],
                   Time == cor_groupings[i, "Time"]) %>% 
    select(all_of(mrna_cols))

  cor_df <- cor(cor_df, use = "pairwise.complete.obs", method = "pearson")

  # Shave to diagonal
  for (j in seq(1, nrow(cor_df))){
    cor_df[j, 1:j] <- NA
  }

  cor_df <- as.data.frame(cor_df) %>% mutate(x = rownames(cor_df))

  cor_df <- pivot_longer(cor_df, 
                         cols = names(select(cor_df, -x)),
                         names_to = "y", 
                         values_to = "Corr") %>% 
    mutate(
      Source = cor_groupings[i, "Source"],
      Time = cor_groupings[i, "Time"]  
    ) %>% 
    select(Source, Time, x, y, Corr)

  return(cor_df)
})

cor_comp_df <- do.call(rbind, cor_list) %>% 
  filter(!is.na(Corr))

# add numeric version of the factor to use geom circle
cor_comp_df <- cor_comp_df %>% 
  mutate(x.num = as.numeric(factor(x, levels = mrna_cols)),
         y.num = as.numeric(factor(y, levels = mrna_cols)))



## Plotting ====
### Correlogram ####
library(ggforce) # for geom_circle
# Corrgram, but in ggplot
compare_corgrams_plt <- cor_comp_df %>% 
  mutate(Source = factor(Source, 
                         levels = c("Brian", "Daniel"), 
                         labels = c("Pilot", "Replicate"))) %>% 
  # df$supp <- factor(df$supp, levels = c("OJ", "VC"),
  #                 labels = c("Orange Juice", "Vitamin C")
  #                 )
  ggplot()+
  geom_circle(aes(x0 = x.num, y0 = y.num, r = abs(Corr)/2, fill = Corr))+
  scale_fill_distiller(palette = "PuOr", direction = 1, limits=c(-1,1))+
  scale_x_continuous(
    name = "",
    position = "top",
    breaks = 1:length(mrna_cols),
    labels = mrna_cols)+
  scale_y_continuous(
    name = "",
    breaks = 1:length(mrna_cols),
    labels = mrna_cols)+
  facet_grid(Source~Time)+
  theme(
    panel.grid.major = element_line(size = 0.5, linetype = 'solid',
                                    colour = NA),
    axis.text.x = element_text(angle = 45,
                               hjust = -0., 
                               vjust = .5, 
                               face = "italic"),
    axis.text.y = element_text(face = "italic"),
    legend.position = "right",
    strip.placement = "outside")+
  coord_fixed()


### Compare correlations across time ####

# make dataset amenable to faceting
cor_comp_df_for_points <- cor_comp_df %>% 
  spread(Time, Corr)

temp_1 <-
cor_comp_df_for_points %>%
  select(Source, x, y, x.num, y.num, `0h`, `24h`) %>%
  rename(x_time = `0h`,
         y_time = `24h`) %>%
  mutate(facet_x = "0h vs 24h")

temp_2 <-
cor_comp_df_for_points %>%
  select(Source, x, y, x.num, y.num, `0h`, `1h`) %>%
  rename(x_time = `0h`,
         y_time = `1h`) %>%
  mutate(facet_x = "0h vs 1h")

temp_3 <-
cor_comp_df_for_points %>%
  select(Source, x, y, x.num, y.num, `1h`, `24h`) %>%
  rename(x_time = `1h`,
         y_time = `24h`) %>%
  mutate(facet_x = "1h vs 24h")

cor_comp_df_for_points <- rbind(rbind(temp_1, temp_2), temp_3)
cor_comp_df_for_points <- cor_comp_df_for_points %>% 
  mutate(facet_x = factor(facet_x, 
                          levels = c(
                            "0h vs 24h",
                            "0h vs 1h",
                            "1h vs 24h"
                          )))

# plot
library(ggsci) # for scale_color_aaas
library(ggpubr) # for stat_cor and stat_regline

compare_cor_scatter_plt <- cor_comp_df_for_points %>% 
  mutate(Source = factor(Source, 
                         levels = c("Brian", "Daniel"), 
                         labels = c("Pilot", "Replicate"))) %>% 
  ggplot(aes(x = x_time, y = y_time, color = Source))+
  geom_abline(intercept = 0, slope = 1)+
  geom_hline(yintercept = 0, color = "darkgray")+
  geom_vline(xintercept = 0, color = "darkgray")+
  geom_smooth(method = "lm", se = F, fullrange = T)+
  geom_point(alpha = 0.4)+
  scale_color_aaas()+
  labs(title = "", x = "", y = "")+
  xlim( c(-1, 1))+
  ylim( c(-1, 1))+
  facet_grid(Source~facet_x)+
  stat_cor(label.x = -.61, label.y = -.8, size = 2.5) +
  stat_regline_equation(label.x = -.61, label.y = -1, size = 2.5)+
  theme(legend.position = "")+
  coord_fixed()


### Compare ECDFs across time ####

# current_Source = "Brian"
# time1 = "0h"
# time2 = "1h"

ecdf_list <- map(list(
  list(current_Source = "Brian", time1 = "0h", time2 = "24h"),
  list(current_Source = "Brian", time1 = "0h", time2 = "1h"),
  list(current_Source = "Brian", time1 = "1h", time2 = "24h"),

  list(current_Source = "Daniel", time1 = "0h", time2 = "24h"),
  list(current_Source = "Daniel", time1 = "0h", time2 = "1h"),
  list(current_Source = "Daniel", time1 = "1h", time2 = "24h")
  ), function(input_list){


    # input_list = list(current_Source = "Brian", time1 = "0h", time2 = "24h")

           current_Source = input_list$current_Source
           time1 = input_list$time1
           time2 = input_list$time2



           data1 <- filter(cor_comp_df, 
                           Source == current_Source & 
                             Time == time1
           ) %>% 
             select(Corr) %>% 
             unlist()

           data2 <- filter(cor_comp_df, 
                           Source == current_Source & 
                             Time == time2
           ) %>% 
             select(Corr) %>% 
             unlist()


           ecdf1 <- ecdf(data1)
           ecdf2 <- ecdf(data2)

           # used to get the most extreme difference between the two samples
           MostExtremeDiff <- seq(min(data1, data2, na.rm = T), max(data1, data2, na.rm = T), length.out = length(data1))
           x0 <- MostExtremeDiff[which(abs(ecdf1(MostExtremeDiff) - ecdf2(MostExtremeDiff)) ==
                                         max(abs(ecdf1(MostExtremeDiff) - ecdf2(MostExtremeDiff))))]
           y0 <- ecdf1(x0)
           y1 <- ecdf2(x0)

           # Run two sided KS test on data
           test.res <- ks.test(data1, data2)

           # Make output df
           graph.df <- 
             data.frame(data1, data2) %>% 
             gather(Condition, Value, 1:2) %>% 
             mutate(Condition = case_when(Condition == "data1" ~ time1, 
                                          Condition == "data2" ~ time2),
                    x0_extreme = x0[1],
                    y0_extreme = y0[1],
                    y1_extreme = y1[1],
                    Source = current_Source,
                    Contrast = paste0(time1 , " vs ", time2),
                    test_stat = test.res$statistic,
                    test_p = test.res$p.value
             )

           return(graph.df)
         })

ecdf_list_df <- do.call(rbind, ecdf_list)

ecdf_list_df <- ecdf_list_df %>% mutate(Contrast = factor(Contrast, levels = c("0h vs 24h", "0h vs 1h", "1h vs 24h")))

compare_cor_ecdf_plt <-
ecdf_list_df %>% 
  mutate(Source = factor(Source, 
                         levels = c("Brian", "Daniel"), 
                         labels = c("Pilot", "Replicate"))) %>% 
  ggplot()+
    geom_segment(aes(x = x0_extreme, y = y0_extreme, xend = x0_extreme, yend = y1_extreme),
                 # linetype = "dashed", 
                 color = "darkgray", size = 1)+
    geom_point(aes(x = x0_extreme , y= y0_extreme), color="darkgray", size=2) +
    geom_point(aes(x = x0_extreme , y= y1_extreme), color="darkgray", size=2) +
    stat_ecdf(aes(x = Value, group = Condition, color = Condition))+
  geom_label(aes(x = -0.75, y = 0.75, 
                label = paste0(
                  "KS = ", as.character(round(test_stat, 3)),
                  "\np = ", as.character(round(test_p, 3))
                  )), 
            size = 2)+
    labs(x = "Sample",
         y = "ECDF"#,
         # subtitle = paste("K-S Test", as.character(group1), "vs", as.character(group2),
         #               "\np-value:", as.character(round(test.res$p.value, digits = 4)))
         )+
  scale_color_brewer(palette = "Set2" )+
  xlim( c(-1, 1))+
  ylim( c(0, 1))+
    theme_minimal()+
    theme(legend.position = "right")+
  facet_grid(Source~Contrast)+
  coord_fixed()

    # scale_color_manual(values = colors)#+
  # theme(text=element_text(family="Calibri Light", size=14))


### Corr Table ####
cor_comp_df %>% 
  spread(Source, Corr) %>% 
  filter(Brian >= 0.7 & Daniel >= 0.7) %>%
  gather(Source, Corr, c("Brian", "Daniel")) %>% 
  # spread(Time, Corr) %>% 
  pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% 
  select(-x.num, -y.num) %>% 
  select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h)





library(flextable)
# make display table
cor_comp_ft <- cor_comp_df %>% 
  mutate(Corr = round(Corr, digits = 3)) %>% 
  spread(Source, Corr) %>% 
  # mutate(Group = case_when( (Brian > 0.7) &  (Daniel > 0.7) ~ "Both", 
  #                           (Brian > 0.7) & !(Daniel > 0.7) ~ "Pilot", 
  #                          !(Brian > 0.7) &  (Daniel > 0.7) ~ "Replicate",
  #                          !(Brian > 0.7) & !(Daniel > 0.7) ~ "Neither"
  #                          )) %>% 
    gather(Source, Corr, c("Brian", "Daniel")) %>% 
  pivot_wider(names_from = c("Source", "Time"), values_from = c("Corr")) %>% #, "Group")) %>% 
  select(-x.num, -y.num) %>% 
  select(x, 
         y,
         Brian_0h, 
         Daniel_0h, 
         Brian_1h,          
         Daniel_1h, 
         Brian_24h,          
         Daniel_24h) %>% 
  flextable()


cor_comp_ft


cor_comp_ft <- cor_comp_ft %>% theme_vanilla()

cor_comp_ft <- italic(cor_comp_ft, ~ y == y,  ~ y, italic = TRUE)
cor_comp_ft <- italic(cor_comp_ft, ~ x == x,  ~ x, italic = TRUE)


cor_comp_ft <- bold(cor_comp_ft, ~ Brian_0h >= 0.7, ~ Brian_0h, bold = TRUE)
cor_comp_ft <- bold(cor_comp_ft, ~ Brian_1h >= 0.7, ~ Brian_1h, bold = TRUE)
cor_comp_ft <- bold(cor_comp_ft, ~ Brian_24h >= 0.7, ~ Brian_24h, bold = TRUE)
cor_comp_ft <- bold(cor_comp_ft, ~ Daniel_0h >= 0.7, ~ Daniel_0h, bold = TRUE)
cor_comp_ft <- bold(cor_comp_ft, ~ Daniel_1h >= 0.7, ~ Daniel_1h, bold = TRUE)
cor_comp_ft <- bold(cor_comp_ft, ~ Daniel_24h >= 0.7, ~ Daniel_24h, bold = TRUE)


color_sig <- "#a1d99b"
cor_comp_ft <- bg(cor_comp_ft, ~ Brian_0h >= 0.7, ~ Brian_0h, bg = color_sig)
cor_comp_ft <- bg(cor_comp_ft, ~ Brian_1h >= 0.7, ~ Brian_1h, bg = color_sig)
cor_comp_ft <- bg(cor_comp_ft, ~ Brian_24h >= 0.7, ~ Brian_24h, bg = color_sig)
cor_comp_ft <- bg(cor_comp_ft, ~ Daniel_0h >= 0.7, ~ Daniel_0h, bg = color_sig)
cor_comp_ft <- bg(cor_comp_ft, ~ Daniel_1h >= 0.7, ~ Daniel_1h, bg = color_sig)
cor_comp_ft <- bg(cor_comp_ft, ~ Daniel_24h >= 0.7, ~ Daniel_24h, bg = color_sig)


cor_comp_ft <- set_header_labels(cor_comp_ft, 
                                 values = list(
                                   x = "", y = "",
                                   Brian_0h = "Pilot 0h", 
                                   Brian_1h = "Pilot 1h", 
                                   Brian_24h = "Pilot 24h", 
                                   Daniel_0h = "Replicate 0h", 
                                   Daniel_1h = "Replicate 1h", 
                                   Daniel_24h = "Replicate 24h"
                                 )

)

cor_comp_ft <- autofit(cor_comp_ft)
cor_comp_ft

pptx_file <- "./output/officer_output/CorComparison.pptx"
save_as_pptx("my table" = cor_comp_ft, path = pptx_file)

docx_file <- "./output/officer_output/CorComparison.docx"
save_as_docx("my table" = cor_comp_ft, path = docx_file)







# make counts tables
partial_count_table <- cor_comp_df %>% 
  mutate(Corr = ifelse(Corr > 0.7, T, F)) %>% 
  spread(Source, Corr) %>% 
  mutate(Group = case_when(Brian & Daniel ~ "Both", 
                           Brian & !Daniel ~ "Pilot", 
                           !Brian & Daniel ~ "Replicate",
                           !Brian & !Daniel ~ "Neither"
                           )) %>% 

  mutate(Group = factor(Group, 
                        levels = c("Both", "Pilot", "Replicate", "Neither")))

cor_change_count_ft <- partial_count_table %>% 
    select(Time, x, y, Group) %>% 
  group_by(Time, Group) %>% 
  tally() %>% 
  spread(Time, n) %>% 
  flextable()

cor_change_count_ft
cor_change_count_ft <- cor_change_count_ft %>% theme_vanilla()


cor_change_count_ft <- set_header_labels(cor_change_count_ft, 
                                 values = list(
                                   Group = "R >= 0.7"
                                 )

)

cor_change_count_ft <- autofit(cor_change_count_ft)
cor_change_count_ft

pptx_file <- "./output/officer_output/CorComparisonCount.pptx"
save_as_pptx("my table" = cor_change_count_ft, path = pptx_file)

docx_file <- "./output/officer_output/CorComparisonCount.docx"
save_as_docx("my table" = cor_change_count_ft, path = docx_file)







partial_count_table %>% 
    select(Time, x, y, Group) %>% 
  group_by(Time, Group) %>% 
  spread(Time, Group)



partial_count_table %>% 
    select(Time, x, y, Group) %>% 
  unite(cor, x, y, sep = " vs ") %>% 
  ggplot(aes(x=Time, y=cor, fill = Group))+
  geom_raster()+
  scale_fill_manual(values = c("#631879", "#EE0000", "#3B4992", "White"))+
  labs(y = "")+
  theme(axis.text.y = element_text(face = "italic"))













cor_comp_df %>% 
  spread(Source, Corr) %>% 
  filter(Brian >= 0.7 ) %>% ## | Daniel >= 0.7) %>%
  gather(Source, Corr, c("Brian", "Daniel")) %>% 
  # spread(Time, Corr) %>% 
  pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% 
  select(-x.num, -y.num) %>% 
  select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h)


cor_comp_df %>% 
  spread(Source, Corr) %>% 
  filter(Daniel >= 0.7 ) %>% ## | Daniel >= 0.7) %>%
  gather(Source, Corr, c("Brian", "Daniel")) %>% 
  # spread(Time, Corr) %>% 
  pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% 
  select(-x.num, -y.num) %>% 
  select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h)




cor_comp_df %>% 
  ggplot(aes(x = Corr, color = Time, fill = Time))+
  geom_density(alpha = 0.3)+
  facet_grid(Source~.)







# compare_cor_table <-

  # cor_comp_df %>% 
  # spread(Source, Corr) %>% 
  # # filter(Brian >= 0.7 | Daniel >= 0.7) %>% 
  # gather(Source, Corr, c("Brian", "Daniel")) %>% 
  # # spread(Time, Corr) %>% 
  # pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% 
  # select(-x.num, -y.num) %>% 
  # select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h) %>% 
  # mutate(Diff_0h = Brian_0h - Daniel_0h, 
  #        Diff_1h = Brian_1h - Daniel_1h, 
  #        Diff_24h = Brian_24h - Daniel_24h) %>% 
  # ggplot()+
  # geom_point(aes(x = Brian_0h, y = Daniel_0h), color = "black")




library(gridExtra)

ggsave(here("output", "officer_output", "compare_corgrams_plt.tiff"),
       compare_corgrams_plt, 
       width = 8.5, units = "in")

ggsave(here("output", "officer_output", "compare_cor_scatter_plt.tiff"),
       compare_cor_scatter_plt, 
       width = 8.5, units = "in")

ggsave(here("output", "officer_output", "compare_cor_ecdf_plt.tiff"),
       compare_cor_ecdf_plt, 
       width = 8.5, units = "in")

Make a just in case set of tables

unique_comparisons <- list()
for (i in seq_along(mrna_cols)){
  for (j in i:length(mrna_cols) ){
    unique_comparisons[[length(unique_comparisons)+1]] <- 
      data.frame(x = mrna_cols[i],
           y = mrna_cols[j])
  }  
}
unique_comparisons <- do.call(rbind, unique_comparisons)
unique_comparisons <- unique_comparisons %>% filter(x != y)

unique_comparisons$slope.p <- NA



# Ancova
pb = txtProgressBar(min = 0, max = nrow(unique_comparisons), 
                    initial = 0, style = 3, char = '█') 
for (i in 1:nrow(unique_comparisons)){
  setTxtProgressBar(pb,i)

  temp <- filter(mrna_rep_winxiqr, 
                 Source == "Daniel") %>% 
    select(unique_comparisons[i, "x"],
           unique_comparisons[i, "y"],
           "Time")

  fm <- lm(
    as.formula(paste(unique_comparisons[i, "y"], 
                     "~", 
                     unique_comparisons[i, "x"], 
                     "* Time")
    ), 
    data = temp
  )

  unique_comparisons[i, "slope.p"] <- broom::tidy(car::Anova(fm))[3, "p.value"]
}


# Pearson
unique_comparisons <- 
  rbind(mutate(unique_comparisons, Time = "0h"), 
      mutate(unique_comparisons, Time = "1h")) %>% 
  rbind(., mutate(unique_comparisons, Time = "24h"))

unique_comparisons$cor <- NA
unique_comparisons$t <- NA
unique_comparisons$p.value <- NA
unique_comparisons$df <- NA

pb = txtProgressBar(min = 0, max = nrow(unique_comparisons), 
                    initial = 0, style = 3, char = '█') 
for (i in 1:nrow(unique_comparisons)){
  setTxtProgressBar(pb,i)
  temp <- filter(mrna_rep_winxiqr, 
                 Source == "Daniel",
                 Time == unique_comparisons[i, "Time"]) %>% 
    select(unique_comparisons[i, "x"],
           unique_comparisons[i, "y"])


  # Cors
  res <- cor.test(x = temp[[unique_comparisons[i, "x"]]],
                  y = temp[[unique_comparisons[i, "y"]]],
                  method = "pearson", 
                  use = "pairwise.complete.obs")
  res <- broom::tidy(res)[, c("estimate", "statistic", "p.value", "parameter")]
  names(res) <- c("cor", "t",  "p.value", "df")

  unique_comparisons[i, c("cor", "t",  "cor.p", "df")] <- res
}


# full ref



# useful ref
cor_use_ft <- unique_comparisons %>% 
  select(x, y, Time, slope.p, cor, cor.p) %>%

  #fdr adj slope p
  mutate(slope.p = p.adjust(slope.p, method = "fdr")) %>% 
  pivot_wider(names_from = "Time", values_from = c("cor.p", "cor")) %>% 
  mutate(
    cor.p_0h = p.adjust(cor.p_0h, method = "fdr"),
    cor.p_1h = p.adjust(cor.p_1h, method = "fdr"),
    cor.p_24h = p.adjust(cor.p_24h, method = "fdr")
  ) %>% 
  mutate_at(.vars = c(
    "slope.p", 
    "cor.p_0h", "cor.p_1h", "cor.p_24h", 
    "cor_0h", "cor_1h", "cor_24h"), 
    .funs = round, digits = 3) %>% 
  flextable()



cor_use_ft <- cor_use_ft %>% theme_vanilla()

cor_use_ft <- bold(cor_use_ft, ~ slope.p <= 0.05, ~ x, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ slope.p <= 0.05, ~ y, bold = TRUE)

cor_use_ft <- bold(cor_use_ft, ~ slope.p <= 0.05, ~ slope.p, bold = TRUE)

cor_use_ft <- bold(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor.p_0h, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor_0h, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor.p_1h, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor_1h, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor.p_24h, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor_24h, bold = TRUE)


color_sig <- "#a1d99b"
cor_use_ft <- bg(cor_use_ft, ~ slope.p <= 0.05, ~ slope.p, bg = color_sig)

cor_use_ft <- bg(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor.p_0h, bg = color_sig)
cor_use_ft <- bg(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor_0h, bg = color_sig)
cor_use_ft <- bg(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor.p_1h, bg = color_sig)
cor_use_ft <- bg(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor_1h, bg = color_sig)
cor_use_ft <- bg(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor.p_24h, bg = color_sig)
cor_use_ft <- bg(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor_24h, bg = color_sig)



cor_use_ft <- set_header_labels(cor_use_ft, 
                                values = list(
                                  x = "", y = "",
                          slope.p = "ANCOVA p",
                          cor.p_0h = "p 0h",
                          cor.p_1h = "p 1h",
                          cor.p_24h = "p 24h",
                          cor_0h = "R 0h",
                          cor_1h = "R 1h",
                          cor_24h = "R 24h"
                                )

)

cor_use_ft <- autofit(cor_use_ft)
cor_use_ft





pptx_file <- "./output/officer_output/DanielAncova.pptx"
save_as_pptx("my table" = cor_use_ft, path = pptx_file)

docx_file <- "./output/officer_output/DanielAncova.docx"
save_as_docx("my table" = cor_use_ft, path = docx_file)
df_for_cor_of_cors <- do.call(rbind, cor_list) %>% filter(!is.na(Corr))%>% 
  spread(Time, Corr)


# df_for_cor_of_cors  %>% 
#   ggplot(aes(x = `0h`, y=`1h`, group = Source, color = Source, fill = Source))+
#   geom_segment(aes(x = -1, y = -1, xend = 1, yend = 1), color = "black", size = 1, linetype = "dashed")+
#   geom_smooth(method = "lm", fullrange = T)+
#   geom_point()+
#   geom_point(shape = 1, color = "black")
#   # geom_point(aes(x = `0h`, y=`24h`), color = "red")
# 
# df_for_cor_of_cors  %>% 
#   ggplot(aes(x = `0h`, y=`24h`, group = Source, color = Source, fill = Source))+
#   geom_segment(aes(x = -1, y = -1, xend = 1, yend = 1), color = "black", size = 1, linetype = "dashed")+
#   geom_smooth(method = "lm", fullrange = T)+
#   geom_point()+
#   geom_point(shape = 1, color = "black")


df_for_cor_of_cors %>% 
  gather("Time", "Corr", c("0h", "1h", "24h")) %>% 
  spread("Source", "Corr") %>% 
  ggplot(aes(x = Brian, y = Daniel, group = Time, color = Time, fill = Time))+
  geom_segment(aes(x = -1, y = -1, xend = 1, yend = 1), color = "black", size = 1, linetype = "dashed")+
  geom_smooth(method = "lm", fullrange = T)+
  geom_point()+
  geom_point(shape = 1, color = "black")+
  facet_grid(.~Time)+
  coord_fixed()


# correlation between us 
df_for_cor_of_cors %>% 
  gather("Time", "Corr", c("0h", "1h", "24h")) %>% 
  spread("Source", "Corr") %>% 
  group_by(Time) %>% 
  summarise(CorrCorr = cor(Brian, Daniel, method = "spearman")) %>% 
  ggplot(aes(x = Time, y = CorrCorr, color = CorrCorr))+
  geom_hline(yintercept = 0)+
  geom_segment(aes(xend = Time, yend = 0), size= 1)+
  geom_point(size= 2)+
  geom_point(size= 2, shape = 1, color = "black")+
  scale_color_gradient2(
    low = RColorBrewer::brewer.pal(9, "PuOr")[1],
    high = RColorBrewer::brewer.pal(9, "PuOr")[9])+
  lims(y = c(-1, 1))+
  theme(legend.position = "")


set.seed(8978979)
resample_cor_of_cor <- map(seq(1, 1001), function(i){
  if (i == 1){
    res <- df_for_cor_of_cors %>% 
      ungroup() %>% 
      gather("Time", "Corr", c("0h", "1h", "24h")) %>% 
      spread("Source", "Corr") %>% 
      group_by(Time) %>% 
      summarise(CorrCorr = cor(Brian, Daniel, method = "spearman"))      
  } else {
    res <- df_for_cor_of_cors %>%
      ungroup() %>% 
      gather("Time", "Corr", c("0h", "1h", "24h")) %>% 
      mutate(Corr = sample(Corr)) %>% 
      spread("Source", "Corr") %>%
      group_by(Time) %>%
      summarise(CorrCorr = cor(Brian, Daniel, method = "spearman"))  
  }

  res$i <- i
  return(res)
})

resample_cor_of_cor <- do.call(rbind, resample_cor_of_cor)

resample_cor_of_cor %>% 
  mutate(obs = case_when(Time == "0h" ~  as.numeric(select(filter(resample_cor_of_cor, i == 1 & Time == "0h"), CorrCorr)),
                         Time == "1h" ~  as.numeric(select(filter(resample_cor_of_cor, i == 1 & Time == "1h"), CorrCorr)),
                         Time == "24h" ~ as.numeric(select(filter(resample_cor_of_cor, i == 1 & Time == "24h"), CorrCorr)))) %>% 
  mutate(bool = CorrCorr >= obs) %>%
  group_by(Time) %>% 
  mutate(ep = mean(bool)) %>% 
  ggplot(aes(x = CorrCorr, color = Time, fill = Time))+
  geom_segment(aes(x = obs, xend = obs, y = 0, yend = 3))+
  geom_density(alpha = 0.2)+
  facet_grid(Time~.)

 #   Time  CorrCorr     i   obs bool        ep
 #   <chr>    <dbl> <int> <dbl> <lgl>    <dbl>
 # 1 0h     0.176       1 0.176 TRUE  0.0549  
 # 2 1h     0.380       1 0.380 TRUE  0.000999
 # 3 24h    0.128       1 0.128 TRUE  0.124  






  # spread(Time, CorrCorr) %>% 

Contrast correlation thresholds -- did they fall in similar ranges? Bin correlations and examine with a jaccard index

n_bins <- 5

library('clusteval') # for cluster_similarity(x, y, similarity="jaccard")


set.seed(9080)
cor_jaccard_list <- map(c("0h", "1h", "24h"), function(Time){
  temp <- cor_comp_df[cor_comp_df$Time == Time, ] %>% 
    mutate(Bins = cut(Corr, breaks = seq(-1, 1, length.out = n_bins))) %>% 
    select(-Corr) %>% 
    pivot_wider(names_from = "Source", values_from = "Bins")

  obs_jaccard <- cluster_similarity(temp$Brian, temp$Daniel, similarity="jaccard")  


  null_jaccard <- map(1:10000, function(i){
    cluster_similarity(sample(temp$Brian, replace = F), 
                       temp$Daniel, similarity="jaccard")
  }) %>% 
    unlist()


  temp <- with(density(null_jaccard), data.frame(x, y))
  temp <- temp %>% mutate(xmax = max(x),
                          obs = obs_jaccard,
                          ep = mean(null_jaccard >= obs_jaccard))

  return(temp)
})


plot_grid(plotlist = map(cor_jaccard_list, function(temp){
  ggplot(data = temp, aes(x = x, y = y))+
    geom_line()+
    geom_vline(xintercept = temp$obs[1])+
    geom_ribbon(data = temp[temp$x > temp$obs, ], 
                aes(xmin = obs, xmax = xmax, ymin = 0, ymax = y))+
    labs(subtitle = paste("    ep=", as.character(temp$ep[1])))
}), labels = c("0h", "1h", "24h"), nrow = 1)

Compare Directional Changes

# calc p values with kruskal-Wallis and follow it up with a Dunn's ####
mrna_rep_winxiqr <- mrna_rep_winxiqr %>% 
  pivot_longer(
    names_to = "mRNA",
    values_to = "Count",
    cols = names(select(mrna_rep_winxiqr, -Source, -Time, -uid)))

stats <- expand.grid(
  Source = c("Brian", "Daniel"),
  # Time = c("0h", "1h", "24h"),
  mRNA = unique(mrna_rep_winxiqr$mRNA),
  Main = NA,
  BvsC = NA,
  BvsD = NA,
  CvsD = NA
)

for (i in 1:nrow(stats)){
  t_Source <- stats[i, "Source"]
  t_mRNA <- stats[i, "mRNA"]

  temp <- mrna_rep_winxiqr %>% 
    filter(Source == t_Source & mRNA == t_mRNA & !is.na(Count)) %>% 
    mutate(Time = as.factor(Time)) # Avoid coering to factor in dunnTest()

  stats[i, "Main"] <- kruskal.test(Count ~ Time, temp)$p.value

  # i = 1
  dunn_res <- dunnTest(Count ~ Time, 
                       data = temp, 
                       method = "bonferroni") # bonferroni and hs produce the same result since it's within a single round of kruskal-wallis
  #FIXME -- Is bonferroni ideal here or would we be better off with sidak, holm, or hs?
  dunn_res <- dunn_res$res

  stats[i, "BvsC"] <- filter(dunn_res, Comparison == "0h - 1h")$P.adj
  stats[i, "BvsD"] <- filter(dunn_res, Comparison == "0h - 24h")$P.adj
  stats[i, "CvsD"] <- filter(dunn_res, Comparison == "1h - 24h")$P.adj
}

# get medians and prepare to plot ####
mrna_rep_medians <- mrna_rep_winxiqr %>% 
  group_by(Source, Time, mRNA) %>% 
  summarise(MedianCount = median(Count, na.rm =T)) %>% 
  arrange(Source, mRNA, Time)


mrna_rep_medians <- mrna_rep_medians %>% 
  mutate(
    MedianCount2 = MedianCount,
    id = case_when(Time == "0h" ~ 1,
                   Time == "1h" ~ 2,
                   Time == "24h" ~ 3)) %>% 
  pivot_wider(names_from = "Time", values_from = "MedianCount") %>% 
  rename(MedianCount = MedianCount2) %>% 
  group_by(Source, mRNA) %>% 
  mutate(`0h` = median(`0h`, na.rm = T),
         `1h` = median(`1h`, na.rm = T),
         `24h` = median(`24h`, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(updown12 = ifelse(`0h` < `1h`, "up", "down"), 
         updown13 = ifelse(`0h` < `24h`, "up", "down"),
         updown23 = ifelse(`1h` < `24h`, "up", "down"))


# combine stats and prepared plotting df####
mrna_rep_medians <- full_join(mrna_rep_medians, stats) %>% 
  mutate(color1 = ifelse(Main < 0.05 & BvsC < 0.05, updown12, "none"),
         color2 = ifelse(Main < 0.05 & CvsD < 0.05, updown23, "none")) %>% 
  mutate(color1 = factor(color1, levels = c("down", "none", "up")),
         color2 = factor(color2, levels = c("down", "none", "up")))

# plot ####
temp <- mrna_rep_winxiqr %>% spread("Time", "Count")

ggplot(mrna_rep_medians)+
  geom_point(data = temp, aes(x = 0.5, y = `0h`), shape = "-")+
  geom_point(data = temp, aes(x = 1.5, y = `1h`), shape = "-")+
  geom_point(data = temp, aes(x = 2.5, y = `24h`), shape = "-")+

  geom_segment(aes(x = 0, xend = 1, y = `0h`, yend = `0h`), size = 1)+
  geom_segment(aes(x = 1, xend = 2, y = `1h`, yend = `1h`, 
                   color = color1), size = 1)+
  geom_segment(aes(x = 2, xend = 3, y = `24h`, yend = `24h`, 
                   color = color2), size = 1)+

  geom_segment(aes(x = 1, xend = 1, y = `0h`, yend = `1h`, 
                   color = color1), arrow = arrow(length = unit(0.03, "npc")), size = 1)+
  geom_segment(aes(x = 2, xend = 2, y = `1h`, yend = `24h`, 
                   color = color2), arrow = arrow(length = unit(0.03, "npc")), size = 1)+
  scale_color_manual(values = c("red", "gray", "green"))+
  theme_clean()+
  theme(legend.position = "")+
  facet_wrap(Source ~ mRNA, scales = "free_y", nrow = 2)

How similar are the results?

temp <- mrna_rep_medians %>% 
  select(Source, mRNA, starts_with("updown"), Main, BvsC, BvsD, CvsD) %>% 
  distinct() 

# Convert the results to catagories based on what our conclusion would be
# Is there a main effect? 
# Significantly different group? 
# What direction is it different in?
temp <- temp %>% 
  # If there is no main effect all comparisons are in the NA group
  mutate( 
    BvsC = ifelse(Main >= 0.05, NA, BvsC),
    BvsD = ifelse(Main >= 0.05, NA, BvsD),
    CvsD = ifelse(Main >= 0.05, NA, CvsD)
  ) %>% 
  # If there is no post hoc effect that comparison is in the NA group
  mutate( 
    BvsC = ifelse(BvsC >= 0.05, NA, BvsC),
    BvsD = ifelse(BvsD >= 0.05, NA, BvsD),
    CvsD = ifelse(CvsD >= 0.05, NA, CvsD)
  ) %>% 
  # Re-write lack of a change as 'none' instead of NA
  # if there _is_ a post hoc effect then assign that cell the sign of change
  mutate( 
    BvsC = ifelse(is.na(BvsC), 'none', updown12 ),
    BvsD = ifelse(is.na(BvsD), 'none', updown13 ),
    CvsD = ifelse(is.na(CvsD), 'none', updown23 )
  ) %>% 
  select(Source, mRNA, BvsC, BvsD, CvsD )



temp <- temp %>% 
  pivot_longer(cols = c("BvsC", "BvsD", "CvsD")) %>% 
  mutate(value = factor(value, levels = c("down", "none", "up"))) %>% 
  pivot_wider(names_from = "Source", values_from = "value")


temp %>% 
  pivot_longer(c("Brian", "Daniel"), 
               names_to = "Source", 
               values_to = "value") %>% 
  ggplot(aes(x = Source, y = value, color = Source, group = Source))+
  geom_hline(yintercept = c("none"))+
  geom_segment(aes(xend = Source, yend = "none"), size = 3, lineend = "butt")+
  facet_grid(name~mRNA)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        legend.position = "")



obs_jaccard <- cluster_similarity(temp$Brian, temp$Daniel, similarity="jaccard")  

null_jaccard <- map(1:10000, function(i){
  cluster_similarity(sample(temp$Brian, replace = F), 
                     temp$Daniel, similarity="jaccard")
}) %>% 
  unlist()


temp <- with(density(null_jaccard), data.frame(x, y))
temp <- temp %>% mutate(xmax = max(x),
                        obs = obs_jaccard,
                        ep = mean(null_jaccard >= obs_jaccard))

ggplot(data = temp, aes(x = x, y = y))+
  geom_line()+
  geom_vline(xintercept = temp$obs[1])+
  geom_ribbon(data = temp[temp$x > temp$obs, ], 
              aes(xmin = obs, xmax = xmax, ymin = 0, ymax = y))+
  labs(subtitle = paste("    ep=", as.character(temp$ep[1])))

3

Primary Questions

set up data

# Copied from above ----
M_winxiqr_list <- map(c("0h", "1h", "24h"), function(condition){
  temp <- M_all[M_all$Condition == condition, ]

  dvs <- names(M_all)[!(names(M_all) %in% c("Condition", "Cell", "Experiment", "Channel", "Sweep", "Source", "Pharm", "Time", "Sample"))]

  for (dv_col  in dvs){
    temp[[dv_col]] <- ifelse(win_x_iqr(temp[[dv_col]]), temp[[dv_col]], NA)
  }

  return(temp)
})

M_winxiqr <- do.call(rbind, M_winxiqr_list)
M_winxiqr <- M_winxiqr %>% 
  # select(-Sweep) %>% 
  group_by(Condition, Cell, Experiment, Channel, Source, Pharm, Time, Sample) %>% 
  mutate_all(median, na.rm = T) %>% 
  ungroup() %>% 
  distinct()

# Median imputation ----
M_winxiqr <- M_winxiqr %>% 
  select(-Channel, -Source, -Pharm, -Time, -Sample)

M_winxiqr <- M_winxiqr %>% 
  group_by(Condition, Experiment, Cell) %>%
  mutate_all(median, na.rm = T) %>% 
  ungroup() %>% 
  distinct()

library(imputeMissings)
M_winxiqr <- imputeMissings::impute(M_winxiqr, method = "median/mode")


for_label_Condition <- M_winxiqr$Condition
rownames(M_winxiqr) <- paste(M_winxiqr$Experiment, M_winxiqr$Cell, sep = "-")

Are the three time points separable in multidimensional space?

PCA

list_for_pca <- list(
  M_winxiqr[M_winxiqr$Condition == "0h", 
            names(M_winxiqr)[!(names(M_winxiqr) %in% c("Condition", "Cell", "Experiment"))]],
  M_winxiqr[M_winxiqr$Condition == "1h", 
            names(M_winxiqr)[!(names(M_winxiqr) %in% c("Condition", "Cell", "Experiment"))]],
  M_winxiqr[M_winxiqr$Condition == "24h", 
            names(M_winxiqr)[!(names(M_winxiqr) %in% c("Condition", "Cell", "Experiment"))]]
)

pca_summary_descriptions <- map(list_for_pca, function(df){
  mk_pca_multi_plt(input.df = df,
                   scree.max.y = 100)
})

pca_summary_descriptions <- map(list_for_pca, function(df){
  mk_pca_multi_plt(input.df = df,
                 scree.max.y = 100)
})


# {p4|p0}/{p1+p2+p3}

{pca_summary_descriptions[[1]]$scree | pca_summary_descriptions[[2]]$scree | pca_summary_descriptions[[3]]$scree}/
{pca_summary_descriptions[[1]]$biplt | pca_summary_descriptions[[2]]$biplt | pca_summary_descriptions[[3]]$biplt}
# {pca_summary_descriptions[[1]]$cont1 | pca_summary_descriptions[[2]]$cont1 | pca_summary_descriptions[[3]]$cont1}

ggsave(plot = last_plot(), 
       filename = here("output", "officer_output", "PCA_Contrast.tiff"),        
       width = 11.5, height = 4.76)

3d

## mrna only ====
vis_pca_3D_plotly(
  input.df = M_winxiqr[, names(M_winxiqr)[!(names(M_winxiqr) %in% c(
    "Condition", "Cell", "Experiment",
    "vrest", "r11", "r1", 
    "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", 
    "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]],
  color.by = unlist(select(M_winxiqr, Condition)),
  use.colors = RColorBrewer::brewer.pal(3, "Set2"),#c("#cc4c02", "#e31a1c", "#800026"),
  point.size = 3,
  point.opacity = 1,
  dropdowns = T,
  dropdown.width = 1.5,
  dropdown.opacity = 0.5,
  ellipse.opacity = 0#.02
)


## membrane only ====
vis_pca_3D_plotly(
  input.df = M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c(
    # "Condition", "Cell", "Experiment",
    "vrest", "r11", "r1", 
    # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    # "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    # "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", 
    "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]],
  color.by = unlist(select(M_winxiqr, Condition)),
  use.colors = RColorBrewer::brewer.pal(3, "Set2"),#c("#cc4c02", "#e31a1c", "#800026"),
  point.size = 3,
  point.opacity = 1,
  dropdowns = T,
  dropdown.width = 1.5,
  dropdown.opacity = 0.5,
  ellipse.opacity = 0#.02
)


## excitability only ====
vis_pca_3D_plotly(
  input.df = M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c(
    # "Condition", "Cell", "Experiment",
    # "vrest", "r11", "r1", 
    # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta"#, 
    # "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]],
  color.by = unlist(select(M_winxiqr, Condition)),
  use.colors = RColorBrewer::brewer.pal(3, "Set2"),#c("#cc4c02", "#e31a1c", "#800026"),
  point.size = 3,
  point.opacity = 1,
  dropdowns = T,
  dropdown.width = 1.5,
  dropdown.opacity = 0.5,
  ellipse.opacity = 0#.02
)

UMAP

set.seed(16069)
temp.umap = umap(select(M_winxiqr, 
                        -Condition, -Cell, -Experiment, 
                        -cc.3_4, -cc.3_5, -ig.3_4, -ig.3_5))

temp <- temp.umap$layout %>% as.tibble()
col_by_df <- M_winxiqr[, "Condition"] 

# plot with convex hulls
df <- temp
df$Group <- col_by_df

find_hull <- function(df) df[chull(df$V1, df$V2), ]
hulls <- plyr::ddply(df, "Group", find_hull)

ggplot(df, aes(V1, V2, color = Group, fill = Group))+
  geom_polygon(data = hulls, alpha = 0.1) +
  geom_point()+
  scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  coord_fixed()






## mrna only ====
temp.umap = umap(
  M_winxiqr[, names(M_winxiqr)[!(names(M_winxiqr) %in% c(
    "Condition", "Cell", "Experiment",
    "vrest", "r11", "r1", 
    "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", 
    "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]]
  )

temp <- temp.umap$layout %>% as.tibble()
col_by_df <- M_winxiqr[, "Condition"] 

# plot with convex hulls
df <- temp
df$Group <- col_by_df

find_hull <- function(df) df[chull(df$V1, df$V2), ]
hulls <- plyr::ddply(df, "Group", find_hull)

umap_mrna <- ggplot(df, aes(V1, V2, color = Group, fill = Group))+
  geom_polygon(data = hulls, alpha = 0.1) +
  geom_point()+
  scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  theme(legend.position = "bottom")
  # coord_fixed()

## membrane only ====
temp.umap = umap(
  M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c(
    # "Condition", "Cell", "Experiment",
    "vrest", "r11", "r1", 
    # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    # "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    # "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", 
    "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]]
  )

temp <- temp.umap$layout %>% as.tibble()
col_by_df <- M_winxiqr[, "Condition"] 

# plot with convex hulls
df <- temp
df$Group <- col_by_df

find_hull <- function(df) df[chull(df$V1, df$V2), ]
hulls <- plyr::ddply(df, "Group", find_hull)

umap_membrane <- ggplot(df, aes(V1, V2, color = Group, fill = Group))+
  geom_polygon(data = hulls, alpha = 0.1) +
  geom_point()+
  scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  theme(legend.position = "bottom")
  # coord_fixed()

## excitability only ====
temp.umap = umap(
  M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c(
    # "Condition", "Cell", "Experiment",
    # "vrest", "r11", "r1", 
    # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta"#, 
    # "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]]
  )

temp <- temp.umap$layout %>% as.tibble()
col_by_df <- M_winxiqr[, "Condition"] 

# plot with convex hulls
df <- temp
df$Group <- col_by_df

find_hull <- function(df) df[chull(df$V1, df$V2), ]
hulls <- plyr::ddply(df, "Group", find_hull)

umap_excite <- ggplot(df, aes(V1, V2, color = Group, fill = Group))+
  geom_polygon(data = hulls, alpha = 0.1) +
  geom_point()+
  scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  theme(legend.position = "bottom")
  # coord_fixed()

umap_mrna + umap_membrane + umap_excite 

Clustering

mk_hclust_plts <- function(
  df = unite(M_winxiqr, uid, Experiment, Cell, sep = "-"),
  cluster_by = c("vrest", "r11", "r1", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"),
  uid_col = "uid",
  n_clusters = 3,
  true_groups = "Condition",
  true_colors = RColorBrewer::brewer.pal(3, "Set2"),
  cluster_colors = RColorBrewer::brewer.pal(3, "Set1")
){
  df <- as.data.frame(df) 

  if (!exists("cluster_colors")){
    cluster_colors = rainbow(n_clusters)
  }
  ## prep
  # move uid to rowname
  row.names(df) <- df[[uid_col]]
  df_groups <- select(df, all_of(true_groups))
  df <- df[, cluster_by]

  ## Cluster
  cluster <- pvclust(t(df),
                     method.hclust = "ward.D2",
                     method.dist = "correlation",
                     use.cor = "pairwise.complete.obs")


  ## make dendrogram  ####
  dend <- cluster$hclust %>% 
    as.dendrogram() 

  # iteratively coloring the labels is a workaround to get the "true" groups shown
  dend_labs <- rownames_to_column(df_groups, var = "rownames")[, ]
  dend_labs <- full_join(data.frame(rownames = labels(dend)), dend_labs)
  for(i in seq_along(unique(df_groups[[true_groups]]))){
    true_group <- unique(df_groups[[true_groups]])[i]
    true_color <- true_colors[i]

    dend <- dend %>% 
      dendextend::color_labels(
        col = true_color, 
        labels = dend_labs[dend_labs[[true_groups]] == true_group, "rownames"]) 


  }

  dend <- dend %>% 
    set("branches_k_color", 
        k = n_clusters, 
        value = cluster_colors
    ) %>% 
    set("branches_lwd", 0.7) %>%
    set("labels_cex", 0.6) 

  dend_cluster_only <- dend %>% 
    set("labels_colors",
        k = n_clusters,
        value = cluster_colors) %>%
    as.ggdend()


  dend <- dend %>% 
    as.ggdend()



  plt_dend_cluster_only <- ggplot(dend_cluster_only)+
    theme(axis.ticks.y = element_line(),
          axis.text.y = element_text(),
          axis.line.y = element_line())


  plt_dend <- ggplot(dend)+
    theme(axis.ticks.y = element_line(),
          axis.text.y = element_text(),
          axis.line.y = element_line())  


  ## Add reality ribbon with or without clustering result ####
  groups_to_plt <- full_join(
    as.data.frame(dend$labels),
    rownames_to_column(var = "label", df_groups))

  plt_grouping <- groups_to_plt %>% 
    ggplot(aes_string(x="x", y="0", fill = true_groups))+
    geom_tile()+
    scale_fill_manual(values = true_colors)+
    theme_void()+
    labs(x = "", y = "")+
    theme(legend.position = "left")

  plt_grouping_contrast <- ggplot()+
    geom_tile(data = groups_to_plt, aes_string(x="x", y="0.5", fill = true_groups))+
    scale_fill_manual(values = true_colors)+

    ggnewscale::new_scale("fill") +
    geom_tile(data = data.frame(x = seq_along(dend_cluster_only$labels$col),
                                cluster_groups = as.character(as.numeric(as.factor(dend_cluster_only$labels$col)))
    ),
    aes_string(x="x", y= "-0.5", fill = "cluster_groups"),
    )+
    scale_fill_manual(values = cluster_colors)+

    theme_void()+
    labs(x = "", y = "")+
    theme(legend.position = "left")


  ## Add heatmap  ####
  data_to_plt <- full_join(
    as.data.frame(dend$labels),
    rownames_to_column(var = "label", df)) 

  data_to_plt <- 
    data_to_plt %>% 
    gather("key", "value", 
           names(data_to_plt)[
             !(names(data_to_plt) %in% c("x", "y", 
                                         "label", "col", "cex", 
                                         true_groups))
           ])

  plt_heatmap_raw <- data_to_plt %>% 
    ggplot(aes(x, 
               y = key, 
               fill = value))+
    geom_tile()+
    scale_fill_viridis_c()+
    labs(x = "", y = "")+
    theme(panel.background = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text.x = element_blank(),
          legend.position = "left")


  plt_heatmap_z <- data_to_plt %>% 
    group_by(key) %>% 
    mutate(mean = mean(value, na.rm = T),
           sd = sd(value, na.rm = T)) %>% 
    mutate(value = ((value - mean)/sd)) %>% # Now Z scores
    ggplot(aes(x, 
               y = key, 
               fill = value))+
    geom_tile()+
    scale_fill_viridis_c()+
    labs(x = "", y = "")+
    theme(panel.background = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text.x = element_blank(),
          legend.position = "left")


  ## Return plots, manually tweak layout  ####
  return(
    list(
      pvclust_out = cluster,
      dendrogram_clusters = plt_dend_cluster_only,
      dendrogram_both = plt_dend,
      group_tile = plt_grouping,
      group_compare_tile = plt_grouping_contrast,
      heatmap_raw = plt_heatmap_raw,
      heatmap_z = plt_heatmap_z
    )
  )
}
## mrna only ====
o_mrna <- 
  mk_hclust_plts(
    df = rownames_to_column(M_winxiqr, "uid"),
    cluster_by = mrna_cols,
    uid_col = "uid",
    n_clusters = 3,
    true_groups = "Condition",
    true_colors = RColorBrewer::brewer.pal(3, "Set2"), #c("#fdd49e", "#fc8d59", "#b30000"),
    cluster_colors = RColorBrewer::brewer.pal(3, "Set1") 
  )

(o_mrna$dendrogram_both+
  scale_y_continuous(limits = c(-1.51, 3), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_mrna$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  )+ 
  patchwork::plot_layout(heights = c(2, .2))



(o_mrna$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_mrna$heatmap_z + theme(legend.position = "bottom")) + 
  patchwork::plot_layout(heights = c(.2, 5))



(o_mrna$dendrogram_both+
  scale_y_continuous(limits = c(-1.51, 1.51), labels = scales::comma)#c(-1.51, 3), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_mrna$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_mrna$heatmap_z + theme(legend.position = "right", axis.text.y = element_text(face = "italic")) /
  (o_mrna$heatmap_raw+ylim()+theme(legend.position = "right", axis.text.y = element_text(face = "italic")))+ 
  patchwork::plot_layout(heights = c(5, .5, 5, 5))




## membrane only ====
o_mem <- 
  mk_hclust_plts(
    df = rownames_to_column(M_winxiqr, "uid"),
    cluster_by = c("vrest", "r11", "r1", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"),
    uid_col = "uid",
    n_clusters = 3,
    true_groups = "Condition",
    true_colors = RColorBrewer::brewer.pal(3, "Set2"),
    cluster_colors = RColorBrewer::brewer.pal(3, "Set1") 
  )


(o_mem$dendrogram_both+
  scale_y_continuous(limits = c(-.251, 1.25), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_mem$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  )+ 
  patchwork::plot_layout(heights = c(2, .2))



(o_mem$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_mem$heatmap_z + theme(legend.position = "bottom")) + 
  patchwork::plot_layout(heights = c(.2, 1))




(o_mem$dendrogram_both+
  scale_y_continuous(limits = c(-.251, 1.25), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_mem$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_mem$heatmap_z + theme(legend.position = "bottom")) + 
  patchwork::plot_layout(heights = c(2, .2, 1))


## excitability only ====
o_exc <- 
  mk_hclust_plts(
    df = rownames_to_column(M_winxiqr, "uid"),
    cluster_by = epsp_cols,
    uid_col = "uid",
    n_clusters = 3,
    true_groups = "Condition",
    true_colors = RColorBrewer::brewer.pal(3, "Set2"),
    cluster_colors = RColorBrewer::brewer.pal(3, "Set1") 
  )

(o_exc$dendrogram_both+
  scale_y_continuous(limits = c(-.15, .75), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_exc$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  )+ 
  patchwork::plot_layout(heights = c(2, .2))




(o_exc$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_exc$heatmap_z + theme(legend.position = "bottom")) + 
  patchwork::plot_layout(heights = c(.2, 1))




(o_exc$dendrogram_both+
  scale_y_continuous(limits = c(-.15, .75), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_exc$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_exc$heatmap_z + theme(legend.position = "bottom")) + 
  patchwork::plot_layout(heights = c(2, .2, 1))



# (o_mrna$group_compare_tile+theme(legend.position = "")) /
#   (o_mrna$heatmap_z + theme(legend.position = "bottom"))+
#   patchwork::plot_layout(heights = c(.2, 1)) |
# 
# (o_exc$group_compare_tile+theme(legend.position = "")) /
#   (o_exc$heatmap_z + theme(legend.position = "bottom"))+
#   patchwork::plot_layout(heights = c(.2, 1)) |
# 
# (o_mem$group_compare_tile+theme(legend.position = "")) /
#   (o_mem$heatmap_z + theme(legend.position = "bottom"))+
#   patchwork::plot_layout(heights = c(.2, 1))



((o_mrna$dendrogram_both+
  scale_y_continuous(limits = c(-.52, 3), labels = scales::comma))+
(o_mem$dendrogram_both+
  scale_y_continuous(limits = c(-.251, 1.25), labels = scales::comma))+
(o_exc$dendrogram_both+
  scale_y_continuous(limits = c(-.15, .75), labels = scales::comma)))/

  ((o_mrna$group_compare_tile+theme(legend.position = ""))+
(o_mem$group_compare_tile+theme(legend.position = ""))+
(o_exc$group_compare_tile+theme(legend.position = "")))+
  patchwork::plot_layout(heights = c(1, .1))

Evaluating clustering quality

# # if (!requireNamespace("BiocManager", quietly = TRUE))
# #     install.packages("BiocManager")
# # BiocManager::install("BiocGenerics")
# library(BiocGenerics)
# library(imputeMissings)
# library(magrittr) # for %>% (this is part of tidyverse)
# library(clues)
# library(NMF)
# 
# ## Evaluate clustering performance ==========================================
# get_cluster_comparisons <- function(reference.clustering = mrna_raw$Cell,
#                                     generated.clustering) {
#   if (length(reference.clustering) != length(generated.clustering)) {
#     warning("Input vectors are not of the same length!")
#   } else {
#     # reference.clustering = mrna_raw$Cell
#     # generated.clustering = kmeans.m$cluster
#     output <- array(0, dim = 6)
#     reference.clustering <- as.numeric(reference.clustering) %>% as.factor()
#     generated.clustering <- as.numeric(generated.clustering) %>% as.factor()
#     output <- NMF::purity(reference.clustering, generated.clustering)
#     names(output) <- "Purity"
#     # Get a lot of concurrance measures
#     # https://davetang.org/muse/2017/09/21/adjusted-rand-index/
#     output <- c(
#       output,
#       clues::adjustedRand(
#         BiocGenerics::as.vector(reference.clustering),
#         BiocGenerics::as.vector(generated.clustering)
#       )
#     )
#     return(output)
#   }
# }

resampling based evaluation of jaccard

# temp.clust <- as.dendrogram(cluster$hclust)
# assignment <- cutree(temp.clust, k = 3)[order.dendrogram(temp.clust)]
# 
# # use names to get groupings
# time_groups <- as.factor(unlist(transpose(stringr::str_split(names(assignment), pattern = "-"))[[1]]))
# 
# library(purrr)
# set.seed(9348957)
# resample.indices <- map(1:10001, function(i){
#   if (i == 1){
#     indices <- get_cluster_comparisons(reference.clustering = time_groups,
#                                        assignment)
#   }else{
#     indices <- get_cluster_comparisons(reference.clustering = time_groups,
#                                        sample(assignment, replace = F))
#   }
# })
# resample.indices <- do.call(rbind, resample.indices)
# resample.indices <- resample.indices %>% as.data.frame()
# 
# library(ggplot2)
# resample.indices %>% 
#   ggplot(aes(x = Jaccard))+
#   geom_density()+
#   geom_vline(xintercept = as.numeric(resample.indices[1, "Jaccard"]))+
#   xlim(0,1)+
#   theme_minimal()+
#   labs(subtitle = paste0("ep = ", as.character(round(mean(resample.indices$Jaccard >= resample.indices[1, "Jaccard"]), digits = 4))))
# # empirical p-value from 10,000 random samplings
# ggsave(plot = last_plot(), 
#        filename = here("officer_output", "hclust_jaccard_dist.tiff"),
#        width = 11.5, height = 4.76)

Correlations

Correlation Heatmaps

# corr <- cor(M_winxiqr[, mrna_cols], use = "pairwise.complete.obs", method = "spearman")
# p.mat <- cor_pmat(M_winxiqr[, mrna_cols], use = "pairwise.complete.obs", method = "spearman")
#  
# ggcorrplot(
#   corr,
#   p.mat = p.mat,
#   hc.order = FALSE,
#   # method = "circle",
#   type = "lower",
#   insig = "blank",
#   colors = RColorBrewer::brewer.pal(9, "PuOr")[c(1, 5, 9)]
# )

# Time = "0h"
cor_plt_list <- map(c("0h", "1h", "24h"), function(Time){
  df = M_winxiqr[M_winxiqr$Condition == Time, c(mrna_cols, memb_cols, epsp_cols)]

  corr <- cor(df, use = "pairwise.complete.obs", method = "spearman")
  p.mat <- cor_pmat(df, use = "pairwise.complete.obs", method = "spearman")

  cor_df_bin <- corr
  cor_bins <- seq(-1, 1, length.out = 8)
  for (i in 1:(length(cor_bins)-1)){
    # use the more extreme value to shade with
    cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]]
  }


  ggcorrplot(
    cor_df_bin, 
    p.mat = p.mat,
    insig = "blank",
    type = "upper",
    outline.col = "white",
    colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)]
  )+
    labs(subtitle = Time)+
    theme(legend.position = ""#,
          # axis.text.x = element_text(angle = 90)
          )
})

# plot_grid(plotlist = cor_plt_list)

(cor_plt_list[[1]]+theme(
    axis.text.x = element_text(angle = 90))) +
(cor_plt_list[[2]]+theme(
    axis.text.x = element_text(angle = 90),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))+
(cor_plt_list[[3]]+theme(
  axis.text.x = element_text(angle = 90),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))
cor_plt_list <- map(c("0h", "1h", "24h"), function(Time){
  df = M_winxiqr[M_winxiqr$Condition == Time, c(mrna_cols)]

  corr <- cor(df, use = "pairwise.complete.obs", method = "spearman")
  p.mat <- cor_pmat(df, use = "pairwise.complete.obs", method = "spearman")

  cor_df_bin <- corr
  cor_bins <- seq(-1, 1, length.out = 8)
  for (i in 1:(length(cor_bins)-1)){
    # use the more extreme value to shade with
    cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]]
  }


  ggcorrplot(
    cor_df_bin, 
    p.mat = p.mat,
    insig = "blank",
    type = "upper",
    outline.col = "white",
    colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)]
  )+
    labs(subtitle = Time)+
    theme(legend.position = ""#,
          # axis.text.x = element_text(angle = 90)
          )
})

# plot_grid(plotlist = cor_plt_list)

(cor_plt_list[[1]]+theme(
    axis.text.x = element_text(angle = 90))) +
(cor_plt_list[[2]]+theme(
    axis.text.x = element_text(angle = 90),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))+
(cor_plt_list[[3]]+theme(
  axis.text.x = element_text(angle = 90),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))
temp_list <- map(unique(M_winxiqr$Condition), function(condition){
  temp <- filter(M_winxiqr, Condition == condition) %>% 
    select(-Cell, -Experiment)

  temp <- temp[, c(mrna_cols, memb_cols, epsp_cols )] %>% correlate() #%>% shave()
  temp$Condition <- condition
  temp <- gather(temp, "term2", "Cor", names(temp)[!(names(temp) %in% c("term", "Condition"))])

  return(temp)
})


# cor breaks
# cor_br <- seq(-1, 1, length.out = 8)
# 
# plt <- do.call(rbind, temp_list) %>% 
#   filter(term %in% mrna_cols) %>% 
#   filter(term2 %in% c(memb_cols, epsp_cols)) %>% 
#   ggplot(aes(term, term2, fill = Cor))+
#   geom_tile()
# 
# 
# plt+scale_fill_stepsn(colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)]  )
#   
# 
# plt+scale_fill_gradientn(colors=RColorBrewer::brewer.pal(n = 9, name = "PuOr"),
#                          na.value = "transparent",
#                          breaks=seq(-1, 1, length.out = 8),
#                          # labels=c("Minimum",0,"Maximum"),
#                          limits=c(-1,1))


library(corrr)
library(ggtext)
library(glue)
# do.call(rbind, temp_list) 
corr_bt_mrna_phys <- map(1:3, function(i){

temp_list[[i]] %>% 
  filter(term %in% mrna_cols) %>% 
  filter(term2 %in% c(memb_cols, epsp_cols)) %>% 
  mutate(term = glue(("<i>{term}</i>"))) %>% 
  ggplot(aes(term, term2, fill = Cor))+
  geom_tile()+
  labs(x= "mRNA", y = "")+
  theme(axis.text.x = element_markdown(angle = 45))+
  scale_fill_stepsn(
    colors=RColorBrewer::brewer.pal(n = 8, name = "PuOr"),
                         na.value = "transparent",
                         breaks=round(seq(-1, 1, length.out = 9), digits = 2)[2:8],
                         limits=c(-1,1)
  )+
  coord_fixed()

})

plot_grid(plotlist = corr_bt_mrna_phys)


corr_bt_mrna_phys[[1]]+theme(legend.position = "") + 
  corr_bt_mrna_phys[[2]]+theme(legend.position = "") + 
  corr_bt_mrna_phys[[3]]

library(GGally)
ggpairs(select(filter(M_winxiqr),#, Condition == "0h"), 
               all_of(c("Condition", 
                        mrna_cols, "r1", 
                        "Ihtk.0", "Ihtk.0.s", 
                        "Ia.0", "Ia.0.s"
                        ))),#memb_cols, "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "AUC_Obs"))),
        mapping = ggplot2::aes(color = Condition),
        lower = list(continuous = wrap("smooth", se = F, alpha = 0.8, size=1)))


        # ggplot2::aes(colour=Condition, aes = 0.1))
#FIXME What library did I use for "BaselineTEA.png" ? 

Correlation ECDFs

# FIXME

How do these relationships change for TEA over time?

#TODO

What univariate changes occur?

Produce univariate tables

Run anovas

tictoc::tic()
temp_list <- map(
  c(mrna_cols, memb_cols, epsp_cols),
  # c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols), 
  function(temp_col){
    print(temp_col)

    temp <- M_all[M_all$Source == "Daniel", c("Condition", "Experiment", "Cell", temp_col)] %>% distinct()

    # Identify outliers for each group
    temp_b <- temp[temp$Condition == "0h", ]
    temp_c <- temp[temp$Condition == "1h", ]
    temp_d <- temp[temp$Condition == "24h", ]

    temp_b <- temp_b[win_x_iqr(temp_b[[temp_col]], multiplier = 1.5), ]
    temp_c <- temp_c[win_x_iqr(temp_c[[temp_col]], multiplier = 1.5), ]
    temp_d <- temp_d[win_x_iqr(temp_d[[temp_col]], multiplier = 1.5), ]

    temp <- rbind(temp_b, temp_c) %>% rbind(temp_d)

    # temp <- temp[win_x_iqr(temp[[temp_col]], multiplier = 1.5), ]
    temp <- temp[!is.na(temp[[temp_col]]), ]

    # make sure we don't have pseudoreplicates for network measurements
    if (temp_col %in% c("ig.3_4", "ig.3_5", "ig.4_5")){
      temp <- temp %>% 
        select(-Cell) %>% 
        group_by(Condition, Experiment) %>% 
        mutate_all(median, na.rm = T) %>% 
        distinct()
    }


    if(nrow(temp) < 10){

      return(list(
        dv = temp_col,
        p = NA,
        ep = NA,
        hsd_groups = data.frame(dv = c(NA, NA, NA), 
                                groups = c(NA, NA, NA), 
                                Condition = c(NA, NA, NA)
        ),
        fm = NA))

    } else {


      temp_shuffle <- temp
      resample_array <- map(1:1000, function(i){
        temp_shuffle$Condition <- sample(temp_shuffle$Condition, replace = F)
        fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp_shuffle)      
        return(car::Anova(fm)[1,3])
      }) %>% unlist()


      fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp)

      fm_hsd <- agricolae::HSD.test(fm, trt = "Condition") 
      fm_hsd <- fm_hsd$group

      fm_hsd$Condition <- rownames(fm_hsd)

      fm_hsd_p <- agricolae::HSD.test(fm, trt = "Condition", 
                                      group = F)$comparison
      fm_hsd_p[rownames(fm_hsd_p) == "0h - 1h", "pvalue"]

      return(list(
        dv = temp_col,
        p = car::Anova(fm)[1,4],
        ep = mean(resample_array >= car::Anova(fm)[1,3]),
        hsd_groups = fm_hsd,
        fm = fm,
        p0.1 = fm_hsd_p[rownames(fm_hsd_p) == "0h - 1h", "pvalue"],
        p0.24 = fm_hsd_p[rownames(fm_hsd_p) == "0h - 24h", "pvalue"],
        p1.24 = fm_hsd_p[rownames(fm_hsd_p) == "1h - 24h", "pvalue"]        
        ))
    }
  })
tictoc::toc()


temp_list <- temp_list %>% transpose()
fm_overview_table <- data.frame(dv = unlist(temp_list[[1]]),
                                p  = unlist(temp_list[[2]]),
                                ep = unlist(temp_list[[3]]),

                                p0.1 =  unlist(temp_list[[6]]),
                                p0.24 = unlist(temp_list[[7]]),
                                p1.24 = unlist(temp_list[[8]])
                                )


# work up post hoc stats
temp_list_hsd <- map(seq_along(temp_list[[4]]), function(i){
  if (!is.na(unique(temp_list[[4]][[i]]$groups))){
    temp_list[[4]][[i]] %>% 
      gather("dv", "Estimate", names(temp_list[[4]][[i]])[1]) %>% 
      pivot_wider(names_from = "Condition", values_from = c("groups", "Estimate") ) %>% 
      mutate_all(as.character)
  }
})  

fm_overview_hsd <- do.call(rbind, temp_list_hsd)


fm_overview_table <- full_join(fm_overview_table, fm_overview_hsd)
# for adding tukey hsd p vals into plots
fm_overview <- fm_overview_table

Format table

# add groupings
fm_overview_table <- fm_overview_table %>% mutate(Family = case_when(
  dv %in% c("vrest") ~ "Resting Voltage",
  dv %in% c("r11", "r1") ~ "Membrane Resistance",
  dv %in% ionic_cols ~ "Outward Currents",
  dv %in% epsp_cols ~ "Excitability",
  dv %in% c("r12", "rc", "ig", "cc", "ig.3_4", "ig.3_5", "ig.4_5") ~ "Coupling Measures"
))

fm_overview_table <- fm_overview_table %>% 
  mutate(Family = factor(fm_overview_table$Family, levels = c("Membrane Resistance", "Resting Voltage", "Coupling Measures", "Outward Currents", "Excitability"))) %>% 
  arrange(Family) %>% 
  mutate(Family = as.character(Family))

# temp <- rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName)
fm_overview_table <- full_join(fm_overview_table, 
                               rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName, Family2 = Family))

fm_overview_table <- fm_overview_table %>% 
  mutate(Family = ifelse(is.na(Family) & !is.na(Family2), Family2, Family)) %>% 
  select(-Family2) %>% 
  filter(!is.na(p))


# add additional cols
fm_overview_table$fdr <- p.adjust(fm_overview_table$ep, method = "fdr")

fm_overview_table <- fm_overview_table %>% 
  mutate(Estimate_0h = as.numeric(Estimate_0h),
         Estimate_1h = as.numeric(Estimate_1h),
         Estimate_24h = as.numeric(Estimate_24h)) %>% 
  mutate(p = round(p, digits = 3),
         ep = round(ep, digits = 3),
         fdr = round(fdr, digits = 3),
         Estimate_0h = round(Estimate_0h, digits = 3),
         Estimate_1h = round(Estimate_1h, digits = 3),
         Estimate_24h = round(Estimate_24h, digits = 3),
         p0.1 = round(p0.1, digits = 3),
         p0.24 = round(p0.24, digits = 3),
         p1.24 = round(p1.24, digits = 3) ) %>% 
  mutate(stars = case_when(fdr > 0.10               ~ "",
                           fdr < 0.10 & fdr > 0.05  ~ ".",
                           fdr < 0.05 & fdr > 0.01  ~ "*",
                           fdr < 0.01 & fdr > 0.001 ~ "**",
                           fdr < 0.001              ~ "***"
  ))


# library(officer)
library(flextable)
M_ft <-flextable(fm_overview_table,
                 col_keys = c("Family", "dv", "p", "ep",
                              "fdr", 
                              "stars", 
                              "Estimate_0h", "Estimate_1h", "Estimate_24h",
                              "p0.1",  "p0.24",  "p1.24"))

M_ft <- theme_vanilla(M_ft)
M_ft <- merge_v(M_ft, j = c("Family") )
# M_ft <- color(M_ft, ~ fdr < 0.05, ~ stars, color = "red")
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ dv, bold = TRUE)
M_ft <- bold(M_ft, ~ p   <= 0.05, ~ p, bold = TRUE)
M_ft <- bold(M_ft, ~ ep  <= 0.05, ~ ep, bold = TRUE)
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ fdr, bold = TRUE)
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ stars, bold = TRUE)

# get the groupings without showing the columns. 

color_a = "#ffeda0"
color_ab= "#feb24c"
color_b = "#f03b20"
color_c = "#2b8cbe"

M_ft <- bg(M_ft, ~ groups_0h == "a", ~ Estimate_0h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_0h == "ab",~ Estimate_0h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_0h == "b", ~ Estimate_0h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_0h == "c", ~ Estimate_0h, bg = color_c)

M_ft <- bg(M_ft, ~ groups_1h == "a", ~ Estimate_1h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_1h == "ab",~ Estimate_1h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_1h == "b", ~ Estimate_1h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_1h == "c", ~ Estimate_1h, bg = color_c)

M_ft <- bg(M_ft, ~ groups_24h == "a", ~ Estimate_24h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_24h == "ab",~ Estimate_24h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_24h == "b", ~ Estimate_24h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_24h == "c", ~ Estimate_24h, bg = color_c)

M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_0h, bg = "White")
M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_1h, bg = "White")
M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_24h, bg = "White")


M_ft <- set_header_labels(M_ft, 
                          dv = "", stars = "",
                          Estimate_0h = "0h", Estimate_1h = "1h", Estimate_24h = "24h",
                          p0.1 = "0h vs 1h",  p0.24 = "0h vs 24h",  p1.24 = "1h vs 24h"
                          )

M_ft <- autofit(M_ft)
M_ft

pptx_file <- "./officer_output/DanielAnova.pptx"
save_as_pptx("my table" = M_ft, path = pptx_file)

docx_file <- "./officer_output/DanielAnova.docx"
save_as_docx("my table" = M_ft, path = docx_file)

produce univariate plots

# mrna_cols, memb_cols, epsp_cols

plot_univariate_decorated_0 <- function(temp = plt_membrane,
                                        yvar = "Ihtk.0",
                                        new.y = "",
                                        new.title = ""){
  # allow for customization of y axis
  if (typeof(new.title) == "character"){
    if(typeof(new.y) == "character"){
      if(new.y == ""){
        new.title <- yvar
      }
    } else if (typeof(new.y) == "character" | typeof(new.y) == "expression"){
      new.title <- new.title
    } else {
      warning("new.y not supported type")
      new.title <- yvar
    }
  }

  names(temp)[names(temp) == yvar] <- "yvar"

  temp_bands <- temp %>% 
    group_by(Condition) %>% 
    summarise(
      q10 = quantile(yvar, probs = 0.1, na.rm = T),
      q25 = quantile(yvar, probs = 0.25, na.rm = T),
      # q45 = quantile(yvar, probs = 0.45, na.rm = T),
      q50 = quantile(yvar, probs = 0.5, na.rm = T),
      # q55 = quantile(yvar, probs = 0.55, na.rm = T),
      q75 = quantile(yvar, probs = 0.75, na.rm = T),
      q90 = quantile(yvar, probs = 0.9, na.rm = T),
    ) %>% 
    mutate(outlier_up = q50 + 1.5*(q75 - q25),
           outlier_down = q50 - 1.5*(q75 - q25))

  temp <- full_join(temp, temp_bands)

  temp_plt <- ggplot(data = temp, aes(x = Condition, y = yvar))+
    # geom_ribbon(aes(x = Condition, ymin = q10, ymax = q90, group = 1), color = "lightgray", fill = "cornflowerblue", alpha = 0.3)+
    geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = 1), color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+
    # geom_ribbon(aes(x = Condition, ymin = q45, ymax = q55, group = 1), color = "black", fill = "cornflowerblue", alpha = 0.3)+
    geom_line(aes(x = Condition, y = q50, group = 1), linetype = "dashed", color = "blue")+

    geom_line(aes(x = Condition, y = outlier_up, group = 1), linetype = "dashed", color = "black")+
    geom_line(aes(x = Condition, y = outlier_down, group = 1), linetype = "dashed", color = "black")+

    geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+
    # geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 1)+
    geom_point(alpha = 0.5)+
    labs(title = new.title, y=new.y, x = "")

  return(temp_plt)
}

membrane changes

plt_membrane <- M_all %>% 
  select(Condition, Experiment, Cell, all_of(memb_cols)) %>% 
  group_by(Condition, Experiment, Cell) %>% 
  mutate_all(median, na.rm = T) %>% 
  ungroup() %>% 
  distinct() 

membrane_dvs_plt <- c("r11", "r1", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope", "vrest")
new.y_vector <- c(expression(M~Omega), expression(M~Omega), "nA", expression(frac(nA, mV)), "nA", expression(frac(nA, mV)), "mV" )
new.title_vector     <- c(expression(R[In]), 
                          expression(R[Membrane]), 
                          expression(I[HTK] ~ Intercept), 
                          expression(I[HTK] ~ Slope), 
                          expression(I[A] ~ Intercept),
                          expression(I[A] ~ Slope),
                          expression(V[Rest]))
membrane_plts <- map(seq_along(membrane_dvs_plt), function(i){plot_univariate_decorated_0(
  temp = ungroup(plt_membrane), 
  yvar = membrane_dvs_plt[i],
  new.y = new.y_vector[i],
  new.title = new.title_vector[i])})






# run through dependent variables, if there was a significant effect, get the HSD p values and add them for each significant comparison
for (i in seq_along(membrane_dvs_plt)){ # <--------------------------------------- !
  temp_annotation <- fm_overview_table %>% filter(dv == membrane_dvs_plt[i]) # <-- ! 

  if (temp_annotation$fdr <= 0.05){

    # base y position for on max y and % of range
    current_yrange <- ggplot_build(
      membrane_plts[[i]] # <------------------------------------------------------ !
    )$layout$panel_params[[1]]$y.range 

    start_y <- .76*(current_yrange[2] - current_yrange[1]) + current_yrange[1]
    step_size <- (current_yrange[2] - current_yrange[1])*.08

    # create the annotation data frame
    df <- data.frame(
          group1 = c("0h", "0h",  "1h"),
          group2 = c("1h", "24h", "24h"),
          p.adj = c(temp_annotation$p0.1,
                    temp_annotation$p0.24, 
                    temp_annotation$p1.24)
        ) %>% 
      mutate(sig = ifelse(p.adj <= 0.05, 1, 0)) %>% 
      mutate(p.adj = as.character(p.adj)) %>% 
      mutate(p.adj = ifelse(p.adj == "0", "< 0.001", p.adj))

    # only increase the y position if there was a significant difference
    # begin at 1 to get the right output
    steps_up <- cumsum(df$sig)
    steps_up[1] <- 1

    df$y.position <- seq(start_y, by = step_size, length.out = 3)[steps_up]

    membrane_plts[[i]] <- membrane_plts[[i]]+ # <------------------------------------- !
      stat_pvalue_manual(
        data = df, 
        hide.ns = T
      )

  }
}


# (membrane_plts[[1]] / 
#     membrane_plts[[2]]) |
#   (membrane_plts[[3]] /
#      membrane_plts[[4]]) |
#   (membrane_plts[[5]] /
#      membrane_plts[[6]]) |
#   (membrane_plts[[7]] / 
#      patchwork::plot_spacer())

# add to make the scaling match other multiplots
length(membrane_plts)
for(i in 8:14){
  membrane_plts[[i]] <- ggplot()
}

ggsave(here("officer_output", "membrane_plts.svg"),
       cowplot::plot_grid(plotlist =  membrane_plts, labels = c(LETTERS[1:7], rep("", times = 7))), 
       # width = 8.5, 

       # width = 16, 
       # height = 8,

       width = 16, 
       height = 16,

       units = "in")

# walk(seq_along(membrane_dvs_plt), function(i){
#   ggsave(paste0(membrane_dvs_plt[i],".tiff"), 
#          plot = membrane_plts[[i]], 
#          path = here("officer_output"), 
#          width = 11.5/4, height = 4.76)  
# })

excitability

# Columns: 8
# $ Cor                 <dbl> 0.5569679, 0.5569679, 0.5569679, 0.5569679, 0.556967~
# $ Min.V_Obs           <dbl> -53.92456, -53.92456, -53.92456, -53.92456, -53.9245~
# $ Max.Amplitude_Obs   <dbl> -16.6320804, -16.6320804, -16.6320804, -16.6320804, ~
# $ Max.Amplitude_Sim   <dbl> -25.5070750, -25.5070750, -25.5070750, -25.5070750, ~
# $ AUC_Obs             <dbl> 55976.54, 55976.54, 55976.54, 55976.54, 55976.54, 55~
# $ AUC_Sim             <dbl> 32069.58, 32069.58, 32069.58, 32069.58, 32069.58, 32~
# $ Max.Amplitude_Delta <dbl> 8.981806, 8.981806, 8.981806, 8.981806, 8.981806, 8.~
# $ AUC_Delta           <dbl> 18563.238, 18563.238, 18563.238, 18563.238, 18563.23~

plt_epsp <- M_all[, c("Condition", "Experiment", "Cell", epsp_cols)] %>% 
  group_by(Condition, Experiment, Cell) %>% 
  mutate_all(median, na.rm = T) %>% 
  ungroup() %>% 
  distinct() 

# $ Condition           <chr> "1h", "1h", "1h", "24h", "1h", "24h", "24h", "24h", ~
# $ Experiment          <chr> "190808a", "190808a", "190830", "190903", "190904", ~
# $ Cell                <chr> "4", "5", "4", "3", "5", "4", "5", "4", "5", "5", "4~
# $ Cor                 <dbl> 0.5569679, 0.6112679, NA, 0.9774225, 0.8716307, 0.94~
# $ Min.V_Obs           <dbl> -53.924562, -57.052614, NA, -61.050416, -51.895143, ~
# $ Max.Amplitude_Obs   <dbl> -16.6320804, -24.1851812, NA, -0.9613037, -45.288086~
# $ Max.Amplitude_Sim   <dbl> -25.5070750, -33.6612173, NA, -0.4841494, -37.848033~
# $ AUC_Obs             <dbl> 55976.539, 35180.643, NA, 61242.214, 8488.885, 36081~
# $ AUC_Sim             <dbl> 32069.584, 38518.177, NA, 59193.615, 3901.010, 29769~
# $ Max.Amplitude_Delta <dbl> 8.981806, 6.438339, NA, 0.308876, 2.380225, 28.70748~
# $ AUC_Delta           <dbl> 18563.23796, -251.67701, NA, 2050.58898, 5702.37419,~


epsp_dvs_plt <- c(
  # "Cor", 
  # "Min.V_Obs", 
  # "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta",
  # "AUC_Obs", "AUC_Sim", "AUC_Delta"

  "Cor", 
  "Cor.IQR", 

  "Min.V_Obs", 
  "Min.V_Obs.IQR", 

  "Max.Amplitude_Obs", 
  "Max.Amplitude_Obs.IQR", 

  "AUC_Obs",
  "AUC_Obs.IQR", 

  "Max.Amplitude_Delta", 
  "Max.Amplitude_Delta.IQR", 

  "AUC_Delta", 
  "AUC_Delta.IQR",

  "Max.Amplitude_Sim", 
  "AUC_Sim")

new.y_vector <- c("R", "R",

                  "mV", "mV", "mV", "mV", 

                  "mV/Sec", "mV/Sec",

                  "mV", "mV",

                  "mV/Sec", "mV/Sec",

                  "mV",
                  "mV/Sec")

new.title_vector <- c("Correlation", "Correlation IQR",

                      "Baseline", "Baseline IQR",

                      "Obs. Amp.", "Obs. Amp. IQR", 

                      "Obs. AUC", "Obs. AUC IQR",

                      "Diff. Amplitude", "Diff. Amp. IQR",

                      "Diff. AUC", "Diff. AUC IQR",

                      "Sim. Amp.", 
                      "Sim. AUC" )

epsp_plts <- map(seq_along(epsp_dvs_plt), function(i){plot_univariate_decorated_0(
  temp = ungroup(plt_epsp), 
  yvar = epsp_dvs_plt[i],
  new.y = new.y_vector[i],
  new.title = new.title_vector[i] )})


# ((epsp_plts[[1]]+ylim(c(0, 1))) / 
#     patchwork::plot_spacer()/ 
#     patchwork::plot_spacer()) |
#   (epsp_plts[[3]] /
#      epsp_plts[[4]]/
#      epsp_plts[[5]]) |
#   (epsp_plts[[6]] /
#      epsp_plts[[7]]/
#      epsp_plts[[8]]) 


# run through dependent variables, if there was a significant effect, get the HSD p values and add them for each significant comparison
for (i in seq_along(epsp_dvs_plt)){ # <--------------------------------------- !
  temp_annotation <- fm_overview_table %>% filter(dv == epsp_dvs_plt[i]) # <-- ! 

  if (temp_annotation$fdr <= 0.05){

    # base y position for on max y and % of range
    current_yrange <- ggplot_build(
      epsp_plts[[i]] # <------------------------------------------------------ !
    )$layout$panel_params[[1]]$y.range 

    start_y <- .76*(current_yrange[2] - current_yrange[1]) + current_yrange[1]
    step_size <- (current_yrange[2] - current_yrange[1])*.08

    # create the annotation data frame
    df <- data.frame(
          group1 = c("0h", "0h",  "1h"),
          group2 = c("1h", "24h", "24h"),
          p.adj = c(temp_annotation$p0.1,
                    temp_annotation$p0.24, 
                    temp_annotation$p1.24)
        ) %>% 
      mutate(sig = ifelse(p.adj <= 0.05, 1, 0)) %>% 
      mutate(p.adj = as.character(p.adj)) %>% 
      mutate(p.adj = ifelse(p.adj == "0", "< 0.001", p.adj))

    # only increase the y position if there was a significant difference
    # begin at 1 to get the right output
    steps_up <- cumsum(df$sig)
    steps_up[1] <- 1

    df$y.position <- seq(start_y, by = step_size, length.out = 3)[steps_up]

    epsp_plts[[i]] <- epsp_plts[[i]]+ # <------------------------------------- !
      stat_pvalue_manual(
        data = df, 
        hide.ns = T
      )

  }
}


# cowplot::plot_grid(plotlist =  epsp_plts, labels = LETTERS)

ggsave(here("officer_output", "epsp_plts.svg"),
       cowplot::plot_grid(plotlist =  epsp_plts, labels = LETTERS),
       width = 16, 
       height = 16,
       # width = 8.5, 
       units = "in")

# walk(seq_along(epsp_dvs_plt), function(i){
#   ggsave(paste0(epsp_dvs_plt[i],".tiff"), 
#          plot = epsp_plts[[i]], 
#          path = here("officer_output"), 
#          width = 11.5/4, height = 4.76)  
# })

mrna

univariate change over time -- bend quartile

plt_mrna <- M_all[, c("Condition", "Experiment", "Cell", mrna_cols)] %>%
  group_by(Condition, Experiment, Cell) %>% 
  mutate_all(median, na.rm = T) %>% 
  ungroup() %>% 
  distinct()


mrna_plts <- map(seq_along(mrna_cols), function(i){plot_univariate_decorated_0(
  temp = ungroup(plt_mrna), 
  yvar = mrna_cols[i],
  new.y = "",
  new.title = mrna_cols[i] )+theme(plot.title = element_text(face="italic"))})




# na_y = c(0, 20000)
# ca_y = c(0, 7000)
# k_y  = c(0, 5000)
# inx_y <- c(0, 40000)
# 
# 
# mrna_plts[[1]]+coord_cartesian(ylim = na_y)+
#   
#   mrna_plts[[2]]+coord_cartesian(ylim = ca_y) +
#   mrna_plts[[3]]+coord_cartesian(ylim = ca_y) +
#   mrna_plts[[4]]+coord_cartesian(ylim = ca_y) +
#   patchwork::plot_spacer()+
#   
#   mrna_plts[[5]]+coord_cartesian(ylim = k_y) +
#   mrna_plts[[6]]+coord_cartesian(ylim = k_y) +
#   mrna_plts[[7]]+coord_cartesian(ylim = k_y) +
#   mrna_plts[[8]]+coord_cartesian(ylim = k_y) +
#   mrna_plts[[9]]+coord_cartesian(ylim = k_y) +
#   
#   mrna_plts[[10]]+coord_cartesian(ylim = inx_y) +
#   mrna_plts[[11]]+coord_cartesian(ylim = inx_y) +
#   mrna_plts[[12]]+coord_cartesian(ylim = inx_y) +
#   patchwork::plot_spacer() +
#   patchwork::plot_spacer() + 
#   
#   patchwork::plot_layout(ncol = 5, nrow = 3)


#TODO probably a more elegant way to do this using the following functions
# patchwork::get_dim()
# patchwork::set_dim()
# mrna_plts[[1]]+ set_dim(mrna_plts[[2]], get_dim(mrna_plts[[1]]))
# get_max_dim(mrna_plts[[1]], mrna_plts[[2]])




# run through dependent variables, if there was a significant effect, get the HSD p values and add them for each significant comparison
for (i in seq_along(mrna_cols)){ # <--------------------------------------- !
  temp_annotation <- fm_overview_table %>% filter(dv == mrna_cols[i]) # <-- ! 

  if (temp_annotation$fdr <= 0.05){

    # base y position for on max y and % of range
    current_yrange <- ggplot_build(
      mrna_plts[[i]] # <------------------------------------------------------ !
    )$layout$panel_params[[1]]$y.range 

    start_y <- .76*(current_yrange[2] - current_yrange[1]) + current_yrange[1]
    step_size <- (current_yrange[2] - current_yrange[1])*.08

    # create the annotation data frame
    df <- data.frame(
          group1 = c("0h", "0h",  "1h"),
          group2 = c("1h", "24h", "24h"),
          p.adj = c(temp_annotation$p0.1,
                    temp_annotation$p0.24, 
                    temp_annotation$p1.24)
        ) %>% 
      mutate(sig = ifelse(p.adj <= 0.05, 1, 0)) %>% 
      mutate(p.adj = as.character(p.adj)) %>% 
      mutate(p.adj = ifelse(p.adj == "0", "< 0.001", p.adj))

    # only increase the y position if there was a significant difference
    # begin at 1 to get the right output
    steps_up <- cumsum(df$sig)
    steps_up[1] <- 1

    df$y.position <- seq(start_y, by = step_size, length.out = 3)[steps_up]

    mrna_plts[[i]] <- mrna_plts[[i]]+ # <------------------------------------- ! x2
      stat_pvalue_manual(
        data = df, 
        hide.ns = T
      )

  }
}


ggsave(here("officer_output", "mrna_plts.svg"),
       cowplot::plot_grid(plotlist =  mrna_plts, labels = LETTERS),
       width = 16, 
       height = 12,
       # width = 8.5, 
       units = "in")

ggsave(here("officer_output", "mrna_plts.svg"),
       cowplot::plot_grid(plotlist =  mrna_plts, labels = LETTERS, nrow = 4, ncol = 4),
       width = 16, 
       height = 16,
       # width = 8.5, 
       units = "in")


# length(membrane_plts)
# for(i in 8:14){
#   membrane_plts[[i]] <- ggplot()
# }
# 
# ggsave(here("officer_output", "membrane_plts.svg"),
#        cowplot::plot_grid(plotlist =  membrane_plts, labels = c(LETTERS[1:7], rep("", times = 7))), 
#        # width = 8.5, 
# 
#        # width = 16, 
#        # height = 8,
#        
#        width = 16, 
#        height = 16,
#        
#        units = "in")


# walk(seq_along(mrna_cols), function(i){
#   ggsave(paste0(mrna_cols[i],".tiff"), 
#          plot = mrna_plts[[i]]+theme(plot.title = element_text(face="italic")), 
#          path = here("officer_output"), 
#          width = 11.5/4, height = 4.76)  
# })
# plt_mrna_long <- plt_mrna %>% 
#   gather(mrna, count, mrna_cols) %>% 
#   filter(!is.na(count))
# 
# 
# plt_mrna_long_bands <- plt_mrna_long %>% 
#     group_by(Condition, mrna) %>% 
#     summarise(
#       q10 = quantile(count, probs = 0.1, na.rm = T),
#       q25 = quantile(count, probs = 0.25, na.rm = T),
#       q50 = quantile(count, probs = 0.5, na.rm = T),
#       q75 = quantile(count, probs = 0.75, na.rm = T),
#       q90 = quantile(count, probs = 0.9, na.rm = T),
#     ) %>% 
#     mutate(outlier_up = q50 + 1.5*(q75 - q25),
#            outlier_down = q50 - 1.5*(q75 - q25))
# 
# temp <- full_join(plt_mrna_long, plt_mrna_long_bands)
# 
# 
# 
# # devtools::install_github("teunbrand/ggh4x")
# library(ggh4x)
# 
# filter(temp, mrna %in% c("nav")) %>% 
#   ggplot(aes(x = Condition, y = count))+
#     geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = mrna), 
#                 color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+
#     geom_line(aes(x = Condition, y = q50, group = 1), 
#               linetype = "dashed", color = "blue")+
#     geom_line(aes(x = Condition, y = outlier_up, group = 1), 
#               linetype = "dashed", color = "black")+
#     geom_line(aes(x = Condition, y = outlier_down, group = 1), 
#               linetype = "dashed", color = "black")+
#     
#     geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+
# 
#     geom_point(alpha = 0.5)+
#   facet_wrap(~ mrna, strip.position = "bottom")
# 
# 
# 
# filter(temp, mrna %in% c("shaker", "shab", "shal", "shaw1", "shaw2", "bkkca", "cav1", "cav2")) %>% 
#   ggplot(aes(x = Condition, y = count))+
#     geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = mrna), 
#                 color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+
#     geom_line(aes(x = Condition, y = q50, group = 1), 
#               linetype = "dashed", color = "blue")+
#     geom_line(aes(x = Condition, y = outlier_up, group = 1), 
#               linetype = "dashed", color = "black")+
#     geom_line(aes(x = Condition, y = outlier_down, group = 1), 
#               linetype = "dashed", color = "black")+
#     
#     geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+
# 
#     geom_point(alpha = 0.5)+
#   facet_wrap(~ mrna, strip.position = "bottom")
# 
# 
# 
# filter(temp, mrna %in% c("inx1", "inx2", "inx3")) %>% 
#   ggplot(aes(x = Condition, y = count))+
#     geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = mrna), 
#                 color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+
#     geom_line(aes(x = Condition, y = q50, group = 1), 
#               linetype = "dashed", color = "blue")+
#     geom_line(aes(x = Condition, y = outlier_up, group = 1), 
#               linetype = "dashed", color = "black")+
#     geom_line(aes(x = Condition, y = outlier_down, group = 1), 
#               linetype = "dashed", color = "black")+
#     
#     geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+
# 
#     geom_point(alpha = 0.5)+
#   facet_wrap(~ mrna, strip.position = "bottom")

univariate change over time -- boxplots

# devtools::install_github("teunbrand/ggh4x")
library(ggh4x)
# FIXME 
plt_mrna_long <- plt_membrane %>%
  gather(mrna, count, mrna_cols) %>% 
  filter(!is.na(count))
  # filter(mrna %in% c("bkkca", "shal")) %>% 

return_nested_mrna_plt <- function(temp = plt_mrna_long){
  ggplot(temp, aes(mrna, count))+
    geom_boxplot(aes(fill = Condition))+
    geom_point(position = position_jitter(width = 0.05), #size =3,  
               alpha = 0.7)+  
    theme(axis.text.x=element_blank(),
          legend.position = "")+ # writes over mrna labels so they only show up in the facet
    labs(x = "")+
    facet_nested(~ mrna + Condition, scales = "free_x", switch = "x")
}


# return_nested_mrna_plt(temp = filter(plt_mrna_long, 
#                                      mrna %in% unlist(mRNAInfo[mRNAInfo$Class == "Channel", "RGeneName"])
#                                      ))


return_nested_mrna_plt(temp = filter(plt_mrna_long, 
                                     mrna %in% unlist(mRNAInfo[mRNAInfo$Family == "Voltage-dependent K+ Channel", "RGeneName"])
                                     ))

return_nested_mrna_plt(temp = filter(plt_mrna_long, 
                                     mrna %in% unlist(mRNAInfo[mRNAInfo$Family == "Other K+ Channel", "RGeneName"])
                                     ))

return_nested_mrna_plt(temp = filter(plt_mrna_long, 
                                     mrna %in% unlist(mRNAInfo[mRNAInfo$Family == "Ca2+ Channel", "RGeneName"])
                                     ))

# innexins
# TODO flix axis to right, bind
return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% c("inx1", "inx2", "inx3", "inx4")))
return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% c("inx5")))
return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% c("inx5")))+scale_y_log10()

What happens at the network level?

plt_tevc <- M_all_outliers %>%
  select(Condition, Experiment, ig.3_4, ig.3_5, ig.4_5) %>%
  distinct() %>%
  gather("ElectricalSynapse", "ig", c("ig.3_4", "ig.3_5", "ig.4_5")) %>%
  filter(!is.na(ig))

plt_1.1 <- plot_univariate_decorated_0(
  temp = ungroup(filter(plt_tevc, ElectricalSynapse == "ig.3_4")),
  yvar = "ig",
  new.y = "nA/mV",
  new.title = expression(I[g]~LC[3] %<->% LC[4]))+
  coord_cartesian(ylim = c(0, 0.1))

plt_1.2 <- plot_univariate_decorated_0(
  temp = ungroup(filter(plt_tevc, ElectricalSynapse == "ig.3_5")),
  yvar = "ig",
  new.y = "nA/mV",
  new.title = expression(I[g]~LC[3] %<->% LC[5]))+
  coord_cartesian(ylim = c(0, 0.1))

plt_1.3 <- plot_univariate_decorated_0(
  temp = ungroup(filter(plt_tevc, ElectricalSynapse == "ig.4_5")),
  yvar = "ig",
  new.y = "nA/mV",
  new.title = expression(I[g]~LC[4] %<->% LC[5]))+
  coord_cartesian(ylim = c(0, 0.5))+
  scale_y_continuous(position = "right")


library(patchwork)
plt_1.1 + plt_1.2 + plt_1.3+ plot_layout(widths = c(1, 1, 1) )


ggsave(paste0("ig_lc34.tiff"),
         plot = plt_1.1,
         path = here("officer_output"),
         width = 11.5/4, height = 4.76)
ggsave(paste0("ig_lc35.tiff"),
         plot = plt_1.2,
         path = here("officer_output"),
         width = 11.5/4, height = 4.76)
ggsave(paste0("ig_lc45.tiff"),
         plot = plt_1.3,
         path = here("officer_output"),
         width = 11.5/4, height = 4.76)



#### ####

plt_tecc <- M_all_outliers %>%
  select(Condition, Experiment, cc.3_4, cc.3_5, cc.4_3, cc.4_5, cc.5_3, cc.5_4) %>%
  group_by(Condition, Experiment) %>%
  mutate_all(median, na.rm = T) %>%
  ungroup() %>%
  distinct()

cowplot::plot_grid(plotlist = list(
  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.3_4",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[3] %->% LC[4]))+
    coord_cartesian(ylim = c(0, 1)),

  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.3_5",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[3] %->% LC[5]))+
    coord_cartesian(ylim = c(0, 1)),

  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.4_3",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[4] %->% LC[3]))+
    coord_cartesian(ylim = c(0, 1)),

  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.4_5",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[4] %->% LC[5]))+
    coord_cartesian(ylim = c(0, 1)),

  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.5_3",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[5] %->% LC[4]))+
    coord_cartesian(ylim = c(0, 1)),

  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.5_4",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[5] %->% LC[4]))+
    coord_cartesian(ylim = c(0, 1))
))



#### ####

triangle_lower_df <- data.frame(
  id    = c("b", "b", "b"),
  value = c("2", "2", "2"),
  x     = c(0, 1, 1),
  y     = c(0, 1, 0)
)

triangle_upper_df <- data.frame(
  id    = c("a", "a", "a"),
  value = c("1", "1", "1"),
  x     = c(0, 1, 0),
  y     = c(0, 1, 1)
)


plt_2.1 <- plt_tecc %>%
  ggplot(aes(cc.3_4, cc.4_3))+
  geom_polygon(data = triangle_upper_df, aes(x = x, y = y, group = id), fill = "#e41a1c")+
  geom_polygon(data = triangle_lower_df, aes(x = x, y = y, group = id), fill = "#377eb8")+
  geom_abline(slope = 1, intercept = 0, linetype = "dashed")+
  geom_point()+
  facet_grid(.~Condition)

plt_2.2 <- plt_tecc %>%
  ggplot(aes(cc.3_5, cc.5_3))+
  geom_polygon(data = triangle_upper_df, aes(x = x, y = y, group = id), fill = "#e41a1c")+
  geom_polygon(data = triangle_lower_df, aes(x = x, y = y, group = id), fill = "#4daf4a")+
  geom_point()+
  facet_grid(.~Condition)

plt_2.3 <- plt_tecc %>%
  ggplot(aes(cc.4_5, cc.5_4))+
  geom_polygon(data = triangle_upper_df, aes(x = x, y = y, group = id), fill = "#377eb8")+
  geom_polygon(data = triangle_lower_df, aes(x = x, y = y, group = id), fill = "#4daf4a")+
  geom_point()+
  facet_grid(.~Condition)

plt_2.1 / plt_2.2 / plt_2.3

what happens at the level of output (excitability)?

for my curiosity:

library(brms)
# fit1 <- brm(shal ~ Condition,
#             data = temp,
#             family = gaussian())
# # 'C:/rtools40/usr/mingw_/bin/g++' not found


fit1 <- readRDS(here("data", "brms_models", paste0("shal", ".RDS")))

plot(fit1, pars = c("Condition"))
# 
plot(conditional_effects(fit1, effects = "Condition"))
# 
# temp_col <- "Ia.0"



tictoc::tic()
walk(
  c(mrna_cols, 
    memb_cols, 

    # "Ia.Slope"
    # "Ia.0.s", "Ia.Slope.s"

    # "Cor"
    epsp_cols
    # "Min.V_Obs", "Max.Amplitude_Obs", #"Max.Amplitude_Sim", 
    # "AUC_Obs"#, "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta"
    ),
  # c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols), 
  function(temp_col){


    if (!file.exists(here("data", "brms_models", paste0(temp_col, ".RDS")))){

      print(temp_col)

      temp <- M_all[M_all$Source == "Daniel", c("Condition", "Experiment", "Cell", temp_col)] %>% distinct()

      # Identify outliers for each group
      temp_b <- temp[temp$Condition == "0h", ]
      temp_c <- temp[temp$Condition == "1h", ]
      temp_d <- temp[temp$Condition == "24h", ]

      temp_b <- temp_b[win_x_iqr(temp_b[[temp_col]], multiplier = 1.5), ]
      temp_c <- temp_c[win_x_iqr(temp_c[[temp_col]], multiplier = 1.5), ]
      temp_d <- temp_d[win_x_iqr(temp_d[[temp_col]], multiplier = 1.5), ]

      temp <- rbind(temp_b, temp_c) %>% rbind(temp_d)

      # temp <- temp[win_x_iqr(temp[[temp_col]], multiplier = 1.5), ]
      temp <- temp[!is.na(temp[[temp_col]]), ]

      # make sure we don't have pseudoreplicates for network measurements
      if (temp_col %in% c("ig.3_4", "ig.3_5", "ig.4_5")){
        temp <- temp %>% 
          select(-Cell) %>% 
          group_by(Condition, Experiment) %>% 
          mutate_all(median, na.rm = T) %>% 
          distinct()
      }


      if(nrow(temp) < 10){

        return(list(
          dv = temp_col,
          p = NA,
          ep = NA,
          hsd_groups = data.frame(dv = c(NA, NA, NA), 
                                  groups = c(NA, NA, NA), 
                                  Condition = c(NA, NA, NA)
          ),
          fm = NA))

      } else {

        fit1 <- brm(as.formula(paste0(temp_col, " ~ Condition")), 
                    data = temp, 
                    family = gaussian())

        saveRDS(fit1, here("data", "brms_models", paste0(temp_col, ".RDS")))

        # rm(list = "fit1")
      }


    }



  })
tictoc::toc()

mk table

run anovas

# temp <- M_all %>% 
#   select(Condition, Experiment, Cell,  Ihtk.0) %>% 
#   group_by(Condition, Experiment, Cell) %>% 
#   mutate(Ihtk.0 = median(Ihtk.0, na.rm = T)) %>% 
#   distinct() %>% 
#   ungroup()
# 
# temp_bands <- temp %>% group_by(Condition) %>% summarise(
#   q10 = quantile(Ihtk.0, probs = 0.1, na.rm = T),
#   q25 = quantile(Ihtk.0, probs = 0.25, na.rm = T),
#   q45 = quantile(Ihtk.0, probs = 0.45, na.rm = T),
#   q50 = quantile(Ihtk.0, probs = 0.5, na.rm = T),
#   q55 = quantile(Ihtk.0, probs = 0.55, na.rm = T),
#   q75 = quantile(Ihtk.0, probs = 0.75, na.rm = T),
#   q90 = quantile(Ihtk.0, probs = 0.9, na.rm = T),
#   )
# 
# temp <- full_join(temp, temp_bands)
# 
# ggplot(data = temp, aes(x = Condition, y = Ihtk.0))+
#   geom_ribbon(aes(x = Condition, ymin = q10, ymax = q90, group = 1), color = "lightgray", fill = "cornflowerblue", alpha = 0.3)+
#   geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = 1), color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+
#   # geom_ribbon(aes(x = Condition, ymin = q45, ymax = q55, group = 1), color = "black", fill = "cornflowerblue", alpha = 0.3)+
#   geom_line(aes(x = Condition, y = q50, group = 1), linetype = "dashed", color = "blue")+
#   geom_point(aes(x = Condition, y = q50), color = "lightgray", size = 4)+
#   geom_point(aes(x = Condition, y = q50), color = "black", size = 4, shape = 1)+
#   geom_point(alpha = 0.5)

# Make table ----
# c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols)
# 
# temp_col = "Ihtk.0"

#TODO figure out what to do with the epsp cols
tictoc::tic()
temp_list <- map(
  c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols), function(temp_col){
    print(temp_col)

    temp <- M_all[, c("Condition", "Experiment", "Cell", temp_col)] %>% distinct()

    # Identify outliers for each group
    temp_b <- temp[temp$Condition == "0h", ]
    temp_c <- temp[temp$Condition == "1h", ]
    temp_d <- temp[temp$Condition == "24h", ]

    temp_b <- temp_b[win_x_iqr(temp_b[[temp_col]], multiplier = 1.5), ]
    temp_c <- temp_c[win_x_iqr(temp_c[[temp_col]], multiplier = 1.5), ]
    temp_d <- temp_d[win_x_iqr(temp_d[[temp_col]], multiplier = 1.5), ]

    temp <- rbind(temp_b, temp_c) %>% rbind(temp_d)

    # temp <- temp[win_x_iqr(temp[[temp_col]], multiplier = 1.5), ]
    temp <- temp[!is.na(temp[[temp_col]]), ]

    # make sure we don't have pseudoreplicates for network measurements
    if (temp_col %in% c("ig.3_4", "ig.3_5", "ig.4_5")){
      temp <- temp %>% 
        select(-Cell) %>% 
        group_by(Condition, Experiment) %>% 
        mutate_all(median, na.rm = T) %>% 
        distinct()
    }


    if(nrow(temp) < 10){

      return(list(
        dv = temp_col,
        p = NA,
        ep = NA,
        hsd_groups = data.frame(dv = c(NA, NA, NA), 
                                groups = c(NA, NA, NA), 
                                Condition = c(NA, NA, NA)
        ),
        fm = NA))

    } else {


      temp_shuffle <- temp
      resample_array <- map(1:1000, function(i){
        temp_shuffle$Condition <- sample(temp_shuffle$Condition, replace = F)
        fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp_shuffle)      
        return(car::Anova(fm)[1,3])
      }) %>% unlist()


      fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp)

      fm_hsd <- agricolae::HSD.test(fm, trt = "Condition") 
      fm_hsd <- fm_hsd$group

      fm_hsd$Condition <- rownames(fm_hsd)



      return(list(
        dv = temp_col,
        p = car::Anova(fm)[1,4],
        ep = mean(resample_array >= car::Anova(fm)[1,3]),
        hsd_groups = fm_hsd,
        fm = fm))
    }
  })
tictoc::toc()


temp_list <- temp_list %>% transpose()
fm_overview_table <- data.frame(dv = unlist(temp_list[[1]]),
                                p  = unlist(temp_list[[2]]),
                                ep = unlist(temp_list[[3]]))


# work up post hoc stats
temp_list_hsd <- map(seq_along(temp_list[[4]]), function(i){
  if (!is.na(unique(temp_list[[4]][[i]]$groups))){
    temp_list[[4]][[i]] %>% 
      gather("dv", "Estimate", names(temp_list[[4]][[i]])[1]) %>% 
      pivot_wider(names_from = "Condition", values_from = c("groups", "Estimate") ) %>% 
      mutate_all(as.character)
  }
})  

fm_overview_hsd <- do.call(rbind, temp_list_hsd)

Make table

fm_overview_table <- full_join(fm_overview_table, fm_overview_hsd)


# add groupings
fm_overview_table <- fm_overview_table %>% mutate(Family = case_when(
  dv %in% c("vrest") ~ "Resting Voltage",
  dv %in% c("r11", "r1") ~ "Membrane Resistance",
  dv %in% ionic_cols ~ "Outward Currents",
  dv %in% epsp_cols ~ "Excitability",
  dv %in% c("r12", "rc", "ig", "cc", "ig.3_4", "ig.3_5", "ig.4_5") ~ "Coupling Measures"
))

fm_overview_table <- fm_overview_table %>% 
  mutate(Family = factor(fm_overview_table$Family, levels = c("Membrane Resistance", "Resting Voltage", "Coupling Measures", "Outward Currents", "Excitability"))) %>% 
  arrange(Family) %>% 
  mutate(Family = as.character(Family))

# temp <- rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName)
fm_overview_table <- full_join(fm_overview_table, 
                               rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName, Family2 = Family))

fm_overview_table <- fm_overview_table %>% 
  mutate(Family = ifelse(is.na(Family) & !is.na(Family2), Family2, Family)) %>% 
  select(-Family2) %>% 
  filter(!is.na(p))


# add additional cols
fm_overview_table$fdr <- p.adjust(fm_overview_table$ep, method = "fdr")

fm_overview_table <- fm_overview_table %>% 
  mutate(Estimate_0h = as.numeric(Estimate_0h),
         Estimate_1h = as.numeric(Estimate_1h),
         Estimate_24h = as.numeric(Estimate_24h)) %>% 
  mutate(p = round(p, digits = 4),
         ep = round(ep, digits = 4),
         fdr = round(fdr, digits = 4),
         Estimate_0h = round(Estimate_0h, digits = 4),
         Estimate_1h = round(Estimate_1h, digits = 4),
         Estimate_24h = round(Estimate_24h, digits = 4)) %>% 
  mutate(stars = case_when(fdr > 0.10               ~ "",
                           fdr < 0.10 & fdr > 0.05  ~ ".",
                           fdr < 0.05 & fdr > 0.01  ~ "*",
                           fdr < 0.01 & fdr > 0.001 ~ "**",
                           fdr < 0.001              ~ "***"
  ))


# library(officer)
library(flextable)
M_ft <-flextable(fm_overview_table,
                 col_keys = c("Family", "dv", "p", "ep",
                              "fdr", 
                              "stars", "Estimate_0h", "Estimate_1h", "Estimate_24h"))

M_ft <- theme_vanilla(M_ft)
M_ft <- merge_v(M_ft, j = c("Family") )
# M_ft <- color(M_ft, ~ fdr < 0.05, ~ stars, color = "red")
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ dv, bold = TRUE)
M_ft <- bold(M_ft, ~ p   <= 0.05, ~ p, bold = TRUE)
M_ft <- bold(M_ft, ~ ep  <= 0.05, ~ ep, bold = TRUE)
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ fdr, bold = TRUE)
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ stars, bold = TRUE)

# get the groupings without showing the columns. 

color_a = "#ffeda0"
color_ab= "#feb24c"
color_b = "#f03b20"
color_c = "#2b8cbe"

M_ft <- bg(M_ft, ~ groups_0h == "a", ~ Estimate_0h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_0h == "ab",~ Estimate_0h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_0h == "b", ~ Estimate_0h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_0h == "c", ~ Estimate_0h, bg = color_c)

M_ft <- bg(M_ft, ~ groups_1h == "a", ~ Estimate_1h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_1h == "ab",~ Estimate_1h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_1h == "b", ~ Estimate_1h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_1h == "c", ~ Estimate_1h, bg = color_c)

M_ft <- bg(M_ft, ~ groups_24h == "a", ~ Estimate_24h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_24h == "ab",~ Estimate_24h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_24h == "b", ~ Estimate_24h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_24h == "c", ~ Estimate_24h, bg = color_c)


M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_0h, bg = "White")
M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_1h, bg = "White")
M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_24h, bg = "White")


M_ft <- set_header_labels(M_ft, 
                          dv = "", stars = "",
                          Estimate_0h = "0h", Estimate_1h = "1h", Estimate_24h = "24h" )

M_ft <- autofit(M_ft)
M_ft

pptx_file <- "./officer_output/DanielAnova.pptx"
save_as_pptx("my table" = M_ft, path = pptx_file)




# fm_overview_table %>% 
#   ggplot(aes(x = ep))+
#   geom_histogram(binwidth = 0.025)+
#   geom_vline(xintercept = 0.05)

Revisitng excitability 21/03/17

df <- full_join(metadata[, c("Experiment",  "FileName", "Condition")],
          epsp)
df <- df %>% filter(!is.na(Channel))

df %>% 
  unite(uid, Experiment, Cell) %>% 
  filter(Key != "predicted") %>% 
  ggplot(aes(x = uid, AUC, color = Type))+
  geom_point(alpha = 0.5)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = -0.))+
facet_wrap(.~Condition)


# it looks like we don't have full coverage of epsp_50
df %>% group_by(Experiment, Cell, Key, Type) %>% filter(Sweep == 1) %>% tally() %>% spread(Type, n) %>% View()

Another take on excitability:

binned Cor

# FIXME Column `AUC` doesn't exist
# Goal: extract Absolute measures and differential measures for 
# 1. Burst Amplitude (max mV)
# 2. AUC 
# 3. Stimulus correlation (between)


# read in trace
brian2_trace_files <- list.files(here("data", "predicted_voltage"))

brian2_trace_sets <- brian2_trace_files %>% 
  str_remove(pattern = "-actual_v.rds") %>% 
  str_remove(pattern = "-predict_v.rds") %>% 
  unique()

epsp_summary_list <- map(seq(1, length(brian2_trace_sets)), function(i){
  print(i)
  brian2_read_in <- brian2_trace_files[str_detect(brian2_trace_files, brian2_trace_sets[i])]


  # retrieve data and merge into a single df
  temp <- map(brian2_read_in, function(read_file){
    temp_predict <- readRDS(here("data", "predicted_voltage", read_file))

    if(str_detect(read_file, pattern = "predict_v")){
      #consolidate into one df
      for (j in seq(1, length(temp_predict))){
        temp_predict[[j]]$Sweep <- j
      }
      temp_predict <- do.call(rbind, temp_predict)
    }
    return(temp_predict)
  })

  names(temp) <- brian2_read_in

  temp_a <- temp[str_detect(names(temp), pattern = "actual_v")][[1]]
  temp_p <- temp[str_detect(names(temp), pattern = "predict_v")][[1]] %>% as.data.frame() %>% as_tibble()
  temp_a$nrow <- seq(1, nrow(temp_a))
  temp_p$nrow <- seq(1, nrow(temp_p))
  temp_a$Sweep <- as.character(temp_a$Sweep)
  temp_p$Sweep <- as.character(temp_p$Sweep)

  temp <- full_join(temp_a, select(temp_p, -Time)) %>% select(-nrow)

  # Show difference between expected/actual
  # i = 1
  # [1] "190808a_0014-epsp-In4-actual_v.rds"  "190808a_0014-epsp-In4-predict_v.rds" shows why we need to be able to audit the outcome.
  # Here the end of sweep 4 deviates from baseline so the delta is off
  # TODO instead of subtracting a value at the end, we should look at the times with i_inj less than x and then use a percentile from that to estimate the baseline.


  # To get around this in the short term we'll calulate the factors independently for expected/obs
  # downsample_data(temp, len = 10000) %>% 
  # # temp %>% 
  #   mutate(predicted = predicted - offset) %>% 
  #   mutate(difference = In4-predicted) %>% 
  #   ggplot(aes(x = Time))+
  #   geom_ribbon(aes(ymin=In4, ymax=predicted), fill = "cornflowerblue")+
  #   geom_line(aes(y=In4), color = "black")+
  #   geom_line(aes(y=predicted), color = "cornflowerblue", alpha = 0.3)+
  #   # geom_line(aes(y=difference-offset), color = "purple")+
  #   facet_grid(Sweep~.)

  # This is an inelegant workaround for data that is based on in9/12. This prevents duplication of the large set below

  rewrite_names <- F
  if ("In9" %in% names(temp)){
    names(temp) <- c("In4", "In7", "Sweep", "Time", "predicted", "offset")
    rewrite_names <- T
  }

  Use.Bin.Size <- 19687

  temp_processed <- temp
  temp_processed$TimeBin <- rep(rep(seq(1, (nrow(temp)/4)/Use.Bin.Size), each = Use.Bin.Size), times = 4)

  # begin to work with data proper
  # Add optional ways to slice the data based on what we expect to occur
  # At the moment we leave this unused
  temp_processed <- temp_processed %>% 
    mutate(
      Period = case_when(
        Time > 0.40  & Time <  4.87  ~ "A",
        Time > 4.87  & Time < 10.74  ~ "B",
        Time > 10.74 & Time < 16.74  ~ "C",
        Time > 16.74 & Time < 19.685 ~ "D"
      ),
      Period_eq = case_when(
        Time > 0.40  & Time <  0.40 + 2.945 ~ "A",
        Time > 4.87  & Time <  4.87 + 2.945 ~ "B",
        Time > 10.74 & Time < 10.74 + 2.945 ~ "C",
        Time > 16.74 & Time < 16.74 + 2.945 ~ "D"
      ),
      Burst = case_when(
        Time > 0.40  & Time <  2    ~ "A",
        Time > 4.87  & Time <  6.17 ~ "B",
        Time > 10.74 & Time < 12.54 ~ "C",
        Time > 16.74 & Time < 18.5  ~ "D"
      ),
      Burst_eq = case_when(
        Time > 0.40  & Time <  2    + 1.30 ~ "A",
        Time > 4.87  & Time <  6.17 + 1.30 ~ "B",
        Time > 10.74 & Time < 12.54 + 1.30 ~ "C",
        Time > 16.74 & Time < 18.5  + 1.30 ~ "D"
      )
    ) %>% 
    # correlation with expected
    group_by(Sweep, TimeBin) %>% 
    mutate(Cor = cor(In4, predicted)) %>% 
    ungroup() %>% 
    gather(Key, mV, c("In4", "predicted")) %>% 
    ## Metrics for whole sweep
    group_by(Key, Sweep) %>% 
    # Min voltage
    mutate(Min.V = min(mV, na.rm = T)) %>%
    # Defining Baseline as average of the lowest 20% of points
    mutate(Use.in.Base = quantile(mV, probs = .2, na.rm = T)) %>%
    mutate(Use.in.Base = ifelse(mV <= Use.in.Base, mV, NA)) %>%
    mutate(Use.in.Base = mean(Use.in.Base, na.rm = T)) %>%
    # Max voltage
    mutate(Max.Amplitude = max(mV, na.rm = T)) %>%
    # AUC
    # Using Min.V
    # mutate(AUC = sum(mV - Min.V, na.rm = T)/(max(temp$Time)/Use.Bin.Size)) %>%
    ungroup() 

  temp_processed <- temp_processed[seq(1, nrow(temp_processed), by = Use.Bin.Size), ]



  if (rewrite_names){
    temp_processed <- temp_processed %>% 
      mutate(Key = ifelse(Key == "In4", "In9", Key))
  }


  temp_processed <- temp_processed[, c("Sweep", "TimeBin", "Time", "Cor", "Key", "Min.V", #"Use.in.Base", 
                                       "Max.Amplitude", "AUC")] %>% 
    distinct() #%>% 
    # pivot_wider(names_from = "Key", values_from = c("Min.V", #"Use.in.Base", 
    #                                                 "Max.Amplitude", "AUC")) 


  # add annotations to aid merging
  file_name <- brian2_read_in[[1]] %>% str_split(pattern = "-")

  temp_processed$Experiment <- as.character(str_split(as.character(file_name[[1]][1]), pattern = "_")[[1]][1])
  temp_processed$FileName <- paste0(as.character(file_name[[1]][1]), ".abf")
  temp_processed$Channel <- as.character(file_name[[1]][3])

  return(temp_processed)
})


epsp_summary <- do.call(rbind, epsp_summary_list)
# epsp_summary <- epsp_summary[, c("Experiment", "FileName", "Channel", "Sweep", "Cor", "Key", "Min.V", "Max.Amplitude", "AUC")]

epsp_summary %>% filter(FileName == "190808a_0014.abf", Sweep == 1) %>% 
  ggplot(aes(x = Time, y = Cor))+
  geom_point()


ggplot(aes(x = Time, y = Cor))+
  scattermore::geom_scattermost(
    as.data.frame(epsp_summary[epsp_summary$Sweep == 1, ]),
    # col=viridisLite::viridis(100, alpha=0.05)[1+99*d[,2]],
    pointsize=1,
    pixels=c(700,700)) +
  ggtitle("geom_scattermost")

Binned by event

# Goal: extract Absolute measures and differential measures for 
# 1. Burst Amplitude (max mV)
# 2. AUC 
# 3. Stimulus correlation (between)


# read in trace
brian2_trace_files <- list.files(here("data", "predicted_voltage"))

brian2_trace_sets <- brian2_trace_files %>% 
  str_remove(pattern = "-actual_v.rds") %>% 
  str_remove(pattern = "-predict_v.rds") %>% 
  unique()

epsp_summary_list <- map(seq(1, length(brian2_trace_sets)), function(i){
  print(i)
  brian2_read_in <- brian2_trace_files[str_detect(brian2_trace_files, brian2_trace_sets[i])]


  # retrieve data and merge into a single df
  temp <- map(brian2_read_in, function(read_file){
    temp_predict <- readRDS(here("data", "predicted_voltage", read_file))

    if(str_detect(read_file, pattern = "predict_v")){
      #consolidate into one df
      for (j in seq(1, length(temp_predict))){
        temp_predict[[j]]$Sweep <- j
      }
      temp_predict <- do.call(rbind, temp_predict)
    }
    return(temp_predict)
  })

  names(temp) <- brian2_read_in

  temp_a <- temp[str_detect(names(temp), pattern = "actual_v")][[1]]
  temp_p <- temp[str_detect(names(temp), pattern = "predict_v")][[1]] %>% as.data.frame() %>% as_tibble()
  temp_a$nrow <- seq(1, nrow(temp_a))
  temp_p$nrow <- seq(1, nrow(temp_p))
  temp_a$Sweep <- as.character(temp_a$Sweep)
  temp_p$Sweep <- as.character(temp_p$Sweep)

  temp <- full_join(temp_a, select(temp_p, -Time)) %>% select(-nrow)

  # Show difference between expected/actual
  # i = 1
  # [1] "190808a_0014-epsp-In4-actual_v.rds"  "190808a_0014-epsp-In4-predict_v.rds" shows why we need to be able to audit the outcome.
  # Here the end of sweep 4 deviates from baseline so the delta is off
  # TODO instead of subtracting a value at the end, we should look at the times with i_inj less than x and then use a percentile from that to estimate the baseline.


  # To get around this in the short term we'll calulate the factors independently for expected/obs
  # downsample_data(temp, len = 10000) %>% 
  # # temp %>% 
  #   mutate(predicted = predicted - offset) %>% 
  #   mutate(difference = In4-predicted) %>% 
  #   ggplot(aes(x = Time))+
  #   geom_ribbon(aes(ymin=In4, ymax=predicted), fill = "cornflowerblue")+
  #   geom_line(aes(y=In4), color = "black")+
  #   geom_line(aes(y=predicted), color = "cornflowerblue", alpha = 0.3)+
  #   # geom_line(aes(y=difference-offset), color = "purple")+
  #   facet_grid(Sweep~.)

  # This is an inelegant workaround for data that is based on in9/12. This prevents duplication of the large set below

  rewrite_names <- F
  if ("In9" %in% names(temp)){
    names(temp) <- c("In4", "In7", "Sweep", "Time", "predicted", "offset")
    rewrite_names <- T
  }

  # begin to work with data proper
  # Add optional ways to slice the data based on what we expect to occur
  # At the moment we leave this unused
  temp_processed <- temp %>% 
    mutate(
      Segment = case_when(
        Time > 0.40  & Time < 2      ~ "A_B",
        Time > 2     & Time < 4.87   ~ "A_I",
        Time > 4.87  & Time < 6.17   ~ "B_B",
        Time > 6.17  & Time < 10.74  ~ "B_I",        
        Time > 10.74 & Time < 12.54  ~ "C_B",
        Time > 12.54 & Time < 16.74  ~ "C_I",
        Time > 16.74 & Time < 18.5   ~ "D_B",
        Time > 18.5  & Time < 19.685 ~ "D_I"
      )
    ) %>% 
    # correlation with expected
    group_by(Sweep, Segment) %>% 
    mutate(Cor = cor(In4, predicted)) %>% 
    ungroup() %>% 
    gather(Key, mV, c("In4", "predicted")) %>% 
    ## Metrics for whole sweep
    group_by(Key, Sweep) %>% 
    # Min voltage
    mutate(Min.V = min(mV, na.rm = T)) %>%
    # Defining Baseline as average of the lowest 20% of points
    mutate(Use.in.Base = quantile(mV, probs = .2, na.rm = T)) %>%
    mutate(Use.in.Base = ifelse(mV <= Use.in.Base, mV, NA)) %>%
    mutate(Use.in.Base = mean(Use.in.Base, na.rm = T)) %>%
    # Max voltage
    mutate(Max.Amplitude = max(mV, na.rm = T)) %>%
    # AUC
    # Using Min.V
    # mutate(AUC = sum(mV - Min.V, na.rm = T)/19.68685) %>%
    ungroup() 


  if (rewrite_names){
    temp_processed <- temp_processed %>% 
      mutate(Key = ifelse(Key == "In4", "In9", Key))
  }


  temp_processed <- temp_processed[, c("Sweep", "Segment", "Cor", "Key", "Min.V", #"Use.in.Base", 
                                       "Max.Amplitude")] %>% 
    distinct() #%>% 
    # pivot_wider(names_from = "Key", values_from = c("Min.V", #"Use.in.Base", 
    #                                                 "Max.Amplitude", "AUC")) 


  # add annotations to aid merging
  file_name <- brian2_read_in[[1]] %>% str_split(pattern = "-")

  temp_processed$Experiment <- as.character(str_split(as.character(file_name[[1]][1]), pattern = "_")[[1]][1])
  temp_processed$FileName <- paste0(as.character(file_name[[1]][1]), ".abf")
  temp_processed$Channel <- as.character(file_name[[1]][3])

  return(temp_processed)
})


epsp_summary <- do.call(rbind, epsp_summary_list)
# epsp_summary <- epsp_summary[, c("Experiment", "FileName", "Channel", "Sweep", "Cor", "Key", "Min.V", "Max.Amplitude", "AUC")]



epsp_summary <- left_join(epsp_summary, metadata) %>% 
  select(-FileName, -Type) %>% 
  mutate(Cell = case_when(Channel == "In4" ~ In4,
                          Channel == "In9" ~ In9)) %>% 
  mutate(Cell = as.character(Cell))  %>% 
  mutate(Key = case_when(Key %in% c("In4", "In9") ~ "Obs",
                                  Key %in% c("predicted")  ~ "Sim")) %>% 
  pivot_wider(names_from = "Key", values_from  = c("Min.V", "Max.Amplitude")) %>% 
  mutate(Max.Amplitude_Sim = Max.Amplitude_Sim + (Min.V_Obs - Min.V_Sim)) %>% 
  mutate(Max.Amplitude_Delta = Max.Amplitude_Obs - Max.Amplitude_Sim) %>% 
  group_by(Condition, Experiment, Cell, Segment, Sweep) %>% 
  select(-Min.V_Sim) %>% 
  mutate_at(c("Cor", 
              "Min.V_Obs", #"Min.V_Sim", "Min.V_Delta", 
              "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta"), median, na.rm = T) %>% 
  distinct() %>% 
  ungroup()


epsp_summary %>% 
  ggplot(aes(x = Segment, y = Cor, group = Sweep, color = Sweep))+
  geom_point(position = position_jitter(width = 0.1), alpha = 0.3)+
  geom_smooth(se = F)+
  facet_grid(.~Condition)+
  ggsci::scale_color_aaas()

epsp_summary %>% 
  filter(!is.na(Segment)) %>% 
  ggplot(aes(x = Cor, y = Sweep, fill = factor(stat(quantile))))+#, group = Sweep, color = Sweep))+
  # ggridges::geom_density_ridges()+
  ggridges::stat_density_ridges(
               geom = "density_ridges_gradient", calc_ecdf = TRUE,
               quantiles = 4, quantile_lines = TRUE
             ) +
               scale_fill_viridis_d(name = "Quartiles")+
  coord_flip()+
  facet_grid(Condition~Segment)+
  theme(legend.position = "")



## Here's the stimulus used in the protocol ====
# sweep duration should be 19.687 seconds
epsp_stim <- readABF_as_matrix(here("inst", "extdata", "epsp_stim_files", "08 current injection.abf"),
                  channels = "Axo1I2")
epsp_stim <- as_tibble(epsp_stim) %>%
  mutate(Time = Time - min(Time, na.rm = T)) %>%
  rename(Stim = Axo1I2)



epsp_stim <- mutate(epsp_stim,
Stim = Stim - min(Stim, na.rm = T),
         Stim = (Stim / 11.9034)-1) 


epsp_summary %>% 
  filter(!is.na(Segment)) %>% 
  mutate(x = case_when(
    Segment == "A_B" ~ 0.40,
    Segment == "A_I" ~ 2,
    Segment == "B_B" ~ 4.87,
    Segment == "B_I" ~ 6.17,
    Segment == "C_B" ~ 10.74,
    Segment == "C_I" ~ 12.54,
    Segment == "D_B" ~ 16.74,
    Segment == "D_I" ~ 18.5
  ),
  xend = case_when(
    Segment == "A_B" ~ 2,
    Segment == "A_I" ~ 4.87,
    Segment == "B_B" ~ 6.17,
    Segment == "B_I" ~ 10.74,
    Segment == "C_B" ~ 12.54,
    Segment == "C_I" ~ 16.74,
    Segment == "D_B" ~ 18.5,
    Segment == "D_I" ~ 19.687
  )) %>% 
  ggplot()+
  geom_segment(aes(x = x, y = Cor, xend = xend, yend = Cor, colour = Cor))+
  facet_grid(Condition~Sweep)+
  geom_line(data = epsp_stim, aes(Time, Stim))+
  theme(legend.position = "")+
  scale_color_viridis_c(direction = -1, option = "B")
#   option  
# A character string indicating the colormap option to use. Four options are available: "magma" (or "A"), "inferno" (or "B"), "plasma" (or "C"), "viridis" (or "D", the default option) and "cividis" (or "E").






epsp_stim %>% 
  mutate(Stim = Stim - min(Stim, na.rm = T),
         Stim = (Stim / 11.9034)-1) %>%
  ggplot(aes(Time, Stim))+
  geom_line()

# 
# 
# shading_annotations <- data.frame(
#   starts = c(0.40,
#              4.87,
#              10.74,
#              16.74),
#   next_start = c(4.87,
#                  10.74,
#                  16.74,
#                  19.685),
#   equal_len = c(0.40,
#                 4.87,
#                 10.74,
#                 16.74) + 2.945,
#   on_end = c(2,
#              6.17,
#              12.54, 
#              18.5),
#   equal_on = c(0.40,
#                4.87,
#                10.74,
#                16.74) + 1.30
# )
# 
# annotation_df <- data.frame(x = c(0.5, 0.5, 2.1, 1.8),
#                             y = seq(0.25, -1.25, length.out = 4),
#                             text = c("Period",
#                                      "Minumum Period",
#                                      "Burst", 
#                                      "Min Burst"))
# 
# segmentation_demo_plt <- ggplot()+
#   geom_vline(xintercept = c(shading_annotations$starts, shading_annotations$equal_len, shading_annotations$next_start, shading_annotations$equal_on), color = "gray")+
#   geom_path(data = epsp_stim, aes(Time, Stim+1.5))+
#   geom_rect(data = shading_annotations, aes(xmin = starts, xmax = next_start, ymin = 0, ymax = 0.5), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[1], color = RColorBrewer::brewer.pal(3, "Set1")[1])+
#   geom_rect(data = shading_annotations, aes(xmin = starts, xmax = equal_len, ymin = -0.5, ymax = 0), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[1], color = RColorBrewer::brewer.pal(3, "Set1")[1])+
#   geom_rect(data = shading_annotations, aes(xmin = starts, xmax = on_end, ymin = -1, ymax = -0.5), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[2], color = RColorBrewer::brewer.pal(3, "Set1")[2])+
#   geom_rect(data = shading_annotations, aes(xmin = starts, xmax = equal_on, ymin = -1.5, ymax = -1), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[2], color = RColorBrewer::brewer.pal(3, "Set1")[2])+
#   theme_classic()+
#   # geom_vline(xintercept = 12.54)+
#   geom_text(data = annotation_df[1:2, ], aes(x=x, y=y, label = text), hjust = "inward", color = RColorBrewer::brewer.pal(3, "Set1")[1])+
#   geom_text(data = annotation_df[3:4, ], aes(x=x, y=y, label = text), hjust = "inward", color = RColorBrewer::brewer.pal(3, "Set1")[2])+
#   ylim(-1.5, 9.5)+
#   labs(title = "Ways to Segement EPSP Stim")
# 
# 
# segmentation_demo_plt

Rolling Cor

# Goal: extract Absolute measures and differential measures for 
# 1. Burst Amplitude (max mV)
# 2. AUC 
# 3. Stimulus correlation (between)


# read in trace
brian2_trace_files <- list.files(here("data", "predicted_voltage"))

brian2_trace_sets <- brian2_trace_files %>% 
  str_remove(pattern = "-actual_v.rds") %>% 
  str_remove(pattern = "-predict_v.rds") %>% 
  unique()

epsp_summary_list <- map(seq(1, length(brian2_trace_sets)), function(i){
  print(i)
  brian2_read_in <- brian2_trace_files[str_detect(brian2_trace_files, brian2_trace_sets[i])]


  # retrieve data and merge into a single df
  temp <- map(brian2_read_in, function(read_file){
    temp_predict <- readRDS(here("data", "predicted_voltage", read_file))

    if(str_detect(read_file, pattern = "predict_v")){
      #consolidate into one df
      for (j in seq(1, length(temp_predict))){
        temp_predict[[j]]$Sweep <- j
      }
      temp_predict <- do.call(rbind, temp_predict)
    }
    return(temp_predict)
  })

  names(temp) <- brian2_read_in

  temp_a <- temp[str_detect(names(temp), pattern = "actual_v")][[1]]
  temp_p <- temp[str_detect(names(temp), pattern = "predict_v")][[1]] %>% as.data.frame() %>% as_tibble()
  temp_a$nrow <- seq(1, nrow(temp_a))
  temp_p$nrow <- seq(1, nrow(temp_p))
  temp_a$Sweep <- as.character(temp_a$Sweep)
  temp_p$Sweep <- as.character(temp_p$Sweep)

  temp <- full_join(temp_a, select(temp_p, -Time)) %>% select(-nrow)

  # Show difference between expected/actual
  # i = 1
  # [1] "190808a_0014-epsp-In4-actual_v.rds"  "190808a_0014-epsp-In4-predict_v.rds" shows why we need to be able to audit the outcome.
  # Here the end of sweep 4 deviates from baseline so the delta is off
  # TODO instead of subtracting a value at the end, we should look at the times with i_inj less than x and then use a percentile from that to estimate the baseline.


  # To get around this in the short term we'll calulate the factors independently for expected/obs
  # downsample_data(temp, len = 10000) %>% 
  # # temp %>% 
  #   mutate(predicted = predicted - offset) %>% 
  #   mutate(difference = In4-predicted) %>% 
  #   ggplot(aes(x = Time))+
  #   geom_ribbon(aes(ymin=In4, ymax=predicted), fill = "cornflowerblue")+
  #   geom_line(aes(y=In4), color = "black")+
  #   geom_line(aes(y=predicted), color = "cornflowerblue", alpha = 0.3)+
  #   # geom_line(aes(y=difference-offset), color = "purple")+
  #   facet_grid(Sweep~.)

  # This is an inelegant workaround for data that is based on in9/12. This prevents duplication of the large set below

  rewrite_names <- F
  if ("In9" %in% names(temp)){
    names(temp) <- c("In4", "In7", "Sweep", "Time", "predicted", "offset")
    rewrite_names <- T
  }

  # begin to work with data proper
  # Add optional ways to slice the data based on what we expect to occur
  # At the moment we leave this unused
  temp_processed <- temp %>% 
    mutate(
      Period = case_when(
        Time > 0.40  & Time <  4.87  ~ "A",
        Time > 4.87  & Time < 10.74  ~ "B",
        Time > 10.74 & Time < 16.74  ~ "C",
        Time > 16.74 & Time < 19.685 ~ "D"
      ),
      Period_eq = case_when(
        Time > 0.40  & Time <  0.40 + 2.945 ~ "A",
        Time > 4.87  & Time <  4.87 + 2.945 ~ "B",
        Time > 10.74 & Time < 10.74 + 2.945 ~ "C",
        Time > 16.74 & Time < 16.74 + 2.945 ~ "D"
      ),
      Burst = case_when(
        Time > 0.40  & Time <  2    ~ "A",
        Time > 4.87  & Time <  6.17 ~ "B",
        Time > 10.74 & Time < 12.54 ~ "C",
        Time > 16.74 & Time < 18.5  ~ "D"
      ),
      Burst_eq = case_when(
        Time > 0.40  & Time <  2    + 1.30 ~ "A",
        Time > 4.87  & Time <  6.17 + 1.30 ~ "B",
        Time > 10.74 & Time < 12.54 + 1.30 ~ "C",
        Time > 16.74 & Time < 18.5  + 1.30 ~ "D"
      )
    ) %>% 
    # correlation with expected
    group_by(Sweep) %>% 
    mutate(Cor = cor(In4, predicted)) %>% 
    ungroup() %>% 
    gather(Key, mV, c("In4", "predicted")) %>% 
    ## Metrics for whole sweep
    group_by(Key, Sweep) %>% 
    # Min voltage
    mutate(Min.V = min(mV, na.rm = T)) %>%
    # Defining Baseline as average of the lowest 20% of points
    mutate(Use.in.Base = quantile(mV, probs = .2, na.rm = T)) %>%
    mutate(Use.in.Base = ifelse(mV <= Use.in.Base, mV, NA)) %>%
    mutate(Use.in.Base = mean(Use.in.Base, na.rm = T)) %>%
    # Max voltage
    mutate(Max.Amplitude = max(mV, na.rm = T)) %>%
    # AUC
    # Using Min.V
    mutate(AUC = sum(mV - Min.V, na.rm = T)/19.68685) %>%
    ungroup() 


  if (rewrite_names){
    temp_processed <- temp_processed %>% 
      mutate(Key = ifelse(Key == "In4", "In9", Key))
  }


  temp_processed <- temp_processed[, c("Sweep", "Cor", "Key", "Min.V", #"Use.in.Base", 
                                       "Max.Amplitude", "AUC")] %>% 
    distinct() #%>% 
    # pivot_wider(names_from = "Key", values_from = c("Min.V", #"Use.in.Base", 
    #                                                 "Max.Amplitude", "AUC")) 


  # add annotations to aid merging
  file_name <- brian2_read_in[[1]] %>% str_split(pattern = "-")

  temp_processed$Experiment <- as.character(str_split(as.character(file_name[[1]][1]), pattern = "_")[[1]][1])
  temp_processed$FileName <- paste0(as.character(file_name[[1]][1]), ".abf")
  temp_processed$Channel <- as.character(file_name[[1]][3])

  return(temp_processed)
})


epsp_summary <- do.call(rbind, epsp_summary_list)
epsp_summary <- epsp_summary[, c("Experiment", "FileName", "Channel", "Sweep", "Cor", "Key", "Min.V", "Max.Amplitude", "AUC")]

Secondary Questions

scatter_w_wo_outliers(temp = distinct(select(filter(M_all, Time == "Baseline"), shal, Ia.0)), 
                  X = "shal", 
                  Y = "Ia.0")

scatter_w_wo_outliers(temp = distinct(select(filter(M_all, Time == "Compensated"), shal, Ia.0)), 
                  X = "shal", 
                  Y = "Ia.0")

scatter_w_wo_outliers(temp = distinct(select(filter(M_all, Time == "Delayed"), shal, Ia.0)), 
                  X = "shal", 
                  Y = "Ia.0")
# ref ----
# c(
#   ## Ion Channels ====
#   #Kv
#   "Shab", "Shaker", "Shal", "Shaw1", "Shaw2",
#   "KCNQ1", "KCNQ2", 
#   "KCNH1", "KCNH2", "KCNH3",
#   #K
#   "BKKCa", "SKKCa", 
#   "KCNT1", "IRK", "KCNK1", "KCNK2", 
#   #Ca
#   "CaV1", "CaV2", "CaV3", 
#   #Na
#   "CbNaV", 
#   "NALCN", 
#   #Hyp/CyclNuc Activated/Gated
#   "IH", 
#   ## CNGs
#   
#   #TRPs
#   "TRP-A1", "TRP-A-like", "TRP-M1", "TRP-M3", "TRP-M-like", 
#   ##-V5, -V6, -Pyrexia
#   
#   
#   #Biogenic amine activated
#   "HisCL", 
#   
#   ## Receptors Biogenic Amines ====
#   #octopamine
#   
#   #dopamine
#   "DAR1A", "DAR2",  "Dopa-1Br",
#   #serotonin
#   "HTR1A", "HTR2", "HTR7", "5HTr-1Br",  
#   
#   #histamine
#   "His-1r", "His-2r", "His-3r", 
#   #gaba
#   "GABAB-R1",
#   "LCCH3r",
#   "RDLr", 
#   "mGABA2", "mGABA3", 
#   
#   ## Receptors Glutamate/Acetylcholine ====
#   #metabotropic glutamate receptors
#   "mGluR1", "mGluR2", "mGluR4", "mGluR5", "mGluR7",
#   #kainate-like receptors
#   "Kainate-1A", "Kainate-1B", "Kainate-2A", "Kainate-2B", "Kainate-2C",
#   #NMDA-like receptors
#   "NMDA-1A", "NMDA-1B", "NMDA-2A", "NMDA-2B", "NMDA-2-like", 
#   #glutamate-gated chloride channel
#   "GluCl",
#   #acetylcholine receptors
#   "mACHrA", "mACHrB", 
#   
#   # Crustacean cardioactive peptide receptor ===
#   "CCAPr", 
#   
#   #Acetylcholinesterase
#   "ACHE", 
#   # Actylcholine catalyst
#   "ChAT", 
#   
#   #vesicular ach transporter
#   "vAChT", 
#   #vesicular glut transporter
#   "vGluT", 
#   
#   ## Innexins ====
#   "INX1", "INX2", "INX3", "INX4", "INX5"
# )
load(here("data", "ionic.rds"))
load(here("data", "tevc.rds"))
load(here("data", "tecc.rds"))
load(here("data", "mrna.rds"))

mRNAInfo <- readxl::read_excel(here("inst", "extdata", "mRNAInfo.xlsx")) # tables from Northcutt 2016 and annotations

ionic ~ mrna?

Make a joint dataset

M <- left_join(ionic, mrna)

M <- M[!is.na(M$x18s), ]

MeasuredmRNA <- names(mrna)[!(names(mrna) %in% c("Source", "Pharm", "Time", "Sample", "Experiment", "Cell"))]

# temp.plts <- map(seq_along(MeasuredmRNA), function(i){
#   ggplot(M, aes_string(MeasuredmRNA[i], y = "Ihtk.0", color = "Time"))+
#   geom_smooth(method = "lm", se = F)+
#   geom_point()
# })
# 
# cowplot::plot_grid(plotlist = temp.plts)
# 
# mrna

Clean up data cols

PercentNotNA <- map(MeasuredmRNA, function(i){
  mean(!is.na(M[[i]]))
}) %>% unlist()

ThresholdPercentNotNA <- 0.6
MeasuredmRNA <- MeasuredmRNA[PercentNotNA >= ThresholdPercentNotNA] # use in map call
MeasuredKCurrents <- c("Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope") # use in map call

How well does this work for currents we know something about?

Ia ~ shal

# TEXTING WITH DAVE
scatter_w_wo_outliers(temp = filter(M, Time == "0h"), 
                  X = "shal", 
                  Y = "Ia.0")

# broom::tidy(lm(Ia.0 ~ shal, temp))
# broom::tidy(lm(Ia.0 ~ shal, temp[temp$flag, ]))
library(brms)
# Loading required package: StanHeaders
# rstan (Version 2.19.3, GitRev: 2e1f913d3ca3)
# For execution on a local, multicore CPU with excess RAM we recommend calling
options(mc.cores = parallel::detectCores())
# To avoid recompilation of unchanged Stan programs, we recommend calling
rstan_options(auto_write = TRUE)
# For improved execution time, we recommend calling
Sys.setenv(LOCAL_CPPFLAGS = '-march=corei7 -mtune=corei7')
# although this causes Stan to throw an error on a few processors.


temp <- temp[!is.na(temp$Ia.0), ]

tic <- Sys.time()
fm1 <- lm(Ia.0 ~ shal, temp)
toc1 <- Sys.time() - tic

tic <- Sys.time()
fm2 <- brm(Ia.0 ~ shal, data  = temp, family = gaussian())
toc2 <- Sys.time() - tic

tic <- Sys.time()
fm3 <- brm(Ia.0 ~ shal, data  = temp, family = student())
toc3 <- Sys.time() - tic



toc1
toc2
toc3

t1 <- broom::tidy(fm1)
t2 <- broom::tidy(fm2)
t3 <- broom::tidy(fm3)



bayes_ex1 <- 
ggplot(temp, aes(x = shal, y = Ia.0))+
  geom_point()+
  geom_abline(slope = as.numeric(t1[2, "estimate"]), intercept = as.numeric(t1[1, "estimate"]), size = 1, color = "gray")+
  geom_abline(slope = as.numeric(t2[2, "estimate"]), intercept = as.numeric(t2[1, "estimate"]), size = 1, color = "cornflowerblue")+
  geom_abline(slope = as.numeric(t3[2, "estimate"]), intercept = as.numeric(t3[1, "estimate"]), size = 1, color = "firebrick")+
  theme_bw()

Ia ~ shaker

scatter_w_wo_outliers(temp = filter(M, Time == "0h"), 
                  X = "shaker", 
                  Y = "Ia.0")

Model comparison shal best predicts Ia

temp <- filter(M, Time == "0h") 
# What's the best model?
library(AICcmodavg)
# no rm points
aictab(cand.set = list(
  lm(Ia.0 ~ shal, temp),
  lm(Ia.0 ~ shaker, temp),
  lm(Ia.0 ~ shal+shaker, temp),
  lm(Ia.0 ~ shal*shaker, temp)
))

# rm points
temp_no_outliers <- temp %>% 
  mutate(shal = ifelse(win_x_iqr(shal), shal, NA)) %>% 
  mutate(shaker = ifelse(win_x_iqr(shaker), shaker, NA)) %>% 
  mutate(Ia.0 = ifelse(win_x_iqr(Ia.0), Ia.0, NA))


aictab(cand.set = list(
  lm(Ia.0 ~ shal, temp_no_outliers),
  lm(Ia.0 ~ shaker, temp_no_outliers),
  lm(Ia.0 ~ shal+shaker, temp_no_outliers),
  lm(Ia.0 ~ shal*shaker, temp_no_outliers)
))

IHTK ~ bkkca

scatter_w_wo_outliers(temp = filter(M, Time == "0h"), 
                  X = "bkkca", 
                  Y = "Ihtk.0")

How might we exclude outliers? Cooks Distance? Iterated Cooks?

temp <- filter(M, Time == "Baseline") %>%
  mutate(flag = ifelse(bkkca < 3000, T, F))

fm <- lm(Ihtk.0 ~ bkkca, temp)

temp$cooksd <- cooks.distance(fm)

ggplot(temp, aes_string("bkkca", "Ihtk.0", color = "flag"))+
  geom_smooth(method = lm, se = F, color = "gray")+
  geom_smooth(data = temp[temp$flag == T, ], method = lm, se = F, fullrange = T)+
  geom_point(aes(color = cooksd < (mean(cooks.distance(fm))*2.2) ))+
  scale_color_manual(values = c("gray", "black"))+
  theme_bw()+
  theme(legend.position = "")

# broom::tidy(lm(Ihtk.0 ~ bkkca, temp))
# broom::tidy(lm(Ihtk.0 ~ bkkca, temp[temp$flag, ]))
broom::tidy(lm(Ihtk.0 ~ bkkca, temp[temp$cooksd < (mean(cooks.distance(fm))*4) , ]))
# temp %>% filter(flag == F)

Checking the use of iterated Cook's distance Using cooks is going to be challenging. It'll be hard to choose a stopping criteria as

flag_cooks_outliers <- function(
  df = filter(M, Time == "Baseline"),
  mod = "Ihtk.0 ~ bkkca",
  multiplier = 4,
  center = "mean"){

  fm <- lm(as.formula(mod), df)

  cd <- cooks.distance(fm)

  if (center == "mean"){
    cd_outlier <- cd > mean(cd)*multiplier 
  } else if (center == "median") {
    cd_outlier <- cd > median(cd)*multiplier 
  } else {
    warning("center must be mean or median")
  }

  return(cd_outlier)
}

# run 10 iterations of cooks distance. When a cell is identified as an outlier, drop it and note when it was deamed an outlier

temp <- M %>% dplyr::select(Ihtk.0, bkkca, Time) %>% filter(Time == "Baseline")
temp$dropout <- NA

for (i in 1:10){
  current_outliers <- flag_cooks_outliers(df = temp[is.na(temp$dropout), ],
                    mod = "Ihtk.0 ~ bkkca",
                    multiplier = 4,
                    center = "mean")

  temp[is.na(temp$dropout), ][current_outliers, "dropout"] <- i

}

ggplot(temp, aes(x = bkkca, y = Ihtk.0, color = as.factor(dropout)))+
  geom_point()

bkkca_cook_outliers <- temp$dropout
temp %>% group_by(dropout) %>% tally()
# # A tibble: 5 x 2
#   dropout     n
#     <int> <int>
# 1       1     1
# 2       2     1
# 3       3     1
# 4       4     1
# 5      NA    12





temp <- M  %>% filter(Time == "Baseline")
temp$dropout <- NA
for (i in 1:10){
  current_outliers <- flag_cooks_outliers(df = temp[is.na(temp$dropout), ],
                    mod = "Ia.0 ~ shal",
                    multiplier = 4,
                    center = "mean")
  temp[is.na(temp$dropout), ][current_outliers, "dropout"] <- i
}

temp %>% group_by(dropout) %>% tally()


temp <- M  %>% filter(Time == "Baseline")
temp$dropout <- NA
for (i in 1:10){
  current_outliers <- flag_cooks_outliers(df = temp[is.na(temp$dropout), ],
                    mod = "Ia.0 ~ shaker",
                    multiplier = 4,
                    center = "mean")
  temp[is.na(temp$dropout), ][current_outliers, "dropout"] <- i
}

temp %>% group_by(dropout) %>% tally()

contrast cooks with a fit t distribution.

# temp <- filter(M, Time == "Baseline")
# 
# tic <- Sys.time()
# fm0 <- lm(Ihtk.0 ~ bkkca, temp[temp$bkkca < 3000, ])
# toc1 <- Sys.time() - tic
# 
# tic <- Sys.time()
# fm1 <- lm(Ihtk.0 ~ bkkca, temp)
# toc1 <- Sys.time() - tic
# 
# tic <- Sys.time()
# fm2 <- brm(Ihtk.0 ~ bkkca, data  = temp, family = gaussian())
# toc2 <- Sys.time() - tic
# 
# tic <- Sys.time()
# fm3 <- brm(Ihtk.0 ~ bkkca, data  = temp, family = student())
# toc3 <- Sys.time() - tic
# 
# 
# 
# toc1
# toc2
# toc3
# 
# t0 <- broom::tidy(fm0)
# t1 <- broom::tidy(fm1)
# t2 <- broom::tidy(fm2)
# t3 <- broom::tidy(fm3)
# 
# 
# temp$dropout <- bkkca_cook_outliers
# # temp[is.na(temp$dropout), "dropout"] <- 11
# 
# alpha_param <-  0.8
# # bayes_ex1 <- 
# ggplot(temp, aes(x = bkkca, y = Ihtk.0, color = as.factor(dropout)))+
#   
#   geom_abline(slope = as.numeric(t0[2, "estimate"]), intercept = as.numeric(t0[1, "estimate"]), size = 1, alpha = alpha_param, color = "black")+
#   geom_abline(slope = as.numeric(t1[2, "estimate"]), intercept = as.numeric(t1[1, "estimate"]), size = 1, alpha = alpha_param, color = "black")+
#   geom_abline(slope = as.numeric(t2[2, "estimate"]), intercept = as.numeric(t2[1, "estimate"]), size = 1, alpha = alpha_param, color = "cornflowerblue")+
#   geom_abline(slope = as.numeric(t3[2, "estimate"]), intercept = as.numeric(t3[1, "estimate"]), size = 1, alpha = alpha_param, color = "firebrick")+
#   
#   geom_point(size = 2, color = "white")+
#   geom_point(size = 2)+
#   geom_point(size = 2, color = "black", shape = 1)+
#   
#   theme_bw()+
#   theme(legend.position = "bottom")+
#   scale_color_brewer(type = "div", palette = "PuOr")

IHTK ~ skkca

scatter_w_wo_outliers(temp = filter(M, Time == "Baseline"), 
                  X = "skkca", 
                  Y = "Ihtk.0")

How do K channels predict IKv intercepts?

# TODO redo this using the info in mRNAInfo 
# TODO use next block as inspiration.

# filter(M, Time == "Baseline") %>% 
#   select(Ihtk.0, Ihtk.Slope, Ia.0, Ia.Slope, 
#          #Kv
#          shab, shaker, shal, shaw1, shaw2,
#          kcnq1, kcnq2,
#          kcnh1, kcnh2, kcnh3,
#          #k
#          bkkca, skkca,
#          kcnt1, irk, kcnk1, kcnk2
#   ) %>% 
#   pivot_longer(cols = c(
#     #kv
#     "shab", "shaker", "shal", "shaw1", "shaw2", 
#     "kcnq1", "kcnq2",
#     "kcnh1", "kcnh2", "kcnh3", 
#     #k
#     "bkkca", "skkca", 
#     "kcnt1", "irk", "kcnk1", "kcnk2"
#   ), names_to = "mRNA", values_to = "Count") %>% 
#   pivot_longer(cols = c("Ihtk.0", 
#                         # "Ihtk.Slope", 
#                         "Ia.0"#, 
#                         # "Ia.Slope"
#                         ),
#                names_to = "Key", values_to = "Value") %>% 
#   ggplot(aes(x = Count, y = Value))+
#   geom_smooth(method = lm, se = F)+
#   geom_point()+
#   # facet_wrap(Key ~ mRNA, nrow = 4, scales = "free")
#   facet_grid(Key ~ mRNA)+
#   theme_bw()
#   
# 
# PlotList <- 
# map(c(
#   #Kv
#   "Shab", "Shaker", "Shal", "Shaw1", "Shaw2", 
#   "Kcnq1", "Kcnq2",
#   "Kcnh1", "Kcnh2", "Kcnh3", 
#   #K
#   "BkkCa", "SkkCa", 
#   "Kcnt1", "Irk", "Kcnk1", "Kcnk2"
# ), function(X){
#   map(c("Ihtk.0", 
#         # "Ihtk.Slope", 
#         "Ia.0"#, 
#         # "Ia.Slope"
#   ), function(Y){
#     ggplot(filter(M, Time == "Baseline"), aes_string(X, Y))+
#       geom_smooth(method = lm, se = F)+
#       geom_point()+
#       theme_bw()
#   })
# })
#   
# PlotList <- transpose(PlotList)  
# 
# library(cowplot)
# cowplot::plot_grid(plotlist = PlotList[[1]])
# 
# cowplot::plot_grid(plotlist = PlotList[[2]])

Linear models -- what predicts ionics?

This falls under the auspices of EDA.

KCurrent = "Ihtk.0"
mRNA = "BkkCa"

# Make outlier free version of the baseline data

M_baseline_outlier_free <- filter(M, Time == "Baseline")

for (NAME in names(M)[!(names(M) %in% c("Condition" , "Cell", "Experiment", "Source", "Pharm", "Time", "Sample"))]){
  M_baseline_outlier_free[[NAME]] <- ifelse(win_x_iqr(M_baseline_outlier_free[[NAME]]), M_baseline_outlier_free[[NAME]], NA)
}



# Which have relationships at baseline?
fm.list <- map(MeasuredKCurrents, function(KCurrent){
  map(MeasuredmRNA, function(mRNA){
    lm(as.formula(paste0(KCurrent, " ~ ", mRNA)), data = M_baseline_outlier_free)
  })
})


p.list <- map(fm.list, function(ListLvl1){
  map(ListLvl1, function(ListLvl2){
    broom::tidy(ListLvl2)[2, "p.value"] # non-intercept p
  }) %>% unlist()
})

p.df <- do.call(cbind, p.list) %>% as.data.frame()

names(p.df) <- MeasuredKCurrents
p.df$mRNA <- MeasuredmRNA





# Uncorrected
p.df.long <- p.df %>% 
  gather(iK, pValue, all_of(MeasuredKCurrents))
  # dplyr::filter(iK %in% c("Ia.0", "Ihtk.0")) %>% 
  # mutate(pAdj = p.adjust(pValue, method = "fdr")) %>% 

highlight.text.temp.df <- p.df.long %>% 
  mutate(highlight = ifelse(pValue <= 0.05, 1, 0)) %>% 
  select(-pValue) %>% pivot_wider(names_from = iK, values_from = highlight) %>% 
  mutate(highlight = ifelse(Ihtk.0 == 0, 
                     ifelse(Ihtk.Slope == 0,
                     ifelse(Ia.0 == 0, 
                     ifelse(Ia.Slope == 0, 
                            0, 1), 1), 1), 1) ) %>% 
  mutate(highlight = ifelse(highlight == 1, "firebrick", "black")) %>% 
  arrange(mRNA)

plt.fm <-   
p.df.long %>% 
  ggplot(aes(x = mRNA, color = pValue <= 0.05))+
  geom_hline(yintercept = 0.95, color = "gray", linetype = "dashed")+
  # geom_point()+
  # geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pValue), size = 2, color = "gray", alpha = 0.0001)+
  geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pValue), size = 2)+

  # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pValue), size = 2)+
  # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pAdj), size = 2)+
  facet_grid(iK~.)+
  scale_color_manual(values = c("black", "firebrick"))+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = highlight.text.temp.df[["highlight"]]),
        legend.position = "")

# Corrected with fdr
plt.fm.fdr <- 
p.df %>% 
  gather(iK, pValue, all_of(MeasuredKCurrents)) %>% 
  # dplyr::filter(iK %in% c("Ia.0", "Ihtk.0")) %>% 
  mutate(pAdj = p.adjust(pValue, method = "fdr")) %>% 
  ggplot(aes(x = mRNA, color = pAdj <= 0.05))+
  geom_hline(yintercept = 0.95, color = "gray", linetype = "dashed")+
  # geom_point()+
  geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pValue), size = 2, color = "gray", alpha = 0.0001)+
  geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pAdj), size = 2, color = "black")+

  # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pValue), size = 2)+
  # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pAdj), size = 2)+
  facet_grid(iK~.)+
  scale_color_manual(values = c("black", "firebrick"))+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        legend.position = "")

PCA what loads with ionics?

library(factoextra)
# library(missMDA) # for imputePCA 
library("FactoMineR") #PCA

MNumericOnly <- M_baseline_outlier_free
MNumericOnly <- MNumericOnly[, c(MeasuredKCurrents, MeasuredmRNA)]

ResPCA <- PCA(MNumericOnly, scale.unit = T)

get_eig(ResPCA)

fviz_screeplot(ResPCA, addlabels = T)

fviz_pca_biplot(ResPCA, repel = T)
# Ihtk.Slope, Ihtk.0, Ia.0, Ia.Slope, and Kainate1A are all loading together (roughly)

fviz_pca_var(ResPCA, 
             col.var = "contrib",
             # gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE # Avoid text overlapping
             )

What correlates with ionics?

library(corrr)

MCorrr <- MNumericOnly %>% 
  correlate()

temp <- MCorrr %>% 
  as_tibble() %>% 
  select(rowname, Ihtk.0, Ihtk.Slope, Ia.0, Ia.Slope) %>% 
  gather(colname, Cor, c("Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope")) 

# temp$rowname <- as.factor(temp$rowname) 
#TODO fix ordering


temp %>% 
  ggplot(aes(rowname, Cor, color = Cor))+
  geom_segment(aes(x = rowname, xend = rowname, y = 0, yend = Cor))+
  geom_point()+
  geom_point(shape = 1, color = "black")+
  geom_hline(yintercept = 0)+
  ylim(-1,1)+
  facet_grid(colname~.)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  scale_color_gradient2(low = RColorBrewer::brewer.pal(7, "PuOr")[1],
                         mid = RColorBrewer::brewer.pal(7, "PuOr")[4],
                         high = RColorBrewer::brewer.pal(7, "PuOr")[7],
                         midpoint = 0, 
                         na.value = "#00000000")



# 
# 
# 
# # rplot(MCorrr)
# 
# # You have to zoom or the chords don't show up. ¯\_(ツ)_/¯
# # network_plot(MCorrr, min_cor = .7)
# 
# 
library(ggcorrplot)
# 
MCorr <- cor(MNumericOnly, use = "pairwise.complete.obs")

pMat <- cor_pmat(MNumericOnly)
# 
library(RColorBrewer)
# ggcorrplot(MCorr, 
#            colors = RColorBrewer::brewer.pal(7, "PuOr")[c(1, 4 ,7)]#,
#            # p.mat = pMat
#            # lab = T
#            )
# 
# # library(GGally)
# # GGally::ggscatmat(MNumericOnly)
# GGally::ggcorr(MCorr,
#        # nbreaks = 5,
#        low = RColorBrewer::brewer.pal(7, "PuOr")[1],
#                          mid = RColorBrewer::brewer.pal(7, "PuOr")[4],
#                          high = RColorBrewer::brewer.pal(7, "PuOr")[7])


library(corrplot)
corrplot(MCorr, method = "square", type = "upper", tl.col = "black", order = "hclust", col = brewer.pal(n = 9, name = "PuOr"),
         p.mat = pMat,
         sig.level = 0.01, insig = "blank")


danielkick/mRNA24hLC documentation built on Feb. 6, 2024, 2:18 a.m.