In danielkick/mRNA24hLC:

# below: yaml for html without distill
# output:
#   html_document:
#     smart: no
#     theme: flatly
#     toc: true
#     toc_float: true

knitr::opts_chunk$set(echo = FALSE) # run code, echo = False will only show output
# knitr::opts_chunk$set(include = FALSE) # show nothing

library(here)
library(readxl)

library(plotly) # loaded first so that it doesn't become the default filter
library(tidyverse)
library(car)
library(FSA) # for Dunn's Test

library(factoextra)
library(FactoMineR) #PCA
library(umap) # for UMAP
library(pvclust)
library(dendextend) # for color_labels
library(corrr)

library(ggthemes)
library(cowplot)
library(patchwork)
library(rgl) # for vis_pca_3D
library(ggnewscale) # for newscale so that mk_hclust
# library(scales) # for overiding scientific notation on dendrogram y axis 


library(ggpmisc) # for labeling through stat_poly_eq
theme_set(ggplot2::theme_minimal())
devtools::load_all()

Data preparation

Load data and create a uid

## Load in data ====
load(here("data", "ionic.rds"))
load(here("data", "tevc.rds"))
load(here("data", "tecc.rds"))
load(here("data", "mrna.rds"))
epsp <- read.csv(here("data", "epsp.csv")) %>% as_tibble()
metadata <- read_excel(here("inst", "extdata", "ManualDataEphys02.xlsx"),
                       sheet = "MetadataEphys")  
# tables from Northcutt 2016 and annotations
mRNAInfo <- readxl::read_excel(here("inst", "extdata", "mRNAInfo.xlsx"))

Convert condition labels

# ionic$Condition
# # tevc
# # tecc
# mrna$Time
# # epsp
# metadata$Condition
# # mRNAInfo

ionic <- ionic %>% 
  mutate(Condition = case_when(Condition == "Baseline"    ~ "0h",
                               Condition == "Compensated" ~ "1h",
                               Condition == "Delayed" ~ "24h"
  ))

mrna <- mrna %>% 
  mutate(Time = case_when(Time == "Baseline"    ~ "0h",
                          Time == "Compensated" ~ "1h",
                          Time == "Delayed" ~ "24h"
  ))

metadata <- metadata %>% 
  mutate(Condition = case_when(Condition == "Baseline"    ~ "0h",
                               Condition == "Compensated" ~ "1h",
                               Condition == "Delayed" ~ "24h"
  ))

## Add Experiment/Cell to metadata ====
metadata <- metadata %>% 
  mutate(ABFType = case_when(Type %in% c("tevc") ~ "gjvc",
                             # Type %in% c("igj","igj_50", "gj") ~ "gjcc",
                             # Type %in% c("epsp", "epsp_50") ~ "epsp",
                             Type %in% c("fi_50", "fi") ~ "fi")) %>% 
  # make expected file names
  mutate(num_zeros = str_length(Recording)) %>% 
  mutate(num_zeros = ifelse(num_zeros == 1, "000", 
                            ifelse(num_zeros == 2, "00", 
                                   ifelse(num_zeros == 3, "0", NA)))) %>% 
  mutate(FileName = paste0(Experiment, "_", num_zeros, as.character(Recording), ".abf" )) %>% 
  select(-num_zeros)



#TODO there are only two Type == "gj". Replace these with igj|igj_50
#   Experiment  Page Recording Type    TEA Condition Include Notes   In4   In9 ABFType FileName       
#   <chr>      <dbl>     <dbl> <chr> <dbl> <chr>     <lgl>   <lgl> <dbl> <dbl> <chr>   <chr>          
# 1 190906         0        32 gj       24 Delayed   NA      NA        5    NA NA      190906_0032.abf
# 2 190906         0        50 gj       24 Delayed   NA      NA        4    NA NA      190906_0050.abf



### use a subset of metadata cols to add experiment/cell data to other dfs ####
small_metadata <- metadata %>% 
  select(Experiment, FileName, In4, In9) %>% 
  gather("Channel", "Cell", c("In4", "In9"))

small_metadata2 <- metadata %>% 
  select(Experiment, Type, FileName, In4, In9) %>% 
  gather("Channel", "Cell", c("In4", "In9"))

Current Clamp

## Add Experiment/Cell to tecc ====
tecc <- tecc %>% 
  mutate(PreSyn = case_when(
    (abs(R1S3Mean) > abs(R1S6Mean)) ~ Signal1, 
    (abs(R1S3Mean) < abs(R1S6Mean)) ~ Signal4
  ),
  vrest = case_when(
    (abs(R1S3Mean) > abs(R1S6Mean)) ~ R1S1Baseline, 
    (abs(R1S3Mean) < abs(R1S6Mean)) ~ R1S4Baseline
  )) %>% 
  mutate(PreSyn = case_when(
    PreSyn == "IN 9" ~ "In9",
    PreSyn == "IN 4" ~ "In4"
  )) %>% 
  select(FileName, PreSyn, vrest, #Signal1, Signal4, R1S1Baseline, R1S4Baseline, 
         cc, r11, r1#, r12, rc # gap junction measures we'll get from voltage clamp
  ) %>% 
  distinct() %>% 
  rename(Channel = PreSyn)


tecc <- left_join(tecc, spread(small_metadata, Channel, Cell)) %>% 
  mutate(Cell = case_when(Channel == "In9" ~ In9,
                          Channel == "In4" ~ In4),
         PrePost = case_when(Channel == "In9" ~ paste0("cc.", as.character(In9), "_", as.character(In4)),
                             Channel == "In4" ~ paste0("cc.", as.character(In4), "_", as.character(In9)))
  ) %>% 
  filter(!is.na(Cell) &
           !is.na(In4) &
           !is.na(In9)) %>% 
  filter(In4 %in% c(3, 4, 5) &
           In9 %in% c(3, 4, 5)) %>% 
  select(-In4, -In9) %>% 
  filter(!is.na(cc)) %>% 
  spread(PrePost, cc)

Voltage Clamp (Gap Junction Procedure)

## Add Experiment/Cell to tevc ====
tevc <-
  tevc %>% 
  select(FileName, PreSyn, MedianIg) %>% 
  distinct() %>% 
  mutate(PreSyn = case_when(
    PreSyn == "IN 9" ~ "In9",
    PreSyn == "IN 4" ~ "In4"
  )) %>% 
  rename(Channel = PreSyn,
         ig = MedianIg)


tevc <- left_join(tevc, spread(small_metadata, Channel, Cell)) %>% 
  mutate(Cell = case_when(Channel == "In9" ~ In9,
                          Channel == "In4" ~ In4),
         PrePost = case_when(Channel == "In9" ~ paste0("ig.", as.character(In9), "_", as.character(In4)),
                             Channel == "In4" ~ paste0("ig.", as.character(In4), "_", as.character(In9)))
  ) %>% 
  filter(!is.na(Cell) &
           !is.na(In4) &
           !is.na(In9)) %>% 
  filter(In4 %in% c(3, 4, 5) &
           In9 %in% c(3, 4, 5)) %>% 
  select(-In4, -In9) %>% 
  filter(!is.na(ig)) %>% 
  spread(PrePost, ig)

# is conductance symmetric?
# tevc_plt <- tevc %>% 
#   select(-FileName, -Channel, -Cell) %>%
#   group_by(Experiment) %>% 
#   mutate_all(median, na.rm = T) %>% 
#   distinct() 
# 
# tevc_plt %>% 
# ggplot(aes(x = ig.3_4, y = ig.4_3))+
#   geom_point()
# 
# tevc_plt %>% 
# ggplot(aes(x = ig.3_5, y = ig.5_3))+
#   geom_point()
# 
# tevc_plt %>% 
# ggplot(aes(x = ig.4_5, y = ig.5_4))+
#   geom_point()

tevc[is.na(tevc$ig.3_4), "ig.3_4"] <- tevc[is.na(tevc$ig.3_4), "ig.4_3"]
tevc[is.na(tevc$ig.3_5), "ig.3_5"] <- tevc[is.na(tevc$ig.3_5), "ig.5_3"]
tevc[is.na(tevc$ig.4_5), "ig.4_5"] <- tevc[is.na(tevc$ig.4_5), "ig.5_4"]

tevc <- tevc %>% select(-ig.4_3, -ig.5_3, -ig.5_4)

Add metadata to epsp

## Add Experment/Cell to epsp ====
epsp <- left_join(epsp, small_metadata2)

# TODO How do we best caputre variace? find range between max/min across all sweeps when calculating auc? 
# note that there are plenty of replicates here
# and that there are not epsp_50's for each experiment. These are likely sufficiently close to -50. To be on the safe side, we'll only consider epsp not epsp_50
epsp %>% 
  group_by(Experiment, Cell, Type) %>% 
  filter(Key != "predicted") %>% 
  # filter(Sweep == 1 & Key == "predicted") %>% 
  tally() %>% 
  spread(Type, n) %>%
  mutate(epsp = ifelse(is.na(epsp), NA, T),
         epsp_50 = ifelse(is.na(epsp_50), NA, T))

epsp %>% 
  group_by(Experiment, Cell, Key) %>% 
  filter(Type == "epsp") %>% 
  mutate(
    Cor.IQR = IQR(Cor, na.rm = T),
    Min.V.IQR = IQR(Min.V, na.rm = T),
    Max.Amplitude.IQR = IQR(Max.Amplitude, na.rm = T),
    AUC.IQR = IQR(AUC, na.rm = T)
  ) %>% 
  mutate(
    Cor = median(Cor, na.rm = T),
    Min.V = median(Min.V, na.rm = T),
    Max.Amplitude = median(Max.Amplitude, na.rm = T),
    AUC = median(AUC, na.rm = T)
  )

Update labeling on Brian's cells

# Relabel Brian's cells ----
##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "AP" &
               mrna$Time == "24h", "Cell"]%>% 
  unlist() %>% 
  str_remove(pattern = "#") %>% 
  str_split(pattern = " ") %>%
  transpose()

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "AP" &
       mrna$Time == "24h", "Experiment"] <- unlist(transpose(temp[[1]]))

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "AP" &
       mrna$Time == "24h", "Cell"] <- unlist(transpose(temp[[2]])) %>% str_remove(pattern = "LC")


##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "None" &
               mrna$Time == "0h", "Cell"] %>% 
  unlist() %>% 
  str_remove(pattern = "Control #")

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "None" &
       mrna$Time == "0h", "Experiment"] <- temp

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "None" &
       mrna$Time == "0h", "Cell"] <- NA


##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "None" &
               mrna$Time == "24h", "Cell"] %>% 
  unlist() %>% 
  str_split(pattern = " ") %>%
  transpose()

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "None" &
       mrna$Time == "24h", "Experiment"] <- unlist(transpose(temp[[1]]))

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "None" &
       mrna$Time == "24h", "Cell"] <- unlist(transpose(temp[[2]])) %>% str_remove(pattern = "LC")


##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "TEA" &
               mrna$Time == "1h", "Cell"] %>% 
  unlist() %>% 
  str_remove(pattern = "TEA #")

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TEA" &
       mrna$Time == "1h", "Experiment"] <- temp

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TEA" &
       mrna$Time == "1h", "Cell"] <- NA

##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "TEA" &
               mrna$Time == "24h", "Cell"] %>% 
  unlist() %>% 
  str_remove(pattern = "TEA24-")

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TEA" &
       mrna$Time == "24h", "Experiment"] <- temp

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TEA" &
       mrna$Time == "24h", "Cell"] <- NA

##
temp <- mrna[mrna$Source == "Brian" &
               mrna$Pharm == "TTX", "Cell"] %>% 
  unlist() %>% 
  str_split(pattern = "-") %>%
  transpose()

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TTX", "Experiment"] <- unlist(transpose(temp[[1]]))

mrna[mrna$Source == "Brian" &
       mrna$Pharm == "TTX", "Cell"] <- unlist(transpose(temp[[2]]))
## deduplicate mrna
mrna <- mrna %>%
  gather(key = "mrna",
         value = "count",
         names(mrna)[!(names(mrna) %in% c("Source", "Pharm", "Time", "Sample", "Experiment", "Cell"))]) %>%
  filter(mrna != c("htr2b", "kainate2b", "nmda1a", "nmda2b", "mglur7", "trpm1", "machrb", "htr1b")) %>% # drop fully missing mrnas
  group_by(Source, Pharm, Time, Experiment, Cell, mrna) %>%
  mutate(count = median(count, na.rm = T)) %>%
  ungroup() %>%
  distinct() %>%
  spread(key = "mrna", value = "count")

Add whether network was active to metadata

# Was there activity on the EC? 
metadata <- full_join(metadata,
                      tibble::tribble(
                        ~Experiment, ~ExtracellularActive,
                        "190808a",                 TRUE,
                        "190830",                 TRUE,
                        "190903",                 TRUE,
                        "190904",                 TRUE,
                        "190905",                 TRUE,
                        "190906",                 TRUE,
                        "190907",                 TRUE,
                        "190915",                 TRUE,
                        "190916a",                 TRUE,
                        "190917a",                   NA, # File not found. Entry not found in notebook either.
                        "190917",                 TRUE,
                        "190918",                 TRUE,
                        "190924",                 TRUE,
                        "190924a",                 TRUE,
                        "191002",                 TRUE,
                        "191004",                 TRUE,
                        "190926",                 TRUE,
                        "190927",                FALSE,
                        "190927a",                 TRUE,
                        "190930",                FALSE,
                        "190930a",                FALSE,
                        "191001",                 TRUE
                      )
)

metadata %>% 
  select(Experiment, Condition, ExtracellularActive) %>% 
  filter(Condition %in% c("0h", "1h", "24h")) %>% 
  distinct() %>% 
  group_by(Condition, ExtracellularActive) %>% 
  tally() %>%
  filter(!is.na(ExtracellularActive)) %>% 
  pivot_wider(names_from = "ExtracellularActive", values_from = "n")

Create cross type data, sans Brian's cells

small_metadata <- metadata[, c("Experiment",  "FileName", "Condition")]

M_ionic<- ionic %>% mutate(Cell = as.character(Cell))  
#Ihtk.0 Ihtk.Slope  Ia.0 Ia.Slope
M_tecc <- left_join(tecc, small_metadata) %>% select(-FileName) %>% mutate(Cell = as.character(Cell))
# QC drop all non positive resistances, all ccs outside 0-1

M_tecc <- M_tecc %>% mutate(
  # cc  = ifelse(cc > 0 & cc < 1, cc, NA),
  r11 = ifelse(r11 >0, r11, NA),
  r1  = ifelse(r1 >0, r1, NA),
  # r12  = ifelse(r12 >0, r12, NA),
  # rc  = ifelse(rc >0, rc, NA)

  cc.3_4  = ifelse(cc.3_4 > 0 & cc.3_4 < 1, cc.3_4, NA),
  cc.3_5  = ifelse(cc.3_5 > 0 & cc.3_5 < 1, cc.3_5, NA),
  cc.4_3  = ifelse(cc.4_3 > 0 & cc.4_3 < 1, cc.4_3, NA),

  cc.4_5  = ifelse(cc.4_5 > 0 & cc.4_5 < 1, cc.4_5, NA),
  cc.5_3  = ifelse(cc.5_3 > 0 & cc.5_3 < 1, cc.5_3, NA),
  cc.5_4  = ifelse(cc.5_4 > 0 & cc.5_4 < 1, cc.5_4, NA)
) #%>% 
# mutate(rc = rc^-1) %>% 
# rename(rc.ig = rc)

#vrest     cc   r11    r12    r1    rc 
M_tevc <- left_join(tevc, small_metadata) %>% select(-FileName) %>% mutate(Cell = as.character(Cell)) 
#ig
# M_epsp <- left_join(epsp, small_metadata) %>% 
#   select(-FileName, -Type) %>% 
#   mutate(Cell = as.character(Cell))  %>% 
#   mutate(Key = case_when(Key %in% c("In4", "In9") ~ "Obs",
#                          Key %in% c("predicted")  ~ "Sim")) %>% 
#   pivot_wider(names_from = "Key", values_from  = c("Min.V", "Max.Amplitude", "AUC")) %>% 
#   mutate(Max.Amplitude_Sim = Max.Amplitude_Sim + (Min.V_Obs - Min.V_Sim)) %>% 
#   mutate(Max.Amplitude_Delta = Max.Amplitude_Obs - Max.Amplitude_Sim,
#          AUC_Delta = AUC_Obs - AUC_Sim) %>% 
#   group_by(Condition, Experiment, Cell) %>% 
#   select(-Min.V_Sim) %>% 
#   mutate_at(c("Cor", 
#               "Min.V_Obs", #"Min.V_Sim", "Min.V_Delta", 
#               "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta", 
#               "AUC_Obs", "AUC_Sim", "AUC_Delta"), median, na.rm = T) %>% 
#   distinct() %>% 
#   ungroup()

M_epsp <- left_join(epsp, small_metadata) %>% 
  filter(Type == "epsp") %>% 
  select(-FileName, -Type) %>% 
  mutate(Cell = as.character(Cell))  %>% 
  mutate(Key = case_when(Key %in% c("In4", "In9") ~ "Obs",
                         Key %in% c("predicted")  ~ "Sim")) %>% 
  pivot_wider(names_from = "Key", values_from  = c("Min.V", "Max.Amplitude", "AUC")) %>% 
  mutate(Max.Amplitude_Sim = Max.Amplitude_Sim + (Min.V_Obs - Min.V_Sim)) %>% 
  mutate(Max.Amplitude_Delta = Max.Amplitude_Obs - Max.Amplitude_Sim,
         AUC_Delta = AUC_Obs - AUC_Sim) %>% 
  group_by(Condition, Experiment, Cell) %>% 
  select(-Min.V_Sim) %>% 

  mutate(
    Cor.IQR = IQR(Cor, na.rm = T),
    Min.V_Obs.IQR = IQR(Min.V_Obs, na.rm = T),

    Max.Amplitude_Obs.IQR = IQR(Max.Amplitude_Obs, na.rm = T),
    Max.Amplitude_Sim.IQR = IQR(Max.Amplitude_Sim, na.rm = T),
    Max.Amplitude_Delta.IQR = IQR(Max.Amplitude_Delta, na.rm = T),

    AUC_Obs.IQR = IQR(AUC_Obs, na.rm = T),
    AUC_Sim.IQR = IQR(AUC_Sim, na.rm = T),
    AUC_Delta.IQR = IQR(AUC_Delta, na.rm = T)
  ) %>% 

  mutate_at(c("Cor", 
              "Min.V_Obs", #"Min.V_Sim", "Min.V_Delta", 
              "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta", 
              "AUC_Obs", "AUC_Sim", "AUC_Delta"), median, na.rm = T) %>% 

  select(-Sweep) %>% 

  distinct() %>% 
  ungroup()  









#Sweep   Cor Key        Min.V Max.Amplitude    AUC 
M_mrna <- left_join(mrna[mrna$Source == "Daniel", ], small_metadata) %>% select(-FileName) %>% mutate(Cell = as.character(Cell))


#Set up data cols to help organize full dataset
ionic_cols <- c("Ihtk.0", "Ihtk.Slope", "Ihtk.0.s", "Ihtk.Slope.s", "Ia.0", "Ia.Slope", "Ia.0.s", "Ia.Slope.s")
tecc_cols <- c("vrest", "r11", "r1", "cc.3_4", "cc.3_5", "cc.4_3", "cc.4_5", "cc.5_3", "cc.5_4")#, "r12", "rc.ig") # ignore rc, ig is a more direct measure
tevc_cols <- c("ig.3_4", "ig.3_5", "ig.4_5")
epsp_cols <- c("Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "AUC_Obs", "AUC_Sim", 
               "Max.Amplitude_Delta", "AUC_Delta")
mrna_cols <- names(mrna)[!(names(mrna) %in% c("Source", "Pharm", "Time", "Sample", "Experiment", "Cell"))]


M_all <- full_join(M_ionic, M_tecc) %>% full_join(M_tevc) %>% full_join(M_epsp) %>% full_join(M_mrna)

reordered_names <- c(names(M_all)[!(names(M_all) %in% c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols))], 
                     c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols))


M_all <- M_all[, reordered_names] %>% filter(Condition %in% c("0h", "1h", "24h") & Cell %in% c(3, 4, 5)) %>% distinct()

# #TODO this needs some prep work. We'll want to capture the variance in these measures.
# # Not clear how to do that. 
# epsp %>% ggplot(aes(x = FileName, Max.Amplitude, color = interaction(Experiment, Cell)))+
#   geom_line()+
#   geom_point()+
#   facet_grid(.~Key)

#TODO move this to top
# QC data by completion rate, having variance, and median mrna abundance
M_all_outliers <- M_all
M_all_skim <- M_all %>% skimr::skim()
M_all_skim <- M_all_skim[M_all_skim$complete_rate >= 0.19 & 
                           M_all_skim$numeric.sd > 0, ]

M_all_skim <- M_all_skim[!(M_all_skim$skim_variable %in% mrna_cols) |
                           ((M_all_skim$skim_variable %in% mrna_cols) & M_all_skim$numeric.p50 >= 50 ), ]

M_all <- M_all[, names(M_all) %in% c(
  "Condition", "Cell", "Experiment", "Channel", 
  "Sweep", "Source", "Pharm", "Time", "Sample", 
  M_all_skim$skim_variable[!is.na(M_all_skim$skim_variable)])
]

# update cols now that data is QCed
ionic_cols <- c("Ihtk.0", "Ihtk.Slope", "Ihtk.0.s", "Ihtk.Slope.s", "Ia.0", "Ia.Slope", "Ia.0.s", "Ia.Slope.s")[ionic_cols %in% names(M_all)]
tecc_cols <- c("vrest", "r11", "r1", "cc.3_4", "cc.3_5", "cc.4_3", "cc.4_5", "cc.5_3", "cc.5_4")[tecc_cols %in% names(M_all)]
#, "r12", "rc.ig") # ignore rc, ig is a more direct measure
tevc_cols <- c("ig.3_4", "ig.3_5", "ig.4_5")[tevc_cols %in% names(M_all)]
epsp_cols <- c("Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", "AUC_Obs", "AUC_Sim", 
               "Max.Amplitude_Delta", "AUC_Delta")[epsp_cols %in% names(M_all)]
mrna_cols <- mrna_cols[mrna_cols %in% names(M_all)]

"Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"

About the data

What conditions exist in both datasets?

temp <- mrna %>% select(Pharm, Time, Source) %>% distinct()

temp %>% group_by(Pharm, Source) %>%  mutate(exists = "T") %>% spread(Time, exists)

mrna %>% 
  filter(
    (Source == "Daniel") |
    (Source == "Brian" & Pharm == "TEA") |
    (Source == "Brian" & Pharm == "None" & Time == "0h") 
    ) %>% 
  distinct() %>% 
  group_by(Pharm, Time, Source) %>% 
  tally()

How many networks had any extracellular activity by their colleciton point?

metadata %>% 
  select(Experiment, Condition, ExtracellularActive) %>% 
  filter(Condition %in% c("0h", "1h", "24h")) %>% 
  distinct() %>% 
  group_by(Condition, ExtracellularActive) %>% 
  tally() %>%
  filter(!is.na(ExtracellularActive)) %>% 
  pivot_wider(names_from = "ExtracellularActive", values_from = "n")

How many individuals were there in each group?

M_all %>% select(Condition, Experiment, Cell) %>% distinct() %>% group_by(Condition) %>% tally() 

M_all %>% select(Condition, Experiment) %>% distinct() %>% group_by(Condition) %>% tally()

set up DV groupings

mrna_cols <- c(
  "nav", "cav1", "cav2", "bkkca", 
  "shaker", "shal", "shab", "shaw1", "shaw2",
  "inx1", "inx2", "inx3" #"inx4", "inx5", #<- inx5 is likely not expressed
)

memb_cols <- c("vrest", "r11", "r1", 
               "Ihtk.0", "Ihtk.Slope", 
               "Ihtk.0.s", "Ihtk.Slope.s", 
               "Ia.0", "Ia.Slope", 
               "Ia.0.s", "Ia.Slope.s")

epsp_cols <- c("Cor", "Min.V_Obs", 
               "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
               "AUC_Obs", "AUC_Sim", 
               "Max.Amplitude_Delta", "AUC_Delta",

               "Cor.IQR", "Min.V_Obs.IQR", 
               "Max.Amplitude_Obs.IQR", #"Max.Amplitude_Sim.IQR", 
               "AUC_Obs.IQR", #"AUC_Sim.IQR", 
               "Max.Amplitude_Delta.IQR", "AUC_Delta.IQR")

1 What's happening in Brian's Data?

Prep data -- cull outliers and median interpolate missing values

# select the mRNAs present in both Brian's data and mine. 
mrna_b <-select(mrna, 
                Source, Pharm, Time, Experiment, Cell, 
                nav, cav1, cav2, bkkca, 
                shaker, shal, shab, shaw1, shaw2,
                inx1, inx2, inx3 #inx4, inx5, #<- inx5 is likely not expressed
) %>% 
  filter(Source == "Brian")
# mutate(Pharm = ifelse(Source == "Daniel" & Time == "0h", "None", Pharm)) 

mrna_b %>% skimr::skim()

mrna_b %>% 
  group_by(Time, Pharm, Source) %>% 
  tally()

mrna_b$uid <- as.character(seq(1, nrow(mrna_b)))
mrna_b <- mrna_b %>% 
  select(-Experiment, -Cell) %>% 
  unite(Group, Source, Pharm, Time)

mrna_b <- mrna_b[, c("Group", "uid",
                     names(mrna_b)[!(names(mrna_b) %in% c("Group", "uid"))])
]


# Remove Outliers 
mrna_b_winxiqr_list <- map(unique(mrna_b$Group), function(group){
  temp <- mrna_b[mrna_b$Group == group, ]
  dvs <- names(mrna_b)[!(names(mrna_b) %in% c("Group", "uid"))]

  for (dv_col  in dvs){
    temp[[dv_col]] <- ifelse(win_x_iqr(temp[[dv_col]]), temp[[dv_col]], NA)
  }

  return(temp)
})

mrna_b_winxiqr <- do.call(rbind, mrna_b_winxiqr_list)

# Median interpolate missing
mrna_b_winxiqr_interp <- imputeMissings::impute(mrna_b_winxiqr, method = "median/mode")

treatment <- unique(mrna_b_winxiqr$Group)[1]
mrna1 <- "inx1"
mrna2 <- "inx2"

M <- mrna_b_winxiqr %>% filter(Group == treatment) #%>% select(uid, mrna1, mrna2)

## setup 
mrnas <- c("nav", "cav1", "cav2", "bkkca", "shaker", "shal", "shab", "shaw1", "shaw2", "inx1", "inx2", "inx3")

### Prep cor df
mrna_cors <- list()
for(i in seq_along(mrnas)){
  for(j in seq(i, length(mrnas))){
    if(i != j){
      mrna_cors[[(length(mrna_cors)+1)]] <- c(mrnas[i], mrnas[j])
    }
  }  
}
mrna_cors <- data.frame(do.call(rbind, mrna_cors))
names(mrna_cors) <- c("mrna1", "mrna2")
mrna_cors["Cor"] <- NA
mrna_cors["ep"] <- NA
### Fill cor df

for(i in seq(1, nrow(mrna_cors))){
  mrna1 <- mrna_cors[i, "mrna1"]
  mrna2 <- mrna_cors[i, "mrna2"]

  # is there a significant correlation?
  cor_obs <- cor(M[, c(mrna1)], M[, c(mrna2)], use = "pairwise.complete.obs", method = "pearson")
  cor_obs <- as.numeric(cor_obs)

  niter <- 1000
  cor_sim <- unlist(map(seq(1, niter), function(i){
    # return(cor(M[[mrna1]], sample(M[[mrna2]]) ) )
    cor_obs <- cor(M[, c(mrna1)], sample(unlist(M[, c(mrna2)])), use = "pairwise.complete.obs", method = "pearson")
    return(unlist(cor_obs))
    }))
  cor_ep <- mean(c(cor_obs, abs(cor_sim)) >= cor_obs)

  mrna_cors[i, "Cor"] <- cor_obs
  mrna_cors[i, "ep"]  <- cor_ep
}

# fix levels
mrna_cors$mrna1 <- factor(mrna_cors$mrna1, levels = mrnas)
mrna_cors$mrna2 <- factor(mrna_cors$mrna2, levels = mrnas)




# install.packages("GGally")

## Data prep
# mrna_cors %>% 
#   ggplot(aes(x = 0, y = 0))+
#   geom_tile(aes(fill = Cor) )+
#   geom_label(aes(label = Cor))+
#   facet_grid(mrna1 ~ mrna2)


# get the right color for the background
fill_val <- mrna_cors[(
  (mrna_cors$mrna1 == mrna1 & mrna_cors$mrna2 == mrna2) & 
  (mrna_cors$ep < 0.05)), "Cor"]


mk_cor_scatter <- function(M, mrna1, mrna2,
                           fill_val){

  if( length(fill_val) == 0 ){ # if there's a non-significant correlation, don't add fit.
    fill_val <- "#00000000"

    plt <- M %>% 
      ggplot(aes_string(x = mrna1, mrna2))+
      # stat_poly_line(formula = x ~ y, color = "black") +
      geom_point(alpha = 0.5)+
      # stat_poly_eq(aes(label = paste(after_stat(eq.label),
      #                                after_stat(rr.label), sep = "*\", \"*")))+
      theme_void()+
      theme(#plot.background = element_rect(fill = fill_val),
            axis.title.x = element_text(face = "italic"),
            axis.title.y = element_text(face = "italic"))


  } else {    # if there's a significant correlation, add the fit.
    vals <- seq(-1, to = 1, length.out = 7)
    fill_val <- RColorBrewer::brewer.pal(9, "PuOr")[seq(2, 8)][(fill_val < vals)][1]

    plt <- M %>% 
      ggplot(aes_string(x = mrna1, mrna2))+
      geom_rect(aes(xmin=min(M$mrna1, na.rm = T), xmax=max(M$mrna1, na.rm = T),
                    ymin=min(M$mrna2, na.rm = T), ymax=max(M$mrna2, na.rm = T)
                    ), fill=fill_val)+
      stat_poly_line(formula = x ~ y, color = "black") +
      geom_point(alpha = 0.5)+
      # stat_poly_eq(aes(label = paste(after_stat(eq.label),
      #                                after_stat(rr.label), sep = "*\", \"*")))+
      theme_void()+
      theme(plot.background = element_rect(fill = fill_val),
            axis.title.x = element_text(face = "italic"),
            axis.title.y = element_text(face = "italic"))
  }

  return(plt)
}

mk_cor_scatter(M = M, 
               mrna1 = mrna1, 
               mrna2 = mrna2,
               fill_val = fill_val)

# make all the plots
plt_list <- list()

for(i in seq(1, length(mrnas))){
  for(j in seq(i, length(mrnas))){
    if(i != j){

      mrna1 = mrnas[i]
      mrna2 = mrnas[j]

      fill_val <- mrna_cors[(
        (mrna_cors$mrna1 == mrna1 & mrna_cors$mrna2 == mrna2) & 
        (mrna_cors$ep < 0.05)), "Cor"]

      print(fill_val)

      plt_list[[(1+length(plt_list))]] <- mk_cor_scatter(
        M = M, 
        mrna1 = mrna1, 
        mrna2 = mrna2,
        fill_val = fill_val)


    }
  }  
}





# plt_list
library(GGally)
side_len <- (length(mrnas) -1)

pm <- ggmatrix(
  map(seq(1 , side_len**2), function(i){}),
  nrow = side_len, ncol = side_len,
  # xAxisLabels = c("A", "B", "C"),
  # yAxisLabels = c("D", "E"),
  title = "Matrix Title"
)

diagoffset <- 0
colcount <- 1
rowcount <- 1

for(i in seq_along(plt_list)){
  pm[rowcount, colcount] <- plt_list[[i]]

  if(colcount == side_len){
    diagoffset <- diagoffset + 1
    colcount <- 1 + diagoffset
    rowcount <- rowcount + 1
  } else {
    colcount <- colcount + 1
  }


}

# pm[1, 2] <- plt_list[[1]]
pm

# 
# 
# ggpairs(select(M,  -Group, -uid), 
#         upper = list(continuous = "cor"),
#   lower = list(na ="na"),
#   diag = list( na = "naDiag"))

plot_grid(plt_list)

library(patchwork)

# quick and dirty get all the plots ordered
patchwork_string <- c()

ith <- 1
for(i in seq(1, length(mrnas))){
  for(j in seq(1, length(mrnas))){
    if(i == j){

    } else if(j > i){
      patchwork_string <- c(patchwork_string, paste0("plt_list[[", as.character(ith), "]]"))
      ith <- ith+1
    } else {
      patchwork_string <- c(patchwork_string, "plot_spacer()")

    }
  }
}
patchwork_string <- paste(patchwork_string, collapse = "+")

# patchwork_string+patchwork::plot_layout(ncol = length(mrnas))

plt_list[[1]]+plt_list[[2]]+plt_list[[3]]+plt_list[[4]]+plt_list[[5]]+plt_list[[6]]+plt_list[[7]]+plt_list[[8]]+plt_list[[9]]+plt_list[[10]]+plt_list[[11]]+plot_spacer()+plt_list[[12]]+plt_list[[13]]+plt_list[[14]]+plt_list[[15]]+plt_list[[16]]+plt_list[[17]]+plt_list[[18]]+plt_list[[19]]+plt_list[[20]]+plt_list[[21]]+plot_spacer()+plot_spacer()+plt_list[[22]]+plt_list[[23]]+plt_list[[24]]+plt_list[[25]]+plt_list[[26]]+plt_list[[27]]+plt_list[[28]]+plt_list[[29]]+plt_list[[30]]+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[31]]+plt_list[[32]]+plt_list[[33]]+plt_list[[34]]+plt_list[[35]]+plt_list[[36]]+plt_list[[37]]+plt_list[[38]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[39]]+plt_list[[40]]+plt_list[[41]]+plt_list[[42]]+plt_list[[43]]+plt_list[[44]]+plt_list[[45]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[46]]+plt_list[[47]]+plt_list[[48]]+plt_list[[49]]+plt_list[[50]]+plt_list[[51]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[52]]+plt_list[[53]]+plt_list[[54]]+plt_list[[55]]+plt_list[[56]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[57]]+plt_list[[58]]+plt_list[[59]]+plt_list[[60]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[61]]+plt_list[[62]]+plt_list[[63]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[64]]+plt_list[[65]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plt_list[[66]]+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+plot_spacer()+patchwork::plot_layout(ncol = length(mrnas))


# as_tbl_graph(mrna_cors) %>% 
#   ggraph(layout = 'linear', circular = TRUE) + 
#   geom_edge_arc(aes(colour = ep)) + 
#   coord_fixed()

library(ggraph)
library(igraph)

temp <- mutate(mrna_cors, 
               from = as.numeric(as.factor(mrna1)), 
               to   = as.numeric(as.factor(mrna2))
               )
# graph <- graph_from_data_frame(temp[, c("from", "to")])

# graph <- temp %>% 
#     graph_from_data_frame(directed = FALSE)
# 
# # Not specifying the layout - defaults to "auto"
# ggraph(graph) + 
#     # geom_edge_link(aes(colour = factor(Cor))) + 
#     geom_edge_link(aes(edge_alpha = 1-ep)) + 
#     geom_node_point()


# = = = = 

# Create a tidy data frame of correlations
tidy_cors <- M[, mrnas] %>% 
  correlate() %>% 
  stretch()

tidy_cors["ep"] <- NA

for(i in seq(1, nrow(mrna_cors))){
  mrna1 <- mrna_cors[i, "mrna1"]
  mrna2 <- mrna_cors[i, "mrna2"]

  tidy_cors[((tidy_cors$x == mrna1) & (tidy_cors$y == mrna2)) | (
    (tidy_cors$x == mrna2) & (tidy_cors$y == mrna1)
  ) , "ep"] <- mrna_cors[i, "ep"]

}


# eps1 <- full_join(
#   tidy_cors, 
#   rename(select(mrna_cors, -Cor), x = mrna1, y = mrna2)
#   ) %>% 
#   filter(!is.na(ep))
#   
# eps2 <- full_join(
#   tidy_cors,
#     rename(select(mrna_cors, -Cor), x = mrna2, y = mrna1)
#           ) 
#   
# full_join(
#   tidy_cors,
#   eps1) %>% full_join(eps2)

# Convert correlations stronger than some value
# to an undirected graph object
# graph_cors <- tidy_cors %>% 
#   filter(abs(r) > 0.7) %>% 
#   graph_from_data_frame(directed = FALSE)
# 
# # Plot
# ggraph(graph_cors) +
#   geom_edge_link() +
#   geom_node_point() +
#   geom_node_text(aes(label = name), repel = TRUE) +
#   theme_graph()


# = = = = 
#ref https://www.data-imaginist.com/2017/ggraph-introduction-layouts/
graph_cors <- tidy_cors %>% 
  # mutate(bool = case_when(ep < 0.05 ~ TRUE,
  #                         ep >= 0.05 ~ FALSE)) %>% 
  # filter(abs(ep) < 0.05) %>% 
  graph_from_data_frame(directed = FALSE)


graph_cors_sig <- tidy_cors %>% 
  mutate(r = case_when(ep < 0.05 ~ r,
                       ep >= 0.05~ 0)) %>%

  graph_from_data_frame(directed = FALSE)

custom_network_plot <- 
ggraph(graph_cors_sig, layout = 'linear', circular = TRUE) +
  geom_edge_link(aes(edge_alpha = 1/ep, edge_width = 1/ep, color = r)) +
  guides(edge_alpha = "none", edge_width = "none") +
  scale_edge_colour_gradientn(limits = c(-1, 1), colors = c("#B35806", "White","#542788")) +
  geom_node_point(color = "gray", size = 5) +
  geom_node_text(aes(label = name), repel = TRUE) +
  theme_graph() +
  labs(title = "")

Correlations

Correlation heatmaps

All correlations in Brian's data

library(ggcorrplot)

cor_plt_list <- list()

for (temp.group in c("Brian_All", unique(mrna_b_winxiqr$Group))){
  print(temp.group)
  if (temp.group == "Brian_All"){
    cor_df <- select(mrna_b_winxiqr, -Group, -uid)
  } else {
    cor_df <- select(filter(mrna_b_winxiqr, Group == temp.group), -Group, -uid)
  }

  # p.mat <- cor_pmat(cor_df)
  cor_df_bin <- cor(cor_df, use = "pairwise.complete.obs")

  # bin the correlations so there are fewer colors used in the figure
  cor_bins <- seq(-1, 1, length.out = 8)
  for (i in 1:(length(cor_bins)-1)){
    # use the average bin value
    # cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- (cor_bins[i] + cor_bins[i+1])
    # use the more extreme value to shade with
    cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]]
  }

  cor_plt_list[[(length(cor_plt_list)+1)]] <- ggcorrplot(
    cor_df_bin, 
    # p.mat = p.mat,
    insig = "blank",
    type = "upper",
    outline.col = "white",
    colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)]
  )+
    labs(subtitle = temp.group)+
    theme(legend.position = "")

}

cor_plt_list[[3]] + cor_plt_list[[5]] + plot_spacer() + plot_spacer() +
  cor_plt_list[[4]] + cor_plt_list[[6]] + cor_plt_list[[2]]+ cor_plt_list[[7]] + 
  plot_layout(ncol = 4)


# cor_plt_list[[1]] +
cor_plt_list[[3]] + cor_plt_list[[4]] +
  cor_plt_list[[5]] + cor_plt_list[[6]] +
  # plot_spacer() +
  plot_spacer() + cor_plt_list[[2]] +
  plot_spacer() + cor_plt_list[[7]] + plot_layout(ncol = 4) #plot_layout(ncol = 5)

Correlation multiplot

cor_df_list <- map(c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h"), function(temp.group){
  cor_df <- select(filter(mrna_b_winxiqr, Group == temp.group), -Group, -uid)
  cor_df <- cor(cor_df, use = "pairwise.complete.obs")
  cor_df_bin <- cor_df
  # bin the correlations so there are fewer colors used in the figure
  cor_bins <- seq(-1, 1, length.out = 8)
  for (i in 1:(length(cor_bins)-1)){
    # use the more extreme value to shade with
    cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]]
  }

  return(list(cor = cor_df,
              cor_bin = cor_df_bin))
})




temp <- 
map(seq_along(cor_df_list), function(i){
  temp <- cor_df_list[[i]][[1]]
  temp <- as.data.frame(temp) 
  temp %>% 
    rownames_to_column() %>% 
    gather("colname", "Cor", names(temp)) %>% 
    mutate(Group = c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h")[i])
})

temp <- do.call(rbind, temp)


temp_fill <- map(seq_along(cor_df_list), function(i){
  temp <- cor_df_list[[i]][[2]]
  temp <- as.data.frame(temp) 
  temp %>% 
    rownames_to_column() %>% 
    gather("colname", "Cor", names(temp)) %>% 
    mutate(Group = c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h")[i])
})


temp_fill <- do.call(rbind, temp_fill) %>% rename(CorFill = Cor)

cor_col_plt <- 
full_join(temp, temp_fill) %>% 
  # filter(rowname == "bkkca", colname == "cav1") %>%
  mutate(x =  
         case_when(Group == "Brian_None_0h" ~ "0 Hours",
                       Group == "Brian_TEA_1h" ~ "1 Hour",
                       Group == "Brian_TEA_24h" ~ "24 Hours")
         ) %>% 
  mutate(Cor = ifelse(rowname == colname, NA, Cor)) %>% 
  ggplot(aes(x = x, y = Cor, fill = Cor))+
  geom_col()+
  geom_hline(yintercept = 0)+
  facet_grid(rowname ~ colname)+
  scale_fill_distiller(palette = "PuOr", direction = 1)+
  coord_cartesian(ylim = c(-1,1))+
  labs(x = "")+
  scale_y_continuous(breaks = c(-0.5, 0.5))+
  theme(legend.position = "", 
        # panel.grid.major.y = element_line() 
        # axis.ticks.length.y.left = element_line(), 
        axis.text.x = element_text(angle = 90))



cor_bin_fill_ref_plot <- 
ggplot(data.frame(Cor = seq(-1, 1, length.out = 8)),
       aes(x = Cor, y = 0, fill = Cor))+
  geom_tile(aes(height = 2/8))+
  geom_text(aes(label = round(Cor, digits = 1), size = 1))+
  scale_fill_distiller(palette = "PuOr", direction = 1)+
  theme_void()+
  theme(legend.position = "")+
  coord_fixed()+
    plot_spacer()

correlogram_set <- (cor_plt_list[[3]]+labs(subtitle= "0 Hours") + 
  cor_plt_list[[5]]+labs(subtitle= "1 Hours")  +
  cor_plt_list[[6]]+labs(subtitle= "24 Hours"))



figure_w <- c(1)
figure_h <- c(1/3,1,5)


 # (cor_bin_fill_ref_plot + plot_spacer()+ plot_spacer() ) /
cor_bin_fill_ref_plot /
correlogram_set /
cor_col_plt +  
  plot_layout(widths = figure_w, heights = figure_h)

# (cor_plt_list[[3]]+labs(subtitle= "0 Hours") + 
#   cor_plt_list[[5]]+labs(subtitle= "1 Hours")  +
#   cor_plt_list[[6]]+labs(subtitle= "24 Hours") +
#   cor_bin_fill_ref_plot + plot_spacer() + plot_spacer() +
#   plot_spacer() + plot_spacer()+ plot_spacer()) |
#   cor_col_plt+
#   plot_layout(widths = figure_w, heights = figure_h)


# cm_multiplier = 15
# ggsave("BrianCorMulti.svg", 
#        width = sum(figure_w)*cm_multiplier,
#        height = sum(figure_h)*cm_multiplier,
#          
#        units = "cm",
#        limitsize = F)

Corrplots with non-sig dropped

cor_plt_list <- map(c("0h", "1h", "24h"), function(Time){
  df = mrna_b_winxiqr %>% 
    mutate(Condition = Group) %>% 
    mutate(Condition = case_when(Condition == "Brian_None_0h" ~ "0h",
                                 Condition == "Brian_TEA_1h"  ~ "1h",
                                 Condition == "Brian_TEA_24h" ~ "24h"))

  df = df[df$Condition == Time, c(mrna_cols)]

  corr <- cor(df, use = "pairwise.complete.obs", method = "spearman")
  p.mat <- cor_pmat(df, use = "pairwise.complete.obs", method = "spearman")

  cor_df_bin <- corr
  cor_bins <- seq(-1, 1, length.out = 8)
  for (i in 1:(length(cor_bins)-1)){
    # use the more extreme value to shade with
    cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]]
  }


  ggcorrplot(
    cor_df_bin, 
    p.mat = p.mat,
    insig = "blank",
    type = "upper",
    outline.col = "white",
    colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)]
  )+
    labs(subtitle = Time)+
    theme(legend.position = ""#,
          # axis.text.x = element_text(angle = 90)
          )
})




# plot_grid(plotlist = cor_plt_list)

(cor_plt_list[[1]]+theme(
    axis.text.x = element_text(angle = 90, vjust = 0))) +
(cor_plt_list[[2]]+theme(
    axis.text.x = element_text(angle = 90, vjust = 0),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))+
(cor_plt_list[[3]]+theme(
  axis.text.x = element_text(angle = 90, vjust = 0),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))

Correlations ECDFs

cor_list_df <- map(unique(mrna_b_winxiqr$Group), function(group){
  # temp <- cor(select(mrna_b_winxiqr[mrna_b_winxiqr$Group == group, ], -Group, -uid), use = "pairwise.complete.obs") %>% as.data.frame() 
  temp <- corrr::correlate(select(mrna_b_winxiqr[mrna_b_winxiqr$Group == group, ], -Group, -uid), use = "pairwise.complete.obs") %>%

    corrr::shave() %>% 
    as.data.frame()

  temp$row <- rownames(temp)
  temp <- gather(temp, "col", "Corr", names(temp)[!(names(temp) %in% c("term", "row"))])
  temp$Group <- group

  return(temp)  
})
cor_list_df <- do.call(rbind, cor_list_df)

# cor_list_df$Group %>% unique()

ecdf_corr_list <- map(
  list(
    list("Brian_None_0h", "Brian_TEA_1h"),
    list("Brian_TEA_1h", "Brian_TEA_24h"),
    list("Brian_None_0h", "Brian_TEA_24h"),

    list("Brian_None_0h", "Brian_None_24h"),
    list("Brian_None_0h", "Brian_TEA_24h"),
    list("Brian_None_0h", "Brian_TEA_1h"),
    list("Brian_None_24h", "Brian_AP_24h"),
    list("Brian_None_24h", "Brian_TEA_24h"),
    list("Brian_None_24h", "Brian_TTX_24h")
  ),

  function(group_comparison){
    plot_ecdf_ks(
      df = cor_list_df[cor_list_df$Group %in% group_comparison, ],
      data.col = "Corr",
      group.col = "Group",
      group1 = group_comparison[1],
      group2 = group_comparison[2],
      colors = c("black", "red"))
  })

# cowplot::plot_grid(plotlist = ecdf_corr_list)

ecdf_corr_list[[1]] + labs(subtitle = "", x = "") + 
  ecdf_corr_list[[2]] + labs(subtitle = "", x = "") + theme(legend.position = "top")+
  ecdf_corr_list[[3]] + labs(subtitle = "", x = "") 


# ggsave(plot = last_plot(),         
#        filename = here("officer_output", "ContrastSource_ecdfs.tiff"),        
#        width = 11.5, height = 4.76)


 # list("Brian_None_0h", "Brian_TEA_1h"),
 #    list("Brian_TEA_1h", "Brian_TEA_24h"),
 #    list("Brian_None_0h", "Brian_TEA_24h"),

plot_ecdf_ks(
  df = cor_list_df[cor_list_df$Group %in% list("Brian_None_0h", "Brian_TEA_1h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_None_0h",
  group2 = "Brian_TEA_1h",
  colors = c("black", "orange"))+ 
  labs(title = "0 Hours vs 1 Hour",
       subtitle = "p = 0.44", x = "")+


plot_ecdf_ks(
  df = cor_list_df[cor_list_df$Group %in% list("Brian_TEA_1h", "Brian_TEA_24h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_TEA_1h",
  group2 = "Brian_TEA_24h",
  colors = c("orange", "red"))+ 
  labs(title = "1 Hour vs 24 Hours",
       subtitle = "p < 0.01", x = "")+


plot_ecdf_ks(
  df = cor_list_df[cor_list_df$Group %in% list("Brian_None_0h", "Brian_TEA_24h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_None_0h",
  group2 = "Brian_TEA_24h",
  colors = c("black", "red"))+ 
  labs(title = "0 Hours vs 24 Hours",
    subtitle = "p < 0.01", x = "")



## what's happening in the high correlations at zero?
temp <- cor_list_df[cor_list_df$Group %in% list("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h"), ] %>% 
  spread(Group, Corr) %>% 
  filter(Brian_None_0h >=0.66) %>% 
  gather(Group, Corr, c("Brian_None_0h", "Brian_TEA_1h", "Brian_TEA_24h"))




plot_ecdf_ks(
  df = temp[temp$Group %in% list("Brian_None_0h", "Brian_TEA_1h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_None_0h",
  group2 = "Brian_TEA_1h",
  colors = c("black", "orange"))+

plot_ecdf_ks(
  df = temp[temp$Group %in% list("Brian_TEA_1h", "Brian_TEA_24h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_TEA_1h",
  group2 = "Brian_TEA_24h",
  colors = c("orange", "red"))+

plot_ecdf_ks(
  df = temp[temp$Group %in% list("Brian_None_0h", "Brian_TEA_24h"), ],
  data.col = "Corr",
  group.col = "Group",
  group1 = "Brian_None_0h",
  group2 = "Brian_TEA_24h",
  colors = c("black", "red"))

Changes in mean mRNA levels

# refresh NO median interpolation
mrna_b_winxiqr <- do.call(rbind, mrna_b_winxiqr_list)

mrna_b_winxiqr_to_test <- mrna_b_winxiqr %>% 
  separate(Group, c("Source", "Pharm", "Time")) %>% 
  select(-Source) %>% 
  distinct()

library(broom)

How does time alter control cells?

cav2, shab, and shaker change over time.

# desired tests
# time in control
temp <- mrna_b_winxiqr_to_test %>% filter(Pharm %in% c("None"))
temp %>% group_by(Time) %>% tally()

temp_ys <- names(select(temp, -Time, -Pharm, -uid))

temp_fms <- map(temp_ys, function(temp_y){
  fm <- lm(as.formula(paste0(temp_y," ~ Time")), data = temp)
  res <- broom::tidy(fm)
  res$y <- temp_y
  return(res)
})

temp_p <- do.call(rbind, temp_fms) %>% filter(term != "(Intercept)")
temp_p <- temp_p %>% select(y, term, estimate, std.error, statistic, p.value) %>% 
  mutate(p.adj = p.adjust(p.value, method = "holm")) %>% 
  mutate(stars = case_when(p.adj < 0.001 ~ "***",
                           p.adj >= 0.001 & p.adj < 0.01 ~ "**",
                           p.adj >= 0.01 & p.adj < 0.05 ~ "*",
                           p.adj >= 0.05 & p.adj < 0.1 ~ ".",
                           p.adj >= 0.1 ~ " "
  ))

temp_p

temp %>% 
  pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% 
  filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))) %>% 
  # group_by(Time) %>% 
  ggplot(aes(x= Time, y= Count, fill = mRNA))+
  geom_boxplot()+
  geom_point()+
  ggthemes::theme_clean()+
  scale_fill_brewer()+
  facet_grid(.~mRNA)

How does time alter TEA cells?

bkkca expression, inx1 expression, and maybe inx3 expression

# time in TEA
temp <- mrna_b_winxiqr_to_test %>% filter(Pharm %in% c("TEA"))
temp %>% group_by(Time) %>% tally()

temp_ys <- names(select(temp, -Time, -Pharm, -uid))

temp_fms <- map(temp_ys, function(temp_y){
  fm <- lm(as.formula(paste0(temp_y," ~ Time")), data = temp)
  res <- broom::tidy(fm)
  res$y <- temp_y
  return(res)
})

temp_p <- do.call(rbind, temp_fms) %>% filter(term != "(Intercept)")
temp_p <- temp_p %>% select(y, term, estimate, std.error, statistic, p.value) %>% 
  mutate(p.adj = p.adjust(p.value, method = "holm")) %>% 
  mutate(stars = case_when(p.adj < 0.001 ~ "***",
                           p.adj >= 0.001 & p.adj < 0.01 ~ "**",
                           p.adj >= 0.01 & p.adj < 0.05 ~ "*",
                           p.adj >= 0.05 & p.adj < 0.1 ~ ".",
                           p.adj >= 0.1 ~ " "
  ))

temp_p

temp %>% 
  pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% 
  filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))) %>% 
  # group_by(Time) %>% 
  ggplot(aes(x= Time, y= Count, fill = mRNA))+
  geom_boxplot()+
  geom_point()+
  ggthemes::theme_clean()+
  scale_fill_brewer()+
  facet_grid(.~mRNA)

How do blockers alter cells after all have been incubated?

# steady state differences?
# ap vs TEA vs ttx vs control
temp <- mrna_b_winxiqr_to_test %>% filter(Time %in% c("24h"))
temp %>% group_by(Pharm) %>% tally()

temp_fms <- map(temp_ys, function(temp_y){
  fm <- lm(as.formula(paste0(temp_y," ~ Pharm")), data = temp)
  res <- broom::tidy(fm)
  res$y <- temp_y
  return(res)
})

temp_p <- do.call(rbind, temp_fms) %>% filter(term != "(Intercept)")
temp_p <- temp_p %>% select(y, term, estimate, std.error, statistic, p.value) %>% 
  mutate(p.adj = p.adjust(p.value, method = "holm")) %>% 
  mutate(stars = case_when(p.adj < 0.001 ~ "***",
                           p.adj >= 0.001 & p.adj < 0.01 ~ "**",
                           p.adj >= 0.01 & p.adj < 0.05 ~ "*",
                           p.adj >= 0.05 & p.adj < 0.1 ~ ".",
                           p.adj >= 0.1 ~ " "
  ))

temp_p

temp %>% 
  pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% 
  filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))) %>% 
  mutate(Pharm = factor(Pharm, levels = c("None", "TEA", "AP", "TTX"))) %>% 
  ggplot(aes(x= Pharm, y= Count, fill = mRNA))+
  geom_boxplot()+
  geom_point()+
  ggthemes::theme_clean()+
  scale_fill_brewer()+
  facet_wrap(.~mRNA, scales = "free_y")

library(ggpubr)

plot_grid(plotlist = 

map(seq_along(unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))), function(i){
  temp %>% 
  pivot_longer(temp_ys, names_to = "mRNA", values_to = "Count") %>% 
  filter(mRNA %in% unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))[i] ) %>% 
  mutate(Pharm = factor(Pharm, levels = c("None", "TEA", "AP", "TTX"))) %>% 
  ggboxplot(x = "Pharm", 
            y = "Count", 
            fill = "Pharm",
            width = 0.2,
            palette = RColorBrewer::brewer.pal(4, "Set1")
  )+
  geom_point(position = position_jitter(width = 0.05))+
  stat_compare_means(comparisons = list(
    c("None", "TEA"),
    c("None", "AP"),
    c("None", "TTX"),
    c("TEA", "AP")
  ), 
  label = "p.signif")+ # Add significance levels
  stat_compare_means(label.y = 0)+
  theme(legend.position = "",
        axis.title.y = element_text(face = "italic"))+
    labs(x = "", 
         y = as.character(unlist(distinct(select(filter(temp_p, p.adj < 0.05), y)))[i]))
})
)

2 Transition -- Do some of these changes replicate?

We've seen that at least some mRNAs change, presumably due to time and time incubated in the treatment.

Can we replicate these effects? When we focus on TEA do we get changes in the same transcripts?

Data preparation

# select the mRNAs present in both Brian's data and mine. 
mrna_rep <-select(mrna, 
                  Source, Pharm, Time, Experiment, Cell, 
                  bkkca, cav1, cav2, 
                  inx1, inx2, inx3, inx4, #inx5, #<- inx5 is likely not expressed
                  nav, 
                  shab, shaker, shal, shaw1, shaw2) %>% 
  filter(
    (Source == "Brian" & Pharm %in% c("None", "TEA")) |
      Source == "Daniel"
  ) %>% 
  filter(!(Source == "Brian" & Pharm %in% c("None") & Time == "24h")) %>% 
  mutate(Pharm = ifelse(Source == "Daniel" & Time == "0h", "None", Pharm)) %>% 
  select(-Pharm) %>% 
  distinct()

mrna_rep %>% skimr::skim()

mrna_rep %>% 
  group_by(Time, Source) %>% 
  tally() %>% 
  spread("Time", "n")


mrna_rep$uid <- as.character(seq(1, nrow(mrna_rep)))
mrna_rep <- mrna_rep %>% 
  select(-Experiment, -Cell) %>% 
  unite(Group, Source, Time)

mrna_rep <- mrna_rep[, c("Group", "uid",
                         names(mrna_rep)[!(names(mrna_rep) %in% c("Group", "uid"))])
]


# Remove Outliers 
mrna_rep_winxiqr_list <- map(unique(mrna_rep$Group), function(group){
  temp <- mrna_rep[mrna_rep$Group == group, ]
  dvs <- names(mrna_rep)[!(names(mrna_rep) %in% c("Group", "uid"))]

  for (dv_col  in dvs){
    temp[[dv_col]] <- ifelse(win_x_iqr(temp[[dv_col]]), temp[[dv_col]], NA)
  }

  return(temp)
})

mrna_rep_winxiqr <- do.call(rbind, mrna_rep_winxiqr_list)

mrna_rep_winxiqr <- mrna_rep_winxiqr %>% 
  separate("Group", into = c("Source", "Time"), sep = "_")

Figure 1

plt_df <- data.frame( cor = round(cor_df[rw, cl], digits = 2), rad = abs(round(cor_df[rw, cl], digits = 2)), cor_col = cor_color_df[rw, cl] )

# print(
#   plt_df
# )

plt <- ggplot(plt_df)+
  geom_circle(aes(x0 = 0, y0 = 0, r = rad), color = plt_df$cor_col, fill = plt_df$cor_col)+
  geom_text(aes(x = 0, y = 1.25, label = cor))+
  theme_void()+
  lims(y = c(-1.5, 1.5), x = c(-1.5, 1.5))+
  coord_fixed()

Compare Correlations

## Get correlations ====
cor_groupings <- expand.grid(
  Source = c("Brian", "Daniel"),
  Time = c("0h", "1h", "24h")
)

cor_list <- map(seq(1, nrow(cor_groupings)), function(i){
  cor_df <- filter(mrna_rep_winxiqr, 
                   Source == cor_groupings[i, "Source"],
                   Time == cor_groupings[i, "Time"]) %>% 
    select(all_of(mrna_cols))

  cor_df <- cor(cor_df, use = "pairwise.complete.obs", method = "pearson")

  # Shave to diagonal
  for (j in seq(1, nrow(cor_df))){
    cor_df[j, 1:j] <- NA
  }

  cor_df <- as.data.frame(cor_df) %>% mutate(x = rownames(cor_df))

  cor_df <- pivot_longer(cor_df, 
                         cols = names(select(cor_df, -x)),
                         names_to = "y", 
                         values_to = "Corr") %>% 
    mutate(
      Source = cor_groupings[i, "Source"],
      Time = cor_groupings[i, "Time"]  
    ) %>% 
    select(Source, Time, x, y, Corr)

  return(cor_df)
})

cor_comp_df <- do.call(rbind, cor_list) %>% 
  filter(!is.na(Corr))

# add numeric version of the factor to use geom circle
cor_comp_df <- cor_comp_df %>% 
  mutate(x.num = as.numeric(factor(x, levels = mrna_cols)),
         y.num = as.numeric(factor(y, levels = mrna_cols)))



## Plotting ====
### Correlogram ####
library(ggforce) # for geom_circle
# Corrgram, but in ggplot
compare_corgrams_plt <- cor_comp_df %>% 
  mutate(Source = factor(Source, 
                         levels = c("Brian", "Daniel"), 
                         labels = c("Pilot", "Replicate"))) %>% 
  # df$supp <- factor(df$supp, levels = c("OJ", "VC"),
  #                 labels = c("Orange Juice", "Vitamin C")
  #                 )
  ggplot()+
  geom_circle(aes(x0 = x.num, y0 = y.num, r = abs(Corr)/2, fill = Corr))+
  scale_fill_distiller(palette = "PuOr", direction = 1, limits=c(-1,1))+
  scale_x_continuous(
    name = "",
    position = "top",
    breaks = 1:length(mrna_cols),
    labels = mrna_cols)+
  scale_y_continuous(
    name = "",
    breaks = 1:length(mrna_cols),
    labels = mrna_cols)+
  facet_grid(Source~Time)+
  theme(
    panel.grid.major = element_line(size = 0.5, linetype = 'solid',
                                    colour = NA),
    axis.text.x = element_text(angle = 45,
                               hjust = -0., 
                               vjust = .5, 
                               face = "italic"),
    axis.text.y = element_text(face = "italic"),
    legend.position = "right",
    strip.placement = "outside")+
  coord_fixed()


### Compare correlations across time ####

# make dataset amenable to faceting
cor_comp_df_for_points <- cor_comp_df %>% 
  spread(Time, Corr)

temp_1 <-
cor_comp_df_for_points %>%
  select(Source, x, y, x.num, y.num, `0h`, `24h`) %>%
  rename(x_time = `0h`,
         y_time = `24h`) %>%
  mutate(facet_x = "0h vs 24h")

temp_2 <-
cor_comp_df_for_points %>%
  select(Source, x, y, x.num, y.num, `0h`, `1h`) %>%
  rename(x_time = `0h`,
         y_time = `1h`) %>%
  mutate(facet_x = "0h vs 1h")

temp_3 <-
cor_comp_df_for_points %>%
  select(Source, x, y, x.num, y.num, `1h`, `24h`) %>%
  rename(x_time = `1h`,
         y_time = `24h`) %>%
  mutate(facet_x = "1h vs 24h")

cor_comp_df_for_points <- rbind(rbind(temp_1, temp_2), temp_3)
cor_comp_df_for_points <- cor_comp_df_for_points %>% 
  mutate(facet_x = factor(facet_x, 
                          levels = c(
                            "0h vs 24h",
                            "0h vs 1h",
                            "1h vs 24h"
                          )))

# plot
library(ggsci) # for scale_color_aaas
library(ggpubr) # for stat_cor and stat_regline

compare_cor_scatter_plt <- cor_comp_df_for_points %>% 
  mutate(Source = factor(Source, 
                         levels = c("Brian", "Daniel"), 
                         labels = c("Pilot", "Replicate"))) %>% 
  ggplot(aes(x = x_time, y = y_time, color = Source))+
  geom_abline(intercept = 0, slope = 1)+
  geom_hline(yintercept = 0, color = "darkgray")+
  geom_vline(xintercept = 0, color = "darkgray")+
  geom_smooth(method = "lm", se = F, fullrange = T)+
  geom_point(alpha = 0.4)+
  scale_color_aaas()+
  labs(title = "", x = "", y = "")+
  xlim( c(-1, 1))+
  ylim( c(-1, 1))+
  facet_grid(Source~facet_x)+
  stat_cor(label.x = -.61, label.y = -.8, size = 2.5) +
  stat_regline_equation(label.x = -.61, label.y = -1, size = 2.5)+
  theme(legend.position = "")+
  coord_fixed()


### Compare ECDFs across time ####

# current_Source = "Brian"
# time1 = "0h"
# time2 = "1h"

ecdf_list <- map(list(
  list(current_Source = "Brian", time1 = "0h", time2 = "24h"),
  list(current_Source = "Brian", time1 = "0h", time2 = "1h"),
  list(current_Source = "Brian", time1 = "1h", time2 = "24h"),

  list(current_Source = "Daniel", time1 = "0h", time2 = "24h"),
  list(current_Source = "Daniel", time1 = "0h", time2 = "1h"),
  list(current_Source = "Daniel", time1 = "1h", time2 = "24h")
  ), function(input_list){


    # input_list = list(current_Source = "Brian", time1 = "0h", time2 = "24h")

           current_Source = input_list$current_Source
           time1 = input_list$time1
           time2 = input_list$time2



           data1 <- filter(cor_comp_df, 
                           Source == current_Source & 
                             Time == time1
           ) %>% 
             select(Corr) %>% 
             unlist()

           data2 <- filter(cor_comp_df, 
                           Source == current_Source & 
                             Time == time2
           ) %>% 
             select(Corr) %>% 
             unlist()


           ecdf1 <- ecdf(data1)
           ecdf2 <- ecdf(data2)

           # used to get the most extreme difference between the two samples
           MostExtremeDiff <- seq(min(data1, data2, na.rm = T), max(data1, data2, na.rm = T), length.out = length(data1))
           x0 <- MostExtremeDiff[which(abs(ecdf1(MostExtremeDiff) - ecdf2(MostExtremeDiff)) ==
                                         max(abs(ecdf1(MostExtremeDiff) - ecdf2(MostExtremeDiff))))]
           y0 <- ecdf1(x0)
           y1 <- ecdf2(x0)

           # Run two sided KS test on data
           test.res <- ks.test(data1, data2)

           # Make output df
           graph.df <- 
             data.frame(data1, data2) %>% 
             gather(Condition, Value, 1:2) %>% 
             mutate(Condition = case_when(Condition == "data1" ~ time1, 
                                          Condition == "data2" ~ time2),
                    x0_extreme = x0[1],
                    y0_extreme = y0[1],
                    y1_extreme = y1[1],
                    Source = current_Source,
                    Contrast = paste0(time1 , " vs ", time2),
                    test_stat = test.res$statistic,
                    test_p = test.res$p.value
             )

           return(graph.df)
         })

ecdf_list_df <- do.call(rbind, ecdf_list)

ecdf_list_df <- ecdf_list_df %>% mutate(Contrast = factor(Contrast, levels = c("0h vs 24h", "0h vs 1h", "1h vs 24h")))

compare_cor_ecdf_plt <-
ecdf_list_df %>% 
  mutate(Source = factor(Source, 
                         levels = c("Brian", "Daniel"), 
                         labels = c("Pilot", "Replicate"))) %>% 
  ggplot()+
    geom_segment(aes(x = x0_extreme, y = y0_extreme, xend = x0_extreme, yend = y1_extreme),
                 # linetype = "dashed", 
                 color = "darkgray", size = 1)+
    geom_point(aes(x = x0_extreme , y= y0_extreme), color="darkgray", size=2) +
    geom_point(aes(x = x0_extreme , y= y1_extreme), color="darkgray", size=2) +
    stat_ecdf(aes(x = Value, group = Condition, color = Condition))+
  geom_label(aes(x = -0.75, y = 0.75, 
                label = paste0(
                  "KS = ", as.character(round(test_stat, 3)),
                  "\np = ", as.character(round(test_p, 3))
                  )), 
            size = 2)+
    labs(x = "Sample",
         y = "ECDF"#,
         # subtitle = paste("K-S Test", as.character(group1), "vs", as.character(group2),
         #               "\np-value:", as.character(round(test.res$p.value, digits = 4)))
         )+
  scale_color_brewer(palette = "Set2" )+
  xlim( c(-1, 1))+
  ylim( c(0, 1))+
    theme_minimal()+
    theme(legend.position = "right")+
  facet_grid(Source~Contrast)+
  coord_fixed()

    # scale_color_manual(values = colors)#+
  # theme(text=element_text(family="Calibri Light", size=14))


### Corr Table ####
cor_comp_df %>% 
  spread(Source, Corr) %>% 
  filter(Brian >= 0.7 & Daniel >= 0.7) %>%
  gather(Source, Corr, c("Brian", "Daniel")) %>% 
  # spread(Time, Corr) %>% 
  pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% 
  select(-x.num, -y.num) %>% 
  select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h)





library(flextable)
# make display table
cor_comp_ft <- cor_comp_df %>% 
  mutate(Corr = round(Corr, digits = 3)) %>% 
  spread(Source, Corr) %>% 
  # mutate(Group = case_when( (Brian > 0.7) &  (Daniel > 0.7) ~ "Both", 
  #                           (Brian > 0.7) & !(Daniel > 0.7) ~ "Pilot", 
  #                          !(Brian > 0.7) &  (Daniel > 0.7) ~ "Replicate",
  #                          !(Brian > 0.7) & !(Daniel > 0.7) ~ "Neither"
  #                          )) %>% 
    gather(Source, Corr, c("Brian", "Daniel")) %>% 
  pivot_wider(names_from = c("Source", "Time"), values_from = c("Corr")) %>% #, "Group")) %>% 
  select(-x.num, -y.num) %>% 
  select(x, 
         y,
         Brian_0h, 
         Daniel_0h, 
         Brian_1h,          
         Daniel_1h, 
         Brian_24h,          
         Daniel_24h) %>% 
  flextable()


cor_comp_ft


cor_comp_ft <- cor_comp_ft %>% theme_vanilla()

cor_comp_ft <- italic(cor_comp_ft, ~ y == y,  ~ y, italic = TRUE)
cor_comp_ft <- italic(cor_comp_ft, ~ x == x,  ~ x, italic = TRUE)


cor_comp_ft <- bold(cor_comp_ft, ~ Brian_0h >= 0.7, ~ Brian_0h, bold = TRUE)
cor_comp_ft <- bold(cor_comp_ft, ~ Brian_1h >= 0.7, ~ Brian_1h, bold = TRUE)
cor_comp_ft <- bold(cor_comp_ft, ~ Brian_24h >= 0.7, ~ Brian_24h, bold = TRUE)
cor_comp_ft <- bold(cor_comp_ft, ~ Daniel_0h >= 0.7, ~ Daniel_0h, bold = TRUE)
cor_comp_ft <- bold(cor_comp_ft, ~ Daniel_1h >= 0.7, ~ Daniel_1h, bold = TRUE)
cor_comp_ft <- bold(cor_comp_ft, ~ Daniel_24h >= 0.7, ~ Daniel_24h, bold = TRUE)


color_sig <- "#a1d99b"
cor_comp_ft <- bg(cor_comp_ft, ~ Brian_0h >= 0.7, ~ Brian_0h, bg = color_sig)
cor_comp_ft <- bg(cor_comp_ft, ~ Brian_1h >= 0.7, ~ Brian_1h, bg = color_sig)
cor_comp_ft <- bg(cor_comp_ft, ~ Brian_24h >= 0.7, ~ Brian_24h, bg = color_sig)
cor_comp_ft <- bg(cor_comp_ft, ~ Daniel_0h >= 0.7, ~ Daniel_0h, bg = color_sig)
cor_comp_ft <- bg(cor_comp_ft, ~ Daniel_1h >= 0.7, ~ Daniel_1h, bg = color_sig)
cor_comp_ft <- bg(cor_comp_ft, ~ Daniel_24h >= 0.7, ~ Daniel_24h, bg = color_sig)


cor_comp_ft <- set_header_labels(cor_comp_ft, 
                                 values = list(
                                   x = "", y = "",
                                   Brian_0h = "Pilot 0h", 
                                   Brian_1h = "Pilot 1h", 
                                   Brian_24h = "Pilot 24h", 
                                   Daniel_0h = "Replicate 0h", 
                                   Daniel_1h = "Replicate 1h", 
                                   Daniel_24h = "Replicate 24h"
                                 )

)

cor_comp_ft <- autofit(cor_comp_ft)
cor_comp_ft

pptx_file <- "./output/officer_output/CorComparison.pptx"
save_as_pptx("my table" = cor_comp_ft, path = pptx_file)

docx_file <- "./output/officer_output/CorComparison.docx"
save_as_docx("my table" = cor_comp_ft, path = docx_file)







# make counts tables
partial_count_table <- cor_comp_df %>% 
  mutate(Corr = ifelse(Corr > 0.7, T, F)) %>% 
  spread(Source, Corr) %>% 
  mutate(Group = case_when(Brian & Daniel ~ "Both", 
                           Brian & !Daniel ~ "Pilot", 
                           !Brian & Daniel ~ "Replicate",
                           !Brian & !Daniel ~ "Neither"
                           )) %>% 

  mutate(Group = factor(Group, 
                        levels = c("Both", "Pilot", "Replicate", "Neither")))

cor_change_count_ft <- partial_count_table %>% 
    select(Time, x, y, Group) %>% 
  group_by(Time, Group) %>% 
  tally() %>% 
  spread(Time, n) %>% 
  flextable()

cor_change_count_ft
cor_change_count_ft <- cor_change_count_ft %>% theme_vanilla()


cor_change_count_ft <- set_header_labels(cor_change_count_ft, 
                                 values = list(
                                   Group = "R >= 0.7"
                                 )

)

cor_change_count_ft <- autofit(cor_change_count_ft)
cor_change_count_ft

pptx_file <- "./output/officer_output/CorComparisonCount.pptx"
save_as_pptx("my table" = cor_change_count_ft, path = pptx_file)

docx_file <- "./output/officer_output/CorComparisonCount.docx"
save_as_docx("my table" = cor_change_count_ft, path = docx_file)







partial_count_table %>% 
    select(Time, x, y, Group) %>% 
  group_by(Time, Group) %>% 
  spread(Time, Group)



partial_count_table %>% 
    select(Time, x, y, Group) %>% 
  unite(cor, x, y, sep = " vs ") %>% 
  ggplot(aes(x=Time, y=cor, fill = Group))+
  geom_raster()+
  scale_fill_manual(values = c("#631879", "#EE0000", "#3B4992", "White"))+
  labs(y = "")+
  theme(axis.text.y = element_text(face = "italic"))













cor_comp_df %>% 
  spread(Source, Corr) %>% 
  filter(Brian >= 0.7 ) %>% ## | Daniel >= 0.7) %>%
  gather(Source, Corr, c("Brian", "Daniel")) %>% 
  # spread(Time, Corr) %>% 
  pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% 
  select(-x.num, -y.num) %>% 
  select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h)


cor_comp_df %>% 
  spread(Source, Corr) %>% 
  filter(Daniel >= 0.7 ) %>% ## | Daniel >= 0.7) %>%
  gather(Source, Corr, c("Brian", "Daniel")) %>% 
  # spread(Time, Corr) %>% 
  pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% 
  select(-x.num, -y.num) %>% 
  select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h)




cor_comp_df %>% 
  ggplot(aes(x = Corr, color = Time, fill = Time))+
  geom_density(alpha = 0.3)+
  facet_grid(Source~.)







# compare_cor_table <-

  # cor_comp_df %>% 
  # spread(Source, Corr) %>% 
  # # filter(Brian >= 0.7 | Daniel >= 0.7) %>% 
  # gather(Source, Corr, c("Brian", "Daniel")) %>% 
  # # spread(Time, Corr) %>% 
  # pivot_wider(names_from = c("Source", "Time"), values_from = "Corr") %>% 
  # select(-x.num, -y.num) %>% 
  # select(x, y, Brian_0h, Daniel_0h, Brian_1h, Daniel_1h, Brian_24h, Daniel_24h) %>% 
  # mutate(Diff_0h = Brian_0h - Daniel_0h, 
  #        Diff_1h = Brian_1h - Daniel_1h, 
  #        Diff_24h = Brian_24h - Daniel_24h) %>% 
  # ggplot()+
  # geom_point(aes(x = Brian_0h, y = Daniel_0h), color = "black")




library(gridExtra)

ggsave(here("output", "officer_output", "compare_corgrams_plt.tiff"),
       compare_corgrams_plt, 
       width = 8.5, units = "in")

ggsave(here("output", "officer_output", "compare_cor_scatter_plt.tiff"),
       compare_cor_scatter_plt, 
       width = 8.5, units = "in")

ggsave(here("output", "officer_output", "compare_cor_ecdf_plt.tiff"),
       compare_cor_ecdf_plt, 
       width = 8.5, units = "in")

Make a just in case set of tables

unique_comparisons <- list()
for (i in seq_along(mrna_cols)){
  for (j in i:length(mrna_cols) ){
    unique_comparisons[[length(unique_comparisons)+1]] <- 
      data.frame(x = mrna_cols[i],
           y = mrna_cols[j])
  }  
}
unique_comparisons <- do.call(rbind, unique_comparisons)
unique_comparisons <- unique_comparisons %>% filter(x != y)

unique_comparisons$slope.p <- NA



# Ancova
pb = txtProgressBar(min = 0, max = nrow(unique_comparisons), 
                    initial = 0, style = 3, char = '█') 
for (i in 1:nrow(unique_comparisons)){
  setTxtProgressBar(pb,i)

  temp <- filter(mrna_rep_winxiqr, 
                 Source == "Daniel") %>% 
    select(unique_comparisons[i, "x"],
           unique_comparisons[i, "y"],
           "Time")

  fm <- lm(
    as.formula(paste(unique_comparisons[i, "y"], 
                     "~", 
                     unique_comparisons[i, "x"], 
                     "* Time")
    ), 
    data = temp
  )

  unique_comparisons[i, "slope.p"] <- broom::tidy(car::Anova(fm))[3, "p.value"]
}


# Pearson
unique_comparisons <- 
  rbind(mutate(unique_comparisons, Time = "0h"), 
      mutate(unique_comparisons, Time = "1h")) %>% 
  rbind(., mutate(unique_comparisons, Time = "24h"))

unique_comparisons$cor <- NA
unique_comparisons$t <- NA
unique_comparisons$p.value <- NA
unique_comparisons$df <- NA

pb = txtProgressBar(min = 0, max = nrow(unique_comparisons), 
                    initial = 0, style = 3, char = '█') 
for (i in 1:nrow(unique_comparisons)){
  setTxtProgressBar(pb,i)
  temp <- filter(mrna_rep_winxiqr, 
                 Source == "Daniel",
                 Time == unique_comparisons[i, "Time"]) %>% 
    select(unique_comparisons[i, "x"],
           unique_comparisons[i, "y"])


  # Cors
  res <- cor.test(x = temp[[unique_comparisons[i, "x"]]],
                  y = temp[[unique_comparisons[i, "y"]]],
                  method = "pearson", 
                  use = "pairwise.complete.obs")
  res <- broom::tidy(res)[, c("estimate", "statistic", "p.value", "parameter")]
  names(res) <- c("cor", "t",  "p.value", "df")

  unique_comparisons[i, c("cor", "t",  "cor.p", "df")] <- res
}


# full ref



# useful ref
cor_use_ft <- unique_comparisons %>% 
  select(x, y, Time, slope.p, cor, cor.p) %>%

  #fdr adj slope p
  mutate(slope.p = p.adjust(slope.p, method = "fdr")) %>% 
  pivot_wider(names_from = "Time", values_from = c("cor.p", "cor")) %>% 
  mutate(
    cor.p_0h = p.adjust(cor.p_0h, method = "fdr"),
    cor.p_1h = p.adjust(cor.p_1h, method = "fdr"),
    cor.p_24h = p.adjust(cor.p_24h, method = "fdr")
  ) %>% 
  mutate_at(.vars = c(
    "slope.p", 
    "cor.p_0h", "cor.p_1h", "cor.p_24h", 
    "cor_0h", "cor_1h", "cor_24h"), 
    .funs = round, digits = 3) %>% 
  flextable()



cor_use_ft <- cor_use_ft %>% theme_vanilla()

cor_use_ft <- bold(cor_use_ft, ~ slope.p <= 0.05, ~ x, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ slope.p <= 0.05, ~ y, bold = TRUE)

cor_use_ft <- bold(cor_use_ft, ~ slope.p <= 0.05, ~ slope.p, bold = TRUE)

cor_use_ft <- bold(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor.p_0h, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor_0h, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor.p_1h, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor_1h, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor.p_24h, bold = TRUE)
cor_use_ft <- bold(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor_24h, bold = TRUE)


color_sig <- "#a1d99b"
cor_use_ft <- bg(cor_use_ft, ~ slope.p <= 0.05, ~ slope.p, bg = color_sig)

cor_use_ft <- bg(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor.p_0h, bg = color_sig)
cor_use_ft <- bg(cor_use_ft, ~ cor.p_0h <= 0.05, ~ cor_0h, bg = color_sig)
cor_use_ft <- bg(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor.p_1h, bg = color_sig)
cor_use_ft <- bg(cor_use_ft, ~ cor.p_1h <= 0.05, ~ cor_1h, bg = color_sig)
cor_use_ft <- bg(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor.p_24h, bg = color_sig)
cor_use_ft <- bg(cor_use_ft, ~ cor.p_24h <= 0.05, ~ cor_24h, bg = color_sig)



cor_use_ft <- set_header_labels(cor_use_ft, 
                                values = list(
                                  x = "", y = "",
                          slope.p = "ANCOVA p",
                          cor.p_0h = "p 0h",
                          cor.p_1h = "p 1h",
                          cor.p_24h = "p 24h",
                          cor_0h = "R 0h",
                          cor_1h = "R 1h",
                          cor_24h = "R 24h"
                                )

)

cor_use_ft <- autofit(cor_use_ft)
cor_use_ft





pptx_file <- "./output/officer_output/DanielAncova.pptx"
save_as_pptx("my table" = cor_use_ft, path = pptx_file)

docx_file <- "./output/officer_output/DanielAncova.docx"
save_as_docx("my table" = cor_use_ft, path = docx_file)

df_for_cor_of_cors <- do.call(rbind, cor_list) %>% filter(!is.na(Corr))%>% 
  spread(Time, Corr)


# df_for_cor_of_cors  %>% 
#   ggplot(aes(x = `0h`, y=`1h`, group = Source, color = Source, fill = Source))+
#   geom_segment(aes(x = -1, y = -1, xend = 1, yend = 1), color = "black", size = 1, linetype = "dashed")+
#   geom_smooth(method = "lm", fullrange = T)+
#   geom_point()+
#   geom_point(shape = 1, color = "black")
#   # geom_point(aes(x = `0h`, y=`24h`), color = "red")
# 
# df_for_cor_of_cors  %>% 
#   ggplot(aes(x = `0h`, y=`24h`, group = Source, color = Source, fill = Source))+
#   geom_segment(aes(x = -1, y = -1, xend = 1, yend = 1), color = "black", size = 1, linetype = "dashed")+
#   geom_smooth(method = "lm", fullrange = T)+
#   geom_point()+
#   geom_point(shape = 1, color = "black")


df_for_cor_of_cors %>% 
  gather("Time", "Corr", c("0h", "1h", "24h")) %>% 
  spread("Source", "Corr") %>% 
  ggplot(aes(x = Brian, y = Daniel, group = Time, color = Time, fill = Time))+
  geom_segment(aes(x = -1, y = -1, xend = 1, yend = 1), color = "black", size = 1, linetype = "dashed")+
  geom_smooth(method = "lm", fullrange = T)+
  geom_point()+
  geom_point(shape = 1, color = "black")+
  facet_grid(.~Time)+
  coord_fixed()


# correlation between us 
df_for_cor_of_cors %>% 
  gather("Time", "Corr", c("0h", "1h", "24h")) %>% 
  spread("Source", "Corr") %>% 
  group_by(Time) %>% 
  summarise(CorrCorr = cor(Brian, Daniel, method = "spearman")) %>% 
  ggplot(aes(x = Time, y = CorrCorr, color = CorrCorr))+
  geom_hline(yintercept = 0)+
  geom_segment(aes(xend = Time, yend = 0), size= 1)+
  geom_point(size= 2)+
  geom_point(size= 2, shape = 1, color = "black")+
  scale_color_gradient2(
    low = RColorBrewer::brewer.pal(9, "PuOr")[1],
    high = RColorBrewer::brewer.pal(9, "PuOr")[9])+
  lims(y = c(-1, 1))+
  theme(legend.position = "")


set.seed(8978979)
resample_cor_of_cor <- map(seq(1, 1001), function(i){
  if (i == 1){
    res <- df_for_cor_of_cors %>% 
      ungroup() %>% 
      gather("Time", "Corr", c("0h", "1h", "24h")) %>% 
      spread("Source", "Corr") %>% 
      group_by(Time) %>% 
      summarise(CorrCorr = cor(Brian, Daniel, method = "spearman"))      
  } else {
    res <- df_for_cor_of_cors %>%
      ungroup() %>% 
      gather("Time", "Corr", c("0h", "1h", "24h")) %>% 
      mutate(Corr = sample(Corr)) %>% 
      spread("Source", "Corr") %>%
      group_by(Time) %>%
      summarise(CorrCorr = cor(Brian, Daniel, method = "spearman"))  
  }

  res$i <- i
  return(res)
})

resample_cor_of_cor <- do.call(rbind, resample_cor_of_cor)

resample_cor_of_cor %>% 
  mutate(obs = case_when(Time == "0h" ~  as.numeric(select(filter(resample_cor_of_cor, i == 1 & Time == "0h"), CorrCorr)),
                         Time == "1h" ~  as.numeric(select(filter(resample_cor_of_cor, i == 1 & Time == "1h"), CorrCorr)),
                         Time == "24h" ~ as.numeric(select(filter(resample_cor_of_cor, i == 1 & Time == "24h"), CorrCorr)))) %>% 
  mutate(bool = CorrCorr >= obs) %>%
  group_by(Time) %>% 
  mutate(ep = mean(bool)) %>% 
  ggplot(aes(x = CorrCorr, color = Time, fill = Time))+
  geom_segment(aes(x = obs, xend = obs, y = 0, yend = 3))+
  geom_density(alpha = 0.2)+
  facet_grid(Time~.)

 #   Time  CorrCorr     i   obs bool        ep
 #   <chr>    <dbl> <int> <dbl> <lgl>    <dbl>
 # 1 0h     0.176       1 0.176 TRUE  0.0549  
 # 2 1h     0.380       1 0.380 TRUE  0.000999
 # 3 24h    0.128       1 0.128 TRUE  0.124  






  # spread(Time, CorrCorr) %>%

Contrast correlation thresholds -- did they fall in similar ranges? Bin correlations and examine with a jaccard index

n_bins <- 5

library('clusteval') # for cluster_similarity(x, y, similarity="jaccard")


set.seed(9080)
cor_jaccard_list <- map(c("0h", "1h", "24h"), function(Time){
  temp <- cor_comp_df[cor_comp_df$Time == Time, ] %>% 
    mutate(Bins = cut(Corr, breaks = seq(-1, 1, length.out = n_bins))) %>% 
    select(-Corr) %>% 
    pivot_wider(names_from = "Source", values_from = "Bins")

  obs_jaccard <- cluster_similarity(temp$Brian, temp$Daniel, similarity="jaccard")  


  null_jaccard <- map(1:10000, function(i){
    cluster_similarity(sample(temp$Brian, replace = F), 
                       temp$Daniel, similarity="jaccard")
  }) %>% 
    unlist()


  temp <- with(density(null_jaccard), data.frame(x, y))
  temp <- temp %>% mutate(xmax = max(x),
                          obs = obs_jaccard,
                          ep = mean(null_jaccard >= obs_jaccard))

  return(temp)
})


plot_grid(plotlist = map(cor_jaccard_list, function(temp){
  ggplot(data = temp, aes(x = x, y = y))+
    geom_line()+
    geom_vline(xintercept = temp$obs[1])+
    geom_ribbon(data = temp[temp$x > temp$obs, ], 
                aes(xmin = obs, xmax = xmax, ymin = 0, ymax = y))+
    labs(subtitle = paste("    ep=", as.character(temp$ep[1])))
}), labels = c("0h", "1h", "24h"), nrow = 1)

Compare Directional Changes

# calc p values with kruskal-Wallis and follow it up with a Dunn's ####
mrna_rep_winxiqr <- mrna_rep_winxiqr %>% 
  pivot_longer(
    names_to = "mRNA",
    values_to = "Count",
    cols = names(select(mrna_rep_winxiqr, -Source, -Time, -uid)))

stats <- expand.grid(
  Source = c("Brian", "Daniel"),
  # Time = c("0h", "1h", "24h"),
  mRNA = unique(mrna_rep_winxiqr$mRNA),
  Main = NA,
  BvsC = NA,
  BvsD = NA,
  CvsD = NA
)

for (i in 1:nrow(stats)){
  t_Source <- stats[i, "Source"]
  t_mRNA <- stats[i, "mRNA"]

  temp <- mrna_rep_winxiqr %>% 
    filter(Source == t_Source & mRNA == t_mRNA & !is.na(Count)) %>% 
    mutate(Time = as.factor(Time)) # Avoid coering to factor in dunnTest()

  stats[i, "Main"] <- kruskal.test(Count ~ Time, temp)$p.value

  # i = 1
  dunn_res <- dunnTest(Count ~ Time, 
                       data = temp, 
                       method = "bonferroni") # bonferroni and hs produce the same result since it's within a single round of kruskal-wallis
  #FIXME -- Is bonferroni ideal here or would we be better off with sidak, holm, or hs?
  dunn_res <- dunn_res$res

  stats[i, "BvsC"] <- filter(dunn_res, Comparison == "0h - 1h")$P.adj
  stats[i, "BvsD"] <- filter(dunn_res, Comparison == "0h - 24h")$P.adj
  stats[i, "CvsD"] <- filter(dunn_res, Comparison == "1h - 24h")$P.adj
}

# get medians and prepare to plot ####
mrna_rep_medians <- mrna_rep_winxiqr %>% 
  group_by(Source, Time, mRNA) %>% 
  summarise(MedianCount = median(Count, na.rm =T)) %>% 
  arrange(Source, mRNA, Time)


mrna_rep_medians <- mrna_rep_medians %>% 
  mutate(
    MedianCount2 = MedianCount,
    id = case_when(Time == "0h" ~ 1,
                   Time == "1h" ~ 2,
                   Time == "24h" ~ 3)) %>% 
  pivot_wider(names_from = "Time", values_from = "MedianCount") %>% 
  rename(MedianCount = MedianCount2) %>% 
  group_by(Source, mRNA) %>% 
  mutate(`0h` = median(`0h`, na.rm = T),
         `1h` = median(`1h`, na.rm = T),
         `24h` = median(`24h`, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(updown12 = ifelse(`0h` < `1h`, "up", "down"), 
         updown13 = ifelse(`0h` < `24h`, "up", "down"),
         updown23 = ifelse(`1h` < `24h`, "up", "down"))


# combine stats and prepared plotting df####
mrna_rep_medians <- full_join(mrna_rep_medians, stats) %>% 
  mutate(color1 = ifelse(Main < 0.05 & BvsC < 0.05, updown12, "none"),
         color2 = ifelse(Main < 0.05 & CvsD < 0.05, updown23, "none")) %>% 
  mutate(color1 = factor(color1, levels = c("down", "none", "up")),
         color2 = factor(color2, levels = c("down", "none", "up")))

# plot ####
temp <- mrna_rep_winxiqr %>% spread("Time", "Count")

ggplot(mrna_rep_medians)+
  geom_point(data = temp, aes(x = 0.5, y = `0h`), shape = "-")+
  geom_point(data = temp, aes(x = 1.5, y = `1h`), shape = "-")+
  geom_point(data = temp, aes(x = 2.5, y = `24h`), shape = "-")+

  geom_segment(aes(x = 0, xend = 1, y = `0h`, yend = `0h`), size = 1)+
  geom_segment(aes(x = 1, xend = 2, y = `1h`, yend = `1h`, 
                   color = color1), size = 1)+
  geom_segment(aes(x = 2, xend = 3, y = `24h`, yend = `24h`, 
                   color = color2), size = 1)+

  geom_segment(aes(x = 1, xend = 1, y = `0h`, yend = `1h`, 
                   color = color1), arrow = arrow(length = unit(0.03, "npc")), size = 1)+
  geom_segment(aes(x = 2, xend = 2, y = `1h`, yend = `24h`, 
                   color = color2), arrow = arrow(length = unit(0.03, "npc")), size = 1)+
  scale_color_manual(values = c("red", "gray", "green"))+
  theme_clean()+
  theme(legend.position = "")+
  facet_wrap(Source ~ mRNA, scales = "free_y", nrow = 2)

How similar are the results?

temp <- mrna_rep_medians %>% 
  select(Source, mRNA, starts_with("updown"), Main, BvsC, BvsD, CvsD) %>% 
  distinct() 

# Convert the results to catagories based on what our conclusion would be
# Is there a main effect? 
# Significantly different group? 
# What direction is it different in?
temp <- temp %>% 
  # If there is no main effect all comparisons are in the NA group
  mutate( 
    BvsC = ifelse(Main >= 0.05, NA, BvsC),
    BvsD = ifelse(Main >= 0.05, NA, BvsD),
    CvsD = ifelse(Main >= 0.05, NA, CvsD)
  ) %>% 
  # If there is no post hoc effect that comparison is in the NA group
  mutate( 
    BvsC = ifelse(BvsC >= 0.05, NA, BvsC),
    BvsD = ifelse(BvsD >= 0.05, NA, BvsD),
    CvsD = ifelse(CvsD >= 0.05, NA, CvsD)
  ) %>% 
  # Re-write lack of a change as 'none' instead of NA
  # if there _is_ a post hoc effect then assign that cell the sign of change
  mutate( 
    BvsC = ifelse(is.na(BvsC), 'none', updown12 ),
    BvsD = ifelse(is.na(BvsD), 'none', updown13 ),
    CvsD = ifelse(is.na(CvsD), 'none', updown23 )
  ) %>% 
  select(Source, mRNA, BvsC, BvsD, CvsD )



temp <- temp %>% 
  pivot_longer(cols = c("BvsC", "BvsD", "CvsD")) %>% 
  mutate(value = factor(value, levels = c("down", "none", "up"))) %>% 
  pivot_wider(names_from = "Source", values_from = "value")


temp %>% 
  pivot_longer(c("Brian", "Daniel"), 
               names_to = "Source", 
               values_to = "value") %>% 
  ggplot(aes(x = Source, y = value, color = Source, group = Source))+
  geom_hline(yintercept = c("none"))+
  geom_segment(aes(xend = Source, yend = "none"), size = 3, lineend = "butt")+
  facet_grid(name~mRNA)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        legend.position = "")



obs_jaccard <- cluster_similarity(temp$Brian, temp$Daniel, similarity="jaccard")  

null_jaccard <- map(1:10000, function(i){
  cluster_similarity(sample(temp$Brian, replace = F), 
                     temp$Daniel, similarity="jaccard")
}) %>% 
  unlist()


temp <- with(density(null_jaccard), data.frame(x, y))
temp <- temp %>% mutate(xmax = max(x),
                        obs = obs_jaccard,
                        ep = mean(null_jaccard >= obs_jaccard))

ggplot(data = temp, aes(x = x, y = y))+
  geom_line()+
  geom_vline(xintercept = temp$obs[1])+
  geom_ribbon(data = temp[temp$x > temp$obs, ], 
              aes(xmin = obs, xmax = xmax, ymin = 0, ymax = y))+
  labs(subtitle = paste("    ep=", as.character(temp$ep[1])))

3 Primary Questions

set up data

# Copied from above ----
M_winxiqr_list <- map(c("0h", "1h", "24h"), function(condition){
  temp <- M_all[M_all$Condition == condition, ]

  dvs <- names(M_all)[!(names(M_all) %in% c("Condition", "Cell", "Experiment", "Channel", "Sweep", "Source", "Pharm", "Time", "Sample"))]

  for (dv_col  in dvs){
    temp[[dv_col]] <- ifelse(win_x_iqr(temp[[dv_col]]), temp[[dv_col]], NA)
  }

  return(temp)
})

M_winxiqr <- do.call(rbind, M_winxiqr_list)
M_winxiqr <- M_winxiqr %>% 
  # select(-Sweep) %>% 
  group_by(Condition, Cell, Experiment, Channel, Source, Pharm, Time, Sample) %>% 
  mutate_all(median, na.rm = T) %>% 
  ungroup() %>% 
  distinct()

# Median imputation ----
M_winxiqr <- M_winxiqr %>% 
  select(-Channel, -Source, -Pharm, -Time, -Sample)

M_winxiqr <- M_winxiqr %>% 
  group_by(Condition, Experiment, Cell) %>%
  mutate_all(median, na.rm = T) %>% 
  ungroup() %>% 
  distinct()

library(imputeMissings)
M_winxiqr <- imputeMissings::impute(M_winxiqr, method = "median/mode")


for_label_Condition <- M_winxiqr$Condition
rownames(M_winxiqr) <- paste(M_winxiqr$Experiment, M_winxiqr$Cell, sep = "-")

Are the three time points separable in multidimensional space?

PCA

list_for_pca <- list(
  M_winxiqr[M_winxiqr$Condition == "0h", 
            names(M_winxiqr)[!(names(M_winxiqr) %in% c("Condition", "Cell", "Experiment"))]],
  M_winxiqr[M_winxiqr$Condition == "1h", 
            names(M_winxiqr)[!(names(M_winxiqr) %in% c("Condition", "Cell", "Experiment"))]],
  M_winxiqr[M_winxiqr$Condition == "24h", 
            names(M_winxiqr)[!(names(M_winxiqr) %in% c("Condition", "Cell", "Experiment"))]]
)

pca_summary_descriptions <- map(list_for_pca, function(df){
  mk_pca_multi_plt(input.df = df,
                   scree.max.y = 100)
})

pca_summary_descriptions <- map(list_for_pca, function(df){
  mk_pca_multi_plt(input.df = df,
                 scree.max.y = 100)
})


# {p4|p0}/{p1+p2+p3}

{pca_summary_descriptions[[1]]$scree | pca_summary_descriptions[[2]]$scree | pca_summary_descriptions[[3]]$scree}/
{pca_summary_descriptions[[1]]$biplt | pca_summary_descriptions[[2]]$biplt | pca_summary_descriptions[[3]]$biplt}
# {pca_summary_descriptions[[1]]$cont1 | pca_summary_descriptions[[2]]$cont1 | pca_summary_descriptions[[3]]$cont1}

ggsave(plot = last_plot(), 
       filename = here("output", "officer_output", "PCA_Contrast.tiff"),        
       width = 11.5, height = 4.76)

## mrna only ====
vis_pca_3D_plotly(
  input.df = M_winxiqr[, names(M_winxiqr)[!(names(M_winxiqr) %in% c(
    "Condition", "Cell", "Experiment",
    "vrest", "r11", "r1", 
    "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", 
    "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]],
  color.by = unlist(select(M_winxiqr, Condition)),
  use.colors = RColorBrewer::brewer.pal(3, "Set2"),#c("#cc4c02", "#e31a1c", "#800026"),
  point.size = 3,
  point.opacity = 1,
  dropdowns = T,
  dropdown.width = 1.5,
  dropdown.opacity = 0.5,
  ellipse.opacity = 0#.02
)


## membrane only ====
vis_pca_3D_plotly(
  input.df = M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c(
    # "Condition", "Cell", "Experiment",
    "vrest", "r11", "r1", 
    # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    # "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    # "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", 
    "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]],
  color.by = unlist(select(M_winxiqr, Condition)),
  use.colors = RColorBrewer::brewer.pal(3, "Set2"),#c("#cc4c02", "#e31a1c", "#800026"),
  point.size = 3,
  point.opacity = 1,
  dropdowns = T,
  dropdown.width = 1.5,
  dropdown.opacity = 0.5,
  ellipse.opacity = 0#.02
)


## excitability only ====
vis_pca_3D_plotly(
  input.df = M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c(
    # "Condition", "Cell", "Experiment",
    # "vrest", "r11", "r1", 
    # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta"#, 
    # "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]],
  color.by = unlist(select(M_winxiqr, Condition)),
  use.colors = RColorBrewer::brewer.pal(3, "Set2"),#c("#cc4c02", "#e31a1c", "#800026"),
  point.size = 3,
  point.opacity = 1,
  dropdowns = T,
  dropdown.width = 1.5,
  dropdown.opacity = 0.5,
  ellipse.opacity = 0#.02
)

UMAP

set.seed(16069)
temp.umap = umap(select(M_winxiqr, 
                        -Condition, -Cell, -Experiment, 
                        -cc.3_4, -cc.3_5, -ig.3_4, -ig.3_5))

temp <- temp.umap$layout %>% as.tibble()
col_by_df <- M_winxiqr[, "Condition"] 

# plot with convex hulls
df <- temp
df$Group <- col_by_df

find_hull <- function(df) df[chull(df$V1, df$V2), ]
hulls <- plyr::ddply(df, "Group", find_hull)

ggplot(df, aes(V1, V2, color = Group, fill = Group))+
  geom_polygon(data = hulls, alpha = 0.1) +
  geom_point()+
  scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  coord_fixed()






## mrna only ====
temp.umap = umap(
  M_winxiqr[, names(M_winxiqr)[!(names(M_winxiqr) %in% c(
    "Condition", "Cell", "Experiment",
    "vrest", "r11", "r1", 
    "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", 
    "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]]
  )

temp <- temp.umap$layout %>% as.tibble()
col_by_df <- M_winxiqr[, "Condition"] 

# plot with convex hulls
df <- temp
df$Group <- col_by_df

find_hull <- function(df) df[chull(df$V1, df$V2), ]
hulls <- plyr::ddply(df, "Group", find_hull)

umap_mrna <- ggplot(df, aes(V1, V2, color = Group, fill = Group))+
  geom_polygon(data = hulls, alpha = 0.1) +
  geom_point()+
  scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  theme(legend.position = "bottom")
  # coord_fixed()

## membrane only ====
temp.umap = umap(
  M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c(
    # "Condition", "Cell", "Experiment",
    "vrest", "r11", "r1", 
    # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    # "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    # "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta", 
    "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]]
  )

temp <- temp.umap$layout %>% as.tibble()
col_by_df <- M_winxiqr[, "Condition"] 

# plot with convex hulls
df <- temp
df$Group <- col_by_df

find_hull <- function(df) df[chull(df$V1, df$V2), ]
hulls <- plyr::ddply(df, "Group", find_hull)

umap_membrane <- ggplot(df, aes(V1, V2, color = Group, fill = Group))+
  geom_polygon(data = hulls, alpha = 0.1) +
  geom_point()+
  scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  theme(legend.position = "bottom")
  # coord_fixed()

## excitability only ====
temp.umap = umap(
  M_winxiqr[, names(M_winxiqr)[(names(M_winxiqr) %in% c(
    # "Condition", "Cell", "Experiment",
    # "vrest", "r11", "r1", 
    # "cc.3_4", "cc.3_5", "ig.3_4", "ig.3_5", 
    "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "Max.Amplitude_Sim", 
    "AUC_Obs", "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta"#, 
    # "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"
  ))]]
  )

temp <- temp.umap$layout %>% as.tibble()
col_by_df <- M_winxiqr[, "Condition"] 

# plot with convex hulls
df <- temp
df$Group <- col_by_df

find_hull <- function(df) df[chull(df$V1, df$V2), ]
hulls <- plyr::ddply(df, "Group", find_hull)

umap_excite <- ggplot(df, aes(V1, V2, color = Group, fill = Group))+
  geom_polygon(data = hulls, alpha = 0.1) +
  geom_point()+
  scale_color_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  scale_fill_manual(values = RColorBrewer::brewer.pal(3, "Set2"))+
  theme(legend.position = "bottom")
  # coord_fixed()

umap_mrna + umap_membrane + umap_excite

Clustering

mk_hclust_plts <- function(
  df = unite(M_winxiqr, uid, Experiment, Cell, sep = "-"),
  cluster_by = c("vrest", "r11", "r1", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"),
  uid_col = "uid",
  n_clusters = 3,
  true_groups = "Condition",
  true_colors = RColorBrewer::brewer.pal(3, "Set2"),
  cluster_colors = RColorBrewer::brewer.pal(3, "Set1")
){
  df <- as.data.frame(df) 

  if (!exists("cluster_colors")){
    cluster_colors = rainbow(n_clusters)
  }
  ## prep
  # move uid to rowname
  row.names(df) <- df[[uid_col]]
  df_groups <- select(df, all_of(true_groups))
  df <- df[, cluster_by]

  ## Cluster
  cluster <- pvclust(t(df),
                     method.hclust = "ward.D2",
                     method.dist = "correlation",
                     use.cor = "pairwise.complete.obs")


  ## make dendrogram  ####
  dend <- cluster$hclust %>% 
    as.dendrogram() 

  # iteratively coloring the labels is a workaround to get the "true" groups shown
  dend_labs <- rownames_to_column(df_groups, var = "rownames")[, ]
  dend_labs <- full_join(data.frame(rownames = labels(dend)), dend_labs)
  for(i in seq_along(unique(df_groups[[true_groups]]))){
    true_group <- unique(df_groups[[true_groups]])[i]
    true_color <- true_colors[i]

    dend <- dend %>% 
      dendextend::color_labels(
        col = true_color, 
        labels = dend_labs[dend_labs[[true_groups]] == true_group, "rownames"]) 


  }

  dend <- dend %>% 
    set("branches_k_color", 
        k = n_clusters, 
        value = cluster_colors
    ) %>% 
    set("branches_lwd", 0.7) %>%
    set("labels_cex", 0.6) 

  dend_cluster_only <- dend %>% 
    set("labels_colors",
        k = n_clusters,
        value = cluster_colors) %>%
    as.ggdend()


  dend <- dend %>% 
    as.ggdend()



  plt_dend_cluster_only <- ggplot(dend_cluster_only)+
    theme(axis.ticks.y = element_line(),
          axis.text.y = element_text(),
          axis.line.y = element_line())


  plt_dend <- ggplot(dend)+
    theme(axis.ticks.y = element_line(),
          axis.text.y = element_text(),
          axis.line.y = element_line())  


  ## Add reality ribbon with or without clustering result ####
  groups_to_plt <- full_join(
    as.data.frame(dend$labels),
    rownames_to_column(var = "label", df_groups))

  plt_grouping <- groups_to_plt %>% 
    ggplot(aes_string(x="x", y="0", fill = true_groups))+
    geom_tile()+
    scale_fill_manual(values = true_colors)+
    theme_void()+
    labs(x = "", y = "")+
    theme(legend.position = "left")

  plt_grouping_contrast <- ggplot()+
    geom_tile(data = groups_to_plt, aes_string(x="x", y="0.5", fill = true_groups))+
    scale_fill_manual(values = true_colors)+

    ggnewscale::new_scale("fill") +
    geom_tile(data = data.frame(x = seq_along(dend_cluster_only$labels$col),
                                cluster_groups = as.character(as.numeric(as.factor(dend_cluster_only$labels$col)))
    ),
    aes_string(x="x", y= "-0.5", fill = "cluster_groups"),
    )+
    scale_fill_manual(values = cluster_colors)+

    theme_void()+
    labs(x = "", y = "")+
    theme(legend.position = "left")


  ## Add heatmap  ####
  data_to_plt <- full_join(
    as.data.frame(dend$labels),
    rownames_to_column(var = "label", df)) 

  data_to_plt <- 
    data_to_plt %>% 
    gather("key", "value", 
           names(data_to_plt)[
             !(names(data_to_plt) %in% c("x", "y", 
                                         "label", "col", "cex", 
                                         true_groups))
           ])

  plt_heatmap_raw <- data_to_plt %>% 
    ggplot(aes(x, 
               y = key, 
               fill = value))+
    geom_tile()+
    scale_fill_viridis_c()+
    labs(x = "", y = "")+
    theme(panel.background = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text.x = element_blank(),
          legend.position = "left")


  plt_heatmap_z <- data_to_plt %>% 
    group_by(key) %>% 
    mutate(mean = mean(value, na.rm = T),
           sd = sd(value, na.rm = T)) %>% 
    mutate(value = ((value - mean)/sd)) %>% # Now Z scores
    ggplot(aes(x, 
               y = key, 
               fill = value))+
    geom_tile()+
    scale_fill_viridis_c()+
    labs(x = "", y = "")+
    theme(panel.background = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text.x = element_blank(),
          legend.position = "left")


  ## Return plots, manually tweak layout  ####
  return(
    list(
      pvclust_out = cluster,
      dendrogram_clusters = plt_dend_cluster_only,
      dendrogram_both = plt_dend,
      group_tile = plt_grouping,
      group_compare_tile = plt_grouping_contrast,
      heatmap_raw = plt_heatmap_raw,
      heatmap_z = plt_heatmap_z
    )
  )
}

## mrna only ====
o_mrna <- 
  mk_hclust_plts(
    df = rownames_to_column(M_winxiqr, "uid"),
    cluster_by = mrna_cols,
    uid_col = "uid",
    n_clusters = 3,
    true_groups = "Condition",
    true_colors = RColorBrewer::brewer.pal(3, "Set2"), #c("#fdd49e", "#fc8d59", "#b30000"),
    cluster_colors = RColorBrewer::brewer.pal(3, "Set1") 
  )

(o_mrna$dendrogram_both+
  scale_y_continuous(limits = c(-1.51, 3), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_mrna$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  )+ 
  patchwork::plot_layout(heights = c(2, .2))



(o_mrna$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_mrna$heatmap_z + theme(legend.position = "bottom")) + 
  patchwork::plot_layout(heights = c(.2, 5))



(o_mrna$dendrogram_both+
  scale_y_continuous(limits = c(-1.51, 1.51), labels = scales::comma)#c(-1.51, 3), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_mrna$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_mrna$heatmap_z + theme(legend.position = "right", axis.text.y = element_text(face = "italic")) /
  (o_mrna$heatmap_raw+ylim()+theme(legend.position = "right", axis.text.y = element_text(face = "italic")))+ 
  patchwork::plot_layout(heights = c(5, .5, 5, 5))




## membrane only ====
o_mem <- 
  mk_hclust_plts(
    df = rownames_to_column(M_winxiqr, "uid"),
    cluster_by = c("vrest", "r11", "r1", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"),
    uid_col = "uid",
    n_clusters = 3,
    true_groups = "Condition",
    true_colors = RColorBrewer::brewer.pal(3, "Set2"),
    cluster_colors = RColorBrewer::brewer.pal(3, "Set1") 
  )


(o_mem$dendrogram_both+
  scale_y_continuous(limits = c(-.251, 1.25), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_mem$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  )+ 
  patchwork::plot_layout(heights = c(2, .2))



(o_mem$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_mem$heatmap_z + theme(legend.position = "bottom")) + 
  patchwork::plot_layout(heights = c(.2, 1))




(o_mem$dendrogram_both+
  scale_y_continuous(limits = c(-.251, 1.25), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_mem$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_mem$heatmap_z + theme(legend.position = "bottom")) + 
  patchwork::plot_layout(heights = c(2, .2, 1))


## excitability only ====
o_exc <- 
  mk_hclust_plts(
    df = rownames_to_column(M_winxiqr, "uid"),
    cluster_by = epsp_cols,
    uid_col = "uid",
    n_clusters = 3,
    true_groups = "Condition",
    true_colors = RColorBrewer::brewer.pal(3, "Set2"),
    cluster_colors = RColorBrewer::brewer.pal(3, "Set1") 
  )

(o_exc$dendrogram_both+
  scale_y_continuous(limits = c(-.15, .75), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_exc$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  )+ 
  patchwork::plot_layout(heights = c(2, .2))




(o_exc$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_exc$heatmap_z + theme(legend.position = "bottom")) + 
  patchwork::plot_layout(heights = c(.2, 1))




(o_exc$dendrogram_both+
  scale_y_continuous(limits = c(-.15, .75), labels = scales::comma)
  # theme(plot.margin=unit(c(1,1,1.5,1),"cm"))
)/ 
  (o_exc$group_compare_tile+
     # lims(y =  c(2, -2))+ # y axis can be flipped like so
     theme(legend.position = "")
  ) / 
  (o_exc$heatmap_z + theme(legend.position = "bottom")) + 
  patchwork::plot_layout(heights = c(2, .2, 1))



# (o_mrna$group_compare_tile+theme(legend.position = "")) /
#   (o_mrna$heatmap_z + theme(legend.position = "bottom"))+
#   patchwork::plot_layout(heights = c(.2, 1)) |
# 
# (o_exc$group_compare_tile+theme(legend.position = "")) /
#   (o_exc$heatmap_z + theme(legend.position = "bottom"))+
#   patchwork::plot_layout(heights = c(.2, 1)) |
# 
# (o_mem$group_compare_tile+theme(legend.position = "")) /
#   (o_mem$heatmap_z + theme(legend.position = "bottom"))+
#   patchwork::plot_layout(heights = c(.2, 1))



((o_mrna$dendrogram_both+
  scale_y_continuous(limits = c(-.52, 3), labels = scales::comma))+
(o_mem$dendrogram_both+
  scale_y_continuous(limits = c(-.251, 1.25), labels = scales::comma))+
(o_exc$dendrogram_both+
  scale_y_continuous(limits = c(-.15, .75), labels = scales::comma)))/

  ((o_mrna$group_compare_tile+theme(legend.position = ""))+
(o_mem$group_compare_tile+theme(legend.position = ""))+
(o_exc$group_compare_tile+theme(legend.position = "")))+
  patchwork::plot_layout(heights = c(1, .1))

Evaluating clustering quality

# # if (!requireNamespace("BiocManager", quietly = TRUE))
# #     install.packages("BiocManager")
# # BiocManager::install("BiocGenerics")
# library(BiocGenerics)
# library(imputeMissings)
# library(magrittr) # for %>% (this is part of tidyverse)
# library(clues)
# library(NMF)
# 
# ## Evaluate clustering performance ==========================================
# get_cluster_comparisons <- function(reference.clustering = mrna_raw$Cell,
#                                     generated.clustering) {
#   if (length(reference.clustering) != length(generated.clustering)) {
#     warning("Input vectors are not of the same length!")
#   } else {
#     # reference.clustering = mrna_raw$Cell
#     # generated.clustering = kmeans.m$cluster
#     output <- array(0, dim = 6)
#     reference.clustering <- as.numeric(reference.clustering) %>% as.factor()
#     generated.clustering <- as.numeric(generated.clustering) %>% as.factor()
#     output <- NMF::purity(reference.clustering, generated.clustering)
#     names(output) <- "Purity"
#     # Get a lot of concurrance measures
#     # https://davetang.org/muse/2017/09/21/adjusted-rand-index/
#     output <- c(
#       output,
#       clues::adjustedRand(
#         BiocGenerics::as.vector(reference.clustering),
#         BiocGenerics::as.vector(generated.clustering)
#       )
#     )
#     return(output)
#   }
# }

resampling based evaluation of jaccard

# temp.clust <- as.dendrogram(cluster$hclust)
# assignment <- cutree(temp.clust, k = 3)[order.dendrogram(temp.clust)]
# 
# # use names to get groupings
# time_groups <- as.factor(unlist(transpose(stringr::str_split(names(assignment), pattern = "-"))[[1]]))
# 
# library(purrr)
# set.seed(9348957)
# resample.indices <- map(1:10001, function(i){
#   if (i == 1){
#     indices <- get_cluster_comparisons(reference.clustering = time_groups,
#                                        assignment)
#   }else{
#     indices <- get_cluster_comparisons(reference.clustering = time_groups,
#                                        sample(assignment, replace = F))
#   }
# })
# resample.indices <- do.call(rbind, resample.indices)
# resample.indices <- resample.indices %>% as.data.frame()
# 
# library(ggplot2)
# resample.indices %>% 
#   ggplot(aes(x = Jaccard))+
#   geom_density()+
#   geom_vline(xintercept = as.numeric(resample.indices[1, "Jaccard"]))+
#   xlim(0,1)+
#   theme_minimal()+
#   labs(subtitle = paste0("ep = ", as.character(round(mean(resample.indices$Jaccard >= resample.indices[1, "Jaccard"]), digits = 4))))
# # empirical p-value from 10,000 random samplings
# ggsave(plot = last_plot(), 
#        filename = here("officer_output", "hclust_jaccard_dist.tiff"),
#        width = 11.5, height = 4.76)

Correlations

Correlation Heatmaps

# corr <- cor(M_winxiqr[, mrna_cols], use = "pairwise.complete.obs", method = "spearman")
# p.mat <- cor_pmat(M_winxiqr[, mrna_cols], use = "pairwise.complete.obs", method = "spearman")
#  
# ggcorrplot(
#   corr,
#   p.mat = p.mat,
#   hc.order = FALSE,
#   # method = "circle",
#   type = "lower",
#   insig = "blank",
#   colors = RColorBrewer::brewer.pal(9, "PuOr")[c(1, 5, 9)]
# )

# Time = "0h"
cor_plt_list <- map(c("0h", "1h", "24h"), function(Time){
  df = M_winxiqr[M_winxiqr$Condition == Time, c(mrna_cols, memb_cols, epsp_cols)]

  corr <- cor(df, use = "pairwise.complete.obs", method = "spearman")
  p.mat <- cor_pmat(df, use = "pairwise.complete.obs", method = "spearman")

  cor_df_bin <- corr
  cor_bins <- seq(-1, 1, length.out = 8)
  for (i in 1:(length(cor_bins)-1)){
    # use the more extreme value to shade with
    cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]]
  }


  ggcorrplot(
    cor_df_bin, 
    p.mat = p.mat,
    insig = "blank",
    type = "upper",
    outline.col = "white",
    colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)]
  )+
    labs(subtitle = Time)+
    theme(legend.position = ""#,
          # axis.text.x = element_text(angle = 90)
          )
})

# plot_grid(plotlist = cor_plt_list)

(cor_plt_list[[1]]+theme(
    axis.text.x = element_text(angle = 90))) +
(cor_plt_list[[2]]+theme(
    axis.text.x = element_text(angle = 90),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))+
(cor_plt_list[[3]]+theme(
  axis.text.x = element_text(angle = 90),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))

cor_plt_list <- map(c("0h", "1h", "24h"), function(Time){
  df = M_winxiqr[M_winxiqr$Condition == Time, c(mrna_cols)]

  corr <- cor(df, use = "pairwise.complete.obs", method = "spearman")
  p.mat <- cor_pmat(df, use = "pairwise.complete.obs", method = "spearman")

  cor_df_bin <- corr
  cor_bins <- seq(-1, 1, length.out = 8)
  for (i in 1:(length(cor_bins)-1)){
    # use the more extreme value to shade with
    cor_df_bin[cor_df_bin > cor_bins[i] & cor_df_bin < cor_bins[i+1]] <- cor_bins[c(i, (i+1))[which.max(abs(cor_bins[i:(i+1)]))]]
  }


  ggcorrplot(
    cor_df_bin, 
    p.mat = p.mat,
    insig = "blank",
    type = "upper",
    outline.col = "white",
    colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)]
  )+
    labs(subtitle = Time)+
    theme(legend.position = ""#,
          # axis.text.x = element_text(angle = 90)
          )
})

# plot_grid(plotlist = cor_plt_list)

(cor_plt_list[[1]]+theme(
    axis.text.x = element_text(angle = 90))) +
(cor_plt_list[[2]]+theme(
    axis.text.x = element_text(angle = 90),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))+
(cor_plt_list[[3]]+theme(
  axis.text.x = element_text(angle = 90),
  # axis.text.x = element_blank(),
  axis.text.y = element_blank()))

temp_list <- map(unique(M_winxiqr$Condition), function(condition){
  temp <- filter(M_winxiqr, Condition == condition) %>% 
    select(-Cell, -Experiment)

  temp <- temp[, c(mrna_cols, memb_cols, epsp_cols )] %>% correlate() #%>% shave()
  temp$Condition <- condition
  temp <- gather(temp, "term2", "Cor", names(temp)[!(names(temp) %in% c("term", "Condition"))])

  return(temp)
})


# cor breaks
# cor_br <- seq(-1, 1, length.out = 8)
# 
# plt <- do.call(rbind, temp_list) %>% 
#   filter(term %in% mrna_cols) %>% 
#   filter(term2 %in% c(memb_cols, epsp_cols)) %>% 
#   ggplot(aes(term, term2, fill = Cor))+
#   geom_tile()
# 
# 
# plt+scale_fill_stepsn(colors = RColorBrewer::brewer.pal(n = 9, name = "PuOr")[c(1,5,9)]  )
#   
# 
# plt+scale_fill_gradientn(colors=RColorBrewer::brewer.pal(n = 9, name = "PuOr"),
#                          na.value = "transparent",
#                          breaks=seq(-1, 1, length.out = 8),
#                          # labels=c("Minimum",0,"Maximum"),
#                          limits=c(-1,1))


library(corrr)
library(ggtext)
library(glue)
# do.call(rbind, temp_list) 
corr_bt_mrna_phys <- map(1:3, function(i){

temp_list[[i]] %>% 
  filter(term %in% mrna_cols) %>% 
  filter(term2 %in% c(memb_cols, epsp_cols)) %>% 
  mutate(term = glue(("<i>{term}</i>"))) %>% 
  ggplot(aes(term, term2, fill = Cor))+
  geom_tile()+
  labs(x= "mRNA", y = "")+
  theme(axis.text.x = element_markdown(angle = 45))+
  scale_fill_stepsn(
    colors=RColorBrewer::brewer.pal(n = 8, name = "PuOr"),
                         na.value = "transparent",
                         breaks=round(seq(-1, 1, length.out = 9), digits = 2)[2:8],
                         limits=c(-1,1)
  )+
  coord_fixed()

})

plot_grid(plotlist = corr_bt_mrna_phys)


corr_bt_mrna_phys[[1]]+theme(legend.position = "") + 
  corr_bt_mrna_phys[[2]]+theme(legend.position = "") + 
  corr_bt_mrna_phys[[3]]

library(GGally)
ggpairs(select(filter(M_winxiqr),#, Condition == "0h"), 
               all_of(c("Condition", 
                        mrna_cols, "r1", 
                        "Ihtk.0", "Ihtk.0.s", 
                        "Ia.0", "Ia.0.s"
                        ))),#memb_cols, "Cor", "Min.V_Obs", "Max.Amplitude_Obs", "AUC_Obs"))),
        mapping = ggplot2::aes(color = Condition),
        lower = list(continuous = wrap("smooth", se = F, alpha = 0.8, size=1)))


        # ggplot2::aes(colour=Condition, aes = 0.1))

#FIXME What library did I use for "BaselineTEA.png" ?

Correlation ECDFs

# FIXME

How do these relationships change for TEA over time?

#TODO

What univariate changes occur?

Produce univariate tables

Run anovas

tictoc::tic()
temp_list <- map(
  c(mrna_cols, memb_cols, epsp_cols),
  # c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols), 
  function(temp_col){
    print(temp_col)

    temp <- M_all[M_all$Source == "Daniel", c("Condition", "Experiment", "Cell", temp_col)] %>% distinct()

    # Identify outliers for each group
    temp_b <- temp[temp$Condition == "0h", ]
    temp_c <- temp[temp$Condition == "1h", ]
    temp_d <- temp[temp$Condition == "24h", ]

    temp_b <- temp_b[win_x_iqr(temp_b[[temp_col]], multiplier = 1.5), ]
    temp_c <- temp_c[win_x_iqr(temp_c[[temp_col]], multiplier = 1.5), ]
    temp_d <- temp_d[win_x_iqr(temp_d[[temp_col]], multiplier = 1.5), ]

    temp <- rbind(temp_b, temp_c) %>% rbind(temp_d)

    # temp <- temp[win_x_iqr(temp[[temp_col]], multiplier = 1.5), ]
    temp <- temp[!is.na(temp[[temp_col]]), ]

    # make sure we don't have pseudoreplicates for network measurements
    if (temp_col %in% c("ig.3_4", "ig.3_5", "ig.4_5")){
      temp <- temp %>% 
        select(-Cell) %>% 
        group_by(Condition, Experiment) %>% 
        mutate_all(median, na.rm = T) %>% 
        distinct()
    }


    if(nrow(temp) < 10){

      return(list(
        dv = temp_col,
        p = NA,
        ep = NA,
        hsd_groups = data.frame(dv = c(NA, NA, NA), 
                                groups = c(NA, NA, NA), 
                                Condition = c(NA, NA, NA)
        ),
        fm = NA))

    } else {


      temp_shuffle <- temp
      resample_array <- map(1:1000, function(i){
        temp_shuffle$Condition <- sample(temp_shuffle$Condition, replace = F)
        fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp_shuffle)      
        return(car::Anova(fm)[1,3])
      }) %>% unlist()


      fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp)

      fm_hsd <- agricolae::HSD.test(fm, trt = "Condition") 
      fm_hsd <- fm_hsd$group

      fm_hsd$Condition <- rownames(fm_hsd)

      fm_hsd_p <- agricolae::HSD.test(fm, trt = "Condition", 
                                      group = F)$comparison
      fm_hsd_p[rownames(fm_hsd_p) == "0h - 1h", "pvalue"]

      return(list(
        dv = temp_col,
        p = car::Anova(fm)[1,4],
        ep = mean(resample_array >= car::Anova(fm)[1,3]),
        hsd_groups = fm_hsd,
        fm = fm,
        p0.1 = fm_hsd_p[rownames(fm_hsd_p) == "0h - 1h", "pvalue"],
        p0.24 = fm_hsd_p[rownames(fm_hsd_p) == "0h - 24h", "pvalue"],
        p1.24 = fm_hsd_p[rownames(fm_hsd_p) == "1h - 24h", "pvalue"]        
        ))
    }
  })
tictoc::toc()


temp_list <- temp_list %>% transpose()
fm_overview_table <- data.frame(dv = unlist(temp_list[[1]]),
                                p  = unlist(temp_list[[2]]),
                                ep = unlist(temp_list[[3]]),

                                p0.1 =  unlist(temp_list[[6]]),
                                p0.24 = unlist(temp_list[[7]]),
                                p1.24 = unlist(temp_list[[8]])
                                )


# work up post hoc stats
temp_list_hsd <- map(seq_along(temp_list[[4]]), function(i){
  if (!is.na(unique(temp_list[[4]][[i]]$groups))){
    temp_list[[4]][[i]] %>% 
      gather("dv", "Estimate", names(temp_list[[4]][[i]])[1]) %>% 
      pivot_wider(names_from = "Condition", values_from = c("groups", "Estimate") ) %>% 
      mutate_all(as.character)
  }
})  

fm_overview_hsd <- do.call(rbind, temp_list_hsd)


fm_overview_table <- full_join(fm_overview_table, fm_overview_hsd)
# for adding tukey hsd p vals into plots
fm_overview <- fm_overview_table

Format table

# add groupings
fm_overview_table <- fm_overview_table %>% mutate(Family = case_when(
  dv %in% c("vrest") ~ "Resting Voltage",
  dv %in% c("r11", "r1") ~ "Membrane Resistance",
  dv %in% ionic_cols ~ "Outward Currents",
  dv %in% epsp_cols ~ "Excitability",
  dv %in% c("r12", "rc", "ig", "cc", "ig.3_4", "ig.3_5", "ig.4_5") ~ "Coupling Measures"
))

fm_overview_table <- fm_overview_table %>% 
  mutate(Family = factor(fm_overview_table$Family, levels = c("Membrane Resistance", "Resting Voltage", "Coupling Measures", "Outward Currents", "Excitability"))) %>% 
  arrange(Family) %>% 
  mutate(Family = as.character(Family))

# temp <- rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName)
fm_overview_table <- full_join(fm_overview_table, 
                               rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName, Family2 = Family))

fm_overview_table <- fm_overview_table %>% 
  mutate(Family = ifelse(is.na(Family) & !is.na(Family2), Family2, Family)) %>% 
  select(-Family2) %>% 
  filter(!is.na(p))


# add additional cols
fm_overview_table$fdr <- p.adjust(fm_overview_table$ep, method = "fdr")

fm_overview_table <- fm_overview_table %>% 
  mutate(Estimate_0h = as.numeric(Estimate_0h),
         Estimate_1h = as.numeric(Estimate_1h),
         Estimate_24h = as.numeric(Estimate_24h)) %>% 
  mutate(p = round(p, digits = 3),
         ep = round(ep, digits = 3),
         fdr = round(fdr, digits = 3),
         Estimate_0h = round(Estimate_0h, digits = 3),
         Estimate_1h = round(Estimate_1h, digits = 3),
         Estimate_24h = round(Estimate_24h, digits = 3),
         p0.1 = round(p0.1, digits = 3),
         p0.24 = round(p0.24, digits = 3),
         p1.24 = round(p1.24, digits = 3) ) %>% 
  mutate(stars = case_when(fdr > 0.10               ~ "",
                           fdr < 0.10 & fdr > 0.05  ~ ".",
                           fdr < 0.05 & fdr > 0.01  ~ "*",
                           fdr < 0.01 & fdr > 0.001 ~ "**",
                           fdr < 0.001              ~ "***"
  ))


# library(officer)
library(flextable)
M_ft <-flextable(fm_overview_table,
                 col_keys = c("Family", "dv", "p", "ep",
                              "fdr", 
                              "stars", 
                              "Estimate_0h", "Estimate_1h", "Estimate_24h",
                              "p0.1",  "p0.24",  "p1.24"))

M_ft <- theme_vanilla(M_ft)
M_ft <- merge_v(M_ft, j = c("Family") )
# M_ft <- color(M_ft, ~ fdr < 0.05, ~ stars, color = "red")
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ dv, bold = TRUE)
M_ft <- bold(M_ft, ~ p   <= 0.05, ~ p, bold = TRUE)
M_ft <- bold(M_ft, ~ ep  <= 0.05, ~ ep, bold = TRUE)
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ fdr, bold = TRUE)
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ stars, bold = TRUE)

# get the groupings without showing the columns. 

color_a = "#ffeda0"
color_ab= "#feb24c"
color_b = "#f03b20"
color_c = "#2b8cbe"

M_ft <- bg(M_ft, ~ groups_0h == "a", ~ Estimate_0h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_0h == "ab",~ Estimate_0h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_0h == "b", ~ Estimate_0h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_0h == "c", ~ Estimate_0h, bg = color_c)

M_ft <- bg(M_ft, ~ groups_1h == "a", ~ Estimate_1h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_1h == "ab",~ Estimate_1h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_1h == "b", ~ Estimate_1h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_1h == "c", ~ Estimate_1h, bg = color_c)

M_ft <- bg(M_ft, ~ groups_24h == "a", ~ Estimate_24h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_24h == "ab",~ Estimate_24h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_24h == "b", ~ Estimate_24h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_24h == "c", ~ Estimate_24h, bg = color_c)

M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_0h, bg = "White")
M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_1h, bg = "White")
M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_24h, bg = "White")


M_ft <- set_header_labels(M_ft, 
                          dv = "", stars = "",
                          Estimate_0h = "0h", Estimate_1h = "1h", Estimate_24h = "24h",
                          p0.1 = "0h vs 1h",  p0.24 = "0h vs 24h",  p1.24 = "1h vs 24h"
                          )

M_ft <- autofit(M_ft)
M_ft

pptx_file <- "./officer_output/DanielAnova.pptx"
save_as_pptx("my table" = M_ft, path = pptx_file)

docx_file <- "./officer_output/DanielAnova.docx"
save_as_docx("my table" = M_ft, path = docx_file)

produce univariate plots

# mrna_cols, memb_cols, epsp_cols

plot_univariate_decorated_0 <- function(temp = plt_membrane,
                                        yvar = "Ihtk.0",
                                        new.y = "",
                                        new.title = ""){
  # allow for customization of y axis
  if (typeof(new.title) == "character"){
    if(typeof(new.y) == "character"){
      if(new.y == ""){
        new.title <- yvar
      }
    } else if (typeof(new.y) == "character" | typeof(new.y) == "expression"){
      new.title <- new.title
    } else {
      warning("new.y not supported type")
      new.title <- yvar
    }
  }

  names(temp)[names(temp) == yvar] <- "yvar"

  temp_bands <- temp %>% 
    group_by(Condition) %>% 
    summarise(
      q10 = quantile(yvar, probs = 0.1, na.rm = T),
      q25 = quantile(yvar, probs = 0.25, na.rm = T),
      # q45 = quantile(yvar, probs = 0.45, na.rm = T),
      q50 = quantile(yvar, probs = 0.5, na.rm = T),
      # q55 = quantile(yvar, probs = 0.55, na.rm = T),
      q75 = quantile(yvar, probs = 0.75, na.rm = T),
      q90 = quantile(yvar, probs = 0.9, na.rm = T),
    ) %>% 
    mutate(outlier_up = q50 + 1.5*(q75 - q25),
           outlier_down = q50 - 1.5*(q75 - q25))

  temp <- full_join(temp, temp_bands)

  temp_plt <- ggplot(data = temp, aes(x = Condition, y = yvar))+
    # geom_ribbon(aes(x = Condition, ymin = q10, ymax = q90, group = 1), color = "lightgray", fill = "cornflowerblue", alpha = 0.3)+
    geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = 1), color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+
    # geom_ribbon(aes(x = Condition, ymin = q45, ymax = q55, group = 1), color = "black", fill = "cornflowerblue", alpha = 0.3)+
    geom_line(aes(x = Condition, y = q50, group = 1), linetype = "dashed", color = "blue")+

    geom_line(aes(x = Condition, y = outlier_up, group = 1), linetype = "dashed", color = "black")+
    geom_line(aes(x = Condition, y = outlier_down, group = 1), linetype = "dashed", color = "black")+

    geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+
    # geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 1)+
    geom_point(alpha = 0.5)+
    labs(title = new.title, y=new.y, x = "")

  return(temp_plt)
}

membrane changes

plt_membrane <- M_all %>% 
  select(Condition, Experiment, Cell, all_of(memb_cols)) %>% 
  group_by(Condition, Experiment, Cell) %>% 
  mutate_all(median, na.rm = T) %>% 
  ungroup() %>% 
  distinct() 

membrane_dvs_plt <- c("r11", "r1", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope", "vrest")
new.y_vector <- c(expression(M~Omega), expression(M~Omega), "nA", expression(frac(nA, mV)), "nA", expression(frac(nA, mV)), "mV" )
new.title_vector     <- c(expression(R[In]), 
                          expression(R[Membrane]), 
                          expression(I[HTK] ~ Intercept), 
                          expression(I[HTK] ~ Slope), 
                          expression(I[A] ~ Intercept),
                          expression(I[A] ~ Slope),
                          expression(V[Rest]))
membrane_plts <- map(seq_along(membrane_dvs_plt), function(i){plot_univariate_decorated_0(
  temp = ungroup(plt_membrane), 
  yvar = membrane_dvs_plt[i],
  new.y = new.y_vector[i],
  new.title = new.title_vector[i])})






# run through dependent variables, if there was a significant effect, get the HSD p values and add them for each significant comparison
for (i in seq_along(membrane_dvs_plt)){ # <--------------------------------------- !
  temp_annotation <- fm_overview_table %>% filter(dv == membrane_dvs_plt[i]) # <-- ! 

  if (temp_annotation$fdr <= 0.05){

    # base y position for on max y and % of range
    current_yrange <- ggplot_build(
      membrane_plts[[i]] # <------------------------------------------------------ !
    )$layout$panel_params[[1]]$y.range 

    start_y <- .76*(current_yrange[2] - current_yrange[1]) + current_yrange[1]
    step_size <- (current_yrange[2] - current_yrange[1])*.08

    # create the annotation data frame
    df <- data.frame(
          group1 = c("0h", "0h",  "1h"),
          group2 = c("1h", "24h", "24h"),
          p.adj = c(temp_annotation$p0.1,
                    temp_annotation$p0.24, 
                    temp_annotation$p1.24)
        ) %>% 
      mutate(sig = ifelse(p.adj <= 0.05, 1, 0)) %>% 
      mutate(p.adj = as.character(p.adj)) %>% 
      mutate(p.adj = ifelse(p.adj == "0", "< 0.001", p.adj))

    # only increase the y position if there was a significant difference
    # begin at 1 to get the right output
    steps_up <- cumsum(df$sig)
    steps_up[1] <- 1

    df$y.position <- seq(start_y, by = step_size, length.out = 3)[steps_up]

    membrane_plts[[i]] <- membrane_plts[[i]]+ # <------------------------------------- !
      stat_pvalue_manual(
        data = df, 
        hide.ns = T
      )

  }
}


# (membrane_plts[[1]] / 
#     membrane_plts[[2]]) |
#   (membrane_plts[[3]] /
#      membrane_plts[[4]]) |
#   (membrane_plts[[5]] /
#      membrane_plts[[6]]) |
#   (membrane_plts[[7]] / 
#      patchwork::plot_spacer())

# add to make the scaling match other multiplots
length(membrane_plts)
for(i in 8:14){
  membrane_plts[[i]] <- ggplot()
}

ggsave(here("officer_output", "membrane_plts.svg"),
       cowplot::plot_grid(plotlist =  membrane_plts, labels = c(LETTERS[1:7], rep("", times = 7))), 
       # width = 8.5, 

       # width = 16, 
       # height = 8,

       width = 16, 
       height = 16,

       units = "in")

# walk(seq_along(membrane_dvs_plt), function(i){
#   ggsave(paste0(membrane_dvs_plt[i],".tiff"), 
#          plot = membrane_plts[[i]], 
#          path = here("officer_output"), 
#          width = 11.5/4, height = 4.76)  
# })

excitability

# Columns: 8
# $ Cor                 <dbl> 0.5569679, 0.5569679, 0.5569679, 0.5569679, 0.556967~
# $ Min.V_Obs           <dbl> -53.92456, -53.92456, -53.92456, -53.92456, -53.9245~
# $ Max.Amplitude_Obs   <dbl> -16.6320804, -16.6320804, -16.6320804, -16.6320804, ~
# $ Max.Amplitude_Sim   <dbl> -25.5070750, -25.5070750, -25.5070750, -25.5070750, ~
# $ AUC_Obs             <dbl> 55976.54, 55976.54, 55976.54, 55976.54, 55976.54, 55~
# $ AUC_Sim             <dbl> 32069.58, 32069.58, 32069.58, 32069.58, 32069.58, 32~
# $ Max.Amplitude_Delta <dbl> 8.981806, 8.981806, 8.981806, 8.981806, 8.981806, 8.~
# $ AUC_Delta           <dbl> 18563.238, 18563.238, 18563.238, 18563.238, 18563.23~

plt_epsp <- M_all[, c("Condition", "Experiment", "Cell", epsp_cols)] %>% 
  group_by(Condition, Experiment, Cell) %>% 
  mutate_all(median, na.rm = T) %>% 
  ungroup() %>% 
  distinct() 

# $ Condition           <chr> "1h", "1h", "1h", "24h", "1h", "24h", "24h", "24h", ~
# $ Experiment          <chr> "190808a", "190808a", "190830", "190903", "190904", ~
# $ Cell                <chr> "4", "5", "4", "3", "5", "4", "5", "4", "5", "5", "4~
# $ Cor                 <dbl> 0.5569679, 0.6112679, NA, 0.9774225, 0.8716307, 0.94~
# $ Min.V_Obs           <dbl> -53.924562, -57.052614, NA, -61.050416, -51.895143, ~
# $ Max.Amplitude_Obs   <dbl> -16.6320804, -24.1851812, NA, -0.9613037, -45.288086~
# $ Max.Amplitude_Sim   <dbl> -25.5070750, -33.6612173, NA, -0.4841494, -37.848033~
# $ AUC_Obs             <dbl> 55976.539, 35180.643, NA, 61242.214, 8488.885, 36081~
# $ AUC_Sim             <dbl> 32069.584, 38518.177, NA, 59193.615, 3901.010, 29769~
# $ Max.Amplitude_Delta <dbl> 8.981806, 6.438339, NA, 0.308876, 2.380225, 28.70748~
# $ AUC_Delta           <dbl> 18563.23796, -251.67701, NA, 2050.58898, 5702.37419,~


epsp_dvs_plt <- c(
  # "Cor", 
  # "Min.V_Obs", 
  # "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta",
  # "AUC_Obs", "AUC_Sim", "AUC_Delta"

  "Cor", 
  "Cor.IQR", 

  "Min.V_Obs", 
  "Min.V_Obs.IQR", 

  "Max.Amplitude_Obs", 
  "Max.Amplitude_Obs.IQR", 

  "AUC_Obs",
  "AUC_Obs.IQR", 

  "Max.Amplitude_Delta", 
  "Max.Amplitude_Delta.IQR", 

  "AUC_Delta", 
  "AUC_Delta.IQR",

  "Max.Amplitude_Sim", 
  "AUC_Sim")

new.y_vector <- c("R", "R",

                  "mV", "mV", "mV", "mV", 

                  "mV/Sec", "mV/Sec",

                  "mV", "mV",

                  "mV/Sec", "mV/Sec",

                  "mV",
                  "mV/Sec")

new.title_vector <- c("Correlation", "Correlation IQR",

                      "Baseline", "Baseline IQR",

                      "Obs. Amp.", "Obs. Amp. IQR", 

                      "Obs. AUC", "Obs. AUC IQR",

                      "Diff. Amplitude", "Diff. Amp. IQR",

                      "Diff. AUC", "Diff. AUC IQR",

                      "Sim. Amp.", 
                      "Sim. AUC" )

epsp_plts <- map(seq_along(epsp_dvs_plt), function(i){plot_univariate_decorated_0(
  temp = ungroup(plt_epsp), 
  yvar = epsp_dvs_plt[i],
  new.y = new.y_vector[i],
  new.title = new.title_vector[i] )})


# ((epsp_plts[[1]]+ylim(c(0, 1))) / 
#     patchwork::plot_spacer()/ 
#     patchwork::plot_spacer()) |
#   (epsp_plts[[3]] /
#      epsp_plts[[4]]/
#      epsp_plts[[5]]) |
#   (epsp_plts[[6]] /
#      epsp_plts[[7]]/
#      epsp_plts[[8]]) 


# run through dependent variables, if there was a significant effect, get the HSD p values and add them for each significant comparison
for (i in seq_along(epsp_dvs_plt)){ # <--------------------------------------- !
  temp_annotation <- fm_overview_table %>% filter(dv == epsp_dvs_plt[i]) # <-- ! 

  if (temp_annotation$fdr <= 0.05){

    # base y position for on max y and % of range
    current_yrange <- ggplot_build(
      epsp_plts[[i]] # <------------------------------------------------------ !
    )$layout$panel_params[[1]]$y.range 

    start_y <- .76*(current_yrange[2] - current_yrange[1]) + current_yrange[1]
    step_size <- (current_yrange[2] - current_yrange[1])*.08

    # create the annotation data frame
    df <- data.frame(
          group1 = c("0h", "0h",  "1h"),
          group2 = c("1h", "24h", "24h"),
          p.adj = c(temp_annotation$p0.1,
                    temp_annotation$p0.24, 
                    temp_annotation$p1.24)
        ) %>% 
      mutate(sig = ifelse(p.adj <= 0.05, 1, 0)) %>% 
      mutate(p.adj = as.character(p.adj)) %>% 
      mutate(p.adj = ifelse(p.adj == "0", "< 0.001", p.adj))

    # only increase the y position if there was a significant difference
    # begin at 1 to get the right output
    steps_up <- cumsum(df$sig)
    steps_up[1] <- 1

    df$y.position <- seq(start_y, by = step_size, length.out = 3)[steps_up]

    epsp_plts[[i]] <- epsp_plts[[i]]+ # <------------------------------------- !
      stat_pvalue_manual(
        data = df, 
        hide.ns = T
      )

  }
}


# cowplot::plot_grid(plotlist =  epsp_plts, labels = LETTERS)

ggsave(here("officer_output", "epsp_plts.svg"),
       cowplot::plot_grid(plotlist =  epsp_plts, labels = LETTERS),
       width = 16, 
       height = 16,
       # width = 8.5, 
       units = "in")

# walk(seq_along(epsp_dvs_plt), function(i){
#   ggsave(paste0(epsp_dvs_plt[i],".tiff"), 
#          plot = epsp_plts[[i]], 
#          path = here("officer_output"), 
#          width = 11.5/4, height = 4.76)  
# })

mrna

univariate change over time -- bend quartile

plt_mrna <- M_all[, c("Condition", "Experiment", "Cell", mrna_cols)] %>%
  group_by(Condition, Experiment, Cell) %>% 
  mutate_all(median, na.rm = T) %>% 
  ungroup() %>% 
  distinct()


mrna_plts <- map(seq_along(mrna_cols), function(i){plot_univariate_decorated_0(
  temp = ungroup(plt_mrna), 
  yvar = mrna_cols[i],
  new.y = "",
  new.title = mrna_cols[i] )+theme(plot.title = element_text(face="italic"))})




# na_y = c(0, 20000)
# ca_y = c(0, 7000)
# k_y  = c(0, 5000)
# inx_y <- c(0, 40000)
# 
# 
# mrna_plts[[1]]+coord_cartesian(ylim = na_y)+
#   
#   mrna_plts[[2]]+coord_cartesian(ylim = ca_y) +
#   mrna_plts[[3]]+coord_cartesian(ylim = ca_y) +
#   mrna_plts[[4]]+coord_cartesian(ylim = ca_y) +
#   patchwork::plot_spacer()+
#   
#   mrna_plts[[5]]+coord_cartesian(ylim = k_y) +
#   mrna_plts[[6]]+coord_cartesian(ylim = k_y) +
#   mrna_plts[[7]]+coord_cartesian(ylim = k_y) +
#   mrna_plts[[8]]+coord_cartesian(ylim = k_y) +
#   mrna_plts[[9]]+coord_cartesian(ylim = k_y) +
#   
#   mrna_plts[[10]]+coord_cartesian(ylim = inx_y) +
#   mrna_plts[[11]]+coord_cartesian(ylim = inx_y) +
#   mrna_plts[[12]]+coord_cartesian(ylim = inx_y) +
#   patchwork::plot_spacer() +
#   patchwork::plot_spacer() + 
#   
#   patchwork::plot_layout(ncol = 5, nrow = 3)


#TODO probably a more elegant way to do this using the following functions
# patchwork::get_dim()
# patchwork::set_dim()
# mrna_plts[[1]]+ set_dim(mrna_plts[[2]], get_dim(mrna_plts[[1]]))
# get_max_dim(mrna_plts[[1]], mrna_plts[[2]])




# run through dependent variables, if there was a significant effect, get the HSD p values and add them for each significant comparison
for (i in seq_along(mrna_cols)){ # <--------------------------------------- !
  temp_annotation <- fm_overview_table %>% filter(dv == mrna_cols[i]) # <-- ! 

  if (temp_annotation$fdr <= 0.05){

    # base y position for on max y and % of range
    current_yrange <- ggplot_build(
      mrna_plts[[i]] # <------------------------------------------------------ !
    )$layout$panel_params[[1]]$y.range 

    start_y <- .76*(current_yrange[2] - current_yrange[1]) + current_yrange[1]
    step_size <- (current_yrange[2] - current_yrange[1])*.08

    # create the annotation data frame
    df <- data.frame(
          group1 = c("0h", "0h",  "1h"),
          group2 = c("1h", "24h", "24h"),
          p.adj = c(temp_annotation$p0.1,
                    temp_annotation$p0.24, 
                    temp_annotation$p1.24)
        ) %>% 
      mutate(sig = ifelse(p.adj <= 0.05, 1, 0)) %>% 
      mutate(p.adj = as.character(p.adj)) %>% 
      mutate(p.adj = ifelse(p.adj == "0", "< 0.001", p.adj))

    # only increase the y position if there was a significant difference
    # begin at 1 to get the right output
    steps_up <- cumsum(df$sig)
    steps_up[1] <- 1

    df$y.position <- seq(start_y, by = step_size, length.out = 3)[steps_up]

    mrna_plts[[i]] <- mrna_plts[[i]]+ # <------------------------------------- ! x2
      stat_pvalue_manual(
        data = df, 
        hide.ns = T
      )

  }
}


ggsave(here("officer_output", "mrna_plts.svg"),
       cowplot::plot_grid(plotlist =  mrna_plts, labels = LETTERS),
       width = 16, 
       height = 12,
       # width = 8.5, 
       units = "in")

ggsave(here("officer_output", "mrna_plts.svg"),
       cowplot::plot_grid(plotlist =  mrna_plts, labels = LETTERS, nrow = 4, ncol = 4),
       width = 16, 
       height = 16,
       # width = 8.5, 
       units = "in")


# length(membrane_plts)
# for(i in 8:14){
#   membrane_plts[[i]] <- ggplot()
# }
# 
# ggsave(here("officer_output", "membrane_plts.svg"),
#        cowplot::plot_grid(plotlist =  membrane_plts, labels = c(LETTERS[1:7], rep("", times = 7))), 
#        # width = 8.5, 
# 
#        # width = 16, 
#        # height = 8,
#        
#        width = 16, 
#        height = 16,
#        
#        units = "in")


# walk(seq_along(mrna_cols), function(i){
#   ggsave(paste0(mrna_cols[i],".tiff"), 
#          plot = mrna_plts[[i]]+theme(plot.title = element_text(face="italic")), 
#          path = here("officer_output"), 
#          width = 11.5/4, height = 4.76)  
# })

# plt_mrna_long <- plt_mrna %>% 
#   gather(mrna, count, mrna_cols) %>% 
#   filter(!is.na(count))
# 
# 
# plt_mrna_long_bands <- plt_mrna_long %>% 
#     group_by(Condition, mrna) %>% 
#     summarise(
#       q10 = quantile(count, probs = 0.1, na.rm = T),
#       q25 = quantile(count, probs = 0.25, na.rm = T),
#       q50 = quantile(count, probs = 0.5, na.rm = T),
#       q75 = quantile(count, probs = 0.75, na.rm = T),
#       q90 = quantile(count, probs = 0.9, na.rm = T),
#     ) %>% 
#     mutate(outlier_up = q50 + 1.5*(q75 - q25),
#            outlier_down = q50 - 1.5*(q75 - q25))
# 
# temp <- full_join(plt_mrna_long, plt_mrna_long_bands)
# 
# 
# 
# # devtools::install_github("teunbrand/ggh4x")
# library(ggh4x)
# 
# filter(temp, mrna %in% c("nav")) %>% 
#   ggplot(aes(x = Condition, y = count))+
#     geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = mrna), 
#                 color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+
#     geom_line(aes(x = Condition, y = q50, group = 1), 
#               linetype = "dashed", color = "blue")+
#     geom_line(aes(x = Condition, y = outlier_up, group = 1), 
#               linetype = "dashed", color = "black")+
#     geom_line(aes(x = Condition, y = outlier_down, group = 1), 
#               linetype = "dashed", color = "black")+
#     
#     geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+
# 
#     geom_point(alpha = 0.5)+
#   facet_wrap(~ mrna, strip.position = "bottom")
# 
# 
# 
# filter(temp, mrna %in% c("shaker", "shab", "shal", "shaw1", "shaw2", "bkkca", "cav1", "cav2")) %>% 
#   ggplot(aes(x = Condition, y = count))+
#     geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = mrna), 
#                 color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+
#     geom_line(aes(x = Condition, y = q50, group = 1), 
#               linetype = "dashed", color = "blue")+
#     geom_line(aes(x = Condition, y = outlier_up, group = 1), 
#               linetype = "dashed", color = "black")+
#     geom_line(aes(x = Condition, y = outlier_down, group = 1), 
#               linetype = "dashed", color = "black")+
#     
#     geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+
# 
#     geom_point(alpha = 0.5)+
#   facet_wrap(~ mrna, strip.position = "bottom")
# 
# 
# 
# filter(temp, mrna %in% c("inx1", "inx2", "inx3")) %>% 
#   ggplot(aes(x = Condition, y = count))+
#     geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = mrna), 
#                 color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+
#     geom_line(aes(x = Condition, y = q50, group = 1), 
#               linetype = "dashed", color = "blue")+
#     geom_line(aes(x = Condition, y = outlier_up, group = 1), 
#               linetype = "dashed", color = "black")+
#     geom_line(aes(x = Condition, y = outlier_down, group = 1), 
#               linetype = "dashed", color = "black")+
#     
#     geom_point(aes(x = Condition, y = q50), color = "blue", size = 4, shape = 19, alpha = 0.02)+
# 
#     geom_point(alpha = 0.5)+
#   facet_wrap(~ mrna, strip.position = "bottom")

univariate change over time -- boxplots

# devtools::install_github("teunbrand/ggh4x")
library(ggh4x)
# FIXME 
plt_mrna_long <- plt_membrane %>%
  gather(mrna, count, mrna_cols) %>% 
  filter(!is.na(count))
  # filter(mrna %in% c("bkkca", "shal")) %>% 

return_nested_mrna_plt <- function(temp = plt_mrna_long){
  ggplot(temp, aes(mrna, count))+
    geom_boxplot(aes(fill = Condition))+
    geom_point(position = position_jitter(width = 0.05), #size =3,  
               alpha = 0.7)+  
    theme(axis.text.x=element_blank(),
          legend.position = "")+ # writes over mrna labels so they only show up in the facet
    labs(x = "")+
    facet_nested(~ mrna + Condition, scales = "free_x", switch = "x")
}


# return_nested_mrna_plt(temp = filter(plt_mrna_long, 
#                                      mrna %in% unlist(mRNAInfo[mRNAInfo$Class == "Channel", "RGeneName"])
#                                      ))


return_nested_mrna_plt(temp = filter(plt_mrna_long, 
                                     mrna %in% unlist(mRNAInfo[mRNAInfo$Family == "Voltage-dependent K+ Channel", "RGeneName"])
                                     ))

return_nested_mrna_plt(temp = filter(plt_mrna_long, 
                                     mrna %in% unlist(mRNAInfo[mRNAInfo$Family == "Other K+ Channel", "RGeneName"])
                                     ))

return_nested_mrna_plt(temp = filter(plt_mrna_long, 
                                     mrna %in% unlist(mRNAInfo[mRNAInfo$Family == "Ca2+ Channel", "RGeneName"])
                                     ))

# innexins
# TODO flix axis to right, bind
return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% c("inx1", "inx2", "inx3", "inx4")))
return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% c("inx5")))
return_nested_mrna_plt(temp = filter(plt_mrna_long, mrna %in% c("inx5")))+scale_y_log10()

What happens at the network level?

plt_tevc <- M_all_outliers %>%
  select(Condition, Experiment, ig.3_4, ig.3_5, ig.4_5) %>%
  distinct() %>%
  gather("ElectricalSynapse", "ig", c("ig.3_4", "ig.3_5", "ig.4_5")) %>%
  filter(!is.na(ig))

plt_1.1 <- plot_univariate_decorated_0(
  temp = ungroup(filter(plt_tevc, ElectricalSynapse == "ig.3_4")),
  yvar = "ig",
  new.y = "nA/mV",
  new.title = expression(I[g]~LC[3] %<->% LC[4]))+
  coord_cartesian(ylim = c(0, 0.1))

plt_1.2 <- plot_univariate_decorated_0(
  temp = ungroup(filter(plt_tevc, ElectricalSynapse == "ig.3_5")),
  yvar = "ig",
  new.y = "nA/mV",
  new.title = expression(I[g]~LC[3] %<->% LC[5]))+
  coord_cartesian(ylim = c(0, 0.1))

plt_1.3 <- plot_univariate_decorated_0(
  temp = ungroup(filter(plt_tevc, ElectricalSynapse == "ig.4_5")),
  yvar = "ig",
  new.y = "nA/mV",
  new.title = expression(I[g]~LC[4] %<->% LC[5]))+
  coord_cartesian(ylim = c(0, 0.5))+
  scale_y_continuous(position = "right")


library(patchwork)
plt_1.1 + plt_1.2 + plt_1.3+ plot_layout(widths = c(1, 1, 1) )


ggsave(paste0("ig_lc34.tiff"),
         plot = plt_1.1,
         path = here("officer_output"),
         width = 11.5/4, height = 4.76)
ggsave(paste0("ig_lc35.tiff"),
         plot = plt_1.2,
         path = here("officer_output"),
         width = 11.5/4, height = 4.76)
ggsave(paste0("ig_lc45.tiff"),
         plot = plt_1.3,
         path = here("officer_output"),
         width = 11.5/4, height = 4.76)



#### ####

plt_tecc <- M_all_outliers %>%
  select(Condition, Experiment, cc.3_4, cc.3_5, cc.4_3, cc.4_5, cc.5_3, cc.5_4) %>%
  group_by(Condition, Experiment) %>%
  mutate_all(median, na.rm = T) %>%
  ungroup() %>%
  distinct()

cowplot::plot_grid(plotlist = list(
  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.3_4",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[3] %->% LC[4]))+
    coord_cartesian(ylim = c(0, 1)),

  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.3_5",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[3] %->% LC[5]))+
    coord_cartesian(ylim = c(0, 1)),

  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.4_3",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[4] %->% LC[3]))+
    coord_cartesian(ylim = c(0, 1)),

  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.4_5",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[4] %->% LC[5]))+
    coord_cartesian(ylim = c(0, 1)),

  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.5_3",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[5] %->% LC[4]))+
    coord_cartesian(ylim = c(0, 1)),

  plot_univariate_decorated_0(
    temp = plt_tecc,
    yvar = "cc.5_4",
    new.y = "Coupling Coefficient",
    new.title = expression(LC[5] %->% LC[4]))+
    coord_cartesian(ylim = c(0, 1))
))



#### ####

triangle_lower_df <- data.frame(
  id    = c("b", "b", "b"),
  value = c("2", "2", "2"),
  x     = c(0, 1, 1),
  y     = c(0, 1, 0)
)

triangle_upper_df <- data.frame(
  id    = c("a", "a", "a"),
  value = c("1", "1", "1"),
  x     = c(0, 1, 0),
  y     = c(0, 1, 1)
)


plt_2.1 <- plt_tecc %>%
  ggplot(aes(cc.3_4, cc.4_3))+
  geom_polygon(data = triangle_upper_df, aes(x = x, y = y, group = id), fill = "#e41a1c")+
  geom_polygon(data = triangle_lower_df, aes(x = x, y = y, group = id), fill = "#377eb8")+
  geom_abline(slope = 1, intercept = 0, linetype = "dashed")+
  geom_point()+
  facet_grid(.~Condition)

plt_2.2 <- plt_tecc %>%
  ggplot(aes(cc.3_5, cc.5_3))+
  geom_polygon(data = triangle_upper_df, aes(x = x, y = y, group = id), fill = "#e41a1c")+
  geom_polygon(data = triangle_lower_df, aes(x = x, y = y, group = id), fill = "#4daf4a")+
  geom_point()+
  facet_grid(.~Condition)

plt_2.3 <- plt_tecc %>%
  ggplot(aes(cc.4_5, cc.5_4))+
  geom_polygon(data = triangle_upper_df, aes(x = x, y = y, group = id), fill = "#377eb8")+
  geom_polygon(data = triangle_lower_df, aes(x = x, y = y, group = id), fill = "#4daf4a")+
  geom_point()+
  facet_grid(.~Condition)

plt_2.1 / plt_2.2 / plt_2.3

what happens at the level of output (excitability)?

for my curiosity:

library(brms)
# fit1 <- brm(shal ~ Condition,
#             data = temp,
#             family = gaussian())
# # 'C:/rtools40/usr/mingw_/bin/g++' not found


fit1 <- readRDS(here("data", "brms_models", paste0("shal", ".RDS")))

plot(fit1, pars = c("Condition"))
# 
plot(conditional_effects(fit1, effects = "Condition"))
# 
# temp_col <- "Ia.0"



tictoc::tic()
walk(
  c(mrna_cols, 
    memb_cols, 

    # "Ia.Slope"
    # "Ia.0.s", "Ia.Slope.s"

    # "Cor"
    epsp_cols
    # "Min.V_Obs", "Max.Amplitude_Obs", #"Max.Amplitude_Sim", 
    # "AUC_Obs"#, "AUC_Sim", "Max.Amplitude_Delta", "AUC_Delta"
    ),
  # c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols), 
  function(temp_col){


    if (!file.exists(here("data", "brms_models", paste0(temp_col, ".RDS")))){

      print(temp_col)

      temp <- M_all[M_all$Source == "Daniel", c("Condition", "Experiment", "Cell", temp_col)] %>% distinct()

      # Identify outliers for each group
      temp_b <- temp[temp$Condition == "0h", ]
      temp_c <- temp[temp$Condition == "1h", ]
      temp_d <- temp[temp$Condition == "24h", ]

      temp_b <- temp_b[win_x_iqr(temp_b[[temp_col]], multiplier = 1.5), ]
      temp_c <- temp_c[win_x_iqr(temp_c[[temp_col]], multiplier = 1.5), ]
      temp_d <- temp_d[win_x_iqr(temp_d[[temp_col]], multiplier = 1.5), ]

      temp <- rbind(temp_b, temp_c) %>% rbind(temp_d)

      # temp <- temp[win_x_iqr(temp[[temp_col]], multiplier = 1.5), ]
      temp <- temp[!is.na(temp[[temp_col]]), ]

      # make sure we don't have pseudoreplicates for network measurements
      if (temp_col %in% c("ig.3_4", "ig.3_5", "ig.4_5")){
        temp <- temp %>% 
          select(-Cell) %>% 
          group_by(Condition, Experiment) %>% 
          mutate_all(median, na.rm = T) %>% 
          distinct()
      }


      if(nrow(temp) < 10){

        return(list(
          dv = temp_col,
          p = NA,
          ep = NA,
          hsd_groups = data.frame(dv = c(NA, NA, NA), 
                                  groups = c(NA, NA, NA), 
                                  Condition = c(NA, NA, NA)
          ),
          fm = NA))

      } else {

        fit1 <- brm(as.formula(paste0(temp_col, " ~ Condition")), 
                    data = temp, 
                    family = gaussian())

        saveRDS(fit1, here("data", "brms_models", paste0(temp_col, ".RDS")))

        # rm(list = "fit1")
      }


    }



  })
tictoc::toc()

mk table

run anovas

# temp <- M_all %>% 
#   select(Condition, Experiment, Cell,  Ihtk.0) %>% 
#   group_by(Condition, Experiment, Cell) %>% 
#   mutate(Ihtk.0 = median(Ihtk.0, na.rm = T)) %>% 
#   distinct() %>% 
#   ungroup()
# 
# temp_bands <- temp %>% group_by(Condition) %>% summarise(
#   q10 = quantile(Ihtk.0, probs = 0.1, na.rm = T),
#   q25 = quantile(Ihtk.0, probs = 0.25, na.rm = T),
#   q45 = quantile(Ihtk.0, probs = 0.45, na.rm = T),
#   q50 = quantile(Ihtk.0, probs = 0.5, na.rm = T),
#   q55 = quantile(Ihtk.0, probs = 0.55, na.rm = T),
#   q75 = quantile(Ihtk.0, probs = 0.75, na.rm = T),
#   q90 = quantile(Ihtk.0, probs = 0.9, na.rm = T),
#   )
# 
# temp <- full_join(temp, temp_bands)
# 
# ggplot(data = temp, aes(x = Condition, y = Ihtk.0))+
#   geom_ribbon(aes(x = Condition, ymin = q10, ymax = q90, group = 1), color = "lightgray", fill = "cornflowerblue", alpha = 0.3)+
#   geom_ribbon(aes(x = Condition, ymin = q25, ymax = q75, group = 1), color = "darkgray", fill = "cornflowerblue", alpha = 0.3)+
#   # geom_ribbon(aes(x = Condition, ymin = q45, ymax = q55, group = 1), color = "black", fill = "cornflowerblue", alpha = 0.3)+
#   geom_line(aes(x = Condition, y = q50, group = 1), linetype = "dashed", color = "blue")+
#   geom_point(aes(x = Condition, y = q50), color = "lightgray", size = 4)+
#   geom_point(aes(x = Condition, y = q50), color = "black", size = 4, shape = 1)+
#   geom_point(alpha = 0.5)

# Make table ----
# c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols)
# 
# temp_col = "Ihtk.0"

#TODO figure out what to do with the epsp cols
tictoc::tic()
temp_list <- map(
  c(tecc_cols, tevc_cols, epsp_cols, ionic_cols, mrna_cols), function(temp_col){
    print(temp_col)

    temp <- M_all[, c("Condition", "Experiment", "Cell", temp_col)] %>% distinct()

    # Identify outliers for each group
    temp_b <- temp[temp$Condition == "0h", ]
    temp_c <- temp[temp$Condition == "1h", ]
    temp_d <- temp[temp$Condition == "24h", ]

    temp_b <- temp_b[win_x_iqr(temp_b[[temp_col]], multiplier = 1.5), ]
    temp_c <- temp_c[win_x_iqr(temp_c[[temp_col]], multiplier = 1.5), ]
    temp_d <- temp_d[win_x_iqr(temp_d[[temp_col]], multiplier = 1.5), ]

    temp <- rbind(temp_b, temp_c) %>% rbind(temp_d)

    # temp <- temp[win_x_iqr(temp[[temp_col]], multiplier = 1.5), ]
    temp <- temp[!is.na(temp[[temp_col]]), ]

    # make sure we don't have pseudoreplicates for network measurements
    if (temp_col %in% c("ig.3_4", "ig.3_5", "ig.4_5")){
      temp <- temp %>% 
        select(-Cell) %>% 
        group_by(Condition, Experiment) %>% 
        mutate_all(median, na.rm = T) %>% 
        distinct()
    }


    if(nrow(temp) < 10){

      return(list(
        dv = temp_col,
        p = NA,
        ep = NA,
        hsd_groups = data.frame(dv = c(NA, NA, NA), 
                                groups = c(NA, NA, NA), 
                                Condition = c(NA, NA, NA)
        ),
        fm = NA))

    } else {


      temp_shuffle <- temp
      resample_array <- map(1:1000, function(i){
        temp_shuffle$Condition <- sample(temp_shuffle$Condition, replace = F)
        fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp_shuffle)      
        return(car::Anova(fm)[1,3])
      }) %>% unlist()


      fm <- lm(as.formula(paste0(temp_col, " ~ Condition")), data = temp)

      fm_hsd <- agricolae::HSD.test(fm, trt = "Condition") 
      fm_hsd <- fm_hsd$group

      fm_hsd$Condition <- rownames(fm_hsd)



      return(list(
        dv = temp_col,
        p = car::Anova(fm)[1,4],
        ep = mean(resample_array >= car::Anova(fm)[1,3]),
        hsd_groups = fm_hsd,
        fm = fm))
    }
  })
tictoc::toc()


temp_list <- temp_list %>% transpose()
fm_overview_table <- data.frame(dv = unlist(temp_list[[1]]),
                                p  = unlist(temp_list[[2]]),
                                ep = unlist(temp_list[[3]]))


# work up post hoc stats
temp_list_hsd <- map(seq_along(temp_list[[4]]), function(i){
  if (!is.na(unique(temp_list[[4]][[i]]$groups))){
    temp_list[[4]][[i]] %>% 
      gather("dv", "Estimate", names(temp_list[[4]][[i]])[1]) %>% 
      pivot_wider(names_from = "Condition", values_from = c("groups", "Estimate") ) %>% 
      mutate_all(as.character)
  }
})  

fm_overview_hsd <- do.call(rbind, temp_list_hsd)

Make table

fm_overview_table <- full_join(fm_overview_table, fm_overview_hsd)


# add groupings
fm_overview_table <- fm_overview_table %>% mutate(Family = case_when(
  dv %in% c("vrest") ~ "Resting Voltage",
  dv %in% c("r11", "r1") ~ "Membrane Resistance",
  dv %in% ionic_cols ~ "Outward Currents",
  dv %in% epsp_cols ~ "Excitability",
  dv %in% c("r12", "rc", "ig", "cc", "ig.3_4", "ig.3_5", "ig.4_5") ~ "Coupling Measures"
))

fm_overview_table <- fm_overview_table %>% 
  mutate(Family = factor(fm_overview_table$Family, levels = c("Membrane Resistance", "Resting Voltage", "Coupling Measures", "Outward Currents", "Excitability"))) %>% 
  arrange(Family) %>% 
  mutate(Family = as.character(Family))

# temp <- rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName)
fm_overview_table <- full_join(fm_overview_table, 
                               rename(mRNAInfo[, c("Family", "RGeneName")], dv = RGeneName, Family2 = Family))

fm_overview_table <- fm_overview_table %>% 
  mutate(Family = ifelse(is.na(Family) & !is.na(Family2), Family2, Family)) %>% 
  select(-Family2) %>% 
  filter(!is.na(p))


# add additional cols
fm_overview_table$fdr <- p.adjust(fm_overview_table$ep, method = "fdr")

fm_overview_table <- fm_overview_table %>% 
  mutate(Estimate_0h = as.numeric(Estimate_0h),
         Estimate_1h = as.numeric(Estimate_1h),
         Estimate_24h = as.numeric(Estimate_24h)) %>% 
  mutate(p = round(p, digits = 4),
         ep = round(ep, digits = 4),
         fdr = round(fdr, digits = 4),
         Estimate_0h = round(Estimate_0h, digits = 4),
         Estimate_1h = round(Estimate_1h, digits = 4),
         Estimate_24h = round(Estimate_24h, digits = 4)) %>% 
  mutate(stars = case_when(fdr > 0.10               ~ "",
                           fdr < 0.10 & fdr > 0.05  ~ ".",
                           fdr < 0.05 & fdr > 0.01  ~ "*",
                           fdr < 0.01 & fdr > 0.001 ~ "**",
                           fdr < 0.001              ~ "***"
  ))


# library(officer)
library(flextable)
M_ft <-flextable(fm_overview_table,
                 col_keys = c("Family", "dv", "p", "ep",
                              "fdr", 
                              "stars", "Estimate_0h", "Estimate_1h", "Estimate_24h"))

M_ft <- theme_vanilla(M_ft)
M_ft <- merge_v(M_ft, j = c("Family") )
# M_ft <- color(M_ft, ~ fdr < 0.05, ~ stars, color = "red")
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ dv, bold = TRUE)
M_ft <- bold(M_ft, ~ p   <= 0.05, ~ p, bold = TRUE)
M_ft <- bold(M_ft, ~ ep  <= 0.05, ~ ep, bold = TRUE)
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ fdr, bold = TRUE)
M_ft <- bold(M_ft, ~ fdr <= 0.05, ~ stars, bold = TRUE)

# get the groupings without showing the columns. 

color_a = "#ffeda0"
color_ab= "#feb24c"
color_b = "#f03b20"
color_c = "#2b8cbe"

M_ft <- bg(M_ft, ~ groups_0h == "a", ~ Estimate_0h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_0h == "ab",~ Estimate_0h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_0h == "b", ~ Estimate_0h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_0h == "c", ~ Estimate_0h, bg = color_c)

M_ft <- bg(M_ft, ~ groups_1h == "a", ~ Estimate_1h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_1h == "ab",~ Estimate_1h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_1h == "b", ~ Estimate_1h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_1h == "c", ~ Estimate_1h, bg = color_c)

M_ft <- bg(M_ft, ~ groups_24h == "a", ~ Estimate_24h, bg = color_a)
M_ft <- bg(M_ft, ~ groups_24h == "ab",~ Estimate_24h, bg = color_ab)
M_ft <- bg(M_ft, ~ groups_24h == "b", ~ Estimate_24h, bg = color_b)
M_ft <- bg(M_ft, ~ groups_24h == "c", ~ Estimate_24h, bg = color_c)


M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_0h, bg = "White")
M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_1h, bg = "White")
M_ft <- bg(M_ft, ~ fdr > 0.05, ~ Estimate_24h, bg = "White")


M_ft <- set_header_labels(M_ft, 
                          dv = "", stars = "",
                          Estimate_0h = "0h", Estimate_1h = "1h", Estimate_24h = "24h" )

M_ft <- autofit(M_ft)
M_ft

pptx_file <- "./officer_output/DanielAnova.pptx"
save_as_pptx("my table" = M_ft, path = pptx_file)




# fm_overview_table %>% 
#   ggplot(aes(x = ep))+
#   geom_histogram(binwidth = 0.025)+
#   geom_vline(xintercept = 0.05)

Revisitng excitability 21/03/17

df <- full_join(metadata[, c("Experiment",  "FileName", "Condition")],
          epsp)
df <- df %>% filter(!is.na(Channel))

df %>% 
  unite(uid, Experiment, Cell) %>% 
  filter(Key != "predicted") %>% 
  ggplot(aes(x = uid, AUC, color = Type))+
  geom_point(alpha = 0.5)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = -0.))+
facet_wrap(.~Condition)


# it looks like we don't have full coverage of epsp_50
df %>% group_by(Experiment, Cell, Key, Type) %>% filter(Sweep == 1) %>% tally() %>% spread(Type, n) %>% View()

Another take on excitability:

binned Cor

# FIXME Column `AUC` doesn't exist
# Goal: extract Absolute measures and differential measures for 
# 1. Burst Amplitude (max mV)
# 2. AUC 
# 3. Stimulus correlation (between)


# read in trace
brian2_trace_files <- list.files(here("data", "predicted_voltage"))

brian2_trace_sets <- brian2_trace_files %>% 
  str_remove(pattern = "-actual_v.rds") %>% 
  str_remove(pattern = "-predict_v.rds") %>% 
  unique()

epsp_summary_list <- map(seq(1, length(brian2_trace_sets)), function(i){
  print(i)
  brian2_read_in <- brian2_trace_files[str_detect(brian2_trace_files, brian2_trace_sets[i])]


  # retrieve data and merge into a single df
  temp <- map(brian2_read_in, function(read_file){
    temp_predict <- readRDS(here("data", "predicted_voltage", read_file))

    if(str_detect(read_file, pattern = "predict_v")){
      #consolidate into one df
      for (j in seq(1, length(temp_predict))){
        temp_predict[[j]]$Sweep <- j
      }
      temp_predict <- do.call(rbind, temp_predict)
    }
    return(temp_predict)
  })

  names(temp) <- brian2_read_in

  temp_a <- temp[str_detect(names(temp), pattern = "actual_v")][[1]]
  temp_p <- temp[str_detect(names(temp), pattern = "predict_v")][[1]] %>% as.data.frame() %>% as_tibble()
  temp_a$nrow <- seq(1, nrow(temp_a))
  temp_p$nrow <- seq(1, nrow(temp_p))
  temp_a$Sweep <- as.character(temp_a$Sweep)
  temp_p$Sweep <- as.character(temp_p$Sweep)

  temp <- full_join(temp_a, select(temp_p, -Time)) %>% select(-nrow)

  # Show difference between expected/actual
  # i = 1
  # [1] "190808a_0014-epsp-In4-actual_v.rds"  "190808a_0014-epsp-In4-predict_v.rds" shows why we need to be able to audit the outcome.
  # Here the end of sweep 4 deviates from baseline so the delta is off
  # TODO instead of subtracting a value at the end, we should look at the times with i_inj less than x and then use a percentile from that to estimate the baseline.


  # To get around this in the short term we'll calulate the factors independently for expected/obs
  # downsample_data(temp, len = 10000) %>% 
  # # temp %>% 
  #   mutate(predicted = predicted - offset) %>% 
  #   mutate(difference = In4-predicted) %>% 
  #   ggplot(aes(x = Time))+
  #   geom_ribbon(aes(ymin=In4, ymax=predicted), fill = "cornflowerblue")+
  #   geom_line(aes(y=In4), color = "black")+
  #   geom_line(aes(y=predicted), color = "cornflowerblue", alpha = 0.3)+
  #   # geom_line(aes(y=difference-offset), color = "purple")+
  #   facet_grid(Sweep~.)

  # This is an inelegant workaround for data that is based on in9/12. This prevents duplication of the large set below

  rewrite_names <- F
  if ("In9" %in% names(temp)){
    names(temp) <- c("In4", "In7", "Sweep", "Time", "predicted", "offset")
    rewrite_names <- T
  }

  Use.Bin.Size <- 19687

  temp_processed <- temp
  temp_processed$TimeBin <- rep(rep(seq(1, (nrow(temp)/4)/Use.Bin.Size), each = Use.Bin.Size), times = 4)

  # begin to work with data proper
  # Add optional ways to slice the data based on what we expect to occur
  # At the moment we leave this unused
  temp_processed <- temp_processed %>% 
    mutate(
      Period = case_when(
        Time > 0.40  & Time <  4.87  ~ "A",
        Time > 4.87  & Time < 10.74  ~ "B",
        Time > 10.74 & Time < 16.74  ~ "C",
        Time > 16.74 & Time < 19.685 ~ "D"
      ),
      Period_eq = case_when(
        Time > 0.40  & Time <  0.40 + 2.945 ~ "A",
        Time > 4.87  & Time <  4.87 + 2.945 ~ "B",
        Time > 10.74 & Time < 10.74 + 2.945 ~ "C",
        Time > 16.74 & Time < 16.74 + 2.945 ~ "D"
      ),
      Burst = case_when(
        Time > 0.40  & Time <  2    ~ "A",
        Time > 4.87  & Time <  6.17 ~ "B",
        Time > 10.74 & Time < 12.54 ~ "C",
        Time > 16.74 & Time < 18.5  ~ "D"
      ),
      Burst_eq = case_when(
        Time > 0.40  & Time <  2    + 1.30 ~ "A",
        Time > 4.87  & Time <  6.17 + 1.30 ~ "B",
        Time > 10.74 & Time < 12.54 + 1.30 ~ "C",
        Time > 16.74 & Time < 18.5  + 1.30 ~ "D"
      )
    ) %>% 
    # correlation with expected
    group_by(Sweep, TimeBin) %>% 
    mutate(Cor = cor(In4, predicted)) %>% 
    ungroup() %>% 
    gather(Key, mV, c("In4", "predicted")) %>% 
    ## Metrics for whole sweep
    group_by(Key, Sweep) %>% 
    # Min voltage
    mutate(Min.V = min(mV, na.rm = T)) %>%
    # Defining Baseline as average of the lowest 20% of points
    mutate(Use.in.Base = quantile(mV, probs = .2, na.rm = T)) %>%
    mutate(Use.in.Base = ifelse(mV <= Use.in.Base, mV, NA)) %>%
    mutate(Use.in.Base = mean(Use.in.Base, na.rm = T)) %>%
    # Max voltage
    mutate(Max.Amplitude = max(mV, na.rm = T)) %>%
    # AUC
    # Using Min.V
    # mutate(AUC = sum(mV - Min.V, na.rm = T)/(max(temp$Time)/Use.Bin.Size)) %>%
    ungroup() 

  temp_processed <- temp_processed[seq(1, nrow(temp_processed), by = Use.Bin.Size), ]



  if (rewrite_names){
    temp_processed <- temp_processed %>% 
      mutate(Key = ifelse(Key == "In4", "In9", Key))
  }


  temp_processed <- temp_processed[, c("Sweep", "TimeBin", "Time", "Cor", "Key", "Min.V", #"Use.in.Base", 
                                       "Max.Amplitude", "AUC")] %>% 
    distinct() #%>% 
    # pivot_wider(names_from = "Key", values_from = c("Min.V", #"Use.in.Base", 
    #                                                 "Max.Amplitude", "AUC")) 


  # add annotations to aid merging
  file_name <- brian2_read_in[[1]] %>% str_split(pattern = "-")

  temp_processed$Experiment <- as.character(str_split(as.character(file_name[[1]][1]), pattern = "_")[[1]][1])
  temp_processed$FileName <- paste0(as.character(file_name[[1]][1]), ".abf")
  temp_processed$Channel <- as.character(file_name[[1]][3])

  return(temp_processed)
})


epsp_summary <- do.call(rbind, epsp_summary_list)
# epsp_summary <- epsp_summary[, c("Experiment", "FileName", "Channel", "Sweep", "Cor", "Key", "Min.V", "Max.Amplitude", "AUC")]

epsp_summary %>% filter(FileName == "190808a_0014.abf", Sweep == 1) %>% 
  ggplot(aes(x = Time, y = Cor))+
  geom_point()


ggplot(aes(x = Time, y = Cor))+
  scattermore::geom_scattermost(
    as.data.frame(epsp_summary[epsp_summary$Sweep == 1, ]),
    # col=viridisLite::viridis(100, alpha=0.05)[1+99*d[,2]],
    pointsize=1,
    pixels=c(700,700)) +
  ggtitle("geom_scattermost")

Binned by event

# Goal: extract Absolute measures and differential measures for 
# 1. Burst Amplitude (max mV)
# 2. AUC 
# 3. Stimulus correlation (between)


# read in trace
brian2_trace_files <- list.files(here("data", "predicted_voltage"))

brian2_trace_sets <- brian2_trace_files %>% 
  str_remove(pattern = "-actual_v.rds") %>% 
  str_remove(pattern = "-predict_v.rds") %>% 
  unique()

epsp_summary_list <- map(seq(1, length(brian2_trace_sets)), function(i){
  print(i)
  brian2_read_in <- brian2_trace_files[str_detect(brian2_trace_files, brian2_trace_sets[i])]


  # retrieve data and merge into a single df
  temp <- map(brian2_read_in, function(read_file){
    temp_predict <- readRDS(here("data", "predicted_voltage", read_file))

    if(str_detect(read_file, pattern = "predict_v")){
      #consolidate into one df
      for (j in seq(1, length(temp_predict))){
        temp_predict[[j]]$Sweep <- j
      }
      temp_predict <- do.call(rbind, temp_predict)
    }
    return(temp_predict)
  })

  names(temp) <- brian2_read_in

  temp_a <- temp[str_detect(names(temp), pattern = "actual_v")][[1]]
  temp_p <- temp[str_detect(names(temp), pattern = "predict_v")][[1]] %>% as.data.frame() %>% as_tibble()
  temp_a$nrow <- seq(1, nrow(temp_a))
  temp_p$nrow <- seq(1, nrow(temp_p))
  temp_a$Sweep <- as.character(temp_a$Sweep)
  temp_p$Sweep <- as.character(temp_p$Sweep)

  temp <- full_join(temp_a, select(temp_p, -Time)) %>% select(-nrow)

  # Show difference between expected/actual
  # i = 1
  # [1] "190808a_0014-epsp-In4-actual_v.rds"  "190808a_0014-epsp-In4-predict_v.rds" shows why we need to be able to audit the outcome.
  # Here the end of sweep 4 deviates from baseline so the delta is off
  # TODO instead of subtracting a value at the end, we should look at the times with i_inj less than x and then use a percentile from that to estimate the baseline.


  # To get around this in the short term we'll calulate the factors independently for expected/obs
  # downsample_data(temp, len = 10000) %>% 
  # # temp %>% 
  #   mutate(predicted = predicted - offset) %>% 
  #   mutate(difference = In4-predicted) %>% 
  #   ggplot(aes(x = Time))+
  #   geom_ribbon(aes(ymin=In4, ymax=predicted), fill = "cornflowerblue")+
  #   geom_line(aes(y=In4), color = "black")+
  #   geom_line(aes(y=predicted), color = "cornflowerblue", alpha = 0.3)+
  #   # geom_line(aes(y=difference-offset), color = "purple")+
  #   facet_grid(Sweep~.)

  # This is an inelegant workaround for data that is based on in9/12. This prevents duplication of the large set below

  rewrite_names <- F
  if ("In9" %in% names(temp)){
    names(temp) <- c("In4", "In7", "Sweep", "Time", "predicted", "offset")
    rewrite_names <- T
  }

  # begin to work with data proper
  # Add optional ways to slice the data based on what we expect to occur
  # At the moment we leave this unused
  temp_processed <- temp %>% 
    mutate(
      Segment = case_when(
        Time > 0.40  & Time < 2      ~ "A_B",
        Time > 2     & Time < 4.87   ~ "A_I",
        Time > 4.87  & Time < 6.17   ~ "B_B",
        Time > 6.17  & Time < 10.74  ~ "B_I",        
        Time > 10.74 & Time < 12.54  ~ "C_B",
        Time > 12.54 & Time < 16.74  ~ "C_I",
        Time > 16.74 & Time < 18.5   ~ "D_B",
        Time > 18.5  & Time < 19.685 ~ "D_I"
      )
    ) %>% 
    # correlation with expected
    group_by(Sweep, Segment) %>% 
    mutate(Cor = cor(In4, predicted)) %>% 
    ungroup() %>% 
    gather(Key, mV, c("In4", "predicted")) %>% 
    ## Metrics for whole sweep
    group_by(Key, Sweep) %>% 
    # Min voltage
    mutate(Min.V = min(mV, na.rm = T)) %>%
    # Defining Baseline as average of the lowest 20% of points
    mutate(Use.in.Base = quantile(mV, probs = .2, na.rm = T)) %>%
    mutate(Use.in.Base = ifelse(mV <= Use.in.Base, mV, NA)) %>%
    mutate(Use.in.Base = mean(Use.in.Base, na.rm = T)) %>%
    # Max voltage
    mutate(Max.Amplitude = max(mV, na.rm = T)) %>%
    # AUC
    # Using Min.V
    # mutate(AUC = sum(mV - Min.V, na.rm = T)/19.68685) %>%
    ungroup() 


  if (rewrite_names){
    temp_processed <- temp_processed %>% 
      mutate(Key = ifelse(Key == "In4", "In9", Key))
  }


  temp_processed <- temp_processed[, c("Sweep", "Segment", "Cor", "Key", "Min.V", #"Use.in.Base", 
                                       "Max.Amplitude")] %>% 
    distinct() #%>% 
    # pivot_wider(names_from = "Key", values_from = c("Min.V", #"Use.in.Base", 
    #                                                 "Max.Amplitude", "AUC")) 


  # add annotations to aid merging
  file_name <- brian2_read_in[[1]] %>% str_split(pattern = "-")

  temp_processed$Experiment <- as.character(str_split(as.character(file_name[[1]][1]), pattern = "_")[[1]][1])
  temp_processed$FileName <- paste0(as.character(file_name[[1]][1]), ".abf")
  temp_processed$Channel <- as.character(file_name[[1]][3])

  return(temp_processed)
})


epsp_summary <- do.call(rbind, epsp_summary_list)
# epsp_summary <- epsp_summary[, c("Experiment", "FileName", "Channel", "Sweep", "Cor", "Key", "Min.V", "Max.Amplitude", "AUC")]



epsp_summary <- left_join(epsp_summary, metadata) %>% 
  select(-FileName, -Type) %>% 
  mutate(Cell = case_when(Channel == "In4" ~ In4,
                          Channel == "In9" ~ In9)) %>% 
  mutate(Cell = as.character(Cell))  %>% 
  mutate(Key = case_when(Key %in% c("In4", "In9") ~ "Obs",
                                  Key %in% c("predicted")  ~ "Sim")) %>% 
  pivot_wider(names_from = "Key", values_from  = c("Min.V", "Max.Amplitude")) %>% 
  mutate(Max.Amplitude_Sim = Max.Amplitude_Sim + (Min.V_Obs - Min.V_Sim)) %>% 
  mutate(Max.Amplitude_Delta = Max.Amplitude_Obs - Max.Amplitude_Sim) %>% 
  group_by(Condition, Experiment, Cell, Segment, Sweep) %>% 
  select(-Min.V_Sim) %>% 
  mutate_at(c("Cor", 
              "Min.V_Obs", #"Min.V_Sim", "Min.V_Delta", 
              "Max.Amplitude_Obs", "Max.Amplitude_Sim", "Max.Amplitude_Delta"), median, na.rm = T) %>% 
  distinct() %>% 
  ungroup()


epsp_summary %>% 
  ggplot(aes(x = Segment, y = Cor, group = Sweep, color = Sweep))+
  geom_point(position = position_jitter(width = 0.1), alpha = 0.3)+
  geom_smooth(se = F)+
  facet_grid(.~Condition)+
  ggsci::scale_color_aaas()

epsp_summary %>% 
  filter(!is.na(Segment)) %>% 
  ggplot(aes(x = Cor, y = Sweep, fill = factor(stat(quantile))))+#, group = Sweep, color = Sweep))+
  # ggridges::geom_density_ridges()+
  ggridges::stat_density_ridges(
               geom = "density_ridges_gradient", calc_ecdf = TRUE,
               quantiles = 4, quantile_lines = TRUE
             ) +
               scale_fill_viridis_d(name = "Quartiles")+
  coord_flip()+
  facet_grid(Condition~Segment)+
  theme(legend.position = "")



## Here's the stimulus used in the protocol ====
# sweep duration should be 19.687 seconds
epsp_stim <- readABF_as_matrix(here("inst", "extdata", "epsp_stim_files", "08 current injection.abf"),
                  channels = "Axo1I2")
epsp_stim <- as_tibble(epsp_stim) %>%
  mutate(Time = Time - min(Time, na.rm = T)) %>%
  rename(Stim = Axo1I2)



epsp_stim <- mutate(epsp_stim,
Stim = Stim - min(Stim, na.rm = T),
         Stim = (Stim / 11.9034)-1) 


epsp_summary %>% 
  filter(!is.na(Segment)) %>% 
  mutate(x = case_when(
    Segment == "A_B" ~ 0.40,
    Segment == "A_I" ~ 2,
    Segment == "B_B" ~ 4.87,
    Segment == "B_I" ~ 6.17,
    Segment == "C_B" ~ 10.74,
    Segment == "C_I" ~ 12.54,
    Segment == "D_B" ~ 16.74,
    Segment == "D_I" ~ 18.5
  ),
  xend = case_when(
    Segment == "A_B" ~ 2,
    Segment == "A_I" ~ 4.87,
    Segment == "B_B" ~ 6.17,
    Segment == "B_I" ~ 10.74,
    Segment == "C_B" ~ 12.54,
    Segment == "C_I" ~ 16.74,
    Segment == "D_B" ~ 18.5,
    Segment == "D_I" ~ 19.687
  )) %>% 
  ggplot()+
  geom_segment(aes(x = x, y = Cor, xend = xend, yend = Cor, colour = Cor))+
  facet_grid(Condition~Sweep)+
  geom_line(data = epsp_stim, aes(Time, Stim))+
  theme(legend.position = "")+
  scale_color_viridis_c(direction = -1, option = "B")
#   option  
# A character string indicating the colormap option to use. Four options are available: "magma" (or "A"), "inferno" (or "B"), "plasma" (or "C"), "viridis" (or "D", the default option) and "cividis" (or "E").






epsp_stim %>% 
  mutate(Stim = Stim - min(Stim, na.rm = T),
         Stim = (Stim / 11.9034)-1) %>%
  ggplot(aes(Time, Stim))+
  geom_line()

# 
# 
# shading_annotations <- data.frame(
#   starts = c(0.40,
#              4.87,
#              10.74,
#              16.74),
#   next_start = c(4.87,
#                  10.74,
#                  16.74,
#                  19.685),
#   equal_len = c(0.40,
#                 4.87,
#                 10.74,
#                 16.74) + 2.945,
#   on_end = c(2,
#              6.17,
#              12.54, 
#              18.5),
#   equal_on = c(0.40,
#                4.87,
#                10.74,
#                16.74) + 1.30
# )
# 
# annotation_df <- data.frame(x = c(0.5, 0.5, 2.1, 1.8),
#                             y = seq(0.25, -1.25, length.out = 4),
#                             text = c("Period",
#                                      "Minumum Period",
#                                      "Burst", 
#                                      "Min Burst"))
# 
# segmentation_demo_plt <- ggplot()+
#   geom_vline(xintercept = c(shading_annotations$starts, shading_annotations$equal_len, shading_annotations$next_start, shading_annotations$equal_on), color = "gray")+
#   geom_path(data = epsp_stim, aes(Time, Stim+1.5))+
#   geom_rect(data = shading_annotations, aes(xmin = starts, xmax = next_start, ymin = 0, ymax = 0.5), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[1], color = RColorBrewer::brewer.pal(3, "Set1")[1])+
#   geom_rect(data = shading_annotations, aes(xmin = starts, xmax = equal_len, ymin = -0.5, ymax = 0), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[1], color = RColorBrewer::brewer.pal(3, "Set1")[1])+
#   geom_rect(data = shading_annotations, aes(xmin = starts, xmax = on_end, ymin = -1, ymax = -0.5), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[2], color = RColorBrewer::brewer.pal(3, "Set1")[2])+
#   geom_rect(data = shading_annotations, aes(xmin = starts, xmax = equal_on, ymin = -1.5, ymax = -1), alpha = 0.3, fill = RColorBrewer::brewer.pal(3, "Set1")[2], color = RColorBrewer::brewer.pal(3, "Set1")[2])+
#   theme_classic()+
#   # geom_vline(xintercept = 12.54)+
#   geom_text(data = annotation_df[1:2, ], aes(x=x, y=y, label = text), hjust = "inward", color = RColorBrewer::brewer.pal(3, "Set1")[1])+
#   geom_text(data = annotation_df[3:4, ], aes(x=x, y=y, label = text), hjust = "inward", color = RColorBrewer::brewer.pal(3, "Set1")[2])+
#   ylim(-1.5, 9.5)+
#   labs(title = "Ways to Segement EPSP Stim")
# 
# 
# segmentation_demo_plt

Rolling Cor

# Goal: extract Absolute measures and differential measures for 
# 1. Burst Amplitude (max mV)
# 2. AUC 
# 3. Stimulus correlation (between)


# read in trace
brian2_trace_files <- list.files(here("data", "predicted_voltage"))

brian2_trace_sets <- brian2_trace_files %>% 
  str_remove(pattern = "-actual_v.rds") %>% 
  str_remove(pattern = "-predict_v.rds") %>% 
  unique()

epsp_summary_list <- map(seq(1, length(brian2_trace_sets)), function(i){
  print(i)
  brian2_read_in <- brian2_trace_files[str_detect(brian2_trace_files, brian2_trace_sets[i])]


  # retrieve data and merge into a single df
  temp <- map(brian2_read_in, function(read_file){
    temp_predict <- readRDS(here("data", "predicted_voltage", read_file))

    if(str_detect(read_file, pattern = "predict_v")){
      #consolidate into one df
      for (j in seq(1, length(temp_predict))){
        temp_predict[[j]]$Sweep <- j
      }
      temp_predict <- do.call(rbind, temp_predict)
    }
    return(temp_predict)
  })

  names(temp) <- brian2_read_in

  temp_a <- temp[str_detect(names(temp), pattern = "actual_v")][[1]]
  temp_p <- temp[str_detect(names(temp), pattern = "predict_v")][[1]] %>% as.data.frame() %>% as_tibble()
  temp_a$nrow <- seq(1, nrow(temp_a))
  temp_p$nrow <- seq(1, nrow(temp_p))
  temp_a$Sweep <- as.character(temp_a$Sweep)
  temp_p$Sweep <- as.character(temp_p$Sweep)

  temp <- full_join(temp_a, select(temp_p, -Time)) %>% select(-nrow)

  # Show difference between expected/actual
  # i = 1
  # [1] "190808a_0014-epsp-In4-actual_v.rds"  "190808a_0014-epsp-In4-predict_v.rds" shows why we need to be able to audit the outcome.
  # Here the end of sweep 4 deviates from baseline so the delta is off
  # TODO instead of subtracting a value at the end, we should look at the times with i_inj less than x and then use a percentile from that to estimate the baseline.


  # To get around this in the short term we'll calulate the factors independently for expected/obs
  # downsample_data(temp, len = 10000) %>% 
  # # temp %>% 
  #   mutate(predicted = predicted - offset) %>% 
  #   mutate(difference = In4-predicted) %>% 
  #   ggplot(aes(x = Time))+
  #   geom_ribbon(aes(ymin=In4, ymax=predicted), fill = "cornflowerblue")+
  #   geom_line(aes(y=In4), color = "black")+
  #   geom_line(aes(y=predicted), color = "cornflowerblue", alpha = 0.3)+
  #   # geom_line(aes(y=difference-offset), color = "purple")+
  #   facet_grid(Sweep~.)

  # This is an inelegant workaround for data that is based on in9/12. This prevents duplication of the large set below

  rewrite_names <- F
  if ("In9" %in% names(temp)){
    names(temp) <- c("In4", "In7", "Sweep", "Time", "predicted", "offset")
    rewrite_names <- T
  }

  # begin to work with data proper
  # Add optional ways to slice the data based on what we expect to occur
  # At the moment we leave this unused
  temp_processed <- temp %>% 
    mutate(
      Period = case_when(
        Time > 0.40  & Time <  4.87  ~ "A",
        Time > 4.87  & Time < 10.74  ~ "B",
        Time > 10.74 & Time < 16.74  ~ "C",
        Time > 16.74 & Time < 19.685 ~ "D"
      ),
      Period_eq = case_when(
        Time > 0.40  & Time <  0.40 + 2.945 ~ "A",
        Time > 4.87  & Time <  4.87 + 2.945 ~ "B",
        Time > 10.74 & Time < 10.74 + 2.945 ~ "C",
        Time > 16.74 & Time < 16.74 + 2.945 ~ "D"
      ),
      Burst = case_when(
        Time > 0.40  & Time <  2    ~ "A",
        Time > 4.87  & Time <  6.17 ~ "B",
        Time > 10.74 & Time < 12.54 ~ "C",
        Time > 16.74 & Time < 18.5  ~ "D"
      ),
      Burst_eq = case_when(
        Time > 0.40  & Time <  2    + 1.30 ~ "A",
        Time > 4.87  & Time <  6.17 + 1.30 ~ "B",
        Time > 10.74 & Time < 12.54 + 1.30 ~ "C",
        Time > 16.74 & Time < 18.5  + 1.30 ~ "D"
      )
    ) %>% 
    # correlation with expected
    group_by(Sweep) %>% 
    mutate(Cor = cor(In4, predicted)) %>% 
    ungroup() %>% 
    gather(Key, mV, c("In4", "predicted")) %>% 
    ## Metrics for whole sweep
    group_by(Key, Sweep) %>% 
    # Min voltage
    mutate(Min.V = min(mV, na.rm = T)) %>%
    # Defining Baseline as average of the lowest 20% of points
    mutate(Use.in.Base = quantile(mV, probs = .2, na.rm = T)) %>%
    mutate(Use.in.Base = ifelse(mV <= Use.in.Base, mV, NA)) %>%
    mutate(Use.in.Base = mean(Use.in.Base, na.rm = T)) %>%
    # Max voltage
    mutate(Max.Amplitude = max(mV, na.rm = T)) %>%
    # AUC
    # Using Min.V
    mutate(AUC = sum(mV - Min.V, na.rm = T)/19.68685) %>%
    ungroup() 


  if (rewrite_names){
    temp_processed <- temp_processed %>% 
      mutate(Key = ifelse(Key == "In4", "In9", Key))
  }


  temp_processed <- temp_processed[, c("Sweep", "Cor", "Key", "Min.V", #"Use.in.Base", 
                                       "Max.Amplitude", "AUC")] %>% 
    distinct() #%>% 
    # pivot_wider(names_from = "Key", values_from = c("Min.V", #"Use.in.Base", 
    #                                                 "Max.Amplitude", "AUC")) 


  # add annotations to aid merging
  file_name <- brian2_read_in[[1]] %>% str_split(pattern = "-")

  temp_processed$Experiment <- as.character(str_split(as.character(file_name[[1]][1]), pattern = "_")[[1]][1])
  temp_processed$FileName <- paste0(as.character(file_name[[1]][1]), ".abf")
  temp_processed$Channel <- as.character(file_name[[1]][3])

  return(temp_processed)
})


epsp_summary <- do.call(rbind, epsp_summary_list)
epsp_summary <- epsp_summary[, c("Experiment", "FileName", "Channel", "Sweep", "Cor", "Key", "Min.V", "Max.Amplitude", "AUC")]

Secondary Questions

scatter_w_wo_outliers(temp = distinct(select(filter(M_all, Time == "Baseline"), shal, Ia.0)), 
                  X = "shal", 
                  Y = "Ia.0")

scatter_w_wo_outliers(temp = distinct(select(filter(M_all, Time == "Compensated"), shal, Ia.0)), 
                  X = "shal", 
                  Y = "Ia.0")

scatter_w_wo_outliers(temp = distinct(select(filter(M_all, Time == "Delayed"), shal, Ia.0)), 
                  X = "shal", 
                  Y = "Ia.0")

# ref ----
# c(
#   ## Ion Channels ====
#   #Kv
#   "Shab", "Shaker", "Shal", "Shaw1", "Shaw2",
#   "KCNQ1", "KCNQ2", 
#   "KCNH1", "KCNH2", "KCNH3",
#   #K
#   "BKKCa", "SKKCa", 
#   "KCNT1", "IRK", "KCNK1", "KCNK2", 
#   #Ca
#   "CaV1", "CaV2", "CaV3", 
#   #Na
#   "CbNaV", 
#   "NALCN", 
#   #Hyp/CyclNuc Activated/Gated
#   "IH", 
#   ## CNGs
#   
#   #TRPs
#   "TRP-A1", "TRP-A-like", "TRP-M1", "TRP-M3", "TRP-M-like", 
#   ##-V5, -V6, -Pyrexia
#   
#   
#   #Biogenic amine activated
#   "HisCL", 
#   
#   ## Receptors Biogenic Amines ====
#   #octopamine
#   
#   #dopamine
#   "DAR1A", "DAR2",  "Dopa-1Br",
#   #serotonin
#   "HTR1A", "HTR2", "HTR7", "5HTr-1Br",  
#   
#   #histamine
#   "His-1r", "His-2r", "His-3r", 
#   #gaba
#   "GABAB-R1",
#   "LCCH3r",
#   "RDLr", 
#   "mGABA2", "mGABA3", 
#   
#   ## Receptors Glutamate/Acetylcholine ====
#   #metabotropic glutamate receptors
#   "mGluR1", "mGluR2", "mGluR4", "mGluR5", "mGluR7",
#   #kainate-like receptors
#   "Kainate-1A", "Kainate-1B", "Kainate-2A", "Kainate-2B", "Kainate-2C",
#   #NMDA-like receptors
#   "NMDA-1A", "NMDA-1B", "NMDA-2A", "NMDA-2B", "NMDA-2-like", 
#   #glutamate-gated chloride channel
#   "GluCl",
#   #acetylcholine receptors
#   "mACHrA", "mACHrB", 
#   
#   # Crustacean cardioactive peptide receptor ===
#   "CCAPr", 
#   
#   #Acetylcholinesterase
#   "ACHE", 
#   # Actylcholine catalyst
#   "ChAT", 
#   
#   #vesicular ach transporter
#   "vAChT", 
#   #vesicular glut transporter
#   "vGluT", 
#   
#   ## Innexins ====
#   "INX1", "INX2", "INX3", "INX4", "INX5"
# )

load(here("data", "ionic.rds"))
load(here("data", "tevc.rds"))
load(here("data", "tecc.rds"))
load(here("data", "mrna.rds"))

mRNAInfo <- readxl::read_excel(here("inst", "extdata", "mRNAInfo.xlsx")) # tables from Northcutt 2016 and annotations

ionic ~ mrna?

Make a joint dataset

M <- left_join(ionic, mrna)

M <- M[!is.na(M$x18s), ]

MeasuredmRNA <- names(mrna)[!(names(mrna) %in% c("Source", "Pharm", "Time", "Sample", "Experiment", "Cell"))]

# temp.plts <- map(seq_along(MeasuredmRNA), function(i){
#   ggplot(M, aes_string(MeasuredmRNA[i], y = "Ihtk.0", color = "Time"))+
#   geom_smooth(method = "lm", se = F)+
#   geom_point()
# })
# 
# cowplot::plot_grid(plotlist = temp.plts)
# 
# mrna

Clean up data cols

PercentNotNA <- map(MeasuredmRNA, function(i){
  mean(!is.na(M[[i]]))
}) %>% unlist()

ThresholdPercentNotNA <- 0.6
MeasuredmRNA <- MeasuredmRNA[PercentNotNA >= ThresholdPercentNotNA] # use in map call
MeasuredKCurrents <- c("Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope") # use in map call

How well does this work for currents we know something about?

Ia ~ shal

# TEXTING WITH DAVE
scatter_w_wo_outliers(temp = filter(M, Time == "0h"), 
                  X = "shal", 
                  Y = "Ia.0")

# broom::tidy(lm(Ia.0 ~ shal, temp))
# broom::tidy(lm(Ia.0 ~ shal, temp[temp$flag, ]))

library(brms)
# Loading required package: StanHeaders
# rstan (Version 2.19.3, GitRev: 2e1f913d3ca3)
# For execution on a local, multicore CPU with excess RAM we recommend calling
options(mc.cores = parallel::detectCores())
# To avoid recompilation of unchanged Stan programs, we recommend calling
rstan_options(auto_write = TRUE)
# For improved execution time, we recommend calling
Sys.setenv(LOCAL_CPPFLAGS = '-march=corei7 -mtune=corei7')
# although this causes Stan to throw an error on a few processors.


temp <- temp[!is.na(temp$Ia.0), ]

tic <- Sys.time()
fm1 <- lm(Ia.0 ~ shal, temp)
toc1 <- Sys.time() - tic

tic <- Sys.time()
fm2 <- brm(Ia.0 ~ shal, data  = temp, family = gaussian())
toc2 <- Sys.time() - tic

tic <- Sys.time()
fm3 <- brm(Ia.0 ~ shal, data  = temp, family = student())
toc3 <- Sys.time() - tic



toc1
toc2
toc3

t1 <- broom::tidy(fm1)
t2 <- broom::tidy(fm2)
t3 <- broom::tidy(fm3)



bayes_ex1 <- 
ggplot(temp, aes(x = shal, y = Ia.0))+
  geom_point()+
  geom_abline(slope = as.numeric(t1[2, "estimate"]), intercept = as.numeric(t1[1, "estimate"]), size = 1, color = "gray")+
  geom_abline(slope = as.numeric(t2[2, "estimate"]), intercept = as.numeric(t2[1, "estimate"]), size = 1, color = "cornflowerblue")+
  geom_abline(slope = as.numeric(t3[2, "estimate"]), intercept = as.numeric(t3[1, "estimate"]), size = 1, color = "firebrick")+
  theme_bw()

Ia ~ shaker

scatter_w_wo_outliers(temp = filter(M, Time == "0h"), 
                  X = "shaker", 
                  Y = "Ia.0")

Model comparison shal best predicts Ia

temp <- filter(M, Time == "0h") 
# What's the best model?
library(AICcmodavg)
# no rm points
aictab(cand.set = list(
  lm(Ia.0 ~ shal, temp),
  lm(Ia.0 ~ shaker, temp),
  lm(Ia.0 ~ shal+shaker, temp),
  lm(Ia.0 ~ shal*shaker, temp)
))

# rm points
temp_no_outliers <- temp %>% 
  mutate(shal = ifelse(win_x_iqr(shal), shal, NA)) %>% 
  mutate(shaker = ifelse(win_x_iqr(shaker), shaker, NA)) %>% 
  mutate(Ia.0 = ifelse(win_x_iqr(Ia.0), Ia.0, NA))


aictab(cand.set = list(
  lm(Ia.0 ~ shal, temp_no_outliers),
  lm(Ia.0 ~ shaker, temp_no_outliers),
  lm(Ia.0 ~ shal+shaker, temp_no_outliers),
  lm(Ia.0 ~ shal*shaker, temp_no_outliers)
))

IHTK ~ bkkca

scatter_w_wo_outliers(temp = filter(M, Time == "0h"), 
                  X = "bkkca", 
                  Y = "Ihtk.0")

How might we exclude outliers? Cooks Distance? Iterated Cooks?

temp <- filter(M, Time == "Baseline") %>%
  mutate(flag = ifelse(bkkca < 3000, T, F))

fm <- lm(Ihtk.0 ~ bkkca, temp)

temp$cooksd <- cooks.distance(fm)

ggplot(temp, aes_string("bkkca", "Ihtk.0", color = "flag"))+
  geom_smooth(method = lm, se = F, color = "gray")+
  geom_smooth(data = temp[temp$flag == T, ], method = lm, se = F, fullrange = T)+
  geom_point(aes(color = cooksd < (mean(cooks.distance(fm))*2.2) ))+
  scale_color_manual(values = c("gray", "black"))+
  theme_bw()+
  theme(legend.position = "")

# broom::tidy(lm(Ihtk.0 ~ bkkca, temp))
# broom::tidy(lm(Ihtk.0 ~ bkkca, temp[temp$flag, ]))
broom::tidy(lm(Ihtk.0 ~ bkkca, temp[temp$cooksd < (mean(cooks.distance(fm))*4) , ]))
# temp %>% filter(flag == F)

Checking the use of iterated Cook's distance Using cooks is going to be challenging. It'll be hard to choose a stopping criteria as

flag_cooks_outliers <- function(
  df = filter(M, Time == "Baseline"),
  mod = "Ihtk.0 ~ bkkca",
  multiplier = 4,
  center = "mean"){

  fm <- lm(as.formula(mod), df)

  cd <- cooks.distance(fm)

  if (center == "mean"){
    cd_outlier <- cd > mean(cd)*multiplier 
  } else if (center == "median") {
    cd_outlier <- cd > median(cd)*multiplier 
  } else {
    warning("center must be mean or median")
  }

  return(cd_outlier)
}

# run 10 iterations of cooks distance. When a cell is identified as an outlier, drop it and note when it was deamed an outlier

temp <- M %>% dplyr::select(Ihtk.0, bkkca, Time) %>% filter(Time == "Baseline")
temp$dropout <- NA

for (i in 1:10){
  current_outliers <- flag_cooks_outliers(df = temp[is.na(temp$dropout), ],
                    mod = "Ihtk.0 ~ bkkca",
                    multiplier = 4,
                    center = "mean")

  temp[is.na(temp$dropout), ][current_outliers, "dropout"] <- i

}

ggplot(temp, aes(x = bkkca, y = Ihtk.0, color = as.factor(dropout)))+
  geom_point()

bkkca_cook_outliers <- temp$dropout
temp %>% group_by(dropout) %>% tally()
# # A tibble: 5 x 2
#   dropout     n
#     <int> <int>
# 1       1     1
# 2       2     1
# 3       3     1
# 4       4     1
# 5      NA    12





temp <- M  %>% filter(Time == "Baseline")
temp$dropout <- NA
for (i in 1:10){
  current_outliers <- flag_cooks_outliers(df = temp[is.na(temp$dropout), ],
                    mod = "Ia.0 ~ shal",
                    multiplier = 4,
                    center = "mean")
  temp[is.na(temp$dropout), ][current_outliers, "dropout"] <- i
}

temp %>% group_by(dropout) %>% tally()


temp <- M  %>% filter(Time == "Baseline")
temp$dropout <- NA
for (i in 1:10){
  current_outliers <- flag_cooks_outliers(df = temp[is.na(temp$dropout), ],
                    mod = "Ia.0 ~ shaker",
                    multiplier = 4,
                    center = "mean")
  temp[is.na(temp$dropout), ][current_outliers, "dropout"] <- i
}

temp %>% group_by(dropout) %>% tally()

contrast cooks with a fit t distribution.

# temp <- filter(M, Time == "Baseline")
# 
# tic <- Sys.time()
# fm0 <- lm(Ihtk.0 ~ bkkca, temp[temp$bkkca < 3000, ])
# toc1 <- Sys.time() - tic
# 
# tic <- Sys.time()
# fm1 <- lm(Ihtk.0 ~ bkkca, temp)
# toc1 <- Sys.time() - tic
# 
# tic <- Sys.time()
# fm2 <- brm(Ihtk.0 ~ bkkca, data  = temp, family = gaussian())
# toc2 <- Sys.time() - tic
# 
# tic <- Sys.time()
# fm3 <- brm(Ihtk.0 ~ bkkca, data  = temp, family = student())
# toc3 <- Sys.time() - tic
# 
# 
# 
# toc1
# toc2
# toc3
# 
# t0 <- broom::tidy(fm0)
# t1 <- broom::tidy(fm1)
# t2 <- broom::tidy(fm2)
# t3 <- broom::tidy(fm3)
# 
# 
# temp$dropout <- bkkca_cook_outliers
# # temp[is.na(temp$dropout), "dropout"] <- 11
# 
# alpha_param <-  0.8
# # bayes_ex1 <- 
# ggplot(temp, aes(x = bkkca, y = Ihtk.0, color = as.factor(dropout)))+
#   
#   geom_abline(slope = as.numeric(t0[2, "estimate"]), intercept = as.numeric(t0[1, "estimate"]), size = 1, alpha = alpha_param, color = "black")+
#   geom_abline(slope = as.numeric(t1[2, "estimate"]), intercept = as.numeric(t1[1, "estimate"]), size = 1, alpha = alpha_param, color = "black")+
#   geom_abline(slope = as.numeric(t2[2, "estimate"]), intercept = as.numeric(t2[1, "estimate"]), size = 1, alpha = alpha_param, color = "cornflowerblue")+
#   geom_abline(slope = as.numeric(t3[2, "estimate"]), intercept = as.numeric(t3[1, "estimate"]), size = 1, alpha = alpha_param, color = "firebrick")+
#   
#   geom_point(size = 2, color = "white")+
#   geom_point(size = 2)+
#   geom_point(size = 2, color = "black", shape = 1)+
#   
#   theme_bw()+
#   theme(legend.position = "bottom")+
#   scale_color_brewer(type = "div", palette = "PuOr")

IHTK ~ skkca

scatter_w_wo_outliers(temp = filter(M, Time == "Baseline"), 
                  X = "skkca", 
                  Y = "Ihtk.0")

How do K channels predict IKv intercepts?

# TODO redo this using the info in mRNAInfo 
# TODO use next block as inspiration.

# filter(M, Time == "Baseline") %>% 
#   select(Ihtk.0, Ihtk.Slope, Ia.0, Ia.Slope, 
#          #Kv
#          shab, shaker, shal, shaw1, shaw2,
#          kcnq1, kcnq2,
#          kcnh1, kcnh2, kcnh3,
#          #k
#          bkkca, skkca,
#          kcnt1, irk, kcnk1, kcnk2
#   ) %>% 
#   pivot_longer(cols = c(
#     #kv
#     "shab", "shaker", "shal", "shaw1", "shaw2", 
#     "kcnq1", "kcnq2",
#     "kcnh1", "kcnh2", "kcnh3", 
#     #k
#     "bkkca", "skkca", 
#     "kcnt1", "irk", "kcnk1", "kcnk2"
#   ), names_to = "mRNA", values_to = "Count") %>% 
#   pivot_longer(cols = c("Ihtk.0", 
#                         # "Ihtk.Slope", 
#                         "Ia.0"#, 
#                         # "Ia.Slope"
#                         ),
#                names_to = "Key", values_to = "Value") %>% 
#   ggplot(aes(x = Count, y = Value))+
#   geom_smooth(method = lm, se = F)+
#   geom_point()+
#   # facet_wrap(Key ~ mRNA, nrow = 4, scales = "free")
#   facet_grid(Key ~ mRNA)+
#   theme_bw()
#   
# 
# PlotList <- 
# map(c(
#   #Kv
#   "Shab", "Shaker", "Shal", "Shaw1", "Shaw2", 
#   "Kcnq1", "Kcnq2",
#   "Kcnh1", "Kcnh2", "Kcnh3", 
#   #K
#   "BkkCa", "SkkCa", 
#   "Kcnt1", "Irk", "Kcnk1", "Kcnk2"
# ), function(X){
#   map(c("Ihtk.0", 
#         # "Ihtk.Slope", 
#         "Ia.0"#, 
#         # "Ia.Slope"
#   ), function(Y){
#     ggplot(filter(M, Time == "Baseline"), aes_string(X, Y))+
#       geom_smooth(method = lm, se = F)+
#       geom_point()+
#       theme_bw()
#   })
# })
#   
# PlotList <- transpose(PlotList)  
# 
# library(cowplot)
# cowplot::plot_grid(plotlist = PlotList[[1]])
# 
# cowplot::plot_grid(plotlist = PlotList[[2]])

Linear models -- what predicts ionics?

This falls under the auspices of EDA.

KCurrent = "Ihtk.0"
mRNA = "BkkCa"

# Make outlier free version of the baseline data

M_baseline_outlier_free <- filter(M, Time == "Baseline")

for (NAME in names(M)[!(names(M) %in% c("Condition" , "Cell", "Experiment", "Source", "Pharm", "Time", "Sample"))]){
  M_baseline_outlier_free[[NAME]] <- ifelse(win_x_iqr(M_baseline_outlier_free[[NAME]]), M_baseline_outlier_free[[NAME]], NA)
}



# Which have relationships at baseline?
fm.list <- map(MeasuredKCurrents, function(KCurrent){
  map(MeasuredmRNA, function(mRNA){
    lm(as.formula(paste0(KCurrent, " ~ ", mRNA)), data = M_baseline_outlier_free)
  })
})


p.list <- map(fm.list, function(ListLvl1){
  map(ListLvl1, function(ListLvl2){
    broom::tidy(ListLvl2)[2, "p.value"] # non-intercept p
  }) %>% unlist()
})

p.df <- do.call(cbind, p.list) %>% as.data.frame()

names(p.df) <- MeasuredKCurrents
p.df$mRNA <- MeasuredmRNA





# Uncorrected
p.df.long <- p.df %>% 
  gather(iK, pValue, all_of(MeasuredKCurrents))
  # dplyr::filter(iK %in% c("Ia.0", "Ihtk.0")) %>% 
  # mutate(pAdj = p.adjust(pValue, method = "fdr")) %>% 

highlight.text.temp.df <- p.df.long %>% 
  mutate(highlight = ifelse(pValue <= 0.05, 1, 0)) %>% 
  select(-pValue) %>% pivot_wider(names_from = iK, values_from = highlight) %>% 
  mutate(highlight = ifelse(Ihtk.0 == 0, 
                     ifelse(Ihtk.Slope == 0,
                     ifelse(Ia.0 == 0, 
                     ifelse(Ia.Slope == 0, 
                            0, 1), 1), 1), 1) ) %>% 
  mutate(highlight = ifelse(highlight == 1, "firebrick", "black")) %>% 
  arrange(mRNA)

plt.fm <-   
p.df.long %>% 
  ggplot(aes(x = mRNA, color = pValue <= 0.05))+
  geom_hline(yintercept = 0.95, color = "gray", linetype = "dashed")+
  # geom_point()+
  # geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pValue), size = 2, color = "gray", alpha = 0.0001)+
  geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pValue), size = 2)+

  # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pValue), size = 2)+
  # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pAdj), size = 2)+
  facet_grid(iK~.)+
  scale_color_manual(values = c("black", "firebrick"))+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, color = highlight.text.temp.df[["highlight"]]),
        legend.position = "")

# Corrected with fdr
plt.fm.fdr <- 
p.df %>% 
  gather(iK, pValue, all_of(MeasuredKCurrents)) %>% 
  # dplyr::filter(iK %in% c("Ia.0", "Ihtk.0")) %>% 
  mutate(pAdj = p.adjust(pValue, method = "fdr")) %>% 
  ggplot(aes(x = mRNA, color = pAdj <= 0.05))+
  geom_hline(yintercept = 0.95, color = "gray", linetype = "dashed")+
  # geom_point()+
  geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pValue), size = 2, color = "gray", alpha = 0.0001)+
  geom_rect(aes(xmin = mRNA, xmax = mRNA, ymin = 0, ymax = 1- pAdj), size = 2, color = "black")+

  # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pValue), size = 2)+
  # geom_segment(aes(x = mRNA, xend = mRNA, y = 0, yend = 1- pAdj), size = 2)+
  facet_grid(iK~.)+
  scale_color_manual(values = c("black", "firebrick"))+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        legend.position = "")

PCA what loads with ionics?

library(factoextra)
# library(missMDA) # for imputePCA 
library("FactoMineR") #PCA

MNumericOnly <- M_baseline_outlier_free
MNumericOnly <- MNumericOnly[, c(MeasuredKCurrents, MeasuredmRNA)]

ResPCA <- PCA(MNumericOnly, scale.unit = T)

get_eig(ResPCA)

fviz_screeplot(ResPCA, addlabels = T)

fviz_pca_biplot(ResPCA, repel = T)
# Ihtk.Slope, Ihtk.0, Ia.0, Ia.Slope, and Kainate1A are all loading together (roughly)

fviz_pca_var(ResPCA, 
             col.var = "contrib",
             # gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE # Avoid text overlapping
             )

What correlates with ionics?

library(corrr)

MCorrr <- MNumericOnly %>% 
  correlate()

temp <- MCorrr %>% 
  as_tibble() %>% 
  select(rowname, Ihtk.0, Ihtk.Slope, Ia.0, Ia.Slope) %>% 
  gather(colname, Cor, c("Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope")) 

# temp$rowname <- as.factor(temp$rowname) 
#TODO fix ordering


temp %>% 
  ggplot(aes(rowname, Cor, color = Cor))+
  geom_segment(aes(x = rowname, xend = rowname, y = 0, yend = Cor))+
  geom_point()+
  geom_point(shape = 1, color = "black")+
  geom_hline(yintercept = 0)+
  ylim(-1,1)+
  facet_grid(colname~.)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  scale_color_gradient2(low = RColorBrewer::brewer.pal(7, "PuOr")[1],
                         mid = RColorBrewer::brewer.pal(7, "PuOr")[4],
                         high = RColorBrewer::brewer.pal(7, "PuOr")[7],
                         midpoint = 0, 
                         na.value = "#00000000")



# 
# 
# 
# # rplot(MCorrr)
# 
# # You have to zoom or the chords don't show up. ¯\_(ツ)_/¯
# # network_plot(MCorrr, min_cor = .7)
# 
# 
library(ggcorrplot)
# 
MCorr <- cor(MNumericOnly, use = "pairwise.complete.obs")

pMat <- cor_pmat(MNumericOnly)
# 
library(RColorBrewer)
# ggcorrplot(MCorr, 
#            colors = RColorBrewer::brewer.pal(7, "PuOr")[c(1, 4 ,7)]#,
#            # p.mat = pMat
#            # lab = T
#            )
# 
# # library(GGally)
# # GGally::ggscatmat(MNumericOnly)
# GGally::ggcorr(MCorr,
#        # nbreaks = 5,
#        low = RColorBrewer::brewer.pal(7, "PuOr")[1],
#                          mid = RColorBrewer::brewer.pal(7, "PuOr")[4],
#                          high = RColorBrewer::brewer.pal(7, "PuOr")[7])


library(corrplot)
corrplot(MCorr, method = "square", type = "upper", tl.col = "black", order = "hclust", col = brewer.pal(n = 9, name = "PuOr"),
         p.mat = pMat,
         sig.level = 0.01, insig = "blank")

danielkick/mRNA24hLC documentation built on Feb. 6, 2024, 2:18 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

danielkick/mRNA24hLC

In danielkick/mRNA24hLC:

Data preparation

Load data and create a uid

Convert condition labels

Current Clamp

Voltage Clamp (Gap Junction Procedure)

Add metadata to epsp

Update labeling on Brian's cells

Add whether network was active to metadata

Create cross type data, sans Brian's cells

About the data

What conditions exist in both datasets?

How many networks had any extracellular activity by their colleciton point?

How many individuals were there in each group?

set up DV groupings

1

What's happening in Brian's Data?

Prep data -- cull outliers and median interpolate missing values

Correlations

Correlation heatmaps

Correlations ECDFs

Changes in mean mRNA levels

How does time alter control cells?

How does time alter TEA cells?

How do blockers alter cells after all have been incubated?

2

Transition -- Do some of these changes replicate?

Data preparation

Figure 1

Compare Correlations

Compare Directional Changes

3

Primary Questions

set up data

Are the three time points separable in multidimensional space?

PCA

UMAP

Clustering

Correlations

Correlation Heatmaps

Correlation ECDFs

How do these relationships change for TEA over time?

What univariate changes occur?

Produce univariate tables

Run anovas

Format table

produce univariate plots

membrane changes

excitability

mrna

for my curiosity:

mk table

Revisitng excitability 21/03/17

Another take on excitability:

binned Cor

Binned by event

Rolling Cor

Secondary Questions

ionic ~ mrna?

Make a joint dataset

Clean up data cols

How well does this work for currents we know something about?

Linear models -- what predicts ionics?

PCA what loads with ionics?

What correlates with ionics?

R Package Documentation

Browse R Packages

We want your feedback!