In becky-work/MouseCageApp: INCA

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

# https://www.sciencedirect.com/science/article/pii/S1550413118304017?via%3Dihub
# https://calrapp.org  

knitr::opts_chunk$set(include = FALSE)
#library(MouseCageApp)

library(here)
library(dplyr)
library(tibble)
library(tidyr)
library(ggplot2)
library(plotly)

#shinycssloaders

options(scipen = 999) # no scientific notation pls
set.seed(72316)

Promethion Cage

These are fancy cages that take a lot of different measurements of the mice inside like vco2, water, food.

Files

There should be multiple files uploaded: sample metadata and the promethion file(s).

The promethion files have a unique cage and measurement pair as the columns (i.e. [variable][cage number]). There is one column that is "kcal_hr[cage number]" and is changed to "kcalhr_[cage number]" for coding ease.

When uploading the meta data, the user should be able to select which column matches

mouse_id - Each subject should have a unique id doesn't have to be a 'mouse' (I don't know what else these cages are used for.)
cage_id - Each cage has a unique id. There could be duplicate cage ids but there should be a unique start day for each.
start_day - When did this mouse go into the cage
time format - Ideally this is consistent but just in case will allow the user to pick
covariates - Select which other columns to keep

A 'subject_id' column is created in the meta data for each unique cage number and start day. Mouse ID is not used because the promethion data does not have mouse ids.

When uploading the promethion files, the minimum date of each cage number by file is found and a 'subject_id' column is created for each unique cage number minimum date pair. If there are duplicate cage numbers, they should be in separate files so the subject ids should be unique. (Assuming that if there will not be duplicate cage numbers in one file.) All the promethion files are converted to a long format and appended.

The sample meta data is joined to the promethion files using the 'subject_id' column. (Not yet: The number of unique subjects is checked and will alert you if you maybe did not upload all the promethion files.)

Assumptions:

Timezone of the promethion files matches real life time zone (Sys.timezone())
Promethion files are 24 hour time
The minimum date of a cage in the promethion file will be the same as the date in the meta data. You put the mouse in the cage on 9/10/2020 then the promethion data will start on 9/10/2020.
Duplicate cage numbers will be in separate promethion files

Note:

I am not sure what the "enviro" columns are in the promethion files. Cage settings? Not sure if they should all be the same or could be different within one study. I am removing these for now

f_dir <- 'test_data'

meta <- read.csv(here::here(f_dir, 'Male_5-6wk_metadata_test1.csv')) %>% 
  janitor::clean_names()
cage1 <- read.csv(here::here(f_dir, 'Run1_201909300914_m_Male.csv'))
cage2 <- read.csv(here::here(f_dir, 'Run2_201910071102_m_Male.csv'))
#f <- read.csv(here::here(f_dir, 'MCData.csv'))

# only csv, allow multiple cage files

# DT::renderDT({shinipsum::random_DT(nrow = 5, ncol = 3)})
# DT::dataTableOutput
#knitr::kable(head(meta))

# select label id (mouse), cage id, day stamp id, time format, experiment id, covariates columns and rename them
# time format should be placed in here
meta <- meta %>% rename(mouse_id = mouse, cage_id = cage, start_day = date) %>% 
  # force date format
  mutate(start_day = as.Date(start_day, format = '%m/%d/%Y'),
         # create a subject_id in case there are duplicate cage numbers (should be the same as mouse unless mouse is repeated later)
         subject_id = paste(cage_id, start_day, sep = '_'))

## Promethion data is var_cage# but Date_Time_1, envirolightlux_1, envirotemp_1, envirorh_1, enviroocupancy_1, envirosound_1 go with all of them (guessing these are preset)


# need to get all files, pivot, and rbind them
flip.cage.file.func <- function(f){
  f %>% 
    # not sure if always the first 6 columns
    pivot_longer(cols = -c(1:6), names_to = 'var', values_to = 'n') %>% 
    # prob with kcal_hr_cage# so just remove the first _
    mutate(var = gsub('kcal_hr','kcalhr',var)) %>% 
    # split up the var column into var and cage_id
    separate(var, into = c('var','cage_id'), sep = "_") %>% 
    janitor::clean_names() %>% 
    # make cage_id numeric
    mutate(cage_id = as.numeric(cage_id)) %>% 
    # fix datetime column
    mutate(day = as.Date(date_time_1, format = '%m/%d/%Y'),
           dt = as.POSIXct(date_time_1, format = '%m/%d/%Y %H:%M')) %>% 
    # find the start day; will need this to match with metadata
    group_by(cage_id) %>% 
    mutate(start_day = min(day),
           end_day = max(day)) %>% 
    ungroup() %>% 
    # get unique subjects because there can be cage_id duplicates
    ## Assume: no duplicate cage_id in one file
     #example: if two cage 1 they will be in different files and since this is ran per file the "start_day" should be correct
    mutate(subject_id = paste(cage_id, start_day, sep = '_'))
}

# rbind all files
cage_df <- flip.cage.file.func(cage1) %>% 
  rbind(flip.cage.file.func(cage2))

n_cage_prom <- n_distinct(cage_df$subject_id)
n_cage_meta <- n_distinct(meta$subject_id)
# these should be equal
# not entirely needed just makes sure all the promethion files were selected
n_cage_prom == n_cage_meta

# if yes then join up with meta data using cage_id column
prom_df <- left_join(cage_df, meta, by = c('subject_id', 'cage_id','start_day')) %>% 
  # fix column names for my sanity
  janitor::clean_names()

## little check that everything is together
# n_distinct(prom_df$subject_id) == n_distinct(meta$subject_id)
# n_distinct(prom_df$subject_id) == n_distinct(cage_df$subject_id)


# not sure what to do with the cage settings (assumuption)
# Do they change? Will they be different within one study?
cage_settings <- prom_df %>% select(contains('date_time'),contains('enviro'),contains('subject_id'))

prom_df <- prom_df %>% 
  # remove cage settings I get direction
  select(!contains('enviro')) %>% 
  # remove unneeded date columns
  select(-c('date_time_1', 'day')) %>% 
  # create new subject_id using mouse id
  mutate(subject_id = paste(mouse_id, 'Cage',cage_id, sep = '_'))

rm(cage_df, cage1, cage2, n_cage_meta, n_cage_prom, flip.cage.file.func)

# should be moved to flip.cage.file.func and not all columns need this
p <- prom_df %>% group_by(subject_id, var) %>% arrange(dt) %>% 
  mutate(diff_var = paste('diff',var,sep = '_'),
         diff_n = n - lag(n))

TO DO

To do

create nicer variable names e.g. foodupa -> aggregated food uptake
change the aggregated variables to non aggregates
fill in any missing time points
- not sure if the cage is set to record data at certain intervals. can that be changed?
outliers?
- don't know how useful this is because I don't know how this app is used in general

Time Series Plot

An interactive plot over time will be displayed for each user selected cage, mouse, date range, and selected measurement. The plot will be color coded by light/dark phases which the user will select (24 hour time)

# user will select but will use 7-7 as an example
start_light <- hms::as_hms('7:15:00') # inclusive
end_light <- hms::as_hms('19:00:00') # make sure the user put the hour that the light stopped aka when did dark start
light_colors <- c(light = "#FFF68F", dark = "#8DEEEE")

p_df <- prom_df %>% 
  # identify if cage is light or dark, calling it light phase so it's obvious that it's about the light
  mutate(light_phase = ifelse((hms::as_hms(dt) >= start_light) & (hms::as_hms(dt) < end_light), 'light','dark')) %>% 
  # name the phases e.g. light phase 2
  # This is hokey but it works
  group_by(subject_id, var) %>% arrange(dt) %>% 
  mutate(i = ifelse(light_phase == lag(light_phase), 0, 1),
         i = ifelse(is.na(i), 0, i)) %>% 
  group_by(subject_id, var, light_phase) %>% arrange(light_phase, dt) %>% 
  mutate(i1 = cumsum(i),
         add_one_to = ifelse(i1 == 0, light_phase, NA)) %>% 
  ungroup() 

# pull out which light/dark that the cage started in i started with 0 so the cumsum is off by one
#### NOTE: might want to leave it as light 0 dark 1 light 1 dark 2 etc so you know easily which phase the cage started in. Just remove the i1+1 part in the next step for this
a <- p_df %>% select(subject_id, add_one_to) %>% 
  na.omit() %>% unique() %>% 
  mutate(p = paste(subject_id, add_one_to))

prom_df <- p_df %>% 
  mutate(i1 = ifelse(paste(subject_id, light_phase) %in% a$p , i1 + 1, i1),
    phase_num = paste(light_phase, i1)) %>% select(-c(i1, add_one_to)) %>% rename(phase_change = i) %>% 
  ## THIS IS IMPORTANT for the plotting background to work
  arrange(subject_id, dt)

rm(a, p_df)

Example

# doing it this way because this will be a function in the app and will probably have to use the bang bangs
select_samp <- prom_df %>% select(mouse_id, var) %>% slice_sample(n = 1)
select_id <- select_samp$mouse_id
select_var <- select_samp$var 

plot_title <- paste('Mouse', select_id, ':', select_var)
x <- prom_df %>% filter(mouse_id == !!(select_id), var == !!(select_var))

# use this for vertical lines and labels of the phases
phase_changes <- x %>% group_by(subject_id, var) %>% 
  mutate(max_y = max(n)) %>% 
  group_by(subject_id, var, phase_num, max_y) %>% 
  summarise(min_dt = min(dt),
            y = max(max_y))

p <- ggplot(x, aes(dt, n)) +
  theme_minimal() +
  labs(title = plot_title,
       y = select_var,
       x = 'Date time') +
  # background colors for light phases
  geom_rect(aes(xmin = dt, xmax = lead(dt), ymin = min(n), ymax = Inf, fill = light_phase), 
            alpha = .75, stat = 'identity') +
  scale_fill_manual(name = 'Phase', values= light_colors)+
  # vertical lines indicating change in light phase
  geom_vline(data=phase_changes, aes(xintercept = min_dt), 
             show.legend = FALSE, color = 'black', linetype = 'dashed', alpha = .5)+
  # label for the lines
  geom_text(data=phase_changes, aes(x = min_dt, y = y, label = phase_num), 
            # move label forward 1 hour (in seconds), rotate text, set font size, set transparency
            nudge_x = 3600, angle = 90, size = 4, alpha = .5
            ) +
  # do this last so points are on top
  geom_point()

p

#geom_rect doesn't translate here
# plotly::ggplotly(p)

Boxplot of Measurements

User can select subject, measurement, and light/dark phase number to show a boxplot of the values

Example

select_samp <- prom_df %>% select(mouse_id, var, phase_num) %>% slice_sample(n = 1)
select_id <- select_samp$mouse_id
select_var <- select_samp$var 
select_phase <- select_samp$phase_num

plot_title <- paste('Mouse', select_id, ':', select_var, 'for Phase', select_phase)
x <- prom_df %>% filter(mouse_id == !!(select_id), var == !!(select_var), phase_num == !!(select_phase))

ggplot(x, aes(x = n, y = 0)) + 
  geom_boxplot() +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) +
  labs(title = plot_title,
       y = select_var)