knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
# https://www.sciencedirect.com/science/article/pii/S1550413118304017?via%3Dihub # https://calrapp.org knitr::opts_chunk$set(include = FALSE) #library(MouseCageApp) library(here) library(dplyr) library(tibble) library(tidyr) library(ggplot2) library(plotly) #shinycssloaders options(scipen = 999) # no scientific notation pls set.seed(72316)
These are fancy cages that take a lot of different measurements of the mice inside like vco2, water, food.
There should be multiple files uploaded: sample metadata and the promethion file(s).
The promethion files have a unique cage and measurement pair as the columns (i.e. [variable][cage number]). There is one column that is "kcal_hr[cage number]" and is changed to "kcalhr_[cage number]" for coding ease.
When uploading the meta data, the user should be able to select which column matches
A 'subject_id' column is created in the meta data for each unique cage number and start day. Mouse ID is not used because the promethion data does not have mouse ids.
When uploading the promethion files, the minimum date of each cage number by file is found and a 'subject_id' column is created for each unique cage number minimum date pair. If there are duplicate cage numbers, they should be in separate files so the subject ids should be unique. (Assuming that if there will not be duplicate cage numbers in one file.) All the promethion files are converted to a long format and appended.
The sample meta data is joined to the promethion files using the 'subject_id' column. (Not yet: The number of unique subjects is checked and will alert you if you maybe did not upload all the promethion files.)
Assumptions:
Note:
I am not sure what the "enviro" columns are in the promethion files. Cage settings? Not sure if they should all be the same or could be different within one study. I am removing these for now
f_dir <- 'test_data' meta <- read.csv(here::here(f_dir, 'Male_5-6wk_metadata_test1.csv')) %>% janitor::clean_names() cage1 <- read.csv(here::here(f_dir, 'Run1_201909300914_m_Male.csv')) cage2 <- read.csv(here::here(f_dir, 'Run2_201910071102_m_Male.csv')) #f <- read.csv(here::here(f_dir, 'MCData.csv')) # only csv, allow multiple cage files
# DT::renderDT({shinipsum::random_DT(nrow = 5, ncol = 3)}) # DT::dataTableOutput #knitr::kable(head(meta))
# select label id (mouse), cage id, day stamp id, time format, experiment id, covariates columns and rename them # time format should be placed in here meta <- meta %>% rename(mouse_id = mouse, cage_id = cage, start_day = date) %>% # force date format mutate(start_day = as.Date(start_day, format = '%m/%d/%Y'), # create a subject_id in case there are duplicate cage numbers (should be the same as mouse unless mouse is repeated later) subject_id = paste(cage_id, start_day, sep = '_'))
## Promethion data is var_cage# but Date_Time_1, envirolightlux_1, envirotemp_1, envirorh_1, enviroocupancy_1, envirosound_1 go with all of them (guessing these are preset) # need to get all files, pivot, and rbind them flip.cage.file.func <- function(f){ f %>% # not sure if always the first 6 columns pivot_longer(cols = -c(1:6), names_to = 'var', values_to = 'n') %>% # prob with kcal_hr_cage# so just remove the first _ mutate(var = gsub('kcal_hr','kcalhr',var)) %>% # split up the var column into var and cage_id separate(var, into = c('var','cage_id'), sep = "_") %>% janitor::clean_names() %>% # make cage_id numeric mutate(cage_id = as.numeric(cage_id)) %>% # fix datetime column mutate(day = as.Date(date_time_1, format = '%m/%d/%Y'), dt = as.POSIXct(date_time_1, format = '%m/%d/%Y %H:%M')) %>% # find the start day; will need this to match with metadata group_by(cage_id) %>% mutate(start_day = min(day), end_day = max(day)) %>% ungroup() %>% # get unique subjects because there can be cage_id duplicates ## Assume: no duplicate cage_id in one file #example: if two cage 1 they will be in different files and since this is ran per file the "start_day" should be correct mutate(subject_id = paste(cage_id, start_day, sep = '_')) } # rbind all files cage_df <- flip.cage.file.func(cage1) %>% rbind(flip.cage.file.func(cage2))
n_cage_prom <- n_distinct(cage_df$subject_id) n_cage_meta <- n_distinct(meta$subject_id) # these should be equal # not entirely needed just makes sure all the promethion files were selected n_cage_prom == n_cage_meta
# if yes then join up with meta data using cage_id column prom_df <- left_join(cage_df, meta, by = c('subject_id', 'cage_id','start_day')) %>% # fix column names for my sanity janitor::clean_names() ## little check that everything is together # n_distinct(prom_df$subject_id) == n_distinct(meta$subject_id) # n_distinct(prom_df$subject_id) == n_distinct(cage_df$subject_id) # not sure what to do with the cage settings (assumuption) # Do they change? Will they be different within one study? cage_settings <- prom_df %>% select(contains('date_time'),contains('enviro'),contains('subject_id')) prom_df <- prom_df %>% # remove cage settings I get direction select(!contains('enviro')) %>% # remove unneeded date columns select(-c('date_time_1', 'day')) %>% # create new subject_id using mouse id mutate(subject_id = paste(mouse_id, 'Cage',cage_id, sep = '_'))
rm(cage_df, cage1, cage2, n_cage_meta, n_cage_prom, flip.cage.file.func)
# should be moved to flip.cage.file.func and not all columns need this p <- prom_df %>% group_by(subject_id, var) %>% arrange(dt) %>% mutate(diff_var = paste('diff',var,sep = '_'), diff_n = n - lag(n))
To do
An interactive plot over time will be displayed for each user selected cage, mouse, date range, and selected measurement. The plot will be color coded by light/dark phases which the user will select (24 hour time)
# user will select but will use 7-7 as an example start_light <- hms::as_hms('7:15:00') # inclusive end_light <- hms::as_hms('19:00:00') # make sure the user put the hour that the light stopped aka when did dark start light_colors <- c(light = "#FFF68F", dark = "#8DEEEE") p_df <- prom_df %>% # identify if cage is light or dark, calling it light phase so it's obvious that it's about the light mutate(light_phase = ifelse((hms::as_hms(dt) >= start_light) & (hms::as_hms(dt) < end_light), 'light','dark')) %>% # name the phases e.g. light phase 2 # This is hokey but it works group_by(subject_id, var) %>% arrange(dt) %>% mutate(i = ifelse(light_phase == lag(light_phase), 0, 1), i = ifelse(is.na(i), 0, i)) %>% group_by(subject_id, var, light_phase) %>% arrange(light_phase, dt) %>% mutate(i1 = cumsum(i), add_one_to = ifelse(i1 == 0, light_phase, NA)) %>% ungroup() # pull out which light/dark that the cage started in i started with 0 so the cumsum is off by one #### NOTE: might want to leave it as light 0 dark 1 light 1 dark 2 etc so you know easily which phase the cage started in. Just remove the i1+1 part in the next step for this a <- p_df %>% select(subject_id, add_one_to) %>% na.omit() %>% unique() %>% mutate(p = paste(subject_id, add_one_to)) prom_df <- p_df %>% mutate(i1 = ifelse(paste(subject_id, light_phase) %in% a$p , i1 + 1, i1), phase_num = paste(light_phase, i1)) %>% select(-c(i1, add_one_to)) %>% rename(phase_change = i) %>% ## THIS IS IMPORTANT for the plotting background to work arrange(subject_id, dt) rm(a, p_df)
# doing it this way because this will be a function in the app and will probably have to use the bang bangs select_samp <- prom_df %>% select(mouse_id, var) %>% slice_sample(n = 1) select_id <- select_samp$mouse_id select_var <- select_samp$var plot_title <- paste('Mouse', select_id, ':', select_var) x <- prom_df %>% filter(mouse_id == !!(select_id), var == !!(select_var)) # use this for vertical lines and labels of the phases phase_changes <- x %>% group_by(subject_id, var) %>% mutate(max_y = max(n)) %>% group_by(subject_id, var, phase_num, max_y) %>% summarise(min_dt = min(dt), y = max(max_y)) p <- ggplot(x, aes(dt, n)) + theme_minimal() + labs(title = plot_title, y = select_var, x = 'Date time') + # background colors for light phases geom_rect(aes(xmin = dt, xmax = lead(dt), ymin = min(n), ymax = Inf, fill = light_phase), alpha = .75, stat = 'identity') + scale_fill_manual(name = 'Phase', values= light_colors)+ # vertical lines indicating change in light phase geom_vline(data=phase_changes, aes(xintercept = min_dt), show.legend = FALSE, color = 'black', linetype = 'dashed', alpha = .5)+ # label for the lines geom_text(data=phase_changes, aes(x = min_dt, y = y, label = phase_num), # move label forward 1 hour (in seconds), rotate text, set font size, set transparency nudge_x = 3600, angle = 90, size = 4, alpha = .5 ) + # do this last so points are on top geom_point() p #geom_rect doesn't translate here # plotly::ggplotly(p)
User can select subject, measurement, and light/dark phase number to show a boxplot of the values
select_samp <- prom_df %>% select(mouse_id, var, phase_num) %>% slice_sample(n = 1) select_id <- select_samp$mouse_id select_var <- select_samp$var select_phase <- select_samp$phase_num plot_title <- paste('Mouse', select_id, ':', select_var, 'for Phase', select_phase) x <- prom_df %>% filter(mouse_id == !!(select_id), var == !!(select_var), phase_num == !!(select_phase)) ggplot(x, aes(x = n, y = 0)) + geom_boxplot() + theme_minimal() + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) + labs(title = plot_title, y = select_var)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.