#' Import meta data from various PureHoney files.
#'
#' Reads meta data from sample databases on Box via `boxr::box_read()`
#' @md
#' @param study_id Character in the standard study name format (i.e. "RNO0101-1" or "RNO0101").
#' @param device Boolean, is this a device experiment? Only used for 'RNO' projects.
#' @param fully_parse Logical indicating whether to try and filter the database to a specific run; Set \code{FALSE} to troubleshoot.
#' @details Requires a column named `STUDY NAME` that contains the study ID in the
#' Excel file being read, in order to use the `fully_parse` option to work properly.
#' PAH experiments were originally recorded in box_file 101838632791 until November 2018.
#' In order to access these older projects adjust the study ID to be 'PAHO'.
#' @return A data frame containing the meta information from a given study or all the meta information from all studies in a program.
#' @examples
#' \dontrun{
#' ## Requires boxR credentials
#' # returns only meta data from study RNO0738
#' meta <- get_ph_metadata("RNO0738")
#' # returns all studies' meta data from RNO07 project
#' all_meta <- get_ph_metadata("RNO0738", fully_parse = F)
#' }
#' @importFrom stats setNames
#' @importFrom boxr box_read
#' @importFrom dplyr bind_rows filter
#' @importFrom purrr map_lgl
#' @export get_ph_metadata
get_ph_metadata <- function(study_id, device = FALSE, fully_parse = TRUE) {
if (length(study_id) > 1) stop("get_ph_metadata() only accepts one study_id at a time.")
# manually adjusted for new experiments
box_files <- c(
101838632791, 161176997524, 161176997524, 33980749645,
185483159790, 71120788417, 243555362303, 101346468166,
161176997524, 161176997524, 161176997524, 318145533469,
318145533469, 318145533469, 325086451911, 409671319284,
342600832787
) %>%
setNames(c(
"PAHO", "PAU", "PAG", "RNO",
"HEM", "RSQ", "PAD", "PAF",
"PAC", "PAB", "PAP", "PAN",
"PAA", "PAM", "PYP", "PAHT",
"PAH"
))
# manually map for sheet names of interest
if (device == FALSE) {
sheet_names <- c(
"PAH", "PAU_Huh7", "PAG_HepG2", "RNO07 Static",
"RNO20 (PBMC)", "HEM01", "RSQ05 MF", "RSQ01 Primary Heps Static",
"RNO21 (PAHTEE)", "RNO08 - fibroblasts", "PAD", "PAF",
"PAC_Cardiomyocytes", "PAB_PBMCs", "RNO01 -cell lines", "PAP_Hap1",
"PAN_Neurons", "PAM_skeletal muscle", "PAA_Astrocytes", "PYP",
"PAHT", "PAH"
) %>%
setNames(c(
"PAHO", "PAU", "PAG", "RNO07",
"RNO20", "HEM", "RSQ05", "RSQ01",
"RNO21", "RNO08", "PAD", "PAF",
"PAC", "PAB", "RNO01", "PAP",
"PAN", "PAM", "PAA", "PYP",
"PAHT", "PAH"
))
}
if (device == TRUE) {
sheet_names <- c("RNO07 Device") %>%
setNames("RNO07")
}
program <- gsub("^(\\D{3,4}).*", "\\1", study_id)
if (program %in% c("RNO", "RSQ")) program <- gsub("^(\\D{3}\\d{2}).*", "\\1", study_id)
meta <- boxr::box_read(box_files[program],
which = sheet_names[program],
col_names = T,
col_types = "text"
)
# reset "fix" for old/new PAH docs before filtering
study_id <- gsub("PAHO", "PAH", study_id)
if (fully_parse) result <- meta[meta$`STUDY NAME` %in% study_id, ]
# auto-catch the pressence/absence of '-1' run suffix
if (nrow(result) == 0) {
if (!grepl("-\\d$", study_id)) {
study_id2 <- paste0(study_id, "-1")
message(glue::glue('No runs matchs{study_id}, trying {study_id2}...'))
result <- meta[meta$`STUDY NAME` %in% study_id2, ]
}
if (grepl("-\\d$", study_id)) {
study_id2 <- gsub("-\\d+$", "", study_id)
message(glue::glue('No runs matchs{study_id}, trying {study_id2}...'))
result <- meta[meta$`STUDY NAME` %in% study_id2, ]
}
}
# drop empty columns
allNA_ix <- purrr::map_lgl(result, ~ sum(is.na(.)) == nrow(result))
result <- result[!allNA_ix]
return(result)
}
#' Download raw data and layout files for a RNO experiment
#'
#' @md
#' @param plate_ids Character vector of plate identifiers (ex. 100000748)
#' @param outdir File path to local destination for downloaded file.
#' @param layout Boolean. Should the assay layout file be downloaded along with the PureHoney raw files?
#' @param overwrite Boolean. Should existing versions on Box be overwritten.
#' Default is `FALSE`, i.e. don't overwrite.
#' @details Reccommend to use the development version of `boxr`, use `devtools::install_github(brenden-r/boxr)`.
#' @examples
#' \dontrun{
#' box_dl_pure_honey("761")
#' }
#' @importFrom boxr box_ls box_dl
#' @importFrom purrr map_df
#' @importFrom dplyr filter select slice bind_rows
#' @importFrom assertthat assert_that
#' @export
box_dl_pure_honey <- function (
plate_ids = 2276:2283,
outdir = getwd(), overwrite = FALSE, layout = TRUE)
{
plate_ids %<>% sprintf("%08d", .) %>% paste0("1", .)
raw_data_box_path <- "All Files/PureHoney-HS Shared Folder/Raw Data"
raw_data_files <- purrr::map_df(plate_ids, ~ boxr::box_search(.) %>%
as.data.frame()) %>%
dplyr::filter(grepl('\\.csv', name), path == raw_data_box_path) %>%
dplyr::select(name, type, id, modified_at)
possible_matches <- purrr::map_df(plate_ids, ~dplyr::filter(raw_data_files,
grepl(., name))) %>% dplyr::select(name, type, id, modified_at)
if (layout) {
coa_box_dir <- 6203931357
layout_files <- boxr::box_ls(dir_id = coa_box_dir) %>%
as.data.frame()
possible_layout_dirs <- purrr::map_df(plate_ids, ~dplyr::filter(layout_files,
grepl(., name))) %>% unique()
if (nrow(possible_layout_dirs) > 0) {
layout_file <- boxr::box_ls(possible_layout_dirs$id) %>%
as.data.frame() %>%
dplyr::filter(grepl("xlsx$", name)) %>%
dplyr::select(name, type, id, modified_at) %>%
dplyr::slice(1)
possible_matches %<>% dplyr::bind_rows(layout_file)
} else {
stop("No layout files found, consider running with `layout = FALSE`")
}
}
assertthat::assert_that(nrow(possible_matches) > 0, msg = "No matching files :(")
cat("The following files matched the specified plate identifiers: \\n")
print(possible_matches)
user_response <- readline(prompt = paste0("Download the files to ",
outdir, "? (y/N)"))
if (trimws(user_response) == "y") {
for (i in 1:length(possible_matches$id)) {
cat("\\nDownloading ", possible_matches$name[i],
"...")
try(boxr::box_dl(file_id = possible_matches$id[i],
local_dir = outdir, overwrite = overwrite))
}
}
if (layout) {
as.numeric(possible_layout_dirs$id)
}
else {
invisible(NULL)
}
}
#' The color palette for isotopes in PureHoney experiments
#' Colors '13C' blue, '12C' grey and 'deuterated' green.
#' @md
#' @return Named character vector for use with `scale_color_manual()`
#' @importFrom stats setNames
#' @export
ph_color_pal <- function() {
c("blue", "grey", "forestgreen") %>%
setNames(c("13C", "12C", "deuterated"))
}
#' The fill palette for isotopes in PureHoney experiments
#' Colors '13C' blue, '12C' grey and 'deuterated' green.
#' @md
#' @return Named character vector for use with `scale_fill_manual()`
#' @importFrom stats setNames
#' @export
ph_fill_pal <- function() {
c("navyblue", "grey20", "darkgreen") %>%
setNames(c("13C", "12C", "deuterated"))
}
#' PureHoney helper function
#'
#' Tries to match standard curve analytes to the paired analytes without a standard curve of their own. i.e. matching 12C-Isobutyrl with D6-Isobutyryl
#'
#' @param targets Vector of targets present in the analysis.
#' @param standard_prefix Regex expression to identify curves that have standard curves.
#' @param drop_acetyl Boolean. Should Acetyl related curve pairs be removed.
#' Usually both standards for 12C-Acetyl-CoA and 13C-Acetyl-CoA and in that case your don't want to curve match them.
#' @examples
#' \dontrun{
#' # purehoney data
#' curve_matcher(ph_data$targ, "D")
#' curve_matcher(ph_data$targ, "13C")
#' }
#'
#' @export
curve_matcher <- function(targets, standard_prefix = "D\\d", drop_acetyl = TRUE) {
unique_targs <- unique(targets)
curve_targs <- unique_targs[grepl(paste0("^(", standard_prefix, ")"), unique_targs)]
non_curve_targs <- setdiff(unique_targs, curve_targs)
matched <- curve_targs[match(gsub("^[^-]*-", "\\1", non_curve_targs),
gsub("^[^-]*-", "\\1", curve_targs))]
curve_pairs <- matched %>% setNames(non_curve_targs)
if (drop_acetyl) {
curve_pairs %<>% .[!grepl("Acetyl", ., ignore.case = TRUE)]
}
curve_pairs[!is.na(curve_pairs)]
}
#' PureHoney helper function
#'
#' Applies a named vector of curve matches (ususally the output of assayr2::curve_matcher()) to data to add the relevant curve id columns
#'
#' @md
#' @param data Date being anlyzed, typically from `assayr2::read_ph_raw()`.
#' @param curve_pairs Named chr vector, with curves analytes as values and matched non-curve analytes as names
#' @param plate_id A numeric or character representing the plate_id of the standards.
#' Useful if standards are read on one plate, but used for multiple sample plates.
#' @param prun A character representing the plate run of the standards.
#' Useful if standard are read on one read, but used for multiple sample reads.
#' @examples
#' \dontrun{
#' # if no special matching is required (all plate reads have dedicated curves)
#' ph_data %>% apply_curve_pairs(curve_pairs)
#'
#' # if targets need to be matched to curves on another read on the same plate
#' ph_data %>% apply_curve_pairs(curve_pairs, prun = "Std_read")
#'
#' # if targets need to be matched to curves on another plate
#' ph_data %>% apply_curve_pairs(curve_pairs, plate_id = 2222)
#' }
#' @importFrom dplyr mutate
#' @export
apply_curve_matches <- function(data, curve_pairs, plate_id = NULL, prun = NULL) {
data %<>% dplyr::mutate(curve_id = ifelse(target %in% names(curve_pairs),
curve_pairs[target], target
))
data$curve_id %<>% ifelse(is.na(data$curve_id), data$target, .)
# make curve_id unique for all plate runs
if (is.null(plate_id)) {plate_id <- data$plate_id}
if (is.null(prun)) {prun <- data$prun}
data$plate_run <- paste(plate_id, prun, sep = "_")
data$curve_id <- paste(data$plate_run, data$curve_id, sep = "_")
data
}
#' PureHoney helper function
#'
#' Joins a data frame of nucleus counts to a PureHoney data frame and performs
#' standard nuclear normalization calculations to determine intracellular concentraion.
#'
#' @param data PureHoney data frame, with the columns 'sample_id' and 'conc_corrected'.
#' @param nuc_data Nucleus count data frame, with the columns 'sample_id' and 'nuc_count'.
#' @examples
#' \dontrun{
#' data %<>% nuc_join(nuc_data)
#' }
#' @importFrom dplyr inner_join mutate
#' @export
nuc_join <- function(data, nuc_data) {
dplyr::inner_join(data, nuc_data) %>%
dplyr::mutate(
cell_volume_well_L = nuc_well * (3.4 * 10^-12),
conc_nmols = conc_corrected * (.0001 + cell_volume_well_L), # conc_corrected in nM want to get at # of mols in 100uL reaction volume (reaction buffer + cell volume)
conc_incell_nM = conc_nmols / cell_volume_well_L,
conc_incell_uM = conc_incell_nM / 1000
)
}
#' A wrapper for saving PureHoney data for used in the Shiny app, HST-Explorer
#'
#' Saves RDSs in the required format for use in `rds_checker.R`.
#' @md
#' @param data A data.frame with analyzed PureHoney data, ready to be QC'd for HemoShine.
#' @param path A valid file path to a directory for RDS outputs.
#' @param cols_split These columns will be combined to create `tx_run`, the unique ID in HST Explorer.
#' Deviating from the default may cause unexpected behavior.
#' @importFrom dplyr mutate
#' @importFrom purrr map2
#' @importFrom tidyr unite
#' @export
save_explorer_rds <- function(data, path = getwd(), cols_split = c("donor", "tx_cmpd", "tx_challenge", "run")) {
# add new column, tx_challenge_abstract, used for selection widget in HST-Explorer
data %<>% dplyr::mutate(
tx_challenge_abstract = gsub("(_|,)? ?\\d+(u|m)M", "", tx_challenge) %>%
gsub("\\+H00.*", "", .) %>%
gsub(" ", "", .),
# Add columns required for QC check
qc = FALSE,
qc_warning = ""
) %>%
tidyr::unite(tx_run, !!cols_split, remove = FALSE) # build unique split var
lst <- data %>% split(.$tx_run)
purrr::map2(lst, names(lst), ~saveRDS(., paste0(path, "/", .y, ".RDS")))
}
#' Compute the C3/C2 ratio on Carntine data from PureHoney
#'
#' Check a data frame for required information and return the orginal + newly
#' computed values.
#' @param data A data frame of PureHoney data. Must contain the columns:
#' "plate_id", "prun", "row", "column", "sample_id", "target", and "conc_corrected".
#' @param propionyl_species A character, either '12C' (default) or '13C' denoting the
#' species of Propionyl-Carnitine to use in calculating the C3/C2 ratio.
#' @examples
#' \dontrun{
#' data %<>% make_c3c2()
#' }
#' @importFrom dplyr ungroup group_by count filter select mutate distinct inner_join bind_rows
#' @importFrom tidyselect starts_with matches
#' @importFrom tidyr spread
#' @importFrom rlang ensym
#' @importFrom magrittr %<>%
#' @export
make_c3c2 <- function(data, propionyl_species = "12C") {
# check propionyl_species is valid
if (!propionyl_species %in% c("12C", "13C")) {
stop("Only '12C' or '13C' is allowed as 'propionyl_species'.")
}
propionyl <- paste0(propionyl_species, "-Propionyl-Car")
# check required fields are present
req_cols <- c("plate_id", "prun", "row", "column", "sample_id", "target", "conc_corrected")
if (!all(req_cols %in% names(data))) {
missing_cols <- req_cols[!req_cols %in% names(data)]
stop(paste0("Missing required columns: ", paste(missing_cols, collapse = ", ")))
}
# check required analytes are detected
req_analytes <- c(propionyl, "12C-Acetyl-Car")
if (!all(req_analytes %in% unique(data$target))) {
missing_analytes <- req_analytes[!req_analytes %in% unique(data$target)]
stop(paste0("Missing required analytes: ", paste(missing_analytes, collapse = ", ")))
}
# check sample_ids are valid
if (any(is.na(data$sample_id))) {
stop("NAs found for in data$sample_id. Double check that sample IDs are valid")
}
ratios <- data %>%
filter(target %in% c("12C-Acetyl-Car", propionyl)) %>%
select(plate_id, prun, sample_id, row, column, target, conc_corrected)
# check coordinates identify a unique sample; required for spreading later
check <- ratios %>%
ungroup() %>%
group_by(plate_id, prun, row, column, sample_id) %>%
count()
if(!all(check$n == 2)) {
stop("All combinations of columns, plate_id, row, column, and sample_id
are not unique. Double check that each unique sample contains measurements
for '12C-Acetyl-Car' and '", propionyl, "'.")
}
ratios %<>%
spread(target, conc_corrected) %>%
mutate(conc_corrected = !!ensym(propionyl) / `12C-Acetyl-Car`) %>%
select(-starts_with("1")) %>%
mutate(target = paste0(propionyl_species, "-C3-C2-ratio-Car"),
curve_plot = "C3-C2-ratio-Car",
isotope = propionyl_species,
c_bool = FALSE)
# add new ratio values onto original data
select(data, -target, -matches("^curve_"), -c_bool, -isotope, -raw, -matches("^conc"), -matches("^log"), - llod_conc) %>%
distinct() %>%
inner_join(ratios, ., by = c("plate_id", "prun", "sample_id", "row", "column")) %>%
fct_sync(data) %>%
bind_rows(data)
}
#' Estimate concentration in tissue
#'
#' @param conc_nm Recovery-adjusted concentration from Pure Honey (nanomolar)
#' @param w_mg Weight of compound administered (milligrams)
#' @return Estimated concentration (micromolar) in tissue (numeric vector)
#' @export
get_tissue_conc <- function(conc_nm, w_mg) {
3750 / w_mg * conc_nm / 1000
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.