# Functions related to reading in data
#' Universal assayr2 function template to read in tables.
#'
#' Wrapper around file format specific read-in patterns.
#' @md
#' @param file `character` path to file or integer of Box folder ID
#' @param rows `integer` defining rows to be read. Only used for XLSX files.
#' @param cols `integer` or `character` defining colums to be read. Only used for XLSX files.
#' @param col_names `logical` should the first non-empty row be converted to column names.
#' @param dropNA `logical` should columns with all NA value be dropped automatically, if `TRUE`
#' (default) will emit at warning in the console.
#' @param snake_names `logical` should column names be converted to snake_case
#' @param ... Arguments passed on to `read.table()`, `openxlsx::read.xlsx()` or `boxr::box_read()`.
#' @return A dataframe without factors.
#' @importFrom utils read.table count.fields
#' @importFrom boxr box_read
#' @importFrom openxlsx read.xlsx
#' @importFrom glue glue
#' @importFrom dplyr mutate_all
#' @export
read_generic <- function(file, rows = NULL, cols = NULL, col_names = FALSE,
dropNA = TRUE, snake_names = TRUE, ...) {
if (is.numeric(file)) {
out <- boxr::box_read(file, ...)
}
else {
file_type <- gsub(".*\\.(.*)$", "\\1", file) %>% tolower()
if (file_type == "txt") {
out_ncol_max <- max(count.fields(file = file, sep = "\t"))
out <- read.table(
file = file, header = col_names, sep = "\t", fill = TRUE,
col.names = 1:out_ncol_max,
stringsAsFactors = FALSE, ...
)
}
else if (file_type == "csv") {
out_ncol_max <- max(count.fields(file = file, sep = ","), na.rm = T)
out <- read.table(
file = file, header = col_names, sep = ",", fill = TRUE,
col.names = 1:out_ncol_max,
stringsAsFactors = FALSE, ...
)
}
else if (file_type == "xlsx") {
if (is.character(cols)) {
col_idx <- chr_to_int(cols)
}
else {
col_idx <- cols
}
out <- openxlsx::read.xlsx(file,
colNames = col_names, cols = col_idx,
rows = rows, ...
)
}
else {
stop("Invalid file type: is not supported. Only TXT, CSV, XLSX are currently,
open an issue on github.com/hemoshear/assayr2.")
}
}
if (dropNA) {
na_indexs <- names(out)[sapply(out, function(x) all(is.na(x)))]
if (length(na_indexs) > 0) {
message(glue::glue('The columns: {paste(na_indexs, collapse= ", ")} contained only NA values and were dropped.'))
out <- out[!names(out) %in% na_indexs]
}
}
# temp fix for dplyr #4094 related to tibble #566
names(out) %<>% gsub("^\\.+", "X", .)
if (snake_names) {
names(out) %<>% to_snake_case()
}
# dplyr::mutate_all(out, as.character)
out
}
#' Parser for Molecular Devices scope output
#'
#' Extracts meta information from table headers and combines individual scope
#' aquisition tables into a single unified data frame.
#' @md
#' @param file A valid path to a .txt or .xlsx file with Molecular Devices scope output.
#' @param measurements Default is NULL, which will auto-detect measurements
#' present and messsage the user in terminal. Alternativly, provide a character
#' vector of regex ready (special characters must be escaped) patterns and only
#' matching measurements will be returned.
#' @param verbose Boolean, should information about parsed columns be printed to the console?
#' @param plate_capture Character, regex capture expression to grab plate identifier
#' from the field "Acquisition Name \[Plate Info\]". Default grabs digits followed by optional "b".
#' The `gsub()` call this feeds into is case insensitive.
#' @param ... Arguments passed to `read_generic()`
#' @return A data frame with columns for run, plate, well, site and measurments.
#' @section Warning:
#' This function will not work with a `file = $BoxFileID`, due to `rio::import()`'s
#' behavior which is baked into `boxr`. So `file` must be a local path.
#' @examples
#' moldev_smaa <- system.file("extdata", "SMAA_Moldev_example.txt", package = "assayr2")
#' smaa <- read_moldev(moldev_smaa)
#'
#' moldev_nilered <- system.file("extdata", "NileRed_Moldev_example.txt", package = "assayr2")
#' nilered <- read_moldev(moldev_nilered)
#' @importFrom dplyr mutate_all select everything mutate bind_rows
#' @importFrom purrr map
#' @export
read_moldev <- function (file, measurements = NULL, verbose = TRUE, plate_capture = ".*Plate ?(\\d*|\\d*b).*", ...)
{
output <- read_generic(file)
output %<>% mutate_all(~gsub("ATF$", "", .))
meta_info <- grep(".*Name \\[Plate Info\\].*", unlist(output[,
1]), value = TRUE) %>% unname()
run_names <- gsub(".*=([A-Z]{3}[0-9]{4}(-?|-\\d+)).*", "\\1",
meta_info) %>% gsub("-$", "", .)
plate_nums <- gsub(plate_capture, "\\1", meta_info,
ignore.case = T)
plate_starts <- grep("(Well Name|Plate ID)", unlist(output[,
1])) + 1
plate_stops <- grep("^\\d$", unlist(output[, 1])) - 1
plate_stops <- plate_stops[-1]
plate_stops <- c(plate_stops, nrow(output))
id_row <- droplevels(output[(plate_starts[1] - 1), ]) %>%
unlist() %>% na.omit()
names(id_row) <- 1:length(id_row)
always_ids <- c("Plate ID", "Well Name", "Site ID", "MEASUREMENT SET ID")
if (is.null(measurements)) {
measure_ids <- id_row[!id_row %in% always_ids]
measurement_decoder <- c(`Nuclear Count (Transfluor)` = "nuc_count",
`Vesicle Integrated Intensity (Transfluor)` = "nile_intensity",
`Cell: smaa area sum (Custom Module)` = "smaa_area",
`Cell: integrated int sum (Custom Module)` = "smaa_intensity",
`Vesicle Count (Transfluor)` = "mac_count", `Cell: Average_Nuc_Size (Custom Module)` = "avg_nuc_size",
`Cell: Live_Nuc_Count (Custom Module)` = "live_nuc_count",
`Cell: Dead_Nuc_Count (Custom Module)` = "dead_nuc_count",
`Cell: Total_Nuc_Count (Custom Module)` = "total_nuc_count",
`Cell: stell nuc count (Custom Module)` = "stellate_count",
`Cell: mac nuc count (Custom Module)` = "mac_count",
`Cell: total nuc count (Custom Module)` = "total_nuc_count")
if (verbose) {
message("Parsing measurements: ", paste(ifelse(measure_ids %in%
names(measurement_decoder), measurement_decoder[measure_ids],
measure_ids), collapse = ", "))
}
measures <- names(measure_ids) %>% as.numeric()
}
else {
measures <- purrr::map(measurements, ~grep(., id_row,
ignore.case = TRUE)) %>% unlist() %>% unique() %>%
as.numeric()
}
well_name <- grep("Well Name", id_row)
site_id <- grep("Site ID", id_row)
columns_we_want <- c(well_name, site_id, measures) %>% as.numeric()
c_names <- id_row[columns_we_want] %>% ifelse(. %in% names(measurement_decoder),
measurement_decoder[.], .) %>% gsub(" .*", "", .) %>%
to_snake_case()
plates <- list()
for (p in 1:length(plate_starts)) {
p_df <- output[plate_starts[p]:plate_stops[p], ]
p_df <- p_df[, columns_we_want]
colnames(p_df) <- c_names
p_df$plate <- plate_nums[p]
p_df$run <- run_names[p]
p_df %<>% dplyr::select(run, plate, dplyr::everything())
if (p_df[1, 3] != "6") {
plates[[p]] <- p_df
}
}
plates %>% bind_rows() %>% mutate_at(measures, as.numeric) %>%
mutate(plate = gsub("^0+", "", plate))
}
#' Extract sample identifiers from a file with discards from imaging.
#'
#' Currently only workds with CSVs, could be extended to XLSX
#' @md
#' @param file A path to a CSV file with discards.
#' @param separate_uid Boolean. Should the UID from the scope be split into 3 separate
#' columns (plate, well, site).
#' @return A data frame.
#' @param ... Arguments passed to `read_generic()`
#' @importFrom stats setNames
#' @importFrom tidyr separate
#' @importFrom dplyr mutate
#' @examples
#' \dontrun{
#' # scope data
#' scope_discards <- read_discards(367357642382) # a file on Box
#' }
#' @export
read_discards <- function(file, separate_uid = TRUE, ...) {
out <- read_generic(file, col_names = TRUE, ...)[2] %>%
setNames(c("uid")) %>%
separate(uid, c("run", "uid"), sep = "late") %>%
dplyr::mutate(
run = gsub("([A-Z]{3}[0-9]{4}(-?|-\\d+)).*", "\\1", run),
uid = gsub("s(\\d+)\\.tif", "\\1", uid)
)
if (separate_uid) {
out %<>% separate(uid, c("plate", "well", "site")) %>%
mutate(plate = gsub("^0+", "", plate))
}
out
}
#' Generic parser for data from the Omega FLUOStar plate analyzer
#'
#' Versatile parser that orders standard columns (ie raw, blank_raw, machine_fit, etc)
#' automatically for \code{rbind()}-ing. Verbose with warnings, returns a tibble.
#' @param file Path to a BMG FLUOstar Omega plate-reader output xlsx.
#' @param plate_capture A regex pattern with a single capture for extracting plate id from file name.
#' @param std_concs A numeric vector of concentrations in descending order. Must be same length as standards
#' in the FLUOstar output file.
#' @param measurements A character specifying the measrements to be extracted from
#' the FLUOstar output file. If `NULL` (the default), assumes all columns that are not
#' 'Well Row', 'Well Col' and 'Content' are to be parsed as measurments.
#' @param verbose Boolean, should information about parsed columns be printed to the console?
#' @param ... Arguments passed to `read_generic()`
#' @examples
#' \dontrun{
#' # example data in assayr2
#' fluostar_file <- system.file("extdata", "FLUOstar_ALbumin_example.xlsx", package = "assayr2")
#' fluostar_data <- read_fluostar(fluostar_file, ".*_(.*)\\..*")
#' }
#' @importFrom stats setNames
#' @importFrom purrr map
#' @importFrom glue glue
#' @importFrom dplyr filter full_join mutate mutate_at select rename
#' @export
read_fluostar <- function(file, plate_capture = "", std_concs = NULL,
measurements = NULL, verbose = TRUE, ...) {
df <- read_generic(file, ...)
# Parse columns of interest and their names
id_row_pos <- grep("^Well", df[, 1])
id_names <- unlist(df[id_row_pos, ]) %>%
setNames(1:length(.)) %>%
to_snake_case() %>%
gsub("well_", "", .)
# could add more gsub()s here to clean up colnames more
# flag values that are always present
always_ids <- c("row", "col", "content")
# assume other values are measuements if measurements = NULL
if (is.null(measurements)) {
measure_ids <- id_names[!id_names %in% always_ids]
# could add key-value pairs here to parse names to specified alternatives
if (verbose) {
message("Parsing measurements: ", paste(measure_ids, collapse = ", "))
}
measures_col_pos <- names(measure_ids) %>% as.numeric()
} else {
measurements <- to_snake_case(measurements)
measures_col_pos <- purrr::map(measurements, ~grep(., id_names, ignore.case = TRUE))
# handle missing matches with a message to user
measures_missing_a_match <- sapply(measures_col_pos, function(x) length(x) == 0)
if (any(measures_missing_a_match)) {
message(glue::glue("The measurements:\n{paste(measurements[measures_missing_a_match], collapse = '\n')}\nDon't match any names in FLUOstar output :("))
}
measures_col_pos <- measures_col_pos %>%
unlist() %>%
unique()
}
# set up
columns_we_want <- c(names(id_names[id_names %in% always_ids]), measures_col_pos) %>%
as.numeric()
column_names <- id_names[columns_we_want]
df <- df[(id_row_pos + 1):nrow(df), columns_we_want] %>%
setNames(column_names)
# handle cases on missing std_concentations
if (nrow(dplyr::filter(df, grepl("Standard", content))) > 0 &&
sum(grepl("standard_concentrations", names(df))) == 0 &&
is.null(std_concs)) {
exp_concs <- df[grepl("Standard", df$content), ]$content %>%
unique() %>%
length()
stop(paste0("No standard concentrations detected. Please provide a numeric vector (length ", exp_concs, ") to the argument 'std_concs'"))
}
if (nrow(dplyr::filter(df, grepl("Standard", content))) > 0 &&
sum(grepl("standard_concentrations", names(df))) == 0 &&
!is.null(std_concs)) {
missing <- df[grepl("Standard", df$content), ]$content %>%
unique()
df <- dplyr::full_join(df, data.frame(content = missing, std_conc = std_concs, stringsAsFactors = FALSE))
}
df %<>% dplyr::mutate_at(.vars = measures_col_pos, .funs = as.numeric)
# warn if saturated measurements are detected
main_measure_id <- measure_ids[which.max(grepl("raw", measure_ids))]
if (any(df[[main_measure_id]] == 3.5)) {
warning("Saturated values detected in: ", main_measure_id)
}
df %>%
dplyr::mutate(plate = gsub(plate_capture, "\\1", file, ignore.case = TRUE)) %>%
dplyr::rename(column = col) %>%
dplyr::select(plate, dplyr::everything())
}
#' Generic parser for Biorad CFX thermocycler
#'
#' Reads in the defualt output csv from CFX Manager 3.1.1621.0826 as a tibble.
#' @md
#' @param file Path to a BioRad CFX output CSV
#' @param plate_capture A regex pattern with a single capture for extracting plate id from file name.
#' @return A data frame with 3 columns (plate, well, cq)
#' @examples
#' # example data in assayr2
#' pcr_file <- system.file("extdata", "CFX_example.csv", package = "assayr2")
#' pcr_data <- read_cfx(pcr_file)
#' @importFrom dplyr mutate select
#' @export
read_cfx <- function(file, plate_capture = "") {
read_generic(file) %>%
.[, 1:2] %>%
setNames(c("well", "cq")) %>%
.[(grep("Well$", .$well) + 1):nrow(.), ] %>%
dplyr::mutate(
plate = gsub(plate_capture, "\\1", file),
cq = as.numeric(cq),
well = gsub("(\\D)0+", "\\1", well)
) %>%
dplyr::select(plate, well, cq) # re-order columns
}
#' Generic parser for PureHoney Assay layouts
#'
#' High level wrapper function that that imports the assay layout for PureHoney as a tibble.
#' Uses fixed offset from document markers to capture specific regions, prone to errors if assay document template is altered.
#' @md
#' @param file Path to .xlsx file with PureHoney assay layout.
#' @param plate_ids Optional character vector with plate numbers to extract.
#' Defaults to NULL, which will extract all plate ids present.
#' @param meta_cols Numeric. Defines the columns to extract from metadata table.
#' Default is `NULL` for all columns.
#' @param meta_names Character. Column names, length must match `legnth(meta_cols)`.`
#' @param skip Number of rows to ignore when looking for plates. Useful for avoiding standard curve references in the header.
#' @param ... Arguments passed to `read_generic()`
#' @examples
#' ph_meta <- system.file("extdata", "PH_Assay_example.xlsx", package = "assayr2")
#' meta <- read_ph_assay(ph_meta)
#' @importFrom purrr map_chr map_dfr map2_dfr
#' @importFrom dplyr cumall filter mutate full_join
#' @importFrom tidyr replace_na
#' @importFrom tibble enframe
#' @export
read_ph_assay <- function(file, plate_ids = NULL, meta_cols = NULL,
meta_names = NULL, skip = 15, ...) {
read <- read_generic(file, ...) %>%
.[-c(1:skip), ]
plates <- grep("^1\\d{8}$", read[, 1], value = T) %>%
gsub("10*([1-9]\\d+)($| .*)", "\\1", .) %>%
unique()
lays <- grep("^A$", read[, 1]) %>% setNames(., plates[1:length(.)])
metas <- grep("^Sample", read[, 1]) %>% setNames(., plates[1:length(.)])
# catch dilution factor (if it is in the specific cell)
dils <- map_chr(metas, ~read[(. - 1), 2]) %>%
as.numeric() %>%
replace_na(1) %>%
setNames(., plates[1:length(.)])
if (!is.null(plate_ids)) {
lays %<>% .[names(lays) %in% plate_ids]
metas %<>% .[names(metas) %in% plate_ids]
dils %<>% .[names(dils) %in% plate_ids]
}
layout <- purrr::map_dfr(lays, ~melt_plate(read[c(.:(. + 7)), 2:13]), .id = "plate_id")
if (is.null(meta_cols)) {
meta_names <- read[metas[1], ] %>%
unlist() %>%
tolower() %>%
unname() %>%
gsub(" ", "_", .) %>%
gsub("sample_#", "content", .) %>%
replace_na("drop")
meta <- purrr::map_dfr(metas, ~read[c((. + 1):(. + 85)), ] %>%
filter(dplyr::cumall(!is.na(.$x_1))), .id = "plate_id") %>%
setNames(c("plate_id", meta_names))
} else {
meta <- map_dfr(metas, ~read[c((. + 1):(. + 85)), meta_cols] %>%
filter(cumall(!is.na(.$x_1))), .id = "plate_id") %>%
setNames(c("plate_id", meta_names))
}
# drop columns if all values are NAs
allNA_ix <- purrr::map_lgl(meta, ~sum(is.na(.)) == nrow(meta))
meta <- meta[!allNA_ix]
dil <- tibble::enframe(dils) %>%
setNames(c("plate_id", "dilution_factor"))
meta %<>% full_join(layout, by = c("plate_id", "content")) %>%
full_join(dil, by = "plate_id") %>%
filter(!is.na(row))
# little prep tweaks
meta %>% mutate(content = gsub("BLANK", "blank", content, ignore.case = TRUE))
}
#' PureHoney Assay Result Reader
#'
#' High level wrapper function that that imports the raw data output from PureHoney as a tibble.
#' @md
#' @param file Path to .csv file with PureHoney raw output.
#' @param plate_capture A single regex capture expression extracting plate id from file name.
#' @param prun_capture A single regex capture expression for plate run information.
#' @return A long data frame containing all of the analyte reads in the plate.
#' @param ... Arguments passed to `read_generic()`.
#' @examples
#' ph_raw <- system.file("extdata", "PH_raw_example.csv", package = "assayr2")
#' ph_data <- read_ph_raw(ph_raw)
#' @importFrom dplyr rename mutate bind_rows
#' @export
read_ph_raw <- function(file, plate_capture = ".*10+([1-9]\\d{3,5}).*", prun_capture = ".*[[:alpha:]]\\.csv", ...) {
output <- read_generic(file, ...)
species_ids <- grep("XIC", output[, 1], value = T) %>% gsub("XIC = xic-", "", .)
plate_starts <- grep("XIC", output[, 1]) + 2
plate_stops <- plate_starts + 7
data <- list()
for (c in 1:length(species_ids)) {
data[[c]] <- output[c(plate_starts[c]:plate_stops[c]), 2:13] %>%
melt_plate() %>%
dplyr::mutate(
target = species_ids[c],
plate_id = file
) %>%
dplyr::rename(raw = content)
}
data %>%
bind_rows() %>%
mutate(
raw = as.numeric(raw),
log10_raw = log10(raw),
prun = gsub(prun_capture, "\\1", plate_id),
plate_id = gsub(plate_capture, "\\1", plate_id)
)
}
#' Parser for MagPix csv outputs
#'
#' Convenience wrapper for importing a MagPix xPONENT analysis file as a tibble.
#' @md
#' @param file A valid path to a .csv file with MagPix xPONENT analysis.
#' @param value A character vector specifying the value to extract.
#' Must be one of: Median, Net MFI, Count, Result, Avg Net MFI, Avg Result.
#' @param ... Arguments passed to `read_generic()`.
#' @return A list with two elements: one data frame with sample values and one data
#' frame with standard values.
#' @examples
#' magpix_raw <- system.file("extdata", "Magpix_example.csv", package = "assayr2")
#' magpix <- read_magpix(magpix_raw)
#' @importFrom dplyr select rename full_join group_by summarise filter
#' @importFrom stats setNames
#' @importFrom tidyr gather
#' @export
read_magpix <- function(file, value = "Net MFI", ...) {
f <- read_generic(file, ...)
# positional anchors
anchors <- grep("^(DataType|-- CRC --)", f[,1]) %>% stats::setNames(., f[,2][.])
# value
start <- anchors[value] + 1
stop <- anchors[which((anchors - start) > 0)][1] - 1 # next anchors value
vals <- f[start:stop, ] %>% setNames(., .[1, ]) %>% .[-1, ] # first row to col_names
vals %<>% .[, colSums(vals != "") != 0] # drop empty columns
vals %<>% select(-`Total Events`) # drop `Total Events`
vals$Location %<>% gsub(".*,(.*)\\)", "\\1", .) # clean to well_id
vals %<>% gather("target", "concentration", -(Location:Sample)) %>% # go long
rename(content = Sample, well = Location) # clean up
colnames(vals)[4] <- "intensity"
suppressWarnings(vals[, 4] %<>% as.numeric())
# warnings
start <- anchors["Warnings/Errors"] + 1
stop <- anchors[which((anchors - start) > 0)][1] - 1
if (start - stop > 0) {
wrngs <- f[start:stop, ] %>% magrittr::set_names(., .[1, ]) %>% .[-1, ]
wrngs %<>% .[, colSums(wrngs != "") != 0]
wrngs$Location %<>% gsub(".*,", "", .)
names(wrngs) %<>% tolower()
wrngs %<>% dplyr::rename(well = location)
if (nrow(wrngs) > 0) { # join if warning are present
vals %<>% dplyr::full_join(wrngs)
}
}
# std_conc
start <- anchors["Standard Expected Concentration"] + 1
stop <- anchors[which((anchors - start) > 0)][1] - 1
stds <- f[start:stop, ] %>% setNames(., .[1, ]) %>% .[-1, ]
stds %<>% .[, colSums(stds != "") != 0]
stds %<>% gather("target", "concentration", -Reagent) %>%
rename(content = Reagent)
suppressWarnings(stds[, 3] %<>% as.numeric())
validate_std_curve_conc <- function(d) {
f <- group_by(d, target) %>%
summarise(all_equal_conc = all(diff(concentration) == 0)) %>%
filter(all_equal_conc)
# return original dataframe if standard concentrations are ok
if (nrow(f) == 0) {
return(d)
} else {
warning(paste0(
"Invalid standard concentrations for ",
f$target,
", add known concentrations to data frame"
))
return(d)
}
}
stds %<>% validate_std_curve_conc()
suppressWarnings(
vals$log10_intensity <- log10(vals$intensity)
)
# return a named list
list(vals, stds) %>%
setNames(paste0(c("values_", "stds_"), file))
}
#' Melt a data frame from plate format to long format
#' @md
#' @param plt A data frame representing an assay plate.
#' @param .id The name of the well content column.
#' @return A melted (long) data frame.
#' @examples
#' plate <- as.data.frame(matrix(rnorm(96), nrow = 8))
#' melt_plate(plate)
#' @importFrom dplyr mutate
#' @importFrom tidyr gather
#' @importFrom stats setNames
#' @export
melt_plate <- function(plt, .id = "content") {
if (is.null(.id) || is.na(.id)) .id <- "content"
plt %>%
setNames(1:ncol(.)) %>%
mutate(row = LETTERS[1:nrow(.)]) %>%
gather(key = "column", value = !!.id, -row)
}
#' Read xlsx data in plate format and convert to long format
#'
#' @description Read in plate format data from an xlsx file and return a melted
#' data frame with the contents and well coordinates.
#' @md
#' @param file A valid file path for the xlsx file.
#' @param rows A numeric vector of the rows to read
#' @param cols A vector of columns to be read. Can be integers or characters, see examples
#' @param col_names Boolean. Should the first non-empty row be converted to column names.
#' @param sheet The name or index of the sheet.
#' @param ... Arguments passed to `read_generic()`.
#' @param .id The name of the well content column.
#' @return A melted data frame.
#' @details A special case is when the first row of contents is empty.
#' The suggested pattern then is to include an extra row with column names from the Excel Sheet and set `col_names = TRUE`.
#' So for a a 8-row plate you would include a 9-row range, see examples. This is because of behavior in
#' the underlying function `openxlsx::read.xlsx()` that skips empty rows at the start
#' of a read-in region if the contents are blank.
#' @examples
#' \dontrun{
#' # generic assay with layouts in 96 well format
#' meta_file <- "data/plate_layouts.xlxs"
#'
#' # all equivilant
#' melt_plate_xlsx(meta_file, rows = 2:9, cols = 2:13, sheet = 1)
#' melt_plate_xlsx(meta_file, rows = 2:9, cols = LETTERS[2:13], sheet = 1)
#' melt_plate_xlsx(meta_file, rows = 2:9, cols = "B:M", sheet = 1)
#'
#' # if the first row was empty
#' melt_plate_xlsx(meta_file, rows = 1:9, cols = "B:M", col_names = T, sheet = 1)
#' }
#' @export
melt_plate_xlsx <- function (file, rows, cols, sheet = 1, col_names = FALSE, ..., .id = "content") {
read_generic(file, rows = rows, cols = cols, sheet = sheet, col_names = col_names,
dropNA = FALSE, skipEmptyRows = FALSE, ...) %>%
melt_plate(.id = .id)
}
#' A parser for plate maps
#'
#' Useful for reading in an Excel sheet full of plate layouts, originally built
#' for PCR layouts. Auto detects plate names, well contents, targets and
#' primer temperatures, if the layout is typical :)
#' @md
#' @param file A valid file path to layout of interest. Currently only supports
#' 'xlsx' files.
#' @param sheet Numeric. The sheet of interest in `pcr_file`.
#' @param skip Numeric. Number of rows to skip, always try to skip header meta
#' material, start at first plate.
#' @param arrange Character vector with elements being either "rows" or "cols"
#' (default), that describe how primers are layed out within plates. If
#' `length(arrange) == length(plate_layouts)`, then each layout will be handled
#' according to the matching `arrange` value of plate layouts.The `arrange` arg is not a magic bullet.
#' @param times Integer. Number of times to replicate targets.
#' @param ... Arguments passed to `read_generic()`
#' @details Relies on finding a single plate identifier starting with the 'Plate' for each layout.
#' This function will handle standard layouts well, but abnormal layouts will require manual TLC, until a
#' color matching solution is ready (so like never).
#' @return A names list for each plate. Useful for fixing a-typical layouts manually.
#' @examples
#' \dontrun{
#' layout_file <- "path/to/my_layout.xlsx"
#' lay <- read_layouts(meta_file, sheet = 1) # returns list
#' # perhaps fix a specific abnormal plate by hand
#' lay %<>% bind_rows() # once every plate is ready
#' }
#' @importFrom purrr map map_lgl keep discard
#' @importFrom dplyr arrange mutate select
#' @importFrom tidyr unite
#' @export
read_layouts <- function(file, sheet = 1, skip = 1, arrange = "cols", times = NULL, ...) {
f <- read_generic(file, sheet = sheet, startRow = skip, ...)
# locate critical anchor columns ------
all_cols <- purrr::map(f, unlist)
# left edge of plates
# start_col <- which(map_lgl(all_cols, ~ na.omit(.)[1] == 'a' | na.omit(.)[1] == 'A'))
start_col <- which(map_lgl(all_cols, ~all(LETTERS[1:8] %in% .)))
# meta-info:plate_name, temperature, targets (usually to the right of column12)
plt_col <- which.max(map_lgl(all_cols, ~grepl("plate", na.omit(.)[1], ignore.case = TRUE)))
# establish offsets ----
# parser requires constant format (spacing) over plates
row_offset <- which.max(all_cols[[start_col]] == "A")
plt_offset <- which.max(!is.na(all_cols[[plt_col]]))
shift <- abs(row_offset - plt_offset) # the $$$ piece
plate_starts <- grep("^A$", f[, start_col], ignore.case = TRUE)
res <- list()
for (p in plate_starts) { # each plate_layout
if (length(arrange) == length(plate_starts)) {
names(arrange) <- plate_starts
p_arrange <- dplyr::arrange[match(p, names(arrange))]
} else {
p_arrange <- arrange[1]
}
# check row for multiple plate_names within a plate_layout
name <- grep("Plate", f[p - shift, ], value = TRUE, ignore.case = TRUE)
names(name) <- NULL
name_pos <- grep("Plate", f[p - shift, ], ignore.case = TRUE)
num_plts <- length(name)
# grab temperatures if they exist
temps <- purrr::map(name_pos, ~f[ (p - (shift - 1)), .] %>%
purrr::keep(~grepl("(\u00B0|\u00BA)", .))) %>%
stats::setNames(name)
# grab targets as everything thats not temps
targs <- purrr::map(name_pos, ~f[ (p - (shift - 1)):(p + 7), .] %>%
na.omit() %>%
purrr::discard(~grepl("(\u00B0|\u00BA)", .))) %>%
stats::setNames(name)
# melt to long form
plt_lay <- melt_plate(f[p:(p + 7), (start_col + 1):(start_col + 12)])
for (n in name) { # each plate name within a plate_layout
plt_lay$plate <- gsub("plate ?", "", n, ignore.case = TRUE)
number_samples <- length(na.omit(unique(plt_lay$content)))
# add targets by plate -----
tmp <- unlist(targs[[n]])
if (length(tmp) > 0) { # only try to add targets if present
temp <- unlist(temps[[n]])
if (length(temp) > 0) plt_lay$temp <- temp # add temperature
# 5 targs fuck things up
if (length(tmp) == 5) tmp %<>% c("") # add 6th empty target
# + row wise ----
if (p_arrange == "rows" | ((length(tmp) == 4) && (number_samples == 12))) {
times <- 24 * ceiling(number_samples / 12)
plt_lay %<>%
dplyr::arrange(row, as.numeric(column)) %>%
dplyr::mutate(target = rep(tmp, each = times, length.out = nrow(plt_lay)))
}
# + column wise ----
else {
times <- 16 * floor(number_samples / 8)
if (length(tmp) == 2) times <- 48
if (length(tmp) == 6) times <- 16
if (times == 0) times <- 2
plt_lay %<>%
dplyr::arrange(as.numeric(column), row) %>%
dplyr::mutate(target = rep(tmp, each = times, length.out = nrow(.)))
}
}
# mild formatting
res[[n]] <- plt_lay %>%
dplyr::select(plate, tidyselect::everything()) %>%
tidyr::unite(well, row, column, sep = "", remove = FALSE)
}
}
res
}
#' Read in a device layout from Box
#'
#' Trim out the rectangle of critial device information, by using column names and NA positions
#' @md
#' @param file Integer or character. If integer must be a valid Box file ID. If
#' character must be a valid local path for a XLSX file.
#' @param sheet Integer for sheet to read in XLSX file.
#' @param ... Arguments passed to `read_generic()`.
#' @examples
#' \dontrun{
#' library(boxr)
#' box_auth()
#' device_finder("HEM0401-2") %>% read_device_layout()
#' }
#' @importFrom dplyr mutate
#' @importFrom purrr map_lgl
#' @export
read_device_layout <- function(file, sheet = 1, ...) {
read <- read_generic(file, sheet = sheet, ...)
# capture names if they exist
run_name_pos <- grep("Project", read[[1]], ignore.case = TRUE)
if (length(run_name_pos) > 0) {
run_name <- gsub("Project:?", "", read[[1]][run_name_pos]) %>% trimws()
}
else if (!is.null(names(file))) {
run_name <- names(file)
}
else {
warning("No project codes detected, assigning project = NA")
run_name <- NA
}
start_pos <- grep("Device", read[[1]], ignore.case = TRUE) # eventual colnames
if (length(start_pos) == 0) {
stop(paste(file, "doesn't look like device layout"))
} else {
stop_pos <- which.max(is.na(read[[1]])) - 1
if (stop_pos == 0) {
stop_pos <- nrow(read)
}
read %<>% .[(start_pos + 1):stop_pos, ] %>%
setNames(read[start_pos, ]) %>%
.[!is.na(names(.))]
# to chop off bad columns on the right
end_pos <- grep("Touching", names(read), ignore.case = TRUE)
# to drop empty header rows
first_column_mask <- !is.na(read[, 1])
read %<>% .[1:end_pos] %>%
.[first_column_mask, ] %>%
dplyr::mutate(run = run_name)
# drop columns where all values are NA
allNA_idx <- purrr::map_lgl(read, ~sum(is.na(.)) == nrow(read))
read[!allNA_idx]
}
}
#' Read in SMAD quantification data post ImageJ analysis
#'
#' @md
#' @param file A txt file containing ImageJ analysis of SMAD staining.
#' @param uid_capture Regex capture expression for extracting UID (plate_well_site) from image names.
#' @param keys Character vector with names of measuements in analysis output. Default is `NULL`,
#' which will guess whether to use NPC or HEP keys based on number of measurements.
#' @param return_all Boolean, should all intermediate value be returned, useful for debugging.
#' @param ... Arguments passed to `read_generic()`.
#' @return A long data frame, with values returned for multiple measurements in the cytoplasm and nucleus.
#' @examples
#' library(assayr2)
#' \dontrun{
#' smad_raw <- system.file("extdata", "SMAD_Quant_example.txt", package = "assayr2")
#' smad <- read_smad(smad_raw)
#' }
#' @importFrom stats setNames
#' @importFrom glue glue
#' @importFrom dplyr mutate filter select
#' @importFrom tidyr fill spread separate
#' @export
read_smad <- function(file, uid_capture = ".*plate(.*)\\.tif",
keys = NULL, return_all = FALSE, ...) {
output <- read_generic(file, ....) %>%
stats::setNames("value")
# guess keys if NULL
if (is.null(keys)) {
hep_keys <- c(
"image_name", "nuclear.count", "nuclear.intensity", "nuclear.area",
"dilated.count", "dilated.intensity", "dilated.area"
)
npc_keys <- hep_keys[-5] # doesn't have the secondary count
num_measures <- grepl("^\\d", output[, 1]) %>% rle() %>% .$lengths %>% max() %>% add(1)
if (num_measures == 6) {
message(glue::glue("Using NPC measurement keys:{paste(c('', npc_keys), collapse = '\n')}"))
keys <- npc_keys
}
else {
message(glue::glue("Using HEP measurement keys:{paste(c('', hep_keys), collapse = '\n')}"))
keys <- hep_keys
}
output$measure <- rep(keys, length.out = nrow(output))
} else {
output$measure <- rep(keys, length.out = nrow(output))
}
output <- dplyr::mutate(output,
uid = ifelse(grepl("tif$", value),
gsub(uid_capture, "\\1", value, ignore.case = TRUE),
NA
)
) %>%
tidyr::fill(uid) %>%
dplyr::mutate(plate_well = gsub("_s\\d$", "", uid)) %>%
tidyr::separate(plate_well, c("plate", "well"), sep = "_", remove = TRUE, extra = "drop") %>%
dplyr::filter(measure != "image_name") %>%
dplyr::mutate(
plate = gsub("^0+", "", plate),
value = as.numeric(value)
) %>%
tidyr::spread(measure, value) %>%
# building "peri" organelle values
dplyr::mutate(
nuclear.intensity = nuclear.intensity * nuclear.count,
nuclear.area = nuclear.area * nuclear.count,
# NPC and HEP macros differ for dilated area measurements, NPC is total, HEP is avg
dilated.intensity = if ("dilated.count" %in% keys) dilated.intensity * dilated.count else dilated.intensity,
dilated.area = if ("dilated.count" %in% keys) dilated.area * dilated.count else dilated.area,
peri.intensity = dilated.intensity - nuclear.intensity,
peri.area = dilated.area - nuclear.area,
nuclear.mean_intensity = nuclear.intensity / nuclear.area,
peri.mean_intensity = peri.intensity / peri.area,
nuclear.log_ratio = log2(nuclear.mean_intensity / peri.mean_intensity)
)
if (!return_all) output <- dplyr::select(output, uid:well, nuclear.count, nuclear.mean_intensity:nuclear.log_ratio)
output
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.