# HELPERS ---------------------------------------------------------------------
#' Interpret characters as well names
#'
#' @param well Character vector.
#' @param as.tibble If \code{TRUE}, a \code{\link[dplyr:tibble]{tibble}} is
#' returned instead of a list.
#' @param to.upper Should well letters be converted to uppercase if they are not?
#' @param zero.padding Number of digits the well number should be padded to.
#'
#' @return A list with \code{well_let}, \code{well_num} and \code{well} or a
#' tibble with the corresponding column names.
#'
#' @examples
#' as_well(c("a1", "B2", "Q23"))
#' as_well("23a")
#' as_well("n_2")
#'
#' @export
as_well <- function(well, as.tibble = FALSE, to.upper = TRUE, zero.padding = 2) {
# pre-processing to handle NULL values in a list
well <- sapply(well, function(x) if(is.null(x)) NA_character_ else x)
# extract parts
res <- list(well_let = NA_character_, well_num = NA_character_, well = NA_character_)
well_raw <- stringr::str_extract(well, "[:alnum:]+")
well_num <- stringr::str_extract(well_raw, "[0-9]+")
well_let <- stringr::str_extract(well_raw, stringr::regex("[A-Z]+", ignore_case = TRUE))
defects <- mapply(function(x, y) any(is.na(x), is.na(y)), well_let, well_num)
defects <- as.integer(defects)
if (to.upper == TRUE) well_let <- toupper(well_let)
well_num <- sprintf(paste0("%0", zero.padding, "d"), as.numeric(well_num))
well_tot <- paste0(well_let, well_num)
well_let[defects] <- NA_character_
well_num[defects] <- NA_character_
well_tot[defects] <- NA_character_
res <- list(well_let = well_let, well_num = well_num, well = well_tot)
if (as.tibble == TRUE) {
res <- dplyr::as_tibble(res)
}
res
}
# DIRECTORY NAVIGATION --------------------------------------------------------
#' Express file paths in their canonical form -- Truly
#'
#' Under Windows and UNIX, \code{\link[base:.Platform]{.Platform$file.sep}} both
#' may expand to "\code{/}", whereas some upstream applications may still use
#' "\code{\\}" to separate folders and files.
#'
#' @param path A character vector of file paths.
#' @inheritParams base::normalizePath
#'
#' @details
#' Calls \code{\link[base:normalizePath]{normalizePath}} with \code{.Platform$file.sep}.
#' In addition, trailing slashes are removed.
#'
#' @return
#' A character vector.
#'
#' @export
normalizePath <- function(path, winslash = .Platform$file.sep, mustWork = FALSE) {
log_debugging("received", sQuote(path))
norm_path <- stringr::str_replace(
base::normalizePath(path, winslash = winslash, mustWork = mustWork),
# remove trailing path separator which is not accepted on Windows
pattern = paste0(.Platform$file.sep, "$"), replacement = ""
)
log_debugging("returned", sQuote(norm_path))
norm_path
}
#' Backup a file if necessary
#'
#' @param path A file path.
#' @param sep A character to separate the existing file path and the stamp added.
#' @param stamp A datestamp (default) and/or timestamp constructed according
#' to this specification (\link[base:strptime]{POSIXct}) is added to pre-existing
#' files in path.
#'
#' @details
#' The stamp is extracted from an existing file's modification time.
#' For details \link[base:file.info]{file.mtime(...)}.
#' If a backup file with the same stamp exists, it is replaced In practical terms
#' this means that by using \code{"\%y\%m\%d"} as stamp, only the last backup per
#' day is preserved.
#'
#' @return
#' \code{TRUE} if succesful, \code{FALSE} otherwise.
#'
#' @export
backup_file <- function(path, sep = "_", stamp = "%y%m%d") {
if (file.exists(path)) {
bkp_file <- summerr::normalizePath(paste0(path, sep, format(file.mtime(path), stamp)))
if (file.exists(bkp_file)) file.remove(bkp_file)
file.rename(path, bkp_file)
} else {
FALSE
}
}
#' Prompt to select from a list of options
#'
#' Helper asking the user to select an item from a list.
#'
#' @param items A character vector of options/items.
#' @param caption The prompt.
#'
#' @return
#' A single character element.
#'
select_from_list <- function(items, caption = "Select an item") {
stopifnot(is.character(items))
if (length(items) == 1) return(items)
choice <- readline(paste0(
caption, "\n",
paste(sprintf(paste0("%", as.integer(log10(length(items)) + 3), "s %s"),
sprintf("[%i]", c(0, seq_along(items))),
c("None of these (skip)", items)),
collapse = "\n"), " "))
choice <- as.numeric(stringr::str_extract(choice, "[0-9]+"))
if (choice %in% seq_along(items)) {
item <- items[choice]
} else {
item <- character()
}
item
}
#' Interactively choose a directory
#'
#' This is a wrapper to select a directory \code{path} to process. Depending on
#' the mode R is invoked from, choosing is interactive and may use the RStudio IDE.
#'
#' @param path A character vector of length 1 to start browsing; defaults to the
#' current working directory.
#' @inheritParams rstudioapi::selectDirectory
#'
#' @return
#' A character with the path of the chosen directory.
#'
#' @export
select_directory <- function(path = getwd(), caption = "Select a directory",
label = "Select") {
if (interactive()) {
if (rstudioapi::isAvailable()) {
Sys.sleep(0.9) # allow some time for flushing previous outputs
path <- rstudioapi::selectDirectory(caption = caption, label = label, path = path)
} else {
dirs <- list.dirs(path = ".", full.names = FALSE, recursive = TRUE)
dirs <- dirs[which(!grepl(pattern = "^\\..+", x = dirs))]
dirs <- dirs[which(nchar(dirs) > 0)]
path <- file.path(path, select_from_list(
caption = caption, items = dirs))
}
}
path <- summerr::normalizePath(path)
path
}
#' Interactively choose a single file
#'
#' This is a convenient wrapper to select one file in the directory \code{path}
#' from multiple files matching \code{pattern}.
#'
#' @param path A character vector of file paths; the default corresponds to the
#' working directory, \code{\link[base]{getwd}()}. Tilde expansion (see \code{
#' \link[base]{path.expand}}) is performed.
#' @param pattern A \link[base:regex]{regular expression}. Only file names which
#' match the regular expression will be returned.
#' @param prefix A regular expression to match the filename (without extension).
#' @param suffix A character to match the file extension, e.g. "csv".
#' @param filetype A human-readable file type designation. Along with \code{suffix}
#' will be used to construct \code{filter} in \code{\link[rstudioapi:selectFile]{rstudioapi::selectFile}}.
#' @inheritParams rstudioapi::selectDirectory
#'
#' @details
#' In case multiple files are found, the user is prompted to choose one option
#' or none.
#'
#' If \code{pattern} is not \code{NULL}, \code{prefix} and \code{suffix} will be
#' ignored.
#'
#' \code{prefix} is ignored when the RStudio API is used.
#'
#' @return
#' A character vector with a single element in case a single file matches \code{pattern}
#' or a single choice was made, else an empty character vector.
#'
#' In a non-interactive session, returns an empty character vector and a warning
#' if multiple files were found.
#'
#' @export
select_single_file <- function(path = getwd(), prefix = ".+", suffix = "*",
pattern = NULL, filetype = "file",
caption = "Select a", label = "Select") {
if (is.null(pattern)) {
pattern <- ifelse(suffix == "*",
paste0(prefix),
paste0(prefix, "\\.", suffix, "$"))
}
dir_files <- list.files(path = path, pattern = pattern, full.names = TRUE,
no.. = TRUE, recursive = FALSE)
dirs <- list.dirs(path = path, full.names = TRUE, recursive = FALSE)
dir_files <- setdiff(dir_files, dirs)
if (length(dir_files) != 1) {
if (interactive()) {
if (rstudioapi::isAvailable(version_needed = "1.1.287")) {
Sys.sleep(0.9) # allow some time for flushing previous outputs
dir_files <- rstudioapi::selectFile(
caption = paste(caption, filetype), label = label,
path = path, filter = ifelse(suffix == "*", "All Files (*)",
paste0(filetype, "(*.", suffix, ")")))
} else {
dir_files <- file.path(path, select_from_list(
caption = caption, items = basename(dir_files)))
}
} else {
# message("Warning: '", pattern, "' matched multiple files in '", path, "'. Skipping.")
#
# dir_files <- character()
path <- file.path(path, select_from_list(
caption = caption, items = dir_files))
}
}
if (is.null(dir_files)) dir_files <- character()
if (length(dir_files) != 1) message("Warning: No files matching '", pattern, "'.")
dir_files <- summerr::normalizePath(dir_files)
dir_files
}
# GENERAL FILE IMPORT AND EXPORT ----------------------------------------------
#' Import a (plate) layout from an Excel file
#'
#' A (square plate) layout assigning each cell (well) to a specific content with
#' metadata to be taken from columns and/or rows parallel to the plate design.
#'
#' @param file A path or URL to an xlsx file or workbook.
#' @inheritParams openxlsx::read.xlsx
#' @param data_upper_left A string specifying the top left corner (i.e. cell)
#' in Excel coordinates of the plate.
#' @param index_row A string specifying the row (in Excel coordinates) where the
#' plate column index (e.g. running 1 through 24) is found.
#' @param index_col A string specifying the column (in Excel coordinates) where
#' the plate rows index (e.g. running A through P) is found.
#' @param meta_row A named character vector to specify additional metadata to be
#' assigned to each plate column.
#' @param meta_col A named character vector to specify additional metadata to be
#' assigned to each plate row.
#' @param plate_nrow Number of rows to import starting from \code{data_upper_left}.
#' @param plate_ncol Number of columns to import starting from \code{data_upper_left}.
#'
#' @details
#' The content of each Excel cell of the plate is read as a string. Merged cells
#' are allowed and will be unmerged (see \code{\link[openxlsx:read.xlsx]{read.xlsx}}).
#'
#' When importing metadata, it is assumed that \code{meta_row} and \code{meta_col}
#' run parallel to the plate. Offset is allowed. The category names of the metadata
#' must be specified explicitly during the setup; they are not imported.
#'
#' In the example below, the first cell of the acutal plate is "B3". Three metadata
#' columns are given: One in row "1" (here: Concentration), which is applied column-wise,
#' and two parallel to the rows in column "Z" (here: Duplicate) and "AA" (here: Source).
#' The name tags highlighted in yellow are not imported since they are not parallel to
#' the actual plate rows. So is the text "xyz" shown in red.
#'
#' \figure{platelayout_maldi.png}{options: width=600 alt="A sample plate layout provided in \code{summerrmass}."}
#'
#' @return
#' A \code{\link[tibble:tibble]{tibble}} in long form with the columns \code{well},
#' \code{well_let} (letters, row index of the plate), \code{well_num} (integers
#' as strings, column index of the plate), \code{content} and a column for each
#' metadata specified.
#'#'
#' @examples
#' layout_file <- system.file("extdata", "platelayout_maldi.xlsx", package = "summerrmass")
#'
#' import_layout_from_excel(layout_file,
#' # metadata stored in row number "1" (Excel coordinates) contains concentration
#' meta_row = c(Concentration = 1),
#' # metadata stored in column "Z" (Excel coordinates) contains rowwise metadata;
#' # missing values are "NA", the order in meta_col determines the column arrangement
#' meta_col = c(Source = "AA", Duplicates = "Z"))
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#'
#' @export
import_layout_from_excel <- function(
file,
sheet = 1,
data_upper_left = "B3",
index_row = "2",
index_col = "A",
meta_row = c(concentration = "1"),
meta_col = character(),
plate_nrow = 16,
plate_ncol = 24
) {
# convert excel coordinates to R coordinates
from.excel <- function(s) {
# from: https://stackoverflow.com/questions/34537243
s_upper <- toupper(s)
# Convert string to a vector of single letters
s_split <- unlist(strsplit(s_upper, split=""))
# Convert each letter to the corresponding number
s_number <- sapply(s_split, function(x) {which(LETTERS == x)})
# Derive the numeric value associated with each letter
numbers <- 26^((length(s_number)-1):0)
# Calculate the column number
column_number <- sum(s_number * numbers)
column_number
}
# only the first match to a continuous string of numerals or letters is matched
# and interpreted as row/column indexes
from.excel.row <- function(x) as.numeric(stringr::str_extract(x, "[0-9]+"))
from.excel.col <- function(x) from.excel(stringr::str_extract(
x, stringr::regex("[A-Z]+", ignore_case = TRUE)))
index_row <- from.excel.row(index_row)
index_col <- from.excel.col(index_col)
data_row <- from.excel.row(data_upper_left)
data_col <- from.excel.col(data_upper_left)
meta_row <- lapply(meta_row, from.excel.row)
meta_col <- lapply(meta_col, from.excel.col)
plate <- openxlsx::read.xlsx(file, fillMergedCells = TRUE, colNames = FALSE)
# fillMergedCells does not work when rows and cols specification is given; thus
# need to separate from the following code
datap <- plate[
data_row:(data_row + plate_nrow - 1),
data_col:(data_col + plate_ncol - 1)] %>%
# set row and column names according to the values specified by the user within
# the excel sheet; this allows for more flexibility when adjusting plate layout
magrittr::set_rownames(plate[data_row:(data_row + plate_nrow - 1), index_col]) %>%
magrittr::set_colnames(sprintf("%02d", as.numeric(
plate[index_row, data_col:(data_col + plate_ncol - 1)]))) %>%
#
tibble::as_tibble(rownames = "well_let") %>%
# make sure all entries (including NA) are of type character
dplyr::mutate(dplyr::across(tidyselect::vars_select_helpers$where(is.numeric),
as.character)) %>%
# make long form
tidyr::pivot_longer(cols = -.data$well_let, names_to = "well_num",
values_to = "content") %>%
tidyr::unite(.data$well_let, .data$well_num, col = "well", sep = "",
remove = FALSE)
# for each metadata row or column we assume that it is parallel to the plate
get_meta_row <- function(i) tibble::tibble(
# well numbers from the index row
well_num = sprintf("%02d", as.numeric(
plate[index_row, data_col:(data_col + plate_ncol - 1)])),
# data from the metadata row
meta = as.character(plate[i, data_col:(data_col + plate_ncol - 1)])
)
get_meta_col <- function(i) tibble::tibble(
# well letters from the index col
well_let = plate[data_row:(data_row + plate_nrow - 1), index_col],
# data from the metadata col
meta = as.character(plate[data_row:(data_row + plate_nrow - 1), i])
)
if (length(meta_row) > 0) datap <- dplyr::left_join(datap, dplyr::bind_rows(lapply(
as.list(meta_row), get_meta_row), .id = "mtype") %>%
tidyr::pivot_wider(id_cols = "well_num", names_from = .data$mtype, values_from = .data$meta),
by = "well_num")
if (length(meta_col) > 0) datap <- dplyr::left_join(datap, dplyr::bind_rows(lapply(
as.list(meta_col), get_meta_col), .id = "mtype") %>%
tidyr::pivot_wider(id_cols = "well_let", names_from = .data$mtype, values_from = .data$meta),
by = "well_let")
attr(datap, "file") <- file
datap
}
#' Import an experiment layout from nested directories
#'
#' Some data is organized in folders that contain files with similar or even
#' identical names. Given a list of paths pointing to those files, the layout
#' of the experiment is established from the nesting of folders.
#'
#' @param paths A list character vector or list of file paths. See Details.
#' @param pivot A \link[base:regex]{regular expression} describing the
#' name of a single folder in each tree up to which "groups" and from which
#' "replicates" are established. See Details.
#' @param relative_to If not \code{NULL}, this subpath is hidden from the paths
#' for all operations; "" to suppress hiding; \code{NULL} will hide the subpath
#' that is common in all elements of \code{paths}.
#'
#' @details
#' \code{paths} can be a vector such as \code{c("path1", "path2", "path3")}
#' or a list of character vectors, e.g. as a result of averaging data, such as
#' \code{list(c("path1", "path2"), "path3")}. The latter case is usually not
#' exposed to the user; the algorithm applies to "path1" and "path3".
#'
#' \code{pivot} must be a unique match for each path. Nesting above and within
#' the matched folder can differ from path to path; care is taken to handle the
#' grouping accordingly.
#'
#' If \code{pivot} is a regular expression with lookahead or lookbehind, these
#' elements are kept in the path.
#'
#' If \code{pivot} has multiple matches in the path, it is advisable to call this
#' function with \code{relative_to = (common path)} since it will be removed from
#' the paths before a match is sought for. Alternatively, \code{relative_to = NULL}
#' will automatically consume the longest shared path between all \code{paths}.
#'
#' \code{relative_to} is expanded (like all \code{paths}) before the regular
#' expression is looked for.
#'
#' \code{relative_to} is preserved as \code{attr(., "dir")} for future use.
#'
#' \subsection{Sample groups by nesting parent folders}{
#'
#' Sample groups are determined from the enclosing (parent) folders. For example,
#' in
#'
#' \preformatted{
#' /common1/folder1/folderA/0_A1/...
#' ../0_A2/...
#' ../folderB/0_A1/...
#' /common1/folder2/folderA/0_A1/...
#' ../folderB/0_A1/...
#' /0_A2/...
#' }
#'
#' two nested groupings are identified: (1) "folder1" and "folder2" as \code{grp_0},
#' and (2) "folderA" and "folderB" as \code{grp_1}. The common path "common1"
#' will not be used as a grouping variable. The first parent is always used as a
#' grouping variable. The full (unique) grouping is returned under \code{group}.
#'
#' Grouping can allow to anaylze multiple directories simultaneously with their
#' own set of parameters enclosed at an appropriate level of nesting.
#'
#' }
#'
#' \subsection{Replicates by nesting subfolders}{
#'
#' Sample replicates are determined from the enclosed (child) folders. For example,
#' in
#'
#' \preformatted{
#' ../0_A1/1/targetfile
#' /2/targetfile
#' ../0_A2/1/targetfile
#' }
#'
#' the first pivot ("0_A1") contains two replicates, the second pivot ("0_A2") one.
#'
#' }
#'
#' @return
#' A tibble with the experiment layout as determined from the \code{paths}
#' with columns
#' \code{grp_N}, ..., \code{grp_0} specifiying the sample groups,
#' \code{sub_1}, ..., \code{sub_N} specifiying the subfolder nesting,
#' \code{pivot},
#' \code{replicate} and \code{n_replicates}, based on \code{sub_1},
#' the \code{path} of the file relative to \code{relative_to} and its original
#' position in \code{paths} as \code{findex}.
#'
#' @examples
#' demo_paths <- c("folderA/0_A1/1/file.x", "folderA/0_A1/2/file.x",
#' "folderA/0_A2/1/file.x", "folderB/0_A1/1/file.x")
#' import_layout_from_paths(demo_paths, relative_to = NULL)
#' import_layout_from_paths(paste0("folderX/", demo_paths), relative_to = NULL)
#' import_layout_from_paths(paste0("folderX/", demo_paths[1:2]), relative_to = NULL)
#' import_layout_from_paths(paste0("folderY/folderX/", demo_paths), relative_to = NULL)
#'
#' # more complex scenarios
#' import_layout_from_paths(paste0(c("folderY/", "folderX/"), demo_paths), relative_to = NULL)
#' import_layout_from_paths(paste0(c("folderZ/folderY/", "folderX/"), demo_paths), relative_to = NULL)
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#'
#' @export
import_layout_from_paths <- function(paths, pivot = "[0-9]_[A-Z]+[0-9]+",
relative_to = getwd()) {
if (is.null(paths) || length(paths) == 0) {
datad <- tibble::tibble(grp_0 = character(0),
pivot = character(0),
replicate = NA_integer_, n_replicates = NA_integer_,
findex = NA_integer_,
gindex = NA_integer_,
path_to_files = character(0),
path_to_group = character(0),
group = character(0),
file = character(0))
return(datad)
}
grp_prefix <- "grp_"
sub_prefix <- "sub_"
usr.relative_to <- relative_to # store user argument
# only keep the first element if this was a nested list to # transform paths
# into a plain character vector
first_paths <- sapply(paths, "[[", 1)
datad <- tibble::as_tibble(summerr::normalizePath(first_paths))
# To avoid spurious matching of the regex to a common base directory, remove
# the common path first.
if (is.null(relative_to)) {
relative_to <- ""
if (nrow(datad) == 1) {
relative_to <- stringr::str_extract(datad$value, paste0("^.+(?=", pivot, ")"))
} else {
# determine common basis and replace with "."
folderlist <- stringr::str_split(datad$value, pattern = .Platform$file.sep,
simplify = TRUE)
log_debugging("initial folder list", object = folderlist)
foldersame <- apply(folderlist, MARGIN = 2, FUN = dplyr::n_distinct)
# browser()
if (foldersame[1] == 1) {
# Beware that which(foldersame == 1) may be true for elements after the
# pivot has been identified. This is why we do it more "complicated".
#
# We omit the last common element to account for such cases in which also
# the pivot itself is identical for all elements (and only differences in
# the replicates exist).
foldersame <- folderlist[1, which(cumsum(abs(diff(foldersame))) == 0)]
log_debugging("common folder list", object = foldersame)
if (length(foldersame) > 0) relative_to <- paste0(
foldersame, collapse = .Platform$file.sep)
}
}
}
relative_to <- summerr::normalizePath(relative_to)
if (relative_to != "") {
# The "." will designate the common path designated by the user as input
# or identified as topmost shared directory. If there is no ".", there are
# no common directories.
datad$value <- sapply(datad$value, sub, pattern = relative_to,
replacement = ".", fixed = TRUE)
}
log_debugging("assessed paths relative to", sQuote(relative_to))
# V1: grp parts
# V2: sub parts (or pivot in cases where fill = "left")
datad <- datad %>%
dplyr::mutate(pivot = stringr::str_extract(.data$value, pivot)) %>%
tidyr::separate(.data$value, into = c("V1", "V2"), sep = pivot, fill = "left") %>%
dplyr::mutate(V1 = stringr::str_remove(.data$V1, pattern = paste0(.Platform$file.sep, "$")))
datad <- datad %>%
# Remove trailing path separator as we do not count "empty" directories; the
# grp parts then look like "dir1/dir2/dir3" of which the leftmost may be
# shared across all observations. This should have been done in all cases.
# dplyr::mutate(V1 = cleanup_path(.data$V1)) %>%
dplyr::mutate(
grps_level = stringr::str_count(.data$V1, pattern = .Platform$file.sep),
subs_level = stringr::str_count(.data$V2, pattern = .Platform$file.sep)
) %>%
# unnest the groups: X X grp_N ... grp_0 (pivot)
tidyr::separate(.data$V1, into = paste0(grp_prefix, seq(max(c(0, .$grps_level),
na.rm = TRUE), 0)),
sep = .Platform$file.sep, fill = "left", extra = "drop",
remove = FALSE) %>%
# unnest the replicates: (pivot) sub_1 X X sub_2 ... sub_N (= file)
tidyr::separate(.data$V2, into = c(NA, paste0(sub_prefix, "1"), "V3"),
sep = .Platform$file.sep, fill = "left", extra = "merge",
remove = FALSE) %>%
tidyr::separate(.data$V3, into = paste0(sub_prefix, seq(2, max(c(2, .$subs_level),
na.rm = TRUE))),
sep = .Platform$file.sep, fill = "left", extra = "drop") %>%
# process data
dplyr::rename(group = "V1") %>%
dplyr::rename(file = paste0(sub_prefix, max(.$subs_level))) %>%
# remove trailing path separators
dplyr::mutate(group = stringr::str_replace(.data$group, paste0(.Platform$file.sep,
"$"), "")) %>%
# add file paths as provided by the user as long as the table is in the
# order corresponding to the original argument; if the paths argument was
# a nested list, we only continue with the topmost entry
dplyr::mutate(path_to_files = sapply(paths, "[[", 1)) %>%
dplyr::mutate(path_to_group = summerr::normalizePath(stringr::str_remove(
.data$path_to_files, stringr::str_c(pivot, .data$V2)))) %>%
# preserve the file order index
dplyr::mutate(findex = dplyr::row_number())
log_debugging(object = datad)
datad <- datad %>%
# grp_N is root ("C:" under Windows, "" under UNIX) when all paths are
# normalized, we drop it
dplyr::select(!paste0(grp_prefix, max(.$grps_level, na.rm = TRUE))) %>%
# remove additionally empty groups
dplyr::mutate(dplyr::across(tidyselect::starts_with(grp_prefix), ~ dplyr::na_if(., ""))) %>%
dplyr::select(!tidyselect::vars_select_helpers$where(~ all(is.na(.)))) %>%
# preserve the group index
dplyr::group_by(dplyr::across(tidyselect::starts_with(grp_prefix))) %>%
dplyr::mutate(gindex = dplyr::cur_group_id()) %>%
# sort by well in alphabetical order (in case needed); replicates are at
# level sub_1; if no subfolder exists, sub_1 will equal "file", which is
# acceptable
dplyr::arrange(.data$pivot, .data[[paste0(sub_prefix, 1)]], .by_group = TRUE) %>%
# group replicates per well
dplyr::group_by(.data$pivot, .add = TRUE) %>%
# add replicate information
dplyr::mutate(
replicate = dplyr::case_when(
lengths(.data$path_to_files) > 1 ~ NA_character_,
TRUE ~ .data[[paste0(sub_prefix, 1)]]),
n_replicates = dplyr::case_when(
lengths(.data$path_to_files) > 1 ~ lengths(path_to_files),
TRUE ~ dplyr::n_distinct(.data$replicate)))
if (all(lengths(paths) == 1)) datad <- dplyr::group_by(datad, .data$replicate,
.add = TRUE)
datad <- datad %>%
dplyr::select(!tidyselect::any_of(c("grps_level", "subs_level", "V2"))) %>%
dplyr::select(dplyr::group_vars(datad),
tidyselect::any_of(c("n_replicates", "findex", "gindex",
"path_to_files", "path_to_group")),
tidyselect::everything())
attr(datad, "dir") <- relative_to
datad
}
# PLOTTING ---------------------------------------------------------------------
#' Display a layout with ggplot2
#'
#' The function returns the basic layer to be modified with further geoms by
#' the user, depending on the purpose.
#'
#' @param layout A plate layout.
#' @param ... Arguments passed to the \code{\link[ggplot2:geom_tile]{ggplot2::geom_tile()}}
#' aesthetic.
#'
#' @examples
#' library(ggplot2)
#'
#' layout_file <- system.file("extdata", "platelayout_maldi.xlsx", package = "summerrmass")
#'
#' plate_384 <- import_layout_from_excel(layout_file, meta_row = c(Concentration = 1))
#' display_plate_layout(plate_384) # all defined wells
#' display_plate_layout(plate_384, fill = content)
#' display_plate_layout(plate_384, fill = as.numeric(Concentration))
#' display_plate_layout(plate_384, fill = content, alpha = as.numeric(Concentration)) +
#' labs(alpha = "concentration")
#'
#' @importFrom rlang .data
#'
#' @export
display_plate_layout <- function(layout, ...) {
ggplot2::ggplot(layout, ggplot2::aes(x = .data$well_num, y = forcats::fct_rev(.data$well_let))) +
ggplot2::geom_tile(ggplot2::aes(...)) +
ggplot2::coord_fixed() +
ggplot2::theme_minimal() + ggplot2::theme(
axis.title = ggplot2::element_blank(),
panel.grid = ggplot2::element_blank(),
legend.box = "vertical",
legend.box.just = "left",
legend.position = "bottom"
)
}
#' Display a fitted curve
#'
#' Draw a plot after fitting a two-dimensional model.
#'
#' @inheritParams ggplot2::aes
#' @param df A grouped data frame, created with \code{\link{model_cleanly_groupwise}}
#' @param digits Integer indicating the number of significant digits
#' to be used when fitted parameters are substituted in the model formula.
#' Negative values are allowed; see \link[base:Round]{signif(...)}.
#'
#' @return
#' A \code{\link[ggplot2:ggplot]{ggplot}} that can be further customized.
#'
#' @details
#' The parameters of the model, usually accessible via the \code{.$tidy}
#' column after \code{model_cleanly_groupwise} has been called, are substituted
#' in the fitted formula and can be accessed as \code{.$sub.formula}.
#'
#' @examples
#' library(dplyr)
#' library(ggplot2)
#'
#' # create some noisy data for fitting a linear model
#' x <- seq(1, 10)
#' y1 <- 3 * x + 5 + rnorm(10)
#' y2 <- 1 * x + 1 + rnorm(10)
#'
#' my_data <- bind_rows(data1 = tibble(x, y = y1),
#' data2 = tibble(x, y = y2), .id = "dataset") %>%
#' group_by(dataset)
#'
#' my_data %>%
#' model_cleanly_groupwise(lm, formula = y ~ x) %>%
#' model_display(color = dataset)
#'
#' ## positioning of per-facet text labels in ggplot is inherently
#' ## difficult; the visual of the example could be improved
#'
#' my_data %>%
#' model_cleanly_groupwise(lm, formula = y ~ x) %>%
#' model_display() +
#' # add substituted formula
#' geom_text(aes(x = -Inf, y = Inf, label = sub.formula),
#' hjust = 0, vjust = 1) +
#' # add R square
#' geom_text(data = . %>% tidyr::unnest(glance),
#' aes(x = Inf, y = Inf, label = paste("R^2 =", signif(r.squared, 3))),
#' hjust = 1, vjust = 1) +
#' # show as facet
#' facet_wrap(vars(dataset))
#'
#'
#' @importFrom rlang .data
#'
#' @export
model_display <- function(df, ..., digits = 2) {
# substitute the fitted values
substitute_and_simplify <- function(x, digits = 2) {
FML <- stats::formula(x)
COF <- stats::coef(x)
# substitute formula with coefficients
if (class(x) == "lm") {
# only "unnamed" coefficients given
SFL <- stats::as.formula(paste0(
formula.tools::lhs(FML), "~", paste0(sprintf("%f*%s", signif(COF[-1],
digits = digits),
names(COF[-1])), collapse = "+"),
"+", signif(COF[1], digits = digits)))
} else {
# coefficients are values of parameters
SFL <- do.call("substitute", list(FML, as.list(signif(COF, digits = digits))))
}
LHS <- formula.tools::lhs(SFL)
RHS <- Deriv::Simplify(formula.tools::rhs(SFL))
NFL <- substitute(y ~ x, list(y = LHS, x = RHS))
LHS.VARS <- formula.tools::lhs.vars(NFL)
RHS.VARS <- setdiff(formula.tools::get.vars(NFL), LHS.VARS)
tibble::tibble(sub.formula = list(NFL), lhs = list(LHS), rhs = list(RHS),
lhs.vars = list(LHS.VARS), rhs.vars = list(RHS.VARS))
}
df$.tmp <- lapply(df$model, substitute_and_simplify, digits = digits)
df <- dplyr::mutate(tidyr::unnest(df, ".tmp"), sub.formula = as.character(
.data$sub.formula))
# log_object(df)
# make sure to get a 2D plot
stopifnot(all(length(X_VAR <- unique(unlist(df$rhs.vars))) == 1,
length(Y_VAR <- unique(unlist(df$lhs.vars))) == 1))
X_VAR <- rlang::ensym(X_VAR)
ggplot2::ggplot(data = df, mapping = ggplot2::aes(x = !!X_VAR, ...)) +
# original data points
ggplot2::geom_pointrange(
data = . %>%
dplyr::select(dplyr::group_vars(.), .data$augment_old) %>%
tidyr::unnest(.data$augment_old),
ggplot2::aes(y = .data$.fitted + .data$.resid),
stat = "summary", fun.data = ggplot2::mean_se, fatten = 1) +
# fitted lines
ggplot2::geom_line( ## geom_line connects in order on x axis
data = . %>%
dplyr::select(dplyr::group_vars(.), .data$augment_new) %>%
tidyr::unnest(.data$augment_new),
ggplot2::aes(y = .data$.fitted)) +
# labels
ggplot2::labs(y = Y_VAR)
}
#' Arrange plots on a page
#'
#' This helper can be used to arrange plots with \code{\link[graphics:layout]{layout}}
#' on multiple pages. Unused positions on the grid are indexed with \code{NA}.
#'
#' @param items A list of items to arrange.
#' @param byrow Whether to fill rows or columns first per page.
#'
#' @details
#' As a side-effect of calling this function, the \code{\link[graphics:layout]{layout}}
#' of the current device is set to a matrix of \code{nrow} x \code{ncol}.
#'
#' @return
#' An integer vector of indices that can be used to sort \code{items} so that they
#' are neatly placed on a single page or multiple pages with \code{nrow} x \code{ncol}
#' places. The index is filled with \code{NA} on the last page.
#'
#' @examples
#' op <- graphics::par(no.readonly = TRUE) # reconstitute par(...) afterwards
#' par(mfcol = c(4, 2), mar = rep(0, 4))
#' graphics::layout.show(9) # one single plot on page 2
#'
#' plot_me <- function(n) {
#'
#' for (i in n) {
#' plot.new()
#' text(0.5, 0.5, i)
#' }
#'
#' }
#'
#' plot_me(seq(1, 3)) # appended on same page
#'
#' par(mfcol = c(4, 2), mar = rep(0, 4)) # starts new page, but not always "accessible"
#'
#' plot_me(arrange_on_page(seq(1, 9), byrow = FALSE))
#' plot_me(LETTERS[arrange_on_page(seq(1, 2), byrow = TRUE)])
#'
#' rm(plot_me)
#' par(op)
#'
#' @export
arrange_on_page <- function(items = NULL, byrow = TRUE) {
nrow <- graphics::par("mfcol")[1]
ncol <- graphics::par("mfcol")[2]
ipp <- nrow * ncol # items per page
if (is.null(items)) items <- seq_len(ipp)
nop <- length(items) %/% ipp + (length(items) %% ipp > 0) # number of pages
# call for side-effects on graphics device
graphics::layout(matrix(seq_len(ipp), nrow = nrow, ncol = ncol, byrow = byrow))
# order of elements over pages
c(seq_along(items), rep(NA, ipp - length(items) %% ipp))
}
#' Get the indices of plots that are on page borders
#'
#' Returns indices where \code{items} is next (in the direction of \code{border})
#' to a cell containing \code{NA}; all outer borders are also \code{NA}.
#'
#' @inheritParams arrange_on_page
#' @param border Top (\code{"t"}), bottom (\code{"b"}), left (\code{"l"}), right
#' (\code{"r"}).
#' @param include.widows \code{TRUE} if the last items on a semi-full page should
#' also be returned.
#'
#' @export
get_border_indices <- function(items = NULL, border = "b", byrow = TRUE, include.widows = TRUE) {
nrow <- graphics::par("mfcol")[1]
ncol <- graphics::par("mfcol")[2]
ipp <- nrow * ncol # items per page
if (is.null(items)) items <- seq_len(ipp)
nop <- length(items) %/% ipp + (length(items) %% ipp > 0) # number of pages
# ensure we have unique indices to process
index <- seq_along(items)
index[which(is.na(items))] <- NA # propagate NA
if (byrow) {
all.pages <- matrix(c(index, rep(NA, ipp - length(items) %% ipp)),
nrow = nrow * nop, ncol = ncol, byrow = TRUE)
all.pages <- lapply(seq(1, nop), function(p) all.pages[which(rep(
seq(1, nop), each = nrow) == p), ])
} else {
all.pages <- cbind(t(matrix(c(index, rep(NA, nrow - length(items) %% nrow)),
nrow = length(items) %/% nrow + length(items) %% ncol,
ncol = nrow, byrow = TRUE)),
matrix(rep(NA, nop * ipp - nrow * (length(items) %/% nrow +
length(items) %% ncol)),
nrow = nrow))
all.pages <- lapply(seq(1, nop), function(p) all.pages[, which(rep(
seq(1, nop), each = ncol) == p)])
}
log_debugging("page layout", object = all.pages)
na.b <- function(M) M[which(is.na(apply(M, 2, function(x)
ifelse(is.na(x), 0, c(x[-1], NA)))))]
na.t <- function(M) M[which(is.na(apply(M, 2, function(x)
ifelse(is.na(x), 0, c(NA, x[-1])))))]
na.r <- function(M) M[which(is.na(t(apply(M, 1, function(x)
ifelse(is.na(x), 0, c(NA, x[-1]))))))]
na.l <- function(M) M[which(is.na(t(apply(M, 1, function(x)
ifelse(is.na(x), 0, c(x[-1], NA))))))]
all.pages <- lapply(all.pages, function(x) do.call(paste0("na.", border),
args = list(M = x)))
unlist(all.pages)
}
# MODELLING --------------------------------------------------------------------
#' Fit, tidy and augment a model
#'
#' Fits a modelling call to each group of a grouped data frame, extracts the
#' fitted parameters using \code{\link[broom:tidy]{broom::tidy}} and interpolates
#' missing values using \code{\link[broom:tidy]{broom::augment}} on existing points
#' or the \code{newdata}.
#'
#' @param x Grouped data to apply \code{expr} upon. \code{x} is passed to \code{expr}
#' as first argument.
#' @param FUN A name of a function that returns a model object or another R object
#' with model information.
#' @param ... Arguments to \code{expr}.
#' @param newdata Data points at which to evaluate the model.
#'
#' @details
#' The fitting is wrapped with \code{\link[purrr:possibly]{purrr::possibly}} and
#' will return empty objects instead of errors.
#'
#' @return
#' A \link[dplyr:grouped_df]{grouped data frame} with additional columns, \code{data},
#' \code{model}, \code{tidy}, \code{augment_old}, and \code{augment_new}.
#'
#' @seealso model_display
#'
#' @importFrom rlang .data
#'
#' @export
model_cleanly_groupwise <- function(x, FUN, newdata = NULL, ...) {
# warn if not all dots arguments are named
.call <- match.call(expand.dots = FALSE)
if(any(nchar(names(.call$...)) == 0)) {
log_warn("not all arguments passed to", sQuote(.call$FUN), "are named")
}
# suppress error and warning messages from now on
old_ep <- getOption("show.error.messages")
on.exit(options(show.error.messages = old_ep))
options(show.error.messages = FALSE)
suppressWarnings({
dplyr::mutate(
tidyr::nest(x),
model = purrr::map(.data$data, purrr::possibly(FUN, NULL), ...),
tidy = purrr::map(.data$model, purrr::possibly(broom::tidy, tibble::tibble())),
glance= purrr::map(.data$model, purrr::possibly(broom::glance, tibble::tibble())),
augment_old = purrr::map2(.data$model, .data$data, purrr::possibly(broom::augment, tibble::tibble())),
augment_new = purrr::map2(.data$model, .data$data, purrr::possibly(broom::augment, tibble::tibble()),
newdata = newdata)
)
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.