msdap: Mass Spectrometry Downstream Analysis Pipeline

Documented in add_protein_prettyprint_label as_matrix_except_first_column assert_lme4_functional boxplot_add_text check_parameter_is_boolean check_parameter_is_numeric check_parameter_is_string coefficient_of_variation coefficient_of_variation_vectorized fit_t_dist_fixed_mu format_r_code gg_color_hue initialize_multiprocessing log2_to_ln make_bins matrix_grouped_column_aggregate matrix_to_long minlog10 msdap_version reduce_ggplot_object_size regex_classification remove_by_charlength replace_inf_with_NA replace_nonfinite replace_zero_with_NA scale_between_quantiles smooth_loess_custom split_array strip_common_substring subset_relevant_code_snippet_for_report threshold_numerics

#' return msdap package version as a string
#'
#' simple wrapper around utils::packageVersion()
#' @export
msdap_version = function() {
  as.character(packageVersion("msdap"))
}



#' Reduce the memory footprint of ggplot objects
#'
#' it seems like ggplot objects retain copies of source data, which can result in out of control RAM and RData file sizes.
#' For example, Cscore histograms from 200 Spectronaut files (i.e. 200 basic histograms of ~50k peptides) will results in 100+ GB RData files
#'
#' Reference; https://stackoverflow.com/questions/75594274/why-are-my-ggplot-objects-getting-so-large-in-size
#' Reference; https://github.com/tidyverse/ggplot2/issues/4056#issue-636077598
#' @param x a ggplot object, or a (nested) list of ggplot objects
reduce_ggplot_object_size = function(x) {
  # current param is a single list
  if("ggplot" %in% class(x)) {
    x = ggplot2::ggplotGrob(x)
    x = ggpubr::as_ggplot(x)
  } else {
    # if plain list, iterate elements and call this function recursively
    if(is.list(x)) {
      for(i in seq_along(x)) {
        x[[i]] = reduce_ggplot_object_size(x[[i]])
      }
    }
  }
  return(x)
}



#' add short prettyprint labels to a protein tibble using gene symbols, if available
#'
#' @param tib tibble with at least a protein_id column, but should also contain a gene_symbols_or_id column for truly pretty-print names
#' @param maxlength max string length for each output value
#' @param shorten_ambiguous replace all ambiguous entries with the first value followed by an asterix. Example; `proteinA;proteinB` becomes  `proteinA*`
#' @return input tibble with resulting values in the label column (overwritten if this column already existed in input)
add_protein_prettyprint_label = function(tib, maxlength = 12, shorten_ambiguous = TRUE) {
  if(length(tib) == 0) {
    return(NULL)
  }
  stopifnot(is.data.frame(tib) && length(tib) > 0 && "protein_id" %in% colnames(tib))

  # default label; protein_id
  tib = tib %>% mutate(label = protein_id)

  # use gene symbols if available
  if("gene_symbols_or_id" %in% colnames(tib)) {
    tib = tib %>% mutate(label = gene_symbols_or_id)
    rows_nolabel = is.na(tib$label) | nchar(tib$label) < 2
    tib$label[rows_nolabel] = tib$protein_id[rows_nolabel]
  }

  # reduce the string length of very long labels
  rows = nchar(tib$label) > maxlength
  tib$label[rows] = paste0(substr(tib$label[rows], 1, maxlength-3), "...")

  # find labels containing semi-colons, indicating ambiguous IDs
  if(shorten_ambiguous) {
    rows = grepl(";", tib$label, fixed = T)
    if(any(rows)) {
      # strip ambiguous IDs
      tib$label = gsub(";.*", "", tib$label)
      # mark ambiguous IDs with an asterix
      tib$label[rows] = paste0(tib$label[rows], "*")
    }
  }

  return(tib)
}



#' To facilitate multiprocessing, create a Parallel Socket Cluster object
#'
#' You only need this if a) not using the quickstart function and b) running DEA algorithms msqrob or msqrobsum
#'
#' @param n_thread maximum number of threads
#' @importFrom parallel detectCores makePSOCKcluster
#' @importFrom doParallel registerDoParallel
#' @export
initialize_multiprocessing = function(n_thread = 0) {
  # if no count is specified, use number of available cores minus one
  n_max = parallel::detectCores()
  n_thread = ifelse(length(n_thread) != 1 || !is.finite(n_thread) || n_thread < 1, max(c(1, n_max-1)), ceiling(n_thread))
  # limit to core count
  n_thread = min(n_max, n_thread)
  # create a psock cluster. Parallels package warns against using the "FORK" cluster with GUI front-ends or multi-threaded libraries
  cl = parallel::makePSOCKcluster(n_thread, setup_strategy = "sequential") # the latter argument deals with a bug in parallel in R 4.0; https://github.com/rstudio/rstudio/issues/6692
  doParallel::registerDoParallel(cl)
  append_log(sprintf("using %d threads for multiprocessing", n_thread), type = "info")
  return(cl)
}



#' remove NA, non-character and characters shorter than N from array
#'
#' @param x character array
#' @param minchar integer, minimum character of elements in x
remove_by_charlength = function(x, minchar) {
  x[!is.na(x) & is.character(x) & nchar(x) >= minchar]
}



#' split array into a list of N-sized chunks
#'
#' @param x array to be chunked
#' @param chunk_size integer, maximum chunk size
split_array = function(x, chunk_size) {
  l = list()
  i = 1
  n = length(x)
  while (i <= n) {
    j = min(n, i + chunk_size - 1)
    l[[length(l) + 1]] = x[i:j]
    i = i + chunk_size
  }
  return(l)
}



#' CoV computation for an array of non-log values
#'
#' @param nonlog_array numeric array of non-log values
coefficient_of_variation = function(nonlog_array) {
  sqrt(expm1(sd(log(nonlog_array), na.rm = T)^2))
}



#' fast CoV computation for all rows in a matrix of natural-log transformed values
#'
#' @param matrix_with_naturallog_intensities a matrix with values transformed into natural log
#' @importFrom matrixStats rowSds
coefficient_of_variation_vectorized = function(matrix_with_naturallog_intensities) {
  sqrt(expm1( matrixStats::rowSds(matrix_with_naturallog_intensities, na.rm = T)^2 ))
}


#' placeholder title
#' @param bp todo
#' @param show_N todo
#' @param show_values todo
#' @param ... todo
boxplot_add_text = function(bp, show_N = TRUE, show_values = T, ...) {
  if (show_N) {
    text(x = seq_along(bp$n), y = bp$stats[3, ], labels = paste("n=", bp$n, sep = ""), adj = c(1, -0.5), ...)
  }
  if (show_values) {
    for (i in seq_along(bp$n)) {
      text(x = i, y = bp$stats[, i], labels = sapply(bp$stats[, i], function(x) sprintf("%.2f", x)), adj = c(-0.5, -0.5), ...)
    }
  }
}



#' convert a wide-format data.frame to a matrix
#'
#' after pivoting a long-format tibble to wide-format, use this function to easily convert to a matrix (e.g. for efficient numerical computation using the matrixStats package)
#' @examples
#' \dontrun{
#'   m = as_matrix_except_first_column(dataset$peptides %>%
#'     pivot_wider(id_cols = "peptide_id", names_from = "sample_id", values_from = "intensity")
#'   )
#' }
#'
#' @param x a wide-format data.frame or tibble where the first column are assumed to represent unique row identifiers and other columns assumed 'data' columns for the output matrix
#' @export
as_matrix_except_first_column = function(x) {
  if (is_tibble(x)) {
    x = as.data.frame(x, stringsAsFactors = F)
  }
  rownames(x) = x[, 1]
  as.matrix(x[, -1, drop = F])
}



#' wrapper function around tidyverse's gather to convert a matrix to long-format tibble
#'
#' NA values are removed from output
#'
#' @param mat a matrix
#' @param value_name target column name for the values
#' @param column_name target column name for the row names
#' @param row_name target column name for the column names
#' @importFrom tidyr gather
#' @export
matrix_to_long = function(mat, value_name = "value", column_name = "sample", row_name = "sequence") {
  x = as_tibble(mat)
  x$rownms = rownames(mat)
  res = x %>% gather(colname, value, -rownms, na.rm = T)
  names(res)[1] = row_name
  names(res)[2] = column_name
  names(res)[3] = value_name
  return(res)
}



#' fast grouped matrix aggregation using data.table
#' @examples \dontrun{
#' x = my_integer_matrix
#' x_rowgroups = my_grouping_var
#' result = matrix_grouped_column_aggregate(x, x_rowgroups, FUN = sum)
#'
#' # validate this function yields the same as (slower) stats::aggregate()
#' df = stats::aggregate(x, by = list(groupid = x_rowgroups), FUN = sum)
#' i = match(rownames(result), df$groupid)
#' stopifnot(!is.na(i))
#' stopifnot(count_detect == as.matrix(df[,-1])[i,])
#' stopifnot(rownames(count_detect) == df$groupid[i])
#' }
#' @param x a matrix
#' @param x_rowgroups grouping variable
#' @param FUN function to apply, e.g. `sum`
#' @return a matrix, where rownames match the (unique values of the) grouping variable
matrix_grouped_column_aggregate = function(x, x_rowgroups, FUN) {
  dt = data.table::as.data.table(x)
  # use set() function to add column (fastest according to data.table manual)
  data.table::set(dt, j = "groupid", value = x_rowgroups)
  # aggregate each column by provided function, overwriting dt
  dt = dt[, lapply(.SD, FUN), by = groupid]
  # extract grouping var and the column by setting NULL  (fastest according to data.table manual)
  result_groupid = dt$groupid
  dt[,groupid := NULL]
  # convert to matrix and return
  dt = as.matrix(dt)
  rownames(dt) = result_groupid
  return(dt)
}



#' fast replacement in a data.frame; infinite values to NA
#' https://stackoverflow.com/a/57092321
#' @param df data.frame to replace values in
replace_inf_with_NA <- function(df) {
  rapply(df, function(x) replace(x, is.infinite(x), NA), classes = "numeric", how = "replace")
}



#' fast replacement in a data.frame; zero values to NA
#' https://stackoverflow.com/a/57092321
#' @param df data.frame to replace values in
replace_zero_with_NA <- function(df) {
  rapply(df, function(x) replace(x, x==0, NA), classes = "numeric", how = "replace")
}



#' fast replacement in a vector; nonfinite to target value
#' @param x vector to replace values in
#' @param val the value to replace with
replace_nonfinite = function(x, val = NA) {
  replace(x, !is.finite(x), val)
}



#' generic input validation; all provided arguments should be numeric (not array)
#'
#' @param ... arbitrary set of parameters
#' @param minval_ optionally, a finite number indicating the minimum value for each parameter
#' @param maxval_ optionally, a finite number indicating the maximum value for each parameter
check_parameter_is_numeric = function(..., minval_ = NA, maxval_ = NA) {
  varnames = lapply(substitute(list(...))[-1], deparse)
  arguments <- list(...)
  names(arguments) = unlist(varnames, use.names = F)

  for (n in names(arguments)) {
    x = arguments[[n]]
    if (length(x) != 1) {
      append_log(sprintf("function argument '%s' must be a single value", n), type = "error")
    }
    if (!is.finite(x) || !is.numeric(x)) {
      append_log(sprintf("function argument '%s' must be numeric", n), type = "error")
    }
    if (is.finite(minval_) && x < minval_) {
      append_log(sprintf("function argument '%s' must be greater than %d", n, minval_), type = "error")
    }
    if (is.finite(maxval_) && x > maxval_) {
      append_log(sprintf("function argument '%s' must be smaller than %d", n, maxval_), type = "error")
    }
  }
}



#' generic input validation; all provided arguments should be boolean/logical (not array)
#'
#' @param ... arbitrary set of parameters
check_parameter_is_boolean = function(...) {
  varnames = lapply(substitute(list(...))[-1], deparse)
  arguments <- list(...)
  names(arguments) = unlist(varnames, use.names = F)

  for (n in names(arguments)) {
    x = arguments[[n]]
    if (length(x) != 1) {
      append_log(sprintf("function argument '%s' must be a single value", n), type = "error")
    }
    if (!is.finite(x) || !is.logical(x)) {
      append_log(sprintf("function argument '%s' must be boolean", n), type = "error")
    }
  }
}



#' generic input validation; all provided arguments should be strings (not array)
#'
#' @param ... arbitrary set of parameters
check_parameter_is_string = function(...) {
  varnames = lapply(substitute(list(...))[-1], deparse)
  arguments <- list(...)
  names(arguments) = unlist(varnames, use.names = F)

  for (n in names(arguments)) {
    x = arguments[[n]]
    if (length(x) != 1) {
      append_log(sprintf("function argument '%s' must be a single value", n), type = "error")
    }
    if (!is.character(x)) {
      append_log(sprintf("function argument '%s' must be a string", n), type = "error")
    }
  }
}



#' generate colours analogous to ggplot's default palette
#'
#' https://stackoverflow.com/a/8197703
#'
#' @param n number of colors
gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  grDevices::hcl(h = hues, l = 65, c = 100)[1:n]
}



#' Accept pvalues that are exactly zero as the 'best pvalue'
#'
#' @param x values to transform
#' @export
minlog10 = function(x) {
  y = -log10(x)
  y[!is.finite(x)] = NA
  y[is.finite(x) & x == 0] = max(y[is.finite(y)])
  return(y)
}



#' convert base-log from log2 to natural log
#'
#' log_a(x) = log_b(x) / log_b(a)
#'
#' @examples
#' \dontrun{
#'   x = 1:4
#'   cbind(input = x, as_log2 = log2(x), as_ln = log2_to_ln(log2(x)),
#'   ln_then_undo = exp(log2_to_ln(log2(x))) )
#' }
#' @param x numeric values that were log2 transformed
log2_to_ln = function(x) {
  x / log2(exp(1))
}



#' threshold finite numeric values
#'
#' @param x numeric array or matrix
#' @param threshold limit finite values below this val
threshold_numerics = function(x, threshold) {
  x[is.finite(x) & x < threshold] = threshold
  return(x)
}



#' Classify an array of strings by regular expression
#'
#' For example, provide fasta headers and an array of regular expressions to find out which are yeast/human while removing ambigous entries or those matching ecoli:
#' regex = c(human="_HUMA", yeast="_YEAS", discard="_ECOL")
#'
#' @param x array of string where each regex is applied to
#' @param regex named vector of regular expressions. setting proper names is important, these are the classification labels. example: c(human="_HUMA", yeast="_YEAS", discard="_ECOL")
#' @param discard_label default label for classifications that should be discarded downstream. default: "discard"
#' @export
regex_classification = function(x, regex, discard_label = "discard") {
  stopifnot(is.vector(regex) && all(nchar(names(regex))>0))

  # allocate matrix to hold each regex' results
  mat_mask = matrix(F, nrow = length(x), ncol = length(regex), dimnames=list(NULL, names(regex)))
  # apply each regex and store in matrix
  for(i in seq_along(regex)) {
    if(!is.na(regex[i]) & regex[i] != "") {
      mat_mask[,i] = grepl(regex[i], x, ignore.case=T, perl = T) # enable perl regex so we support negative lookahead
    }
  }

  # count how many regex' match each protein / fasta header
  mat_mask_counts = rowSums(mat_mask)
  rows_single = mat_mask_counts == 1

  # default return value; discard
  classification = rep(discard_label, length(x))
  # for each regex, update classification if protein only matches this regex
  for(n in names(regex)) {
    classification[rows_single & mat_mask[,n]] = n
  }

  # proteins that match multiple regex' are discarded
  classification[mat_mask_counts > 1] = discard_label

  return(classification)
}



#' bin an array and return both the bins and their metadata (label and mean value)
#'
#' @param x numeric vector
#' @param from parameter for seq()
#' @param to parameter for seq()
#' @param length length.out parameter for seq()
make_bins = function(x, from, to, length) {
  x_mask = !is.na(x)
  b = seq(from=from, to=to, length.out = length)
  x[x_mask & x<from] = from
  x[x_mask & x>to] = to
  ct = cut(x, breaks = b, include.lowest = T) # ct[as.numeric(ct) == 1]
  # from strings that contain the range of each bin in brackets, extract both numerics and take mean value
  ct_mean_value = unlist(lapply(strsplit(gsub("[^0-9,.-]", "", levels(ct)), split = ",", fixed = T),
                                function(x) mean(as.numeric(x))))
  ct_numeric = as.numeric(ct)
  result = array(ct_numeric, dimnames = list(as.character(ct_mean_value)[ct_numeric]))

  attr(result, "bin_ids") <- seq_along(levels(ct))
  attr(result, "bin_names") <- levels(ct)
  attr(result, "bin_means") <- ct_mean_value
  return(result)
}



#' Loess fit on finite value pairs. Ignores overfit warnings, returns input data on error
#'
#' if a robust loess fit is desired, add parameter family='symmetric' which will be passed to loess() function
#'
#' @param x numeric vector
#' @param y numeric vector
#' @param span span argument for loess() function
#' @param remove_na_yval set all output values where is.na(y) to NA. default is TRUE
#' @param min_values minimum number of values required (if less, input parameter y is returned). default is 10
#' @param ... passed to loess() function
smooth_loess_custom = function(x, y, span=.1, remove_na_yval = TRUE, min_values = 10, ...) {
  # debug; X<<-x; Y<<-y

  # subset entries where both x and y are finite, to be used for loess fit
  x_orig = x
  y_orig = y
  flag_valid = is.finite(x) & is.finite(y)
  x = x[flag_valid]
  y = y[flag_valid]

  if(length(y) >= min_values) {
    # ignore loess warnings (eg; overfitting). Especially important for systems that halt on warning (eg; options(warn = 2) ) as this occurs on many datasets where this function is used in RT QC plots
    z = tryCatch(predict(suppressWarnings(loess(y ~ x, span = span, ...)), x_orig), error=function(e) NULL)

    # ? optionally, we could fall-back and try a wider span on error. eg; add param fallback_spans=c(0.25,0.5,1); for(span in na.omit(fallback_spans)) z=tryCatch(fit loess)... break loop on success

    # if loess fit succeeded, return predicted values
    if(length(z) == length(y_orig)) {
      if(remove_na_yval) {
        z[is.na(y_orig)] = NA
      }
      return(z)
    }
  }

  # default / fallback
  return(y_orig)

  ## v1
  # if(length(y) < 10) return(y_orig)
  # z = predict(loess(y ~ x, span = span), x_orig)
  # if(remove_na_yval) {
  #   z[is.na(y_orig)] = NA
  # }
  # return(z)
}



#' scale an array of numerics between 0~1, thresholding values at lower/upper quantiles
#' non-finite values are ignored (not replaced).
#' example: x=rnorm(1000); hist(x); hist(scale_between_quantiles(x, min_quantile=0.01, max_quantile = 0.99))
#'
#' edge-case; if either lower or upper quantile could not be determined, all finite values of x will be set to 1
#' edge-case; if all non-finite input values are the same, the upper and lower quantiles are the same. All finite values of x will be set to 1
#'
#' @param x values to transform
#' @param min_quantile lower quantile, used as parameter in quantile()
#' @param max_quantile upper quantile, used as parameter in quantile()
scale_between_quantiles = function(x, min_quantile = 0.01, max_quantile = 0.99) {
  # basic input validation
  stopifnot(is.numeric(x) && min_quantile>0 && max_quantile<1 && max_quantile > min_quantile)

  x_mask = is.finite(x)
  q = quantile(x[x_mask], probs = c(min_quantile, max_quantile))

  # edge-case; if all non-finite input values are the same, the upper and lower quantiles are the same. All finite values of x will be set to 1
  if(any(!is.finite(q)) || q[2] <= q[1]) {
    x[x_mask] = 1
    return(x)
  }

  x[x_mask & x < q[1]] = q[1]
  x[x_mask & x > q[2]] = q[2]
  # subtract lower threshold, then divide by upper threshold (which we adjust for the lower threshold as well)
  (x-q[1]) / (q[2] - q[1])
}



#' format a string of R code using the formatR and styler packages
#'
#' parameter can either be a string or an array of strings (eg; lines from R history)
#' @param rcode_lines R code as an array of characters (strings)
#' @importFrom formatR tidy_source
#' @importFrom styler cache_deactivate style_text
format_r_code = function(rcode_lines) {
  rcode_formatr = formatR::tidy_source(text=rcode_lines, width=20, output = F, comment = F, blank = F)
  styler::cache_deactivate(verbose = FALSE)
  rcode_formatr_styler = styler::style_text(rcode_formatr$text.tidy, scope = "line_breaks", strict = T)
  return(as.character(rcode_formatr_styler))
}



#' subset lines of R code (provided as strings) from the last "import_dataset" function on
#'
#' in case users run the pipeline a few times consecutively (eg; testing various parameters) without resetting R history, we only want to keep the relevant portion of the code.
#' simple/transparent way is to show everything from latest dataset import function or source() on
#'
#' @param rcode_lines R code as an array of characters (strings)
subset_relevant_code_snippet_for_report = function(rcode_lines) {
  i = grep("^[^#]+=\\s*import_dataset", rcode_lines)
  # i = grep("^\\s*source\\(|\\s*=\\s*import_dataset", rcode_lines, perl = T)
  if(length(i) > 0) {
    rcode_lines = rcode_lines[tail(i,1):length(rcode_lines)]
  }
  return(rcode_lines)
}



#' remove starting or trailing substrings that ar the same for all strings in a vector
#'
#' eg; `.txt` in all filesnames, or `<experiment data><samplename>`
#'
#' @examples
#' \dontrun{ strip_common_substring(s = unique(peptides$sample_id)) }
#' \dontrun{ strip_common_substring(s = c("sample_a.txt","sample_b1.txt","sample_xyz.txt")) }
#' \dontrun{ strip_common_substring(s = c("","sample_b1.txt","sample_xyz.txt")) }
#' \dontrun{ strip_common_substring(s = c("sample.txt","sample_b1.txt","sample_xyz.txt")) }
#' \dontrun{
#'   assertthat::are_equal(
#'     strip_common_substring(c("sample_a.txt","sample_b1.txt","sample_xyz.txt")),
#'     c("a","b1","xyz")
#'   )
#' }
#'
#' @param s array of strings
#'
#' @importFrom data.table uniqueN
#' @importFrom stringr str_split_fixed str_sub
strip_common_substring = function(s) {
  s_nchar = nchar(s)
  if(any(s_nchar <= 1) || length(s) < 2) return(s)

  ## find common prefix; convert string vector to character matrix, then check which columns are identical
  # character matrix + find similarity across vector s
  m = stringr::str_split_fixed(s, "", n = max(s_nchar))
  m_uniqueN = apply(m, 2, data.table::uniqueN)

  position_first_unique = which(m_uniqueN != 1)
  if(length(position_first_unique) > 0) {
    s = stringr::str_sub(s, position_first_unique[1])
  }


  ## analogously for trailing, take last N characters from s (after prefix filtering) and use the same 'identity in character matrix' trick
  # take trailing
  s_nchar = nchar(s)
  n_trail = min(s_nchar)
  s_end = stringr::str_sub(s, -n_trail) # s_end = stringr::str_sub(s, start = s_nchar - n_trail + 1, end=s_nchar)
  # character matrix + find similarity across vector s
  m_end = stringr::str_split_fixed(s_end, "", n = n_trail)
  m_end_uniqueN = apply(m_end, 2, data.table::uniqueN)
  mask_end = m_end_uniqueN == 1
  if(any(mask_end)) {
    position_last_unique = max(0, tail(which(mask_end != 1), 1))
    n_remove_end = n_trail - position_last_unique
    s = stringr::str_sub(s, 1, -n_remove_end - 1)
  }

  return(s)
}



#' use optim() to fit a t-distribution with fixed mu=0
#' @examples
#' \dontrun{
#' # fit normal
#' x = rnorm(1000, sd = 2)
#' fit = suppressWarnings(fit_t_dist_fixed_mu(x))
#' h = hist(x, breaks = 25, freq = F)
#' curve(dt(x / fit[1], df=fit[2]) / fit[1], col = 2, add = T)
#' }
#' \dontrun{
#' # fit t-distribution
#' x = rt(1000, df = 3)
#' fit = suppressWarnings(fit_t_dist_fixed_mu(x))
#' h = hist(x, breaks = 25, freq = F)
#' curve(dt(x / fit[1], df=fit[2]) / fit[1], col = 2, add = T)
#' }
#' @param x numeric vector of at least 10 finite values
fit_t_dist_fixed_mu = function(x) {
  mydt = function(par) {
    s = par[1]
    df = par[2]
    -sum(stats::dt((x - 0)/s, df, log = TRUE) - log(s))
  }

  x = x[is.finite(x)]
  if(length(x) < 10) return(NULL)

  fit = tryCatch(
    {
      x_sd = sd(x)
      x_mad = mad(x)
      optim(par = c(x_sd*0.5, 10), fn = mydt, method = "L-BFGS-B", lower = c(x_mad*0.5, 1), upper = c(x_sd*1.5, 20))
    },
    error = function(x) NULL
  )
  if(!is.null(fit)) return(c(sigma=fit$par[1], df=fit$par[2]))
}



#' Throw an error if lme4::lmer() cannot be executed
#'
#' @description
#' `stop()` is called when lme4 or Matrix is not installed, or when `lme4::lmer()`
#' cannot be executed (with default settings and mock data).
#' If this function does not throw an error, we may assume `lme4::lmer()` is operational.
#' @export
assert_lme4_functional = function() {
  if(!requireNamespace("lme4", quietly = TRUE)) {
    stop("required package lme4 is not installed", call. = FALSE)
  }
  if(!requireNamespace("Matrix", quietly = TRUE)) {
    stop("required package Matrix is not installed", call. = FALSE)
  }

  result = tryCatch({
    suppressMessages(lme4::lmer(formula = expression ~ (1 | condition), data = data.frame(
      expression = c(14.19, 16.09, 14.94, 15.51, 14.82, 14.87),
      condition = c(1, 1, 1, 2, 2, 2)
    )))
  }, error = function(e) {
    return(e)
  })

  if("error" %in% class(result)) {
    stop(paste0("error while executing function lme4::lmer(), the installed version of R packages lme4 and Matrix are possibly incompatible -> ", result$message))
  }
}
ftwkoopmans/msdap documentation built on March 5, 2025, 12:15 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ftwkoopmans/msdap
Mass Spectrometry Downstream Analysis Pipeline

R/util_generic.R
In ftwkoopmans/msdap: Mass Spectrometry Downstream Analysis Pipeline

R Package Documentation

Browse R Packages

We want your feedback!

ftwkoopmans/msdap Mass Spectrometry Downstream Analysis Pipeline

R/util_generic.R In ftwkoopmans/msdap: Mass Spectrometry Downstream Analysis Pipeline

R Package Documentation

Browse R Packages

We want your feedback!

ftwkoopmans/msdap
Mass Spectrometry Downstream Analysis Pipeline

R/util_generic.R
In ftwkoopmans/msdap: Mass Spectrometry Downstream Analysis Pipeline