R/filter_functions.R
In MultIS: Reconstruction of Clones from Integration Site Readouts and Visualization

Documented in convert_columnwise_relative filter_at_tp_biggest_n filter_at_tp_min filter_combine_measurements filter_is_names filter_match filter_measurement_names filter_names filter_nr_tp_min filter_zero_columns filter_zero_rows

#' Converts a matrix to relative abundances
#'
#' @param data A matrix of readouts that should be converted to relative
#'             abundances
#'
#' @export
#' @return The matrix with all columns in percent
convert_columnwise_relative <- function(data) {
  stopifnot(is.matrix(data))
  stopifnot(nrow(data) > 0)

  clss <- class(data)
  cols <- colSums(data) != 0
  if (length(cols) == 0) {
    return(data)
  }
  data[, cols] <- t(t(data[, cols, drop = FALSE]) /
                     colSums(data[, cols, drop = FALSE], na.rm = TRUE))
  class(data) <- clss
  return(data)
}

#' Filters a matrix of readouts for IS that have a minimum occurrence in some
#' measurement
#'
#' @param data The readout matrix to filter.
#' @param at A filter for the columns/measurements. Only matching
#'           columns/measurements are considered, though all will be returned.
#'           This is a regular expression, so multiple columns/measurements may
#'           match it.
#' @param min The minimum with which an IS has to occur. This could be either
#'            absolute or relative reads. If `at` matches multiple
#'            columns/measurements, the 'rowSum()' over the columns will be
#'            used.
#'
#' @return A matrix with only the IS that occur with a minimum at the
#'         selected measurements.
#' @export
filter_at_tp_min <- function(data, at = "168", min = 0.02) {
  stopifnot(length(min) == 1)
  if (!is.numeric(min)) {
    warning("Argument min is not numeric, converting now")
    min <- as.numeric(min)
  }
  clss <- class(data)
  # given time point
  tp <- data[, grep(paste0("^.*", at, ".*$"), colnames(data)), drop = FALSE]
  # at least min at the last time point
  tp_gt_min <- rowSums(tp >= min, na.rm = TRUE) > 0
  data <- data[tp_gt_min, , drop = FALSE]
  class(data) <- clss
  return(data) # the data for the previous filters
}

#' Filters a matrix of readouts for the n biggest IS at a certain
#' measurement
#'
#' @param data The readout matrix to filter.
#' @param at A filter for the columns/measurement. Only matching
#'           columns/measurements are considered, though all will be returned.
#' @param n The number of biggest IS to return. If `at` matches multiple
#'          columns/measurements, the 'rowSum()' over the columns/measurements
#'          will be used. For ties, more than `n` IS may be returned.
#'
#' @return A matrix with only the n biggest IS at the selected measurements.
#' @export
filter_at_tp_biggest_n <- function(data, at = "168", n = 50) {
  stopifnot(length(n) == 1)
  if (!is.integer(n)) {
    warning("Argument n is not an integer, converting now")
    n <- as.integer(n)
  }
  clss <- class(data)
  # given time point
  tp <- data[, grep(paste0("^.*", at, ".*$"), colnames(data)), drop = FALSE]
  if (ncol(tp) > 1) {
    tp <- rowSums(tp)
    data <- data[rank(tp) > length(tp) - n, ]
  } else {
    data <- data[rank(tp) > nrow(tp) - n, ]
  }

  class(data) <- clss
  return(data)
}

#' Filters for a minimum number of time points/measurements
#'
#' @export
#' @param dat The readout matrix to filter.
#' @param min The minimum number of measurements where an IS needs to have a
#'            value that is not 0 or NA.
#' @return A matrix with only ISs that have more than `min` columns that are
#'         not 0 or NA.
filter_nr_tp_min <- function(dat, min = 6) {
  stopifnot(length(min) == 1)
  if (!is.integer(min)) {
    warning("min is not an integer, converting now")
    min <- as.integer(min)
  }
  clss <- class(dat)
  dat <- dat[rowSums(!is.na(dat), na.rm = TRUE) >= min, , drop = FALSE]
  dat <- dat[rowSums(dat != 0, na.rm = TRUE) >= min, , drop = FALSE]
  class(dat) <- clss
  return(dat)
}

#' Filters a vector of names and returns the shortest common prefix.
#'
#' @export
#' @param names The vector of names to filter.
#' @param by A regexp that splits the string. The default filters by special
#'           characters. A split by character can be achieved by using "." as
#'           the regexp.
#' @return The names shortened to the shortest prefix (in chunks defined by
#'         the regexp) where all names are unique.
filter_names <- function(names, by = "[_():]|[^_():]*") {
  names <- regmatches(names, gregexpr(by, names))

  max_len <- max(unlist(lapply(names, length)))
  for (shrtst in 1:max_len) {
    # might pad names that are too short with NA
    tmp <- lapply(names, `[`, 1:shrtst)
    if (!anyDuplicated(tmp)) {
      break
    }
  }

  return(lapply(tmp, function(x) {
    paste(stats::na.omit(x), collapse = "")
  }))
}

#' Shortens the rownames of a readout matrix to the shortest distinct prefix
#'
#' @export
#' @param dat The readout matrix for which the names should be filtered.
#' @param by The regexp used to split the names.
#' @return A matrix with the names filtered to the shortest unique prefix.
#' @seealso filter.names
filter_is_names <- function(dat, by = "[_():]|[^_():]*") {
  rownames(dat) <- filter_names(rownames(dat), by = by)
  return(dat)
}

#' Filters for columns containing a certain substring.
#'
#' @export
#' @param dat The readout matrix to filter.
#' @param match The substring that columns must match.
#' @return A readout matrix that only contains the columns whose names contain
#'         the substring.
filter_match <- function(dat, match = "E2P11") {
  clss <- class(dat)
  dat <- dat[, grep(paste0(".*", match, ".*"), colnames(dat))]
  class(dat) <- clss
  return(dat)
}

#' Splits a vector of strings by a given regexp, selects and rearranges the
#' parts and joins them again
#'
#' @export
#' @param dat The readout matrix to filter.
#' @param elems The elements to select. They are rearrange in the order that is
#'              given via this argument.
#' @param by The string used for splitting the names of the columns.
#' @return A matrix where the names of the columns are split by the given
#'         string, rearranged and again joined by the string.
filter_measurement_names <- function(dat, elems = c(1, 3), by = "_") {
  clss <- class(dat)
  old_cn <- colnames(dat)
  new_cn <- unlist(lapply(old_cn, function(x) {
    paste(strsplit(x, split = by)[[1]][elems], collapse = by)
  }))
  colnames(dat) <- new_cn
  class(dat) <- clss
  return(dat)
}

#' Combines columns that have the same name. The columns are joined additively.
#'
#' @export
#' @param dat The readout matrix to filter.
#' @param pre_norm Whether to normalize columns before joining them.
#' @param post_norm Whether to normalize columns after they are joined.
#' @return A matrix in which columns that had the same name are added and
#'         (possibly) normalized.
filter_combine_measurements <- function(dat,
                                        pre_norm = TRUE,
                                        post_norm = TRUE) {
  clss <- class(dat)

  nm <- colnames(dat)
  ret <- matrix(data = NA,
                nrow = nrow(dat),
                ncol = length(unique(nm)),
                dimnames = list(rownames(dat), unique(nm)))

  for (ucn in unique(nm)) {
    cols <- nm == ucn
    ss <- dat[, cols, drop = FALSE]
    if (pre_norm) {
      ss <- t(t(ss) / colSums(ss, na.rm = TRUE))
    }
    ret[, ucn] <- rowSums(ss, na.rm = TRUE)
  }

  if (post_norm) {
    ret <- t(t(ret) / colSums(ret, na.rm = TRUE))
  }

  class(ret) <- clss
  return(ret)
}

#' Removes rows that only contain 0 or NA.
#'
#' @export
#' @param dat The readout matrix to filter.
#' @return A matrix where rows that where only 0 or NA are filtered out.
filter_zero_rows <- function(dat) {
  clss <- class(dat)
  dat <- dat[rowSums(dat, na.rm = TRUE) != 0, , drop = FALSE]
  class(dat) <- clss
  return(dat)
}

#' Removes columns that only contain 0 or NA.
#'
#' @export
#' @param dat The readout matrix to filter.
#' @return A matrix where columns that where only 0 or NA are filtered out.
filter_zero_columns <- function(dat) {
  clss <- class(dat)
  dat <- dat[, colSums(dat, na.rm = TRUE) != 0, drop = FALSE]
  class(dat) <- clss
  return(dat)
}

Any scripts or data that you put into this service are public.

MultIS documentation built on Aug. 6, 2021, 5:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

MultIS
Reconstruction of Clones from Integration Site Readouts and Visualization

R/filter_functions.R
In MultIS: Reconstruction of Clones from Integration Site Readouts and Visualization

Defines functions filter_zero_columns filter_zero_rows filter_combine_measurements filter_measurement_names filter_match filter_is_names filter_names filter_nr_tp_min filter_at_tp_biggest_n filter_at_tp_min convert_columnwise_relative

Documented in convert_columnwise_relative filter_at_tp_biggest_n filter_at_tp_min filter_combine_measurements filter_is_names filter_match filter_measurement_names filter_names filter_nr_tp_min filter_zero_columns filter_zero_rows

Try the MultIS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MultIS Reconstruction of Clones from Integration Site Readouts and Visualization

R/filter_functions.R In MultIS: Reconstruction of Clones from Integration Site Readouts and Visualization

Defines functions filter_zero_columns filter_zero_rows filter_combine_measurements filter_measurement_names filter_match filter_is_names filter_names filter_nr_tp_min filter_at_tp_biggest_n filter_at_tp_min convert_columnwise_relative

Documented in convert_columnwise_relative filter_at_tp_biggest_n filter_at_tp_min filter_combine_measurements filter_is_names filter_match filter_measurement_names filter_names filter_nr_tp_min filter_zero_columns filter_zero_rows

Try the MultIS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MultIS
Reconstruction of Clones from Integration Site Readouts and Visualization

R/filter_functions.R
In MultIS: Reconstruction of Clones from Integration Site Readouts and Visualization