xcore: xcore expression regulators inference

Documented in applyOverColumnGroups applyOverDFList design2factor estimateStat getCoverage isTRUEorFALSE mae maeSummary mse rsq simplifyInteractionMatrix subsetWithMissing translateCounts

#' re-export magrittr pipe operator
#'
#' @name %>%
#' @rdname pipe
#'
#' @importFrom magrittr %>%
#'
#' @export
NULL

# #' Intersect GRanges objects
# #'
# #' \code{intersectGR} intersect two GRanges objects and return ranges from
# #' \code{a} that were overlapped by ranges in \code{b}.
# #'
# #' @param a GRanges object.
# #' @param b GRanges object.
# #' @param ... other arguments internally passed to
# #'   \code{\link[GenomicRanges]{findOverlaps}}.
# #'
# #' @return GRanges object which is a subset of \code{a}.
# #'
# intersectGR <- function(a, b, ...) {
#   warning("deprecated! use IRanges::subsetByOverlaps instead") # TODO drop from the package
#   stopifnot(is(a, "GRanges"))
#   stopifnot(is(b, "GRanges"))
#
#   IRanges::subsetByOverlaps(x = a, ranges = b, ...)
# }

#' Calculate regions coverage
#'
#' \code{getCoverage} calculates coverage of regions (rows in interaction
#' matrix) by features (columns). It is possible to specify features grouping
#' variable \code{gr} then coverage tells how many distinct groups the region
#' overlap with.
#'
#' @param mat dgCMatrix interaction matrix such as produced by
#'   \code{\link{getInteractionMatrix}}.
#' @param gr factor specifying features groups. Must have length equal
#'   to number of columns in \code{mat}.
#'
#' @return Numeric vector.
#'
#' @examples
#' data("remap_mini")
#' y <- colnames(remap_mini)
#'
#' # simple coverage
#' gr <- seq_along(y) %>% as.factor()
#' getCoverage(remap_mini, gr)
#'
#' # per cell type coverage
#' gr <- sub(".*\\.", "", y) %>% as.factor()
#' getCoverage(remap_mini, gr)
#'
#' @importFrom DelayedArray colsum
#'
#' @export
getCoverage <- function(mat, gr) {
  stopifnot("mat must be an instance of class 'dgCMatrix'" = is(mat, "dgCMatrix"))
  stopifnot("gr must be a factor" = is.factor(gr))
  stopifnot("gr length must equal to number of columns in mat" = length(gr) == ncol(mat))

  DelayedArray::colsum(x = mat, group = gr) %>%
    `>=`(1) %>%
    rowSums()
}

#' Simplify Interaction Matrix
#'
#' @param mat dgCMatrix interaction matrix such as produced by
#'   \code{\link{getInteractionMatrix}}.
#' @param alpha Number between 0 and 1 specifying voting threshold. Eg. for 3
#'   column matrix alpha 0.5 will give voting criteria >= 2.
#' @param colname character giving new column name.
#'
#' @return dgCMatrix
#'
#' @importFrom Matrix rowSums sparseMatrix
#'
simplifyInteractionMatrix <- function(mat, alpha = 0.5, colname = NA) {
  stopifnot(is(mat, "dgCMatrix"))
  stopifnot(is.numeric(alpha) && (length(alpha) == 1))
  stopifnot((alpha > 0) && (alpha <= 1))

  crit <- ceiling(ncol(mat) * alpha)
  i <- which(Matrix::rowSums(mat) >= crit)
  Matrix::sparseMatrix(i = i,
                       j = rep(1L, length(i)),
                       x = 1,
                       dims = c(nrow(mat), 1L),
                       dimnames = list(rownames(mat), colname))
}

#' Apply function over groups of columns
#'
#' Returns a array obtained by applying a function to rows of submatrices of the
#' input matrix, where the submatrices are divided into specified groups of
#' columns.
#'
#' @param mat a matrix.
#' @param groups a vector giving columns grouping.
#' @param f function to be applied.
#' @param ... optional arguments to \code{f}.
#'
#' @return a matrix of dimensions \code{nrow(mat) x nlevels(groups)}.
#'
applyOverColumnGroups <- function(mat, groups, f, ...) {
  stopifnot(is.matrix(mat))
  stopifnot(is.vector(groups) || is.factor(groups))
  stopifnot(length(groups) == ncol(mat))
  stopifnot(is.function(f))

  if (is.vector(groups)) {
    groups <- factor(groups)
  }

  group_f <- lapply(X = levels(groups),
                    FUN = function(g) {
                      apply(X = mat,
                            MARGIN = 1,
                            FUN = function(row) f(row[groups == g], ...))
                    })
  new_mat <- do.call(cbind, group_f)
  colnames(new_mat) <- levels(groups)

  return(new_mat)
}

#' Estimate linear models goodness of fit statistic
#'
#' Estimate goodness of fit statistic of penalized linear regression models.
#' Works with different goodness of fit statistic functions.
#'
#' @inheritParams glmnet::glmnet
#' @param u offset vector as in \code{\link{glmnet}}. \code{"U"} experiment in
#'   mae.
#' @param s user supplied lambda.
#' @param method currently only cross-validation is implemented.
#' @param nfold number of fold to use in cross-validation.
#' @param statistic function computing goodness of fit statistic. Should accept
#'   \code{y}, \code{x}, \code{offset} arguments and return
#'   a numeric vector of the same length. See \code{rsq}, \code{mse} for examples.
#' @param ...  Other arguments that can be passed to glmnet.
#'
#' @return numeric vector of \code{statistic} estimates.
#'
#' @importFrom foreach foreach
#'
estimateStat <- function(x, y, u, s, method = "cv", nfold = 10, statistic = rsq, alpha = 0, ...) {
  if (method == "cv") {
    out <- c()
    part <- sample(seq_len(nfold), size = length(y), replace = TRUE)

    out <- foreach::foreach(p_ = seq_len(nfold), .combine = c) %do%
      {
        py <- y[part != p_]
        px <- x[part != p_, ]
        poffset <- u[part != p_, ]
        mod <- glmnet::glmnet(x = px, y = py, offset = poffset, lambda = s, alpha = alpha, ...)

        # evaluate on held-out fold
        py <- y[part == p_]
        px <- x[part == p_, ]
        poffset <- u[part == p_, ]
        yhat <- stats::predict(mod, newx = px, newoffset = poffset, s = s)
        stat <- statistic(py, yhat, px, poffset)
        out <- c(out, stat)
      }
  }

  return(out)
}

# declare estimateStat foreach variables
utils::globalVariables("p_")

#' Calculate $R^2$
#'
#' @param y numeric vector of observed expression values.
#' @param yhat numeric vector of predicted expression values.
#' @param offset numeric vector giving basal expression level.
#'
#' @return numeric vector
#'
rsq <- function(y, yhat, px, offset) {
  y <- y - offset
  yhat <- yhat - offset
  1 - (sum((y - yhat)^2) / (var(y) * (length(y) - 1)))
}

#' Calculate Mean Squared Error
#'
#' @param y numeric vector of observed expression values.
#' @param yhat numeric vector of predicted expression values.
#' @param ... not used.
#'
#' @return numeric vector
#'
mse <- function(y, yhat, ...) mean((y - yhat)^2)

#' Calculate Mean Absolute Error
#'
#' @param y numeric vector of observed expression values.
#' @param yhat numeric vector of predicted expression values.
#' @param ... not used.
#'
#' @return numeric vector
#'
mae <- function(y, yhat, ...) mean(abs(y - yhat))

#' Transform design matrix to factor
#'
#' @param design design matrix
#'
#' @return factor
#'
#' @examples
#' \dontrun{
#' design <- matrix(data = c(1, 1, 0, 0, 0, 0, 1, 1),
#'                  nrow = 4,
#'                  ncol = 2,
#'                  dimnames = list(c(paste("sample", 1:4)), c("gr1", "gr2")))
#' design2factor(design)
#' }
#'
design2factor <- function(design) {
  # based on edgeR::designAsFactor, but jokes aside
  groups <- factor(rowMeans(design * col(design) * ncol(design)))
  samples_to_keep <- groups != 0
  groups <- groups[samples_to_keep] # omit empty groups
  groups <- droplevels(groups)
  levels(groups) <- colnames(design)[colSums(design) > 0]

  return(groups)
}

#' Check if argument is a binary flag
#'
#' @param x object to test
#'
#' @return binary flag
#'
isTRUEorFALSE <- function(x) {
  (length(x) == 1) && is.logical(x) && (! is.na(x))
}

#' Apply function over selected column in list of data frames
#'
#' \code{applyOverDFList} operates on a list of data frames where all data frames
#' has the same size and columns. Column of interest is extracted from each data
#' frame and column binded in \code{groups}, next \code{fun} is applied over
#' rows. Final result is a matrix with result for each group on a separate column.
#' Function is parallelized over groups.
#'
#' @param list_of_df list of \code{data.frame}s.
#' @param col_name string specifying column in \code{data.frame}s to apply
#'   \code{fun} on.
#' @param fun function to apply, should take a single vector as a argument.
#' @param groups factor defining how elements of \code{list_of_df} should be
#'   grouped.
#'
#' @return matrix with \code{nrow(list_of_df[[1]])} rows and
#'   \code{nlevels(groups)} columns.
#'
applyOverDFList <- function(list_of_df, col_name, fun, groups) {
  stopifnot("all list_of_df names must be included in groups" = setequal(names(list_of_df), names(groups)))
  stopifnot("groups must not have unused levels" = setdiff(levels(groups), groups) == character(0))

  col_fun_mat <- foreach::foreach(gr_ = levels(groups), .combine = cbind) %dopar% # parallel version is bit faster
    {
      i <- groups == gr_
      attr <- lapply(list_of_df[i], function(df) df[[col_name]])
      attr <- do.call(cbind, attr)
      matrix(
        data = apply(X = attr, MARGIN = 1, FUN = fun),
        ncol = 1L,
        dimnames = list(rownames(attr), gr_)
      )
    }

  return(col_fun_mat)
}

# declare applyOverDFList foreach variables
utils::globalVariables("gr_")

#' Subset keeping missing
#'
#' Subset matrix keeping unmatched rows as NA.
#'
#' @param mat matrix
#' @param rows character
#'
#' @return a matrix
#'
subsetWithMissing <- function(mat, rows) {
  i <- match(x = rows, table = rownames(mat), nomatch = 0)
  i[i == 0] <- NA
  smat <- mat[i, ]
  rownames(smat) <- rows
  smat
}

#' Helper summarizing MAE object
#'
#' @param mae MultiAssayExperiment object.
#'
#' @return named list giving number of rows and columns, overall mean and
#'   standard deviation in \code{mae}'s experiments.
#'
#' @importFrom MultiAssayExperiment experiments
#' @importFrom Matrix mean
#' @importFrom stats sd
#'
#'
maeSummary <- function(mae) {
  lapply(MultiAssayExperiment::experiments(mae), function(x) c(dim(x), Matrix::mean(x), sd(x)))
}


#' Translate counts matrix rownames
#'
#' \code{translateCounts} renames counts matrix rownames according to supplied
#' \code{dict}ionary. Function can handle many to one assignments by taking a
#' sum or an average over \code{counts} rows. Other types of ambiguous
#' assignments are not supported.
#'
#' @param counts matrix of expression values.
#' @param dict named character vector mapping \code{counts} rownames to new
#'   values. Values of vector should correspond to new desired rownames, and its
#'   names to current rownames.
#'
#' @return matrix of expression values with new rownames.
#'
#' @examples
#' counts <- matrix(
#' data = c(5, 4, 3, 2),
#' nrow = 2,
#'    dimnames = list(
#'      c("ENSG00000130700", "ENSG00000089225"),
#'      c("treatment", "control")
#'    )
#'  )
#' dict <- c(ENSG00000130700 = "GATA5", ENSG00000089225 = "TBX5")
#' translateCounts(counts, dict)
#'
#' @export
translateCounts <- function(counts, dict) {
  stopifnot("counts must be an instance of class 'matrix'" = is(counts, "matrix"))
  stopifnot("counts must have its rownames defined" = ! is.null(rownames(counts)))
  stopifnot("counts rownames must be unique" = ! anyDuplicated(rownames(counts)))
  stopifnot("dict must be an instance of class 'character'" = is(dict, "character"))
  stopifnot("dict must be a named character vector" = ! is.null(names(dict)))
  stopifnot("dict names must be unique" = ! anyDuplicated(names(dict)))
  stopifnot("counts rownames must correspond to dict names" = any(rownames(counts) %in% names(dict)))

  new_counts <- counts[rownames(counts) %in% names(dict), ]
  new_counts <- rowsum(x = new_counts, group = dict[rownames(new_counts)]) # TODO a simple and useful extension would be to implement option to take an average instead of sum, this could be easly done by dividing by the groups size

  return(new_counts)
}

bkaczkowski/xcore documentation built on Jan. 26, 2024, 6:24 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

bkaczkowski/xcore
xcore expression regulators inference

R/utils.R
In bkaczkowski/xcore: xcore expression regulators inference

Defines functions translateCounts maeSummary subsetWithMissing applyOverDFList isTRUEorFALSE design2factor mae mse rsq estimateStat applyOverColumnGroups simplifyInteractionMatrix getCoverage

Documented in applyOverColumnGroups applyOverDFList design2factor estimateStat getCoverage isTRUEorFALSE mae maeSummary mse rsq simplifyInteractionMatrix subsetWithMissing translateCounts

R Package Documentation

Browse R Packages

We want your feedback!

bkaczkowski/xcore xcore expression regulators inference

R/utils.R In bkaczkowski/xcore: xcore expression regulators inference

Defines functions translateCounts maeSummary subsetWithMissing applyOverDFList isTRUEorFALSE design2factor mae mse rsq estimateStat applyOverColumnGroups simplifyInteractionMatrix getCoverage

Documented in applyOverColumnGroups applyOverDFList design2factor estimateStat getCoverage isTRUEorFALSE mae maeSummary mse rsq simplifyInteractionMatrix subsetWithMissing translateCounts

R Package Documentation

Browse R Packages

We want your feedback!

bkaczkowski/xcore
xcore expression regulators inference

R/utils.R
In bkaczkowski/xcore: xcore expression regulators inference