R/get_sig_feature_association.R
In sigminer: Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

Documented in get_sig_feature_association

#' Calculate Association between Signature Exposures and Other Features
#'
#' Association of signature exposures with other features will be performed using one of two procedures:
#' for a continuous association variable (including ordinal variable), correaltion is performed;
#' for a binary association variable, samples will be divided into two groups and Mann-Whitney U-test
#' is performed to test for differences in signature exposure medians between the two groups.
#' See [get_tidy_association] for cleaning association result.
#'
#' @param data a `data.frame` contains signature exposures and other features
#' @param cols_to_sigs colnames for signature exposure
#' @param cols_to_features colnames for other features
#' @param type a character vector containing 'ca' for categorical variable and 'co' for continuous variable,
#' it must have the same length as `cols_to_features`.
#' @param method_co method for continuous variable, default is "spearman", could also be "pearson" and "kendall".
#' @param method_ca method for categorical variable, default is "wilcox.test"
#' @param min_n a minimal fraction (e.g. 0.01) or a integer number (e.g. 10) for filtering some variables with few positive events.
#' Default is 0.01.
#' @param verbose if `TRUE`, print extra message.
#' @param ... other arguments passing to test functions, like `cor.test`.
#'
#' @return a `list`. For 'co' features, 'measure' means correlation coefficient.
#' For 'ca' features, 'measure' means difference in means of signature exposure.
#' @export
#' @seealso [get_tidy_association]
get_sig_feature_association <- function(data, cols_to_sigs, cols_to_features, type = "ca",
                                        method_co = c("spearman", "pearson", "kendall"), method_ca = stats::wilcox.test,
                                        min_n = 0.01, verbose = FALSE, ...) {
  if (!all(type %in% c("ca", "co"))) {
    stop("all elements in 'type' must be 'ca' for 'categorical' variable and 'co' for 'continuous' variable.")
  }
  if (!is.data.frame(data)) {
    stop("'data' must be a data.frame object.")
  }
  method_co <- match.arg(method_co)
  method_ca <- match.fun(method_ca)
  if (!dplyr::is.tbl(data)) {
    data <- dplyr::as_tibble(data)
  }
  data <- data[, c(cols_to_sigs, cols_to_features)]
  cols_co <- cols_to_features[type == "co"]
  cols_ca <- cols_to_features[type == "ca"]
  if (verbose) {
    message("-> Detecting and transforming possibly ordinal variables...")
  }
  data <- data %>% dplyr::mutate_if(is.ordered, as.numeric)
  data_co <- data[, c(cols_to_sigs, cols_co)]
  data_ca <- data[, c(cols_to_sigs, cols_ca)]
  pairwise_count <- function(x, y = NULL, diagonal = TRUE) {
    if (is.null(y)) {
      n <- t(!is.na(x)) %*% (!is.na(x))
    }
    else {
      n <- t(!is.na(x)) %*% (!is.na(y))
    }
    if (!diagonal & is.null(y)) {
      diag(n) <- NA
    }
    return(n)
  }
  matrix_cortest <- function(mat, ...) {
    mat <- as.matrix(mat)
    n <- ncol(mat)
    p.mat <- matrix(NA, n, n)
    diag(p.mat) <- 0
    for (i in 1:(n - 1)) {
      for (j in (i + 1):n) {
        tryCatch(
          {
            suppressWarnings(tmp <- cor.test(x = mat[
              ,
              i
            ], y = mat[, j], ...))
            p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
          },
          error = function(e) {
            p.mat[i, j] <- NA
          }
        )
      }
    }
    rownames(p.mat) <- colnames(mat)
    colnames(p.mat) <- colnames(mat)
    p.mat
  }
  cor.ca <- function(df, cols_ca, cols_to_sigs, .fun = stats::t.test,
                     min_n = 0.01, verbose = FALSE, ...) {
    nest_df <- lapply(cols_ca, function(x) {
      df[, c(cols_to_sigs, x)] %>%
        tidyr::gather_(
          "signature",
          "exposure", cols_to_sigs
        ) %>%
        dplyr::group_by(signature) %>%
        tidyr::nest()
    })
    names(nest_df) <- cols_ca
    test_fun <- function(df, .fun = stats::t.test, min_n = 0.05,
                         ...) {
      var_name <- setdiff(colnames(df), "exposure")
      df <- df[!is.na(df[["exposure"]]) & !is.na(df[[var_name]]), ]
      var_tb <- table(df[[var_name]])
      if (min_n < 1) {
        if (any(prop.table(var_tb) < min_n)) {
          return(NA)
        }
      }
      if (min_n > 1) {
        if (any(var_tb < min_n)) {
          return(NA)
        }
      }
      var_name2 <- ifelse(isValidAndUnreserved(var_name),
        var_name, paste0("`", var_name, "`")
      )
      f <- as.formula(paste("exposure", "~", var_name2))
      mod <- .fun(f, data = df, ...)
      mod$counts <- sum(var_tb, na.rm = TRUE)

      mod$measure <- df %>%
        split(., .[[var_name]]) %>%
        purrr::map_dbl(., ~ mean(.$exposure, na.rm = TRUE))

      # if (grepl("wilcox", body(.fun)[[2]])) {
      #   mod$measure <- df %>% split(., .[[var_name]]) %>%
      #     purrr::map_dbl(., ~median(.$exposure, na.rm = TRUE))
      # }
      # else if (grepl("t.test", body(.fun)[[2]])) {
      #   mod$measure <- df %>% split(., .[[var_name]]) %>%
      #     purrr::map_dbl(., ~mean(.$exposure, na.rm = TRUE))
      # }
      return(mod)
    }
    if (verbose) {
      message("--> building test models, be patient...")
    }
    tt <- lapply(nest_df, function(x, .fun, min_n, ...) {
      x %>%
        dplyr::ungroup() %>%
        dplyr::transmute(signature,
          model = purrr::map(data, test_fun,
            .fun = .fun,
            min_n = min_n
          )
        )
    }, .fun = .fun, min_n = min_n, ...)
    if (verbose) {
      message("--> done")
    }
    if (verbose) {
      message("--> obtaining model results...")
    }
    res <- lapply(tt, function(x) {
      x %>% dplyr::mutate(measure = purrr::map_dbl(
        .data$model,
        ~ ifelse(all(is.na(.)), NA, ifelse(is.null(.$measure),
          NA, diff(.$measure)
        ))
      ), count = purrr::map_int(
        .data$model,
        ~ ifelse(all(is.na(.)), NA, .$counts)
      ), p = purrr::map_dbl(
        .data$model,
        ~ ifelse(all(is.na(.)), NA, .$p.value)
      ))
    })
    if (verbose) {
      message("--> done")
    }
    if (verbose) {
      message("--> collecting data...")
    }
    res <- dplyr::bind_rows(res, .id = "features")
    res_list <- list()
    res_list$measure <- res[, c(
      "signature", "features",
      "measure"
    )] %>%
      tidyr::spread_(
        key_col = "features",
        value_col = "measure"
      ) %>%
      dplyr::arrange(match(
        signature,
        cols_to_sigs
      )) %>%
      dplyr::select(c("signature", cols_ca)) %>%
      tibble::column_to_rownames(var = "signature") %>%
      as.matrix() %>%
      signif(digits = 3)
    res_list$count <- res[, c("signature", "features", "count")] %>%
      tidyr::spread_(key_col = "features", value_col = "count") %>%
      dplyr::arrange(match(signature, cols_to_sigs)) %>%
      dplyr::select(c("signature", cols_ca)) %>%
      tibble::column_to_rownames(var = "signature") %>%
      as.matrix()
    res_list$p <- res[, c("signature", "features", "p")] %>%
      tidyr::spread_(key_col = "features", value_col = "p") %>%
      dplyr::arrange(match(signature, cols_to_sigs)) %>%
      dplyr::select(c("signature", cols_ca)) %>%
      tibble::column_to_rownames(var = "signature") %>%
      as.matrix() %>%
      signif(digits = 3)
    res_list$data <- res
    if (verbose) {
      message("--> done")
    }
    res_list
  }


  corr_co <- list()
  corr_ca <- list()


  if (length(cols_co) > 0) {
    if (verbose) {
      message(
        "-> Calculating correlations for continuous variables using ",
        method_co, "..."
      )
    }
    if (!all(sapply(data_co, is.numeric))) {
      stop("All continuous variables must be numeric (ordinal variable should be factor type)")
    }

    corr_co$measure <- cor(data_co,
      use = "pairwise.complete.obs",
      method = method_co
    )[cols_to_sigs, c(cols_to_sigs, cols_co)] %>%
      signif(digits = 3)
    corr_co$count <- pairwise_count(data_co)[cols_to_sigs, c(
      cols_to_sigs,
      cols_co
    )]
    corr_co$p <- matrix_cortest(data_co, method = method_co)[
      cols_to_sigs,
      c(cols_to_sigs, cols_co)
    ] %>% signif(digits = 3)
  }


  if (length(cols_ca) > 0) {
    if (verbose) {
      message(
        "-> Calculating correlations for categorical variables using ",
        as.list(method_ca)[[3]] %>% as.character() %>% .[[2]],
        "..."
      )
    }
    if (!all(sapply(data_ca[, cols_ca], is.logical))) {
      stop("All categorical variables must be logical, TRUE for positive events")
    }
    corr_ca <- cor.ca(data_ca, cols_ca, cols_to_sigs,
      .fun = method_ca,
      min_n = min_n, verbose = verbose, ...
    )
  }

  message("-> Done")
  list(corr_co = corr_co, corr_ca = corr_ca)
}

Any scripts or data that you put into this service are public.

sigminer documentation built on May 29, 2024, 3:11 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

sigminer
Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

R/get_sig_feature_association.R
In sigminer: Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

Defines functions get_sig_feature_association

Documented in get_sig_feature_association

Try the sigminer package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sigminer Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

R/get_sig_feature_association.R In sigminer: Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

Defines functions get_sig_feature_association

Documented in get_sig_feature_association

Try the sigminer package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sigminer
Extract, Analyze and Visualize Mutational Signatures for Genomic Variations

R/get_sig_feature_association.R
In sigminer: Extract, Analyze and Visualize Mutational Signatures for Genomic Variations