R/S4-Correlation.R

Defines functions validTypesCorrelation validShapesCorrelation validMethodsCorrelation melt_rcorr initiateCorrelation distinct_corr_df corr_mtr_to_df adjust_corr_mtr

Documented in adjust_corr_mtr corr_mtr_to_df initiateCorrelation melt_rcorr validMethodsCorrelation validShapesCorrelation validTypesCorrelation

#' @include S4-AnalysisAspect.R
NULL




# S4-classes --------------------------------------------------------------

#' @title The \code{Correlation}-class
#'
#' @description S4-class for convenient correlation analysis.
#'
#' @slot data data.frame. The data on which the analysis bases on.
#' @slot key_name character. The name of the variable that is used to identify
#' each observation uniquely.
#' @slot meta data.frame. Data that was part of the input data but is not supposed
#' to be included in analysis steps.
#' @slot methods list. A list of objects of S4-class \code{ClusteringMethod}.
#' @slot variables_grouping character. The names of all grouping variables
#' of the input data - variables of class character or factor. (Does not include
#' variable of slot @@key_name)
#' @slot variables_logical character. The names of all logical variables of
#' the input data.
#' @slot variables_numeric character. The names of all numeric variables
#' based on which outlier detection is conducted.

Correlation <- setClass(Class = "Correlation",
                        slots = list(),
                        contains = "AnalysisAspect"
                        )


CorrelationMethod <- setClass(Class = "CorrelationMethod",
                              slots = list(
                                key_name = "character",
                                method = "character",
                                results = "list",
                                results_across = "list"
                              ))


CorrelationPearson <- setClass(Class = "CorrelationPearson",
                               slots = list(),
                               contains = "CorrelationMethod"
                               )

CorrelationSpearman <- setClass(Class = "CorrelationSpearman",
                                slots = list(),
                                contains = "CorrelationMethod"
                                )


# -----



# r-objects ---------------------------------------------------------------

valid_methods_corr <- c("pearson", "spearman")

valid_types_corr <- c("lower", "upper", "complete")

valid_shapes_corr <- c("circle", "rect", "tile")

# -----


# functions ---------------------------------------------------------------

#' @title Adjust correlation matrix
#'
#' @description
#'
#' @param mtr Correlation or p-value matrix.
#' @inherit corr_dummy params
#'
#' @return Adjusted input matrix.
#' @export
#'

adjust_corr_mtr <- function(mtr, type = "complete", diagonal = TRUE){

  if(type == "lower"){

    mtr[base::upper.tri(mtr, diag = !diagonal)] <- NA

  } else if(type == "upper"){

    mtr[base::lower.tri(mtr, diag = !diagonal)] <- NA

  }

  return(mtr)

}

#' @title Convert correlation matrix to data.frame
#'
#' @description Converts a correlation matrix to a tidy data.frame in
#' which observations are correlation pairs.
#'
#' @param mtr A correlation matrix.
#' @inherit corr_dummy params
#' @export
corr_mtr_to_df <- function(mtr, type = "complete", diagonal = TRUE, distinct = FALSE){

  mtr <- adjust_corr_mtr(mtr = mtr, type = type, diagonal = diagonal)

  corr_df <-
    reshape2::melt(data = mtr, varnames = c("var1", "var2"), value.name = "corr") %>%
    tibble::as_tibble()

  if(base::isTRUE(distinct)){

    corr_df <- distinct_corr_df(corr_df)

  }

  return(corr_df)

}

#' @export
distinct_corr_df <- function(corr_df){

  if(base::names(corr_df)[1] != "var1"){

    stop("Input data.frame must not contain data across groups.")

  }

  vars <- base::unique(corr_df$var1) %>% base::as.character()

  comb_df <-
    utils::combn(x = vars, m = 2) %>%
    base:::t() %>%
    base::as.data.frame() %>%
    magrittr::set_colnames(value = c("var1", "var2"))

  out <-
    dplyr::left_join(x = comb_df, y = corr_df, by = c("var1", "var2")) %>%
    tibble::as_tibble()

  return(out)

}

#' @rdname initiateAnalysisAspect
#' @export
initiateCorrelation <- function(data,
                                key_name,
                                key_prefix = "ID",
                                lgl_to_group = TRUE,
                                meta_names = character(0),
                                verbose = TRUE){

  object <-
    initiateAnalysisAspect(
      data = data,
      key_name = key_name,
      key_prefix = key_prefix,
      meta_names = meta_names,
      lgl_to_group = lgl_to_group,
      verbose = verbose,
      analysis_aspect = "Correlation"
    )

  return(object)

}


#' @title Melt \code{rcorr}
#'
#' @description Melts object of class \code{rcorr} to a data.frame.
#'
#' @param rcorr_obj An object of class \code{rcorr}.
#' @inherit corr_dummy params
#'
#' @return A data.frame with the following columns:
#'
#' \itemize{
#'  \item{\emph{var1}:}{ Factor. First variable of the correlated variable pair.}
#'  \item{\emph{var2}:}{ Factor. Second variable of the correlated variable pair.},
#'  \item{\emph{corr}:}{ Numeric. The correlation vaule.},
#'  \item{\emph{pval}:}{ Numeric. The corresponding p-value.},
#'  }
#'
#' @export
#'
melt_rcorr <- function(rcorr_obj, type = "complete", diagonal = TRUE, distinct = FALSE){

  pval_df <-
    adjust_corr_mtr(mtr = base::as.matrix(rcorr_obj$P), type = type, diagonal = diagonal) %>%
    reshape2::melt(
      data = .,
      varnames = c("var1", "var2"),
      value.name = "pval"
    )

  corr_df <-
    adjust_corr_mtr(mtr = base::as.matrix(rcorr_obj$r), type = type, diagonal = diagonal) %>%
    reshape2::melt(
      varnames = c("var1", "var2"),
      value.name = "corr"
    ) %>%
    tibble::as_tibble() %>%
    dplyr::left_join(x = ., y= pval_df, by = c("var1", "var2"))

  if(base::isTRUE(distinct)){

    corr_df <- distinct_corr_df(corr_df)

  }

  return(corr_df)

}

#' @export
rcorr_to_df <- melt_rcorr


#' @rdname validInput
#' @export
validMethodsCorrelation <- function(){

  return(valid_methods_corr)

}

#' @rdname validInput
#' @export
validShapesCorrelation <- function(){

  return(valid_shapes_corr)

}

#' @rdname validInput
#' @export
validTypesCorrelation <- function(){

  return(valid_types_corr)

}

# -----




# methods for external generics -------------------------------------------

methods::setOldClass(Classes = "corr_df")

#' @rdname computeCorrelation
setMethod(
  f = "computeCorrelation",
  signature = "Correlation",
  definition = function(object,
                        across = NULL,
                        methods_corr = "pearson",
                        verbose = TRUE,
                        ...
                        ){

    check_one_of(
      input = methods_corr,
      against = validMethodsCorrelation()
    )

    n_vars <- base::length(object@variables_numeric)

    is_vec(x = across, mode = "character", skip.allow = TRUE, skip.val = NULL)

    for(method_corr in methods_corr){

      corr_obj <- object@methods[[method_corr]]

      if(base::is.null(corr_obj)){

        class_name <-
          stringr::str_c("Correlation",  make_capital_letters(method_corr))

        give_feedback(
          msg = glue::glue("Creating new object of class {class_name}."),
          verbose = verbose
          )

        corr_obj <-
          methods::new(
            Class = class_name,
            key_name = object@key_name,
            method = method_corr
          )

      }

      give_feedback(
        msg = glue::glue("Correlating {n_vars} variables according to method '{method_corr}'."),
        verbose = verbose
        )

      if(base::is.null(across)){

        mtr <- getMtr(object)

        corr_obj@results <-
          Hmisc::rcorr(x = mtr, type = method_corr, ...) %>%
          magrittr::set_attr(which = "class", value = "list")

      } else {

        all_across <- base::unique(across)

        base::rm(across)

        for(across in all_across){

          print(across)

          check_one_of(
            input = across,
            against = object@variables_grouping
          )

          give_feedback(msg = glue::glue("Correlating {n_vars} variables across '{across}'."), verbose = verbose)

          df <- getDf(object, numeric = TRUE, grouping = TRUE)

          groups <- unique_safely(df[[across]])

          corr_obj@results_across[[across]] <-
            purrr::map(.x = groups, .f = function(group){

              mtr <-
                dplyr::filter(df, !!rlang::sym(across) == {{group}}) %>%
                tibble::column_to_rownames(var = object@key_name) %>%
                dplyr::select_if(.predicate = base::is.numeric) %>%
                base::as.matrix()

              out <-
                tryCatch({

                  res <-
                    Hmisc::rcorr(x = mtr, type = method_corr) %>%
                    magrittr::set_attr(which = "class", value = "list")

                  res

                },
                error = function(error){

                  give_feedback(
                    msg = glue::glue(
                      "The following error occured in group '{group}': ",
                      "'{error$message}.'"
                    ),
                    verbose = TRUE
                  )

                })

              return(out)

            }) %>%
            purrr::set_names(nm = groups)

        }

      }

      object@methods[[method_corr]] <- corr_obj

    }

    give_feedback(msg = "Done.", verbose = verbose)

    return(object)

  })


#' @rdname getCorrDf
#' @export
setMethod(
  f = "getCorrDf",
  signature = "Correlation",
  definition = function(object,
                        method_corr = "pearson",
                        across = NULL,
                        across_subset = NULL,
                        pval_threshold = 0.05,
                        type = "complete",
                        diagonal = TRUE,
                        distinct = FALSE,
                        digits = 2,
                        verbose = TRUE,
                        sep = " & ",
                        ...){

    is_value(sep, mode = "character")
    is_value(digits, mode = "numeric")

    check_one_of(
      input = type,
      against = validTypesCorrelation()
    )

    if(base::is.null(across)){

      corr_res <- getRcorr(object, method_corr = method_corr)

      corr_df <- melt_rcorr(corr_res, type = type, diagonal = diagonal, distinct = distinct)

    } else {

      corr_df <-
        getRcorr(
          object = object,
          method_corr = method_corr,
          across = across,
          across_subset = across_subset
        ) %>%
        purrr::imap_dfr(
          .x = .,
          .f = function(corr_res, group){

            if(base::is.character(corr_res)){

              msg <-
                glue::glue(
                  "No correlation results for group '{group}' due to error: ",
                  "{corr_res}"
                )

              give_feedback(msg = msg, verbose = verbose, with.time = FALSE)

              out <- NULL

            } else {

              out <-
                melt_rcorr(rcorr_obj = corr_res, type = type, diagonal = diagonal, distinct = distinct) %>%
                dplyr::mutate({{across}} := {{group}})

            }

            return(out)

          }
        ) %>%
        dplyr::mutate({{across}} := base::as.factor(x = !!rlang::sym(across)))

    }

    corr_df <-
      dplyr::mutate(
        .data = corr_df,
        var1 = base::as.factor(var1),
        var2 = base::as.factor(var2),
        var_pair = stringr::str_c(var1, var2, sep = sep) %>% base::as.factor(),
        corr = base::round(corr, digits = digits),
        pval_threshold = {{pval_threshold}},
        signif = pval < pval_threshold,
        signif = tidyr::replace_na(signif, replace = TRUE),
        method_corr = {{method_corr}}
      ) %>%
      dplyr::select(
        dplyr::any_of(across), var1, var2, var_pair, corr, pval, pval_threshold, signif, method_corr
      )

    base::class(corr_df) <-
      c("corr_df", base::class(corr_df))

    return(corr_df)

  }
)

#' @inherit corr_dummy params
#' @rdname getCorrMtr
#' @export
setMethod(
  f = "getCorrMtr",
  signature = "Correlation",
  definition = function(object,
                        method_corr = "pearson",
                        across = NULL,
                        across_subset = NULL,
                        type = "complete",
                        diagonal = TRUE,
                        flatten = TRUE){

    if(base::is.null(across)){

      out <-
        getRcorr(object = object, method_corr = method_corr, as_list = TRUE)[["r"]] %>%
        adjust_corr_mtr(type = type, diagonal = diagonal)

    } else {

      out <-
        getRcorr(
          object = object,
          method_corr = method_corr,
          across = across,
          across_subset = across_subset,
          as_list = TRUE
        ) %>%
        purrr::map(
          .f = ~ adjust_corr_mtr(mtr = .x[["r"]], type = type, diagonal = diagonal)
          )

      if(base::length(out) == 1 & base::isTRUE(flatten)){

        out <- out[[1]]

      }

    }

    return(out)

  }
)

#' @rdname getRcorr
#' @export
setMethod(
  f = "getRcorr",
  signature = "Correlation",
  definition = function(object,
                        method_corr = "pearson",
                        across = NULL,
                        across_subset = NULL,
                        as_list = FALSE,
                        flatten = TRUE){

    check_one_of(
      input = method_corr,
      against = validMethodsCorrelation()
    )

    corr_obj <- getResults(object, method = method_corr)

    out <-
      getRcorr(
        object = corr_obj,
        method_corr = method_corr,
        across = across,
        across_subset = across_subset,
        as_list = as_list
      )

  }
)


#' @rdname getRcorr
#' @export
setMethod(
  f = "getRcorr",
  signature = "CorrelationMethod",
  definition = function(object,
                        method_corr = "pearson",
                        across = NULL,
                        across_subset = NULL,
                        as_list = FALSE,
                        stop_if_null = TRUE){

    check_one_of(
      input = method_corr,
      against = validMethodsCorrelation()
    )

    if(base::is.null(across)){

      out <- object@results

      if(base::is.null(out) & base::isTRUE(stop_if_null)){

        stop(
          glue::glue(
            "No rcorr object found for method '{method_corr}'."
          )
        )

      }

      if(base::isFALSE(as_list)){

        out <- magrittr::set_attr(x = out, which = "class", value = "rcorr")

      }


    } else {

      out <- object@results_across[[across]]

      if(base::is.null(out)){

        stop(
          glue::glue(
            "No results found for method '{method_corr}' across '{across}'."
          )
        )

      }

      if(base::is.null(across_subset)){

        across_subset <- base::names(out)

      } else {

        check_one_of(
          input = across_subset,
          against = base::names(out)
        )

      }

      if(base::isFALSE(as_list)){

        out <-
          purrr::map(
            .x = out[across_subset],
            .f = ~ magrittr::set_attr(.x, which = "class", value = "rcorr")
          )

      }


      if(base::length(out) == 1 & base::isTRUE(flatten)){

        out <- out[[1]]

      }

    }

    return(out)

  }
)

#' @rdname plotCorrplot
#' @export
setMethod(
  f = "plotCorrplot",
  signature = "Correlation",
  definition = function(object,
                        method_corr = "pearson",
                        across = NULL,
                        across_subset = NULL,
                        variables_subset = NULL,
                        relevel = FALSE,
                        pval_threshold = NULL,
                        type = "lower",
                        diagonal = TRUE,
                        color_low = "darkred",
                        color_high = "steelblue",
                        color_limits = c(-1,1),
                        shape = "tile",
                        size_by_corr = TRUE,
                        size_max = 15,
                        size_limits = c(-1, 1),
                        display_values = TRUE,
                        values_alpha = 0.9,
                        values_color = "black",
                        values_digits = 2,
                        values_size = 4,
                        display_grid = TRUE,
                        grid_color = "grey",
                        grid_size = 0.5,
                        nrow = NULL,
                        ncol = NULL,
                        verbose = TRUE){


    check_one_of(
      input = shape,
      against = validShapesCorrelation()
    )

    check_one_of(
      input = type,
      against = validTypesCorrelation()
    )

    # allows option to relevel both axes
    if(base::length(relevel) == 1){

      relevel <- c(relevel, relevel)

    }

    corr_df <-
      getCorrDf(
        object = object,
        method_corr = method_corr,
        across = across,
        type = type,
        diagonal = diagonal,
        digits = values_digits,
        pval_threshold = base::ifelse(test = base::is.null(pval_threshold), 0.5, pval_threshold),
        verbose = verbose
      ) %>%
      check_across_subset(
        across = across,
        across.subset = across_subset,
        relevel = relevel[1]
      )

    if(!base::is.null(variables_subset)){

      if(is_list(variables_subset)){

        corr_df <-
          check_across_subset(
            df = corr_df,
            across = "var1",
            across.subset = variables_subset[["x"]],
            relevel = relevel[1],
          ) %>%
          check_across_subset(
            df = .,
            across = "var2",
            across.subset = variables_subset[["y"]],
            relevel = relevel[2]
          )

      } else {

        corr_df <-
          check_across_subset(
            df = corr_df,
            across = "var1",
            across.subset = variables_subset,
            relevel = relevel[1],
          ) %>%
          check_across_subset(
            df = .,
            across = "var2",
            across.subset = variables_subset,
            relevel = relevel[2]
          )

      }



    }

    # baseline plot
    p <-
      ggplot2::ggplot(data = corr_df, mapping = ggplot2::aes(x = var1, y = var2)) +
      ggplot2::scale_color_gradient2(
        midpoint = 0, low = color_low, high = color_high,
        na.value = "white", limits = color_limits
        ) +
      ggplot2::scale_size_area(max_size = size_max, limits = size_limits) +
      ggplot2::theme_void() +
      ggplot2::theme(
        axis.text = ggplot2::element_text(),
        axis.text.x = ggplot2::element_text(angle = 90)
      ) +
      ggplot2::guides(size = FALSE) +
      ggplot2::coord_fixed()

    # add facets
    if(base::is.character(across)){

      facet_by <- rlang::sym(across)

      facet_add_on <- ggplot2::facet_wrap(facets = facet_by, nrow = nrow, ncol = ncol)

    } else {

      facet_add_on <- NULL

    }

    if(base::is.numeric(pval_threshold)){

      insignif_df <- dplyr::filter(corr_df, base::isFALSE(signif))

      insignif_add_on <-
        ggplot2::geom_point(
          data = insignif_df,
          size = size_max,
          color = "black"
        )

    } else {

      insignif_add_on <- NULL

    }

    # create aes mapping
    if(base::isTRUE(size_by_corr)){

      mapping <- ggplot2::aes(x = var1, y = var2, color = corr, size = corr_abs)

    } else {

      mapping <- ggplot2::aes(x = var1, y = var2, color = corr)

    }

    # add geometry

    # turns negative correlation values into positives
    # for display by size
    shape_df <-
      dplyr::filter(corr_df, !base::is.na(corr)) %>%
      dplyr::mutate(corr_abs = base::abs(corr))

    if(shape == "circle"){

      shape_num <- 19

      if(base::isTRUE(size_by_corr)){

        geom_add_on <- ggplot2::geom_point(data = shape_df, shape = shape_num, mapping = mapping)

      } else {

        geom_add_on <- ggplot2::geom_point(data = shape_df, shape = shape_num, mapping = mapping, size = size_max)

      }

    } else if(shape == "rect"){

      shape_num <- 15

      if(base::isTRUE(size_by_corr)){

        geom_add_on <- ggplot2::geom_point(data = shape_df, shape = shape_num, mapping = mapping)

      } else {

        geom_add_on <- ggplot2::geom_point(data = shape_df, shape = shape_num, mapping = mapping, size = size_max)

      }

    } else if(shape == "tile"){

      geom_add_on <-
        list(
          ggplot2::geom_tile(mapping = ggplot2::aes(x = var1, y = var2, fill = corr)),
          ggplot2::scale_fill_gradient2(
            midpoint = 0, limits = color_limits,
            low = color_low, high = color_high, na.value = "white")
        )

    }

    # add grid
    if(base::isTRUE(display_grid)){

      grid_add_on <-
        ggplot2::geom_tile(
          fill = NA, color = grid_color, size = grid_size,
          data = shape_df
        )

    } else {

      grid_add_on <- NULL

    }

    # add values
    if(base::isTRUE(display_values)){

      value_df <- dplyr::filter(corr_df, !base::is.na(corr))

      if(base::is.numeric(pval_threshold)){

        value_df <- dplyr::filter(value_df, signif)

      }

      values_add_on <-
        list(
          ggplot2::geom_text(
            mapping = ggplot2::aes(label = corr),
            alpha = values_alpha,
            color = values_color,
            size = values_size,
            data = value_df),
          ggplot2::guides(fill = FALSE)
        )

    } else {

      values_add_on <- NULL

    }

    p +
      facet_add_on +
      grid_add_on +
      geom_add_on +
      values_add_on +
      insignif_add_on


  }
)
kueckelj/confuns documentation built on June 28, 2024, 9:19 a.m.