R/cor_df.R

Defines functions cor_df

Documented in cor_df

#' Correlate Dataframe
#'
#' This function takes a dataframe and returns a pairwise correlation of all data within it. This includes deselecting non-numeric columns, removing constant columns and dropping NA values.
#' The output of this function has utility in producing network graphs in conjunction with the tidygraphs and ggraph packages.
#'
#' @param df A dataframe.
#' @param method The method to supply to the cor() function - "pearson", "spearman" or "kendall".
#' @return A three-column dataframe - two columns of items and one column of correlation coefficients.
#' @export

cor_df = function(df, method = "pearson"){

  `%>%` <- magrittr::`%>%`

  corr = df %>%
    dplyr::select(where(is.numeric)) %>%
    janitor::remove_constant() %>%
    janitor::remove_empty() %>%
    tidyr::drop_na() %>%
    cor(method = method) %>%
    as.data.frame() %>%
    tibble::rownames_to_column(var = "col1") %>%
    tidyr::pivot_longer(-col1, names_to = "col2", values_to = "corr") %>%
    dplyr::rowwise() %>%
    dplyr::mutate(colA = min(col1, col2),
           colB = max(col1, col2),
           .keep = "unused") %>%
    dplyr::distinct() %>%
    dplyr::filter(colA != colB) %>%
    dplyr::relocate(corr, .after = colB)

  return(corr)

}
jack-davison/jdavisonmisc documentation built on Jan. 1, 2021, 4:26 a.m.