R/getDegreeInfo.R

Defines functions getDegreeInfo

Documented in getDegreeInfo

#' \code{getDegreeInfo}
#'
#' Calculate the connectivity degrees for the co-function network. Called by
#' \code{PFDataLoader}, not intended to be directly invoked by user.
#'
#' @param cfn_data A tibble containing co-function network data generated by \code{loadCFNData}.
#' @param bin Boolean on whether to bin the degrees for each gene. Default is \code{TRUE}.
#' @param n_bins Number of bins to separate data into if \code{bin=TRUE}.
#' @return A formatted tibble with degree information
#'
#'
getDegreeInfo <- function(cfn_data, bin=TRUE, n_bins=128) {
  degree_info <-cfn_data %>%
    group_by(X1) %>%
    summarize("degree" = length(X3))
  names(degree_info)[1] <- "gene"

  if (bin) {
    # Bin values into n_bins via quantile binning
    breaks = as.numeric(quantile(degree_info$degree,
                                 seq(0, 1, 1/n_bins)))

    # Breaks need to be unique in order for `cut` to work. Therefore we will
    # check that the number of unique breaks is equal to the number of desired
    # bins. If there is not equality, we will continue, using the unique breaks,
    # but also warn the user about the undesired number of bins.
    breaks <- unique(breaks)
    if (length(breaks) != n_bins) {
      warning(
      paste0("The number of unique bins (",
             length(breaks),
             ") is less than the desired number of bins (",
             n_bins,
             "). Consider using fewer bins, or removing extreme outliers.")
      )
    }
    degree_info$bin <- cut(degree_info$degree,
                           breaks = breaks)
  }

  return(as_tibble(degree_info))
}
princeew/PFFindR documentation built on Dec. 31, 2020, 2:06 a.m.