R/calculate_complexities.R
In hagis: Analysis of Plant Pathogen Pathotype Complexities, Distributions and Diversity

Documented in autoplot.hagis.complexities calculate_complexities

#' Calculate Distribution of Complexities by Sample
#'
#' @description Calculate the distribution of susceptibilities by sample id.
#' 
#' @inheritParams summarize_gene
#' @examplesIf interactive()
#' 
#' # Using the built-in data set, `P_sojae_survey`
#' data(P_sojae_survey)
#'
#' P_sojae_survey
#'
#' # calculate susceptibilities with a 60 % cutoff value
#' complexities <- calculate_complexities(x = P_sojae_survey,
#'                                        cutoff = 60,
#'                                        control = "susceptible",
#'                                        sample = "Isolate",
#'                                        gene = "Rps",
#'                                        perc_susc = "perc.susc")
#' complexities
#'
#' summary(complexities)
#'
#' @return An object of class `hagis.complexities`.
#'
#' An object of class `hagis.complexities` is a `list` containing the following
#'  components
#'   \describe{
#'     \item{grouped_complexities}{a [data.table::data.table()] object of
#'       grouped complexities}
#'     \item{individual_complexities}{a [data.table::data.table()] object of
#'       individual complexities}
#'   }
#'   
#' @autoglobal
#' @export calculate_complexities

calculate_complexities <- function(x,
                                   cutoff,
                                   control,
                                   sample,
                                   gene,
                                   perc_susc) {
  # check inputs and rename columns to work with this package
  x <- .check_inputs(
    .x = x,
    .cutoff = cutoff,
    .control = control,
    .sample = sample,
    .gene = gene,
    .perc_susc = perc_susc
  )
  
  # The susceptible control is removed from all samples in the data set so that
  #  it will not affect complexity calculations and a new data set is made that
  #  it does not contain susceptible controls.
  x <- subset(x, gene != control)

  # summarise the reactions, create susceptible.1 column, see
  # internal_functions.R
  x <- .binary_cutoff(.x = x, .cutoff = cutoff)

  # Individual isolate complexities as calculated by grouping by "sample" and
  # then summarising the number of "1"s for each "sample" in the "susceptible.1"
  # column
  individual_complexities <- .create_summary_isolate(.y = x)

  # Frequency for each complexity (%) ------------------------------------------
  # Percent frequency is calculated by taking the individual complexity of each
  # Isolate and grouping all Isolates by their complexity

  # create an object of the number of genes in the data
  n_gene <- length(unique(x[, gene]))

  # create an object of the number of samples in the data
  n_sample <- length(unique(x[, sample]))

  # create an empty list to populate with frequency values
  complexities <- vector(mode = "list", length = n_gene)
  names(complexities) <- seq_len(n_gene)

  for (i in seq_len(n_gene)) {
    complexities[[i]] <-
      length(which(individual_complexities[, N_samp == i]) / n_sample * 100)
  }

  grouped_complexities <-
    as.data.table(utils::stack(complexities))
  names(grouped_complexities) <- c("frequency", "N_samp")

  # distribution of complexity (counts)
  dist <- individual_complexities[, .N, by = N_samp]
  dist[, N_samp := as.factor(N_samp)]
  grouped_complexities[dist, on = "N_samp", distribution := i.N]

  # set NA to 0 for distribution
  grouped_complexities[is.na(distribution), distribution := 0]
  setcolorder(grouped_complexities,
                          neworder = c("N_samp", "frequency", "distribution"))
  setnames(grouped_complexities,
                       c("complexity", "frequency", "distribution"))
  complexities <-
    list(grouped_complexities, individual_complexities)
  names(complexities) <-
    c("grouped_complexities", "indvidual_complexities")

  # Set new class
  class(complexities) <- union("hagis.complexities", class(x))

  return(complexities)
}

#' Plot hagis Complexities Objects
#'
#' @description Creates a \CRANpkg{ggplot2} object of the frequency of
#'  complexity (percent per complexity) or a \CRANpkg{ggplot2} object of the
#'  distribution (number per complexity) calculated by
#'  [calculate_complexities()].
#' @param object a \CRANpkg{hagis} `complexities` object generated by
#'  [calculate_complexities()]. `Character`.
#' @param type a vector of values for which the bar plot is desired. Specify
#'  whether to return a graph of the frequency of complexities as a percentage,
#'  "`percentage`", or as the count, "`count`". `Character`.
#' @param color a named or hexadecimal color value to use for the bar color
#' @param order sort the x-axis of the bar chart by ascending or descending
#' order of `frequency`. Accepts `ascending` or `descending` input values.
#' Defaults to `complexity` value. `Character`.
#' @param ... passed to the chosen `geom(s)`
#'
#' @autoglobal
#' @examplesIf interactive()
#' # Using the built-in data set, `P_sojae_survey`
#' data(P_sojae_survey)
#'
#' # calculate susceptibilities with a 60 % cutoff value
#' complexities <- calculate_complexities(x = P_sojae_survey,
#'                                        cutoff = 60,
#'                                        control = "susceptible",
#'                                        sample = "Isolate",
#'                                        gene = "Rps",
#'                                        perc_susc = "perc.susc")
#'
#' # Visualize the distribution (count or actual values)
#' autoplot(complexities, type = "count")
#'
#' # Visualize the frequency (percentages)
#' autoplot(complexities, type = "percentage")
#'
#' @return A \CRANpkg{ggplot2} object
#' @method autoplot hagis.complexities
#' @export

autoplot.hagis.complexities <-
  function(object,
           type,
           color = NULL,
           order = NULL,
           ...) {

    # create a single data.frame to use in the ggplot call
    z <- object[[1]]

    # order cols based on user input
    if (!is.null(order)) {
      if (order == "ascending") {
        setorder(x = z,
                             cols = frequency)
        z$order <- seq_len(nrow(z))
      } else if (order == "descending") {
        setorder(x = z,
                             cols = -frequency)
        z$order <- seq_len(nrow(z))
      }
    } else
      # if no order is specified
      setorder(x = z, cols = complexity)
    z$order <- seq_len(nrow(z))

    plot_percentage <- function(.data, .color) {
      perc_plot <- ggplot2::ggplot(data = .data,
                                   ggplot2::aes(x = stats::reorder(complexity,
                                                                   order),
                                                y = frequency)) +
        ggplot2::labs(y = "Percent of samples",
                      x = "Complexity") +
        ggplot2::ggtitle("Percentage of isolates per complexity")

      if (!is.null(.color)) {
        perc_plot +
          ggplot2::geom_col(fill = .color,
                            colour = .color)
      } else {
        perc_plot +
          ggplot2::geom_col()
      }
    }

    plot_count <- function(.data, .color) {
      num_plot <- ggplot2::ggplot(data = .data,
                                  ggplot2::aes(x = stats::reorder(complexity,
                                                                  order),
                                               y = distribution)) +
        ggplot2::labs(y = "Number of samples",
                      x = "Complexity") +
        ggplot2::ggtitle("Number of samples per pathotype complexity")

      if (!is.null(.color)) {
        num_plot +
          ggplot2::geom_col(fill = .color,
                            colour = .color)
      } else {
        num_plot +
          ggplot2::geom_col()
      }
    }

    if (type == "percentage") {
      plot_percentage(.data = z, .color = color)
    } else if (type == "count") {
      plot_count(.data = z, .color = color)
    } else {
      stop(.call = FALSE,
           "You have entered an invalid `type`.")
    }
  }

#' Create Summary Table of Binary Reactions by Sample
#'
#' Tally a summary by sample or isolate. This code takes the "Susceptible.1"
#'  column and summarises it by gene for your total "Isolates" pathogenic on
#'  each sample.
#'
#' @param x A \CRANpkg{hagis} `complexities` object generated by
#'  [calculate_complexities()]. `Character`.
#' @return A `data.table` that tallies the results by sample.
#' @noRd
.create_summary_isolate <- function(.y) {
  .y <- .y[, list(N_samp = sum(susceptible.1)), by = list(sample)]
  return(.y)
}

#' Summarises {hagis} Complexity Objects
#'
#' Custom [summary()] method for \pkg{hagis} `complexities` objects.
#'
#' @param x A hagis class object (a list of two `data.table`s)
#' @param ... ignored
#' @noRd
#' @export
summary.hagis.complexities <- function(object, ...) {
  mn <- mean(object$indvidual_complexities$N_samp)
  sd <- sd(object$indvidual_complexities$N_samp)
  se <- sqrt(
    stats::var(object$indvidual_complexities$N_samp) /
      length(object$indvidual_complexities$N_samp)
  )

  x <- data.frame(mn, sd, se)
  names(x) <- c("mean", "sd", "se")
  class(x) <- "summary.complexities"
  x
}

#' Prints summary.hagis Object for Complexities
#'
#' Custom [print()] method for \CRANpkg{hagis} `summary.complexity` objects.
#'
#' @param x a summary.complexities object
#' @param ... ignored
#' @export
#' @noRd
print.summary.complexities <- function(x,
                                       digits = max(3L,
                                                    getOption("digits") - 3L),
                                       ...) {
  cat("\nMean of Complexities\n")
  cat(x$mean, "\n")
  cat("\nStandard Deviation of Complexities\n")
  cat(x$sd, "\n")
  cat("\nStandard Error of Complexities\n")
  cat(x$se)
  invisible(x)
}

#' Pander Method for hagis Summary Complexities
#'
#' Prints a \CRANpkg{hagis} complexities summary in Pandoc's markdown.
#' @param x a `complexities` object
#' @param caption caption (string) to be shown under the table
#' @param ... optional parameters passed to raw `pandoc.table` function
#' @importFrom pander pander
#' @export
#' @noRd
pander.summary.complexities <-
  function(x, caption = attr(x, "caption"), ...) {
    pander::pandoc.table(data.frame(
      "Mean" = x[[1]],
      "SD" = x[[2]],
      "SE" = x[[3]]
    ))
  }

#' Prints hagis.complexities Object
#'
#' Custom [print()] method for `hagis.complexity` objects.
#'
#' @param x a hagis.complexities object
#' @param ... ignored
#' @export
#' @noRd
print.hagis.complexities <- function(x,
                                     digits = max(3L, getOption("digits") - 3L),
                                     ...) {
  cat("\nGrouped Complexities\n")
  print(x[[1]])
  cat("\n")
  cat("\nIndividual Complexities\n")
  print(x[[2]])
  cat("\n")
  invisible(x)
}

Any scripts or data that you put into this service are public.

hagis documentation built on May 29, 2024, 12:01 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

hagis
Analysis of Plant Pathogen Pathotype Complexities, Distributions and Diversity

R/calculate_complexities.R
In hagis: Analysis of Plant Pathogen Pathotype Complexities, Distributions and Diversity

Defines functions print.hagis.complexities pander.summary.complexities print.summary.complexities summary.hagis.complexities .create_summary_isolate autoplot.hagis.complexities calculate_complexities

Documented in autoplot.hagis.complexities calculate_complexities

Try the hagis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hagis Analysis of Plant Pathogen Pathotype Complexities, Distributions and Diversity

R/calculate_complexities.R In hagis: Analysis of Plant Pathogen Pathotype Complexities, Distributions and Diversity

Defines functions print.hagis.complexities pander.summary.complexities print.summary.complexities summary.hagis.complexities .create_summary_isolate autoplot.hagis.complexities calculate_complexities

Documented in autoplot.hagis.complexities calculate_complexities

Try the hagis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hagis
Analysis of Plant Pathogen Pathotype Complexities, Distributions and Diversity

R/calculate_complexities.R
In hagis: Analysis of Plant Pathogen Pathotype Complexities, Distributions and Diversity