R/6-propdViz.R
In propr: Calculating Proportionality Between Vectors of Compositional Data

Documented in bowtie decomposed gemini geyser pals parallel propd2propr shale

#' Calculate PALs for Pairs
#'
#' This function finds the Popular Adjacent Ligand or Self (PALs)
#'  for each feature in the \code{@@results} slot of a \code{propd}
#'  object. Specifically, we define PALs as the adjacent node with
#'  the highest amount of connectivity. If node itself has more
#'  connectivity than any of its neighbors, it is its own PAL.
#'
#' @inheritParams all
#' @return A named vector of PALs, ordered by decreasing
#'  connectivity of the input nodes. The names of the result
#'  refer to the input nodes themselves.
pals <- function(object, k){

  # Get entire graph object
  g <- suppressMessages(plot(object, cutoff = 1, prompt = FALSE, plotSkip = TRUE))

  # Get graph parameters
  d <- sort(igraph::degree(g), decreasing = TRUE)
  el <- igraph::get.edgelist(g)

  pal <- vector("integer", length(d))
  names(pal) <- names(d)
  for(i in 1:length(d)){

    # Extract all partners for the i-th object
    node.i <- names(d)[i]
    el.i <- el[node.i == el[, 1] | node.i == el[, 2], , drop = FALSE]
    z <- union(el.i[, 1], el.i[, 2])

    # Index most popular partner
    pal[i] <- names(d)[names(d) %in% z][1]
  }

  if(!missing(k)){

    # Only keep the k largest PALs
    keep <- names(sort(table(pal), decreasing = TRUE))[1:k]
    pal[!pal %in% keep] <- "Missing"
  }

  return(pal)
}

#' @rdname visualize
#' @section \code{propd} Functions:
#' \code{propd2propr:}
#'  Transforms a \code{propd} object into a \code{propr} object
#'  where the \code{@@matrix} slot contains \eqn{1 - \theta}.
#'  Allows the user to interrogate theta using any
#'  visualization built for \code{propr} objects.
#' @rdname propd
#' @export
propd2propr <- function(object, ivar){

  prop <- new("propr")
  prop@matrix <- 1 - half2mat(object@results$theta)
  return(prop)
}

#' @rdname visualize
#' @section \code{propd} Functions:
#' \code{plot:}
#'  Plots the interactions between pairs as a network.
#'  When plotting disjointed proportionality, red edges
#'  indicate that LRM1 > LRM2 while blue edges indicate
#'  that LRM1 < LRM2. When plotting emergent proportionality,
#'  red edges indicate that VLR1 < VLR2 while blue edges
#'  indicate that VLR1 > VLR2. Group labels numbered based on
#'  the order of the \code{group} argument to \code{propd}.
#'  Use \code{col1} and \code{col2} arguments to color nodes.
#'  For more control over the visualization of the network,
#'  consider exporting the table from \code{shale} to a
#'  network visualization tool like Cytoscape.
#' @export
setMethod("plot", signature(x = "propd", y = "missing"),
          function(x, y, cutoff = 1000, col1, col2, propr, prompt = TRUE, d3 = FALSE, plotSkip = FALSE){

            if(length(unique(x@group)) > 2){
              stop("This method only supports the analysis of 2 groups.")
            }

            names <- colnames(x@counts)
            group <- unique(x@group)

            # Apply cutoff of [0, 1] or a large integer for top theta
            if(cutoff > nrow(x@results)) cutoff <- nrow(x@results)
            if(cutoff > 1) cutoff <- sort(x@results$theta)[cutoff]
            type <- x@active
            x <- x@results[x@results$theta <= cutoff, ]
            if(nrow(x) == 0) stop("No theta remain after cutoff.")
            if(prompt) promptCheck(nrow(x))

            partners <- names[x$Partner]
            pairs <- names[x$Pair]

            # Color edges based on direction of log-fold change in VLR
            g <- igraph::make_empty_graph(directed = FALSE)
            g <- migraph.add(g, partners, pairs)

            if(!missing(propr)){

              ff <- tempfile()
              grDevices::png(filename = ff)
              cyt <- suppressMessages(propr::cytescape(propr, prompt = FALSE))
              grDevices::dev.off()
              unlink(ff)

              g <- migraph.add(g, cyt$Partner, cyt$Pair)
              g <- migraph.color(g, cyt$Partner, cyt$Pair, "forestgreen")
              message("Green: Pair positively proportional across all samples.")

              invProp <- cyt[, 3] < 0
              if(any(invProp)){

                g <- migraph.color(g, cyt$Partner[invProp], cyt$Pair[invProp], "burlywood4")
                message("Brown: Pair inversely proportional across all samples.")
              }
            }

            if(type == "theta_e" | type == "theta_h"){

              g <- migraph.color(g, partners[x$lrv1 < x$lrv2], pairs[x$lrv1 < x$lrv2], "coral1") # red
              g <- migraph.color(g, partners[x$lrv1 > x$lrv2], pairs[x$lrv1 > x$lrv2], "lightseagreen") # blue
              message("Red: Nearly all of total LRV explained by ", group[2])
              message("Blue: Nearly all of total LRV explained by ", group[1])

            }else{

              g <- migraph.color(g, partners[x$lrm1 > x$lrm2], pairs[x$lrm1 > x$lrm2], "coral1") # red
              g <- migraph.color(g, partners[x$lrm1 < x$lrm2], pairs[x$lrm1 < x$lrm2], "lightseagreen") # blue
              message("Red: Pair has higher LRM in group ", group[1], " than in group ", group[2])
              message("Blue: Pair has higher LRM in group ", group[2], " than in group ", group[1])
            }

            # Optional coloring of nodes
            if(!missing(col1)) g <- migraph.color(g, col1, col = "darkred")
            if(!missing(col2)) g <- migraph.color(g, col2, col = "darkslateblue")
            g <- migraph.clean(g)

            # Used by pals function
            if(plotSkip) return(g)

            if(d3){

              packageCheck("rgl")
              coords <- igraph::layout_with_fr(g, dim = 3)
              suppressWarnings(igraph::rglplot(g, layout = coords))

            }else{

              plot(g)
            }

            return(g)
          }
)

#' Build \code{propd} Results Table
#'
#' Builds a table of within-group and total log-ratio
#'  variances, log-ratio means, and PALs (see: \code{\link{pals}}).
#'  If the argument \code{k} is provided, the table will
#'  label at most \code{k} top PALs. Just as each node
#'  gets assigned a PAL, \code{shale} aims to assign
#'  each edge a PAL. Edges that have a top PAL as one
#'  and only one of their nodes get assigned that PAL.
#'  Edges that have top PALs as both of their nodes
#'  get assigned "Bridged". Edges without a top PAL
#'  as one of their nodes will get assigned a PAL if
#'  either (a) both nodes have the same neighbor PAL
#'  or (b) one node has a "Missing" neighbor PAL.
#'  The \code{cutoff} argument guides the maximum value of
#'  theta above which to exclude the pair. A large integer
#'  \code{cutoff} will instead retrieve the top N pairs as
#'  ranked by theta.
#'
#' @inheritParams all
shale <- function(object, cutoff = 1000, k, prompt = TRUE, clean = FALSE){

  if(class(object) != "propd") stop("This function requires a 'propd' object.")
  if(!"lrm1" %in% colnames(object@results)) stop("This function requires an 'lrm1' column!")
  if(!"lrm2" %in% colnames(object@results)) stop("This function requires an 'lrm2' column!")

  # Apply cutoff of [0, 1] or a large integer for top theta
  if(cutoff > nrow(object@results)) cutoff <- nrow(object@results)
  if(cutoff > 1) cutoff <- sort(object@results$theta)[cutoff]
  object@results <- object@results[object@results$theta <= cutoff, ]
  df <- object@results
  if(nrow(df) == 0) stop("No theta remain after cutoff.")
  if(prompt) promptCheck(nrow(df))

  # Retrieve useful information
  colnames(df)[colnames(df) %in% c("lrv", "lrv1", "lrv2", "lrm1", "lrm2")] <-
    c("LRV", "LRV1", "LRV2", "LRM1", "LRM2")
  df$FoldLRV <- log(df$LRV1 / df$LRV2)
  df$FoldLRV[is.na(df$FoldLRV)] <- 0
  df$DiffLRM <- df$LRM1 - df$LRM2
  df$PartnerName <- colnames(object@counts)[df$Partner]
  df$PairName <- colnames(object@counts)[df$Pair]

  # Get PALs for each node
  pal <- pals(object, k)
  df$PartnerPAL <- pal[df$PartnerName]
  df$PairPAL <- pal[df$PairName]

  # Assign an edge a PAL if only one node is a top PAL
  topPALs <- unique(pal)
  df$PAL <- "Missing"
  df$PAL <- ifelse(df$PartnerName %in% topPALs, df$PartnerName, df$PAL)
  df$PAL <- ifelse(df$PairName %in% topPALs, df$PairName, df$PAL)

  # Assign "Bridged" if both nodes are a top PAL
  df$PAL <- ifelse(df$PartnerName %in% topPALs & df$PairName %in% topPALs,
                   "Bridged", df$PAL)

  # Assign PAL to edges still without a top PAL
  df$PAL <- ifelse(df$PAL == "Missing" &
                     df$PartnerPAL != "Missing" & df$PairPAL == "Missing",
                   df$PartnerPAL, df$PAL)
  df$PAL <- ifelse(df$PAL == "Missing" &
                     df$PartnerPAL == "Missing" & df$PairPAL != "Missing",
                   df$PairPAL, df$PAL)
  df$PAL <- ifelse(df$PAL == "Missing" &
                     df$PartnerPAL == df$PairPAL,
                   df$PartnerPAL, df$PAL)

  # Calculate difference in LRM relative to PAL
  df$LRM1byPAL <- ifelse(df$PairName == df$PAL,
                         df$LRM1, ifelse(df$PartnerName == df$PAL,
                                         -1 * df$LRM1, 0))
  df$LRM2byPAL <- ifelse(df$PairName == df$PAL,
                         df$LRM2, ifelse(df$PartnerName == df$PAL,
                                         -1 * df$LRM2, 0))
  df$DiffLRMbyPAL <- df$LRM1byPAL - df$LRM2byPAL

  # Clean data for plot -- used by 'geyser', 'bowtie', and 'gemini'
  if(clean) return(df[df$PairName == df$PAL | df$PartnerName == df$PAL, ])

  return(df)
}

#' @rdname visualize
#' @section \code{propd} Functions:
#' \code{geyser:}
#'  Plots indexed pairs based on the within-group
#'  log-ratio variance (VLR) for each group. Pairs near the
#'  origin show a highly proportional relationship in
#'  both groups. Each line away from the \code{y = x} line
#'  indicates a doubling of VLR compared to the other group.
#'  All pairs colored based on PAL
#'  (see: \code{\link{pals}}).
#'  See \code{gemini}.
#' @export
geyser <- function(object, cutoff = 1000, k = 5, prompt = TRUE, plotly = FALSE){

  if(length(unique(object@group)) > 2){
    stop("This method only supports the analysis of 2 groups.")
  }

  df <- shale(object, cutoff, k, prompt, clean = TRUE)

  maxLim <- max(c(df$LRV1, df$LRV2))
  g <-
    ggplot2::ggplot(
      df, ggplot2::aes_string(Partner = "PartnerName",
                              Pair = "PairName", PartnerPAL = "PartnerPAL",
                              PairPAL = "PairPAL", theta = "theta")) +
    ggplot2::geom_point(ggplot2::aes_string(x = "LRV1", y = "LRV2", colour = "PAL")) +
    ggplot2::theme_bw() +
    ggplot2::scale_colour_brewer(palette = "Set2", name = "PAL") +
    ggplot2::xlab(paste("Log-Ratio Variance (VLR) of Pair in",
                        unique(object@group)[1])) +
    ggplot2::ylab(paste("Log-Ratio Variance (VLR) of Pair in",
                        unique(object@group)[2])) +
    ggplot2::ggtitle("Distribution of Pairs by Within-Group VLR") +
    ggplot2::xlim(0, maxLim) +
    ggplot2::ylim(0, maxLim) +
    ggplot2::geom_abline(slope = 1, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 2/1, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 1/2, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 4/1, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 1/4, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 8/1, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 1/8, intercept = 0, color = "lightgrey") +
    ggplot2::theme(text = ggplot2::element_text(size=18),
                   plot.title = ggplot2::element_text(size=24))

  if(plotly){

    packageCheck("plotly")
    return(plotly::ggplotly(g))
  }

  return(g)
}

#' @rdname visualize
#' @section \code{propd} Functions:
#' \code{bowtie:}
#'  Plots indexed pairs based on the log-ratio means
#'  (LRM), relative to its PAL, for each group. Pairs near
#'  the origin show comparable LRM, relative to its PAL, in
#'  both groups. Each line away from the \code{y = x} line
#'  indicates a doubling of LRM compared to the other group.
#'  All pairs colored based on PAL
#'  (see: \code{\link{pals}}).
#'  See \code{gemini}.
#' @export
bowtie <- function(object, cutoff = 1000, k = 5, prompt = TRUE, plotly = FALSE){

  if(length(unique(object@group)) > 2){
    stop("This method only supports the analysis of 2 groups.")
  }

  df <- shale(object, cutoff, k, prompt, clean = TRUE)

  maxLim <- max(abs(c(df$LRM1byPAL, df$LRM2byPAL)))
  g <-
    ggplot2::ggplot(
      df, ggplot2::aes_string(Partner = "PartnerName",
                              Pair = "PairName", PartnerPAL = "PartnerPAL",
                              PairPAL = "PairPAL", theta = "theta")) +
    ggplot2::geom_point(ggplot2::aes_string(x = "LRM1byPAL", y = "LRM2byPAL", colour = "PAL")) +
    ggplot2::theme_bw() +
    ggplot2::scale_colour_brewer(palette = "Set2", name = "PAL") +
    ggplot2::xlab(paste("Log-Ratio Mean (LRM) Relative to PAL in",
                        unique(object@group)[1])) +
    ggplot2::ylab(paste("Log-Ratio Mean (LRM) Relative to PAL in",
                        unique(object@group)[2])) +
    ggplot2::ggtitle("Distribution of Pairs by Within-Group LRM Relative to PAL") +
    ggplot2::xlim(-1 * maxLim, maxLim) +
    ggplot2::ylim(-1 * maxLim, maxLim) +
    ggplot2::geom_abline(slope = 1, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 2/1, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 1/2, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 4/1, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 1/4, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 8/1, intercept = 0, color = "lightgrey") +
    ggplot2::geom_abline(slope = 1/8, intercept = 0, color = "lightgrey") +
    ggplot2::theme(text = ggplot2::element_text(size=18),
                   plot.title = ggplot2::element_text(size=24))

  if(plotly){

    packageCheck("plotly")
    return(plotly::ggplotly(g))
  }

  return(g)
}

#' @rdname visualize
#' @section \code{propd} Functions:
#' \code{gemini:}
#'  Plots indexed pairs based on the log-fold difference
#'  in log-ratio variance (VLR) between the two groups
#'  versus the difference in log-ratio means (LRM). In this
#'  figure, the LRM for each group is signed (i.e., positive
#'  or negative) such that the PAL is the denominator
#'  of the log-ratio. This allows for a fluid comparison
#'  between pairs within the same PAL module. Pairs with a
#'  "Bridged" or "Missing" PAL get excluded from this graph.
#'  Remember that an increase in VLR suggests less
#'  proportionality. All pairs colored based on PAL
#'  (see: \code{\link{pals}}).
#' @export
gemini <- function(object, cutoff = 1000, k = 5, prompt = TRUE, plotly = FALSE){

  if(length(unique(object@group)) > 2){
    stop("This method only supports the analysis of 2 groups.")
  }

  df <- shale(object, cutoff, k, prompt, clean = TRUE)

  maxLimx <- max(abs(df$DiffLRMbyPAL))
  maxLimy <- max(abs(df$FoldLRV[df$FoldLRV != Inf & df$FoldLRV != -Inf]))
  g <-
    ggplot2::ggplot(
      df, ggplot2::aes_string(Partner = "PartnerName",
                              Pair = "PairName", PartnerPAL = "PartnerPAL",
                              PairPAL = "PairPAL", theta = "theta",
                              LRV1 = "LRV1", LRV2 = "LRV2")) +
    ggplot2::geom_point(ggplot2::aes_string(x = "DiffLRMbyPAL", y = "FoldLRV", colour = "PAL")) +
    ggplot2::theme_bw() +
    ggplot2::scale_colour_brewer(palette = "Set2", name = "PAL") +
    ggplot2::xlab(paste0("Difference in LRM Relative to PAL (",
                         unique(object@group)[1], " - ", unique(object@group)[2], ")")) +
    ggplot2::ylab(paste0("Log-Fold Change in VLR (",
                         unique(object@group)[1], " / ", unique(object@group)[2], ")")) +
    ggplot2::ggtitle("Distribution of Pairs by VLR and LRM Difference") +
    ggplot2::xlim(-1 * maxLimx, maxLimx) +
    ggplot2::ylim(-1 * maxLimy, maxLimy) +
    ggplot2::geom_abline(slope = 0, intercept = 0, color = "lightgrey") +
    ggplot2::geom_vline(xintercept = 0, color = "lightgrey") +
    ggplot2::theme(text = ggplot2::element_text(size=18),
                   plot.title = ggplot2::element_text(size=24))

  if(plotly){

    packageCheck("plotly")
    return(plotly::ggplotly(g))
  }

  return(g)
}

#' @rdname visualize
#' @section \code{propd} Functions:
#' \code{decomposed:}
#'  Plots the decomposition of log-ratio variance into
#'  (weighted) group variances and between-group variance.
#'  Useful for visualizing how a theta type selects pairs.
#' @export
decomposed <- function(object, cutoff = 1000){

  if(length(unique(object@group)) > 2){
    stop("This method only supports the analysis of 2 groups.")
  }

  packageCheck("compositions")
  df <- getResults(object, cutoff)

  # Generalized decomposition of LRV for weighted theta types
  decomp <- matrix(0, nrow = nrow(df), ncol = 3)
  decomp[, 1] <- df$p1 * df$lrv1 / (df$p * df$lrv)
  decomp[, 2] <- df$p2 * df$lrv2 / (df$p * df$lrv)
  decomp[, 3] <- df$p1 * df$p2 * (df$lrm2 - df$lrm1)^2 / (df$p^2 * df$lrv)

  x <- suppressWarnings(compositions::acomp(decomp))
  suppressWarnings(
    plot(x, pch = 20, col = grDevices::rgb(0.1, 0.1, 0.1, 0.1),
         labels = c("group 1  ", "  group 2", "between-group"), axes = TRUE)
  )
}

#' @rdname visualize
#' @section \code{propd} Functions:
#' \code{parallel:}
#'  Plots the sample-wise log-ratio abundance across all
#'  pairs selected by the provided cutoff. Use the
#'  \code{reference} argument to subset the plot to only
#'  include pairs that contain this reference.
#' @export
parallel <- function(object, cutoff = 1000, include = NA, or = TRUE, plotly = FALSE){

  df <- getRatios(object, cutoff, include = include, or = or, melt = TRUE)
  df$variable <- factor(df$variable, levels = unique(df$variable))
  df$group <- object@group

  g <- ggplot2::ggplot(df, ggplot2::aes_string(x = "variable", y = "value", group = "id", col = "group")) +
    ggplot2::geom_line() + ggplot2::theme_bw() +
    ggplot2::scale_colour_brewer(palette = "Set2", name = "Group") +
    ggplot2::xlab("Feature Pair") + ggplot2::ylab("Log-Ratio Abundance") +
    ggplot2::ggtitle("Sample-wise Distribution of Log-Ratios Across Pairs") +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 1)) +
    ggplot2::theme(text = ggplot2::element_text(size=18),
                   plot.title = ggplot2::element_text(size=24))

  if(plotly){

    packageCheck("plotly")
    return(plotly::ggplotly(g))
  }

  return(g)
}
Any scripts or data that you put into this service are public.
propr documentation built on Dec. 16, 2019, 9:30 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
propr
Calculating Proportionality Between Vectors of Compositional Data

R/6-propdViz.R
In propr: Calculating Proportionality Between Vectors of Compositional Data

Defines functions pals propd2propr shale geyser bowtie gemini decomposed parallel

Documented in bowtie decomposed gemini geyser pals parallel propd2propr shale

Try the propr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

propr Calculating Proportionality Between Vectors of Compositional Data

R/6-propdViz.R In propr: Calculating Proportionality Between Vectors of Compositional Data

Defines functions pals propd2propr shale geyser bowtie gemini decomposed parallel

Documented in bowtie decomposed gemini geyser pals parallel propd2propr shale

Try the propr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

propr
Calculating Proportionality Between Vectors of Compositional Data

R/6-propdViz.R
In propr: Calculating Proportionality Between Vectors of Compositional Data