R/prep_plt_ord.R

Defines functions axis_num make_pcoa_df make_rda_df make_ord_df

Documented in axis_num make_ord_df make_pcoa_df make_rda_df

## make_ord_df --------------------------------------------------------------

#' Make a data frame of the ordination
#'
#' Take a phyloseq object and generate a data frame of the distance-based
#' ordination of the samples for plotting. This function has been tested with
#' ordination methods 'PCoA' and 'RDA'. I can't make any promises if you use a
#' different ordination method.
#'
#' @param physeq A phyloseq object with an OTU table and sample data. The table
#'   should be normalized (rarefied, relative abundance, some kind of log-ratio
#'   transformation) so that the distance metrics will be meaningful. Make sure
#'   that the transformation you have used is appropriate for the distance
#'   method you choose.
#' @param dist_meth The distance method to be use. Must be one of the methods
#'   accepted by the [phyloseq::distance()] function. Default is
#'   'bray'. If 'jaccard' is used, adds the `binary = TRUE` argument. If
#'   you have taken a clr transform of your data and want Aitchison distances,
#'   choose `'euclidean'`. Currently, tree-based methods like
#'   `'unifrac'` and `'wunifrac'` are not supported.
#' @param ord_meth The ordination method. Must be one of the methods accepted by
#'   the [phyloseq::ordinate()] function. Default is 'PCoA'. Make sure
#'   the ordination method you choose is appropriate for your distance method.
#' @param scree_only If `TRUE`, this function will print the scree plot of
#'   the requested ordination and then exit. Good for deciding how many axes you
#'   care about. Default is `FALSE`.
#' @param axes A vector of integers indicating which ordination axes to include
#'   in the data frame. Defaults to `1:4`.
#' @export
make_ord_df = function(physeq, dist_meth = 'bray', ord_meth = 'PCoA',
                  scree_only = FALSE, axes = 1:4){

    # Check if dist and ord methods are known/tested
    if (!(dist_meth %in% c('jaccard','bray','euclidean'))){
        warn(paste('This function has only been tested with jaccard,',
                   'bray, and euclid distance methods. Other methods may',
                   'work but you are responsible for making sure what',
                   'you\'re doing is sensible.'))
    }
    if (!(ord_meth %in% c('PCoA','RDA'))){
        stop(paste('This function currently only works with PCoA and RDA',
                   'ordination methods. Pull requests are welcome.'))
    }

    if (ord_meth == 'RDA' & dist_meth != 'euclidean'){
        stop(paste('RDA ordination is not sensible with non-metric distance',
                   'metrics. For now only euclidean distance is accepted with',
                   'RDA, but pull requests are welcome for other metric',
                   'distance metrics which you may wish to implement and test.'))
    }
    # Get the distance object and do the ordination
    if (dist_meth == 'jaccard'){
        d = phyloseq::distance(physeq, method = dist_meth, binary = TRUE)
    } else {
        d = phyloseq::distance(physeq, method = dist_meth)
    }
    ord = phyloseq::ordinate(physeq, ord_meth, d)

    # Scree
    if (scree_only) {
        print(phyloseq::plot_scree(ord))
        return()
    }

    if (ord_meth == 'PCoA'){
        ord_long = make_pcoa_df(ord, physeq, axes)
    } else if (ord_meth == 'RDA'){
        ord_long = make_rda_df(ord, physeq, axes)
    } else {
        ord_long = tryCatch(make_pcoa_df(ord, physeq, axes))
        if (is.data.frame(ord_long)){
            warn(paste('Data frame produced, but untested with this',
                        'ordination method. Double check before plotting.'))
        } else {
            ord_long = tryCatch(make_rda_df(ord, physeq, axes))
            if (is.data.frame(ord_long)) {
                warn(paste('Data frame produced, but untested with this',
                            'ordination method. Double check before plotting.'))
            } else {
                stop(paste('Data frame could not be produced. Try \'PCoA\'',
                           'or \'RDA\' ordination methods.'))
            }
        }

        return(ord_long)
    }

    return(ord_long)

}
## make_rda_df----------------------------------------------------------------

#' Make a data frame of the ordination if the ordination method is RDA. Not
#' exported.
#' @section Description: Take a phyloseq object and generate a data frame of the
#'   distance-based ordination of the samples for plotting when the ordination
#'   method was RDA. Used internally by [make_ord_df()].
#'
#' @param ord The ordination object generated by
#'   [phyloseq::ordinate()].
#' @param physeq The phyloseq object we're making things from
#' @param axes A numeric vector of the axes to plot
make_rda_df = function(ord, physeq, axes){
    # Calculate the axis weights

    eigs = ord$CA$eig
    weights = round(eigs/sum(eigs), 3) * 100

    # Make the data frame
    ord_df = phyloseq::plot_ordination(physeq, ord, axes = axes, justDF = TRUE)
    ord_long = (ord_df
                %>% tidyr::gather(AxisX, ValueX, starts_with('PC'))
                %>% dplyr::left_join(ord_df)
                %>% tidyr::gather(AxisY, ValueY, starts_with('PC'))
                %>% dplyr::mutate(AxisX = factor(paste(AxisX,
                                                       paste(weights[AxisX],'%',
                                                      sep = ''))),
                           AxisY = factor(paste(AxisY,
                                                paste(weights[AxisY], '%',
                                                      sep = '')))))

    return(ord_long)
}

## make_pcoa_df---------------------------------------------------------------

#' Make a data frame of the ordination if the ordination method is PCoA. Not
#' exported
#'
#' Take a phyloseq objet and generate a data frame of the distance-based
#' ordination of the samples for plotting when the ordination method was PCoA.
#' Used internally by `make_ord_df()`
#'
#' @param ord The ordination object generated by `phyloseq::ordinate()`.
#' @param physeq The phyloseq object we're making things from.
#' @param axes A numeric vector of the axes to include.
make_pcoa_df = function(ord, physeq, axes){
    # Calculate the axis weights
    weights = round(ord$values$Relative_eig, 3) * 100

    # Make the data frame
    ord_df = phyloseq::plot_ordination(physeq, ord, axes = axes, justDF = TRUE)
    ord_long = (ord_df
                %>% tidyr::gather(AxisX, ValueX, starts_with('Axis.'))
                %>% dplyr::left_join(ord_df)
                %>% tidyr::gather(AxisY, ValueY, starts_with('Axis.'))
                %>% dplyr::mutate(AxisX = factor(paste(AxisX,
                                        paste(weights[axis_num(AxisX)], '%',
                                                 sep = ''))),
                           AxisY = factor(paste(AxisY,
                                        paste(weights[axis_num(AxisY)], '%',
                                                 sep = '')))))
    return(ord_long)
}


## axis_num ------------------------------------------------------------------

#' Get the axis number
#' Take a character vector of the form 'Axis.N' where N is a number and return
#' a numeric vector of N in the same order
#' @param AxisX The character vector of axes. Must only contain values of the
#' form 'Axis.N'
axis_num = function(AxisX){
    AxisX %>%
        strsplit('\\.') %>%
        unlist() -> tmp
    axes = as.numeric(tmp[seq(2,length(tmp),2)])

    return(axes)
}
JCSzamosi/aftersl1p documentation built on July 3, 2025, 8:37 p.m.