#' Computes the dissimilarity measure \emph{psi} on two or more sequences.
#'
#' @description If the sequences are not aligned (\code{paired.samples = FALSE}), the function executes these steps.
#' \itemize{
#' \item Computes the autosum of the sequences with \code{\link{autoSum}}.
#' \item Computes the distance matrix with \code{\link{distanceMatrix}}.
#' \item Uses the distance matrix to compute the least cost matrix with \code{\link{leastCostMatrix}}.
#' \item Extracts the cost of the least cost path with \code{\link{leastCost}}.
#' \item Computes the dissimilarity measure \emph{psi} with the function \code{\link{psi}}.
#' \item Delivers an output of type "list" (default), "data.frame" or "matrix", depending on the user input, through \code{\link{formatPsi}}.
#' }
#'
#' If the sequences are aligned (\code{paired.samples = TRUE}), these steps are executed:
#' \itemize{
#' \item Computes the autosum of the sequences with \code{\link{autoSum}}.
#' \item Sums the distances between paired samples with \code{\link{distancePairedSamples}}.
#' \item Computes the dissimilarity measure \emph{psi} with the function \code{\link{psi}}.
#' \item Delivers an output of type "list" (default), "data.frame" or "matrix", depending on the user input, through \code{\link{formatPsi}}.
#' }
#'
#' @usage workflowPsi(
#' sequences = NULL,
#' grouping.column = NULL,
#' time.column = NULL,
#' exclude.columns = NULL,
#' method = "manhattan",
#' diagonal = FALSE,
#' format = "dataframe",
#' paired.samples = FALSE,
#' same.time = FALSE,
#' ignore.blocks = FALSE,
#' parallel.execution = TRUE
#' )
#'
#' @param sequences dataframe with multiple sequences identified by a grouping column generated by \code{\link{prepareSequences}}.
#' @param grouping.column character string, name of the column in \code{sequences} to be used to identify separates sequences within the file.
#' @param time.column character string, name of the column with time/depth/rank data.
#' @param exclude.columns character string or character vector with column names in \code{sequences} to be excluded from the analysis.
#' @param method character string naming a distance metric. Valid entries are: "manhattan", "euclidean", "chi", and "hellinger". Invalid entries will throw an error.
#' @param diagonal boolean, if \code{TRUE}, diagonals are included in the computation of the least cost path. Defaults to \code{FALSE}, as the original algorithm did not include diagonals in the computation of the least cost path. If \code{paired.samples} is \code{TRUE}, then \code{diagonal} is irrelevant.
#' @param format string, type of output. One of: "data.frame", "matrix". If \code{NULL} or empty, a list is returned.
#' @param paired.samples boolean, if \code{TRUE}, the sequences are assumed to be aligned, and distances are computed for paired-samples only (no distance matrix required). Default value is \code{FALSE}.
#' @param same.time boolean. If \code{TRUE}, samples in the sequences to compare will be tested to check if they have the same time/age/depth according to \code{time.column}. This argument is only useful when the user needs to compare two sequences taken at different sites but same time frames.
#' @param ignore.blocks boolean. If \code{TRUE}, the function \code{\link{leastCostPathNoBlocks}} analyzes the least-cost path of the best solution, and removes blocks (straight-orthogonal sections of the least-cost path), which happen in highly dissimilar sections of the sequences, and inflate output psi values.
#' @param parallel.execution boolean, if \code{TRUE} (default), execution is parallelized, and serialized if \code{FALSE}.
#'
#' @return A list, matrix, or dataframe, with sequence names and psi values.
#'
#' @author Blas Benito <blasbenito@gmail.com>
#'
#' @examples
#'
#' \donttest{
#' data("sequencesMIS")
#' #prepare sequences
#' MIS.sequences <- prepareSequences(
#' sequences = sequencesMIS,
#' grouping.column = "MIS",
#' if.empty.cases = "zero",
#' transformation = "hellinger"
#' )
#'
#'#execute workflow to compute psi
#'MIS.psi <- workflowPsi(
#' sequences = MIS.sequences[MIS.sequences$MIS %in% c("MIS-1", "MIS-2"), ],
#' grouping.column = "MIS",
#' time.column = NULL,
#' exclude.columns = NULL,
#' method = "manhattan",
#' diagonal = FALSE,
#' parallel.execution = FALSE
#' )
#'
#'MIS.psi
#'
#'}
#'
#' @export
workflowPsi <- function(sequences = NULL,
grouping.column = NULL,
time.column = NULL,
exclude.columns = NULL,
method = "manhattan",
diagonal = FALSE,
format = "dataframe",
paired.samples = FALSE,
same.time = FALSE,
ignore.blocks = FALSE,
parallel.execution = TRUE){
#SAMPLES ARE NOT PAIRED: ELASTIC METHOD
if(paired.samples == FALSE){
#computing distance matrix
distance.matrix <- distanceMatrix(
sequences = sequences,
grouping.column = grouping.column,
time.column = time.column,
exclude.columns = exclude.columns,
method = method,
parallel.execution = parallel.execution
)
#computing least cost matrix
least.cost.matrix <- leastCostMatrix(
distance.matrix = distance.matrix,
diagonal = diagonal,
parallel.execution = parallel.execution
)
#computing least cost path
least.cost.path <- leastCostPath(
distance.matrix = distance.matrix,
least.cost.matrix = least.cost.matrix,
diagonal = diagonal,
parallel.execution = parallel.execution
)
#BLOCKS ARE NOT IGNORED
if(ignore.blocks == TRUE){
#BLOCKS ARE IGNORED
#computing least cost path
least.cost.path <- leastCostPathNoBlocks(
least.cost.path = least.cost.path,
parallel.execution = parallel.execution
)
}
#getting least cost ignoring blocks
least.cost <- leastCost(
least.cost.path = least.cost.path,
parallel.execution = parallel.execution
)
#autosum
autosum.sequences <- autoSum(
sequences = sequences,
least.cost.path = least.cost.path,
grouping.column = grouping.column,
time.column = time.column,
exclude.columns = exclude.columns,
method = method,
parallel.execution = parallel.execution
)
#computing psi
psi.value <- psi(
least.cost = least.cost,
autosum = autosum.sequences,
parallel.execution = parallel.execution
)
#shifting psi by 1
if(diagonal == TRUE){
psi.value <- lapply(X = psi.value, FUN = function(x){x + 1})
}
} #end of paired.samples == FALSE
#SAMPLES ARE PAIRED: STEP-LOCK METHOD
if(paired.samples == TRUE){
#computing least cost
least.cost <- distancePairedSamples(
sequences = sequences,
grouping.column = grouping.column,
time.column = time.column,
exclude.columns = exclude.columns,
same.time = same.time,
method = method,
sum.distances = TRUE,
parallel.execution = parallel.execution
)
#autosum
autosum.sequences <- autoSum(
sequences = sequences,
least.cost.path = least.cost,
grouping.column = grouping.column,
time.column = time.column,
exclude.columns = exclude.columns,
method = method,
parallel.execution = parallel.execution
)
#computing psi
psi.value <- psi(
least.cost = least.cost,
autosum = autosum.sequences,
parallel.execution = parallel.execution
)
#shifting psi by 1
psi.value <- lapply(X = psi.value, FUN = function(x){x + 1})
} #end of paired.samples == TRUE
#formating psi
if(format != "list"){
psi.value <- formatPsi(
psi.values = psi.value,
to = format
)
}
return(psi.value)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.