#' Slots two sequences into a single composite sequence.
#'
#' @description Generates a composite sequence, constrained by sample order, from two sequences, by minimizing the dissimilarity between adjacent samples of each input sequence. The algorithm computes the distance matrix, least cost matrix, and least cost path of two sequences, and uses the least cost path file to find the slotting that better minimizes the dissimilarity between adjacent samples. The algorithm assumes that the samples are not aligned or paired.
#'
#'
#' @usage workflowSlotting(
#' sequences = NULL,
#' grouping.column = NULL,
#' time.column = NULL,
#' exclude.columns = NULL,
#' method = "manhattan",
#' plot = TRUE
#' )
#'
#' @param sequences dataframe with two sequences identified by a grouping column generated by \code{\link{prepareSequences}}.
#' @param grouping.column character string, name of the column in \code{sequences} to be used to identify separates sequences within the file.
#' @param time.column character string, name of the column with time/depth/rank data.
#' @param exclude.columns character string or character vector with column names in \code{sequences} to be excluded from the analysis.
#' @param method character string naming a distance metric. Valid entries are: "manhattan", "euclidean", "chi", and "hellinger". Invalid entries will throw an error.
#' @param plot boolean, if \code{TRUE}, plots the distance matrix and the least-cost path.
#'
#' @return A dataframe with the same number of rows as \code{sequences}, ordered according to the best solution found by the least-cost algorithm.
#'
#' @author Blas Benito <blasbenito@gmail.com>
#'
#' @examples
#'
#' \donttest{
#' #loading the data
#' data(pollenGP)
#'
#' #getting first 20 samples
#' pollenGP <- pollenGP[1:20, ]
#'
#' #sampling indices
#' set.seed(10) #to get same result every time
#' sampling.indices <- sort(sample(1:20, 10))
#'
#' #subsetting the sequence
#' A <- pollenGP[sampling.indices, ]
#' B <- pollenGP[-sampling.indices, ]
#'
#' #preparing the sequences
#' AB <- prepareSequences(
#' sequence.A = A,
#' sequence.A.name = "A",
#' sequence.B = B,
#' sequence.B.name = "B",
#' grouping.column = "id",
#' exclude.columns = c("depth", "age"),
#' transformation = "hellinger"
#' )
#'
#' AB.combined <- workflowSlotting(
#' sequences = AB,
#' grouping.column = "id",
#' time.column = "age",
#' exclude.columns = "depth",
#' method = "manhattan",
#' plot = FALSE
#' )
#'
#' AB.combined
#'
#'}
#'
#'@export
workflowSlotting <- function(sequences = NULL,
grouping.column = NULL,
time.column = NULL,
exclude.columns = NULL,
method = "manhattan",
plot = TRUE){
#checking there are only two sequences
if(is.null(grouping.column)){
stop("The argument 'grouping.column' is empty.")
} else {
if(length(unique(sequences[, grouping.column])) != 2){
stop("The argument 'sequences' must be a dataframe with two sequences identified by 'grouping.column'. The dataframe provided does not have two sequences.")
}
}
#distance matrix
distance.matrix <- distanceMatrix(
sequences = sequences,
grouping.column = grouping.column,
time.column = time.column,
exclude.columns = exclude.columns,
method = method,
parallel.execution = FALSE
)
#least cost matrix
least.cost.matrix <- leastCostMatrix(
distance.matrix = distance.matrix,
diagonal = FALSE,
parallel.execution = FALSE
)
#least cost path
least.cost.path <- leastCostPath(
distance.matrix = distance.matrix,
least.cost.matrix = least.cost.matrix,
diagonal = FALSE,
parallel.execution = FALSE
)
#visualizing output
if(plot == TRUE){
plotMatrix(
distance.matrix = distance.matrix,
least.cost.path = least.cost.path,
color.palette = "viridis"
)
}
#sequence names
sequence.names <- unique(sequences[, grouping.column])
#extracting and flipping least.cost.path
path <- least.cost.path[[1]]
path <- path[nrow(path):1, ]
rownames(path)<-1:nrow(path)
#adding index column to the sequences
for(k in 1:2){
sequences[sequences[, grouping.column] == sequence.names[k], "index"] <- order(unique(path[, sequence.names[k]]))
}
sequences <- data.frame(index = sequences$index, sequences[, colnames(sequences) != "index"])
#creating output dataframe
sequences.combined <- sequences[0, ]
#vectors to introduce used indices
used <- list()
used[[1]] <- vector()
used[[2]] <- vector()
names(used) <- sequence.names
#starting values for dynamic variables
target.index <- 1
target.sequence <- sequence.names[1]
#it works better if it starts with the sequence with the first index repeated
if(!(sum(path[,target.sequence] == target.index) > 1)){
target.sequence <- sequence.names[sequence.names != target.sequence]
}
#first row of path
j <- 1
#iterating through AB.index.unique
##################################
while(j < nrow(path)){
#1. IS NEW?
###########
if(!(path[j, target.sequence] %in% used[[target.sequence]])){
#1. YES: ADD
#------------
sequences.combined <- rbind(sequences.combined, sequences[which(sequences[ , grouping.column] == target.sequence & sequences$index == path[j, target.sequence]), ])
#adds the index to the used indices
used[[target.sequence]] <- c(used[[target.sequence]], path[j, target.sequence])
#2. IS DIFFERENT TO THE NEXT ONE
################
if(path[j, target.sequence] != path[j + 1, target.sequence]){
#2. YES: NEXT
#------------
j <- j + 1
next
} else {
#2. NOPE: SWITCH
#---------------
target.sequence <- sequence.names[sequence.names != target.sequence]
next
}
} else {
#1. NOPE: NEXT
#-------------
j <- j + 1
next
}
}
#adding the last one
sequences.combined <- rbind(sequences.combined, sequences[which(sequences[ , grouping.column] == target.sequence & sequences$index == path[j, target.sequence]), ])
#getting index column
index.column <- sequences.combined$index
sequences.combined$index <- NULL
#getting the grouping column
grouping.column.data <- sequences.combined[, grouping.column]
sequences.combined[, grouping.column] <- NULL
#adding it them the beginning
sequences.combined <- data.frame(groups = grouping.column.data, index = index.column, sequences.combined)
colnames(sequences.combined)[1] <- grouping.column
colnames(sequences.combined)[2] <- "original.index"
return(sequences.combined)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.