Nothing
#' Computing and spotting transitions among a dataset
#'
#' The purpose of \code{seqTrans.R} is to spot transitions
#' in a dataset.
#'
#' @param OD \code{matrix} object containing sequences of a variable with missing data (coded as \code{NA}).
#' @param trans \code{character} vector gathering the impossible transitions. For example: trans <- c("1->3","1->4","2->1","4->1","4->3")
#'
#' @author Andre Berchtold <andre.berchtold@@unil.ch>, Kevin Emery
#'
#' @return It returns a matrix whose rows each are the indices of an impossible transition.
#'
#' @examples
#' data(OD)
#'
#' seqTransList <- seqTrans(OD = OD, trans = c("yes->no"))
#'
#' @importFrom stringr str_count
#' @importFrom stringr str_detect
#' @importFrom stringr str_locate
#' @importFrom stringr str_locate_all
#'
#' @importFrom graphics plot
#'
#' @importFrom stats as.formula
#' @importFrom stats cutree
#' @importFrom stats lm
#' @importFrom stats predict
#' @importFrom stats rnorm
#' @importFrom stats runif
#'
#' @importFrom utils capture.output
#'
#' @importFrom Amelia missmap
#'
#' @importFrom TraMineR seqdef
#' @importFrom TraMineR seqfplot
#' @importFrom TraMineR seqdplot
#' @importFrom TraMineR seqsubm
#' @importFrom TraMineR seqdist
#'
#' @importFrom cluster agnes
#'
#' @importFrom plyr mapvalues
#'
#' @importFrom dfidx dfidx
#'
#' @importFrom rms lrm
#'
#' @importFrom mice as.mids
#'
#' @importFrom mlr makeClassifTask
#'
#' @importFrom ranger ranger
#'
#' @importFrom stats model.matrix
#'
#' @importFrom utils setTxtProgressBar
#' @importFrom utils txtProgressBar
#'
#' @importFrom dplyr n_distinct
#'
#' @importFrom foreach foreach
#' @importFrom foreach %dopar%
#'
#' @export
seqTrans <- function(OD, trans) {
impTrans <- trans
# Naming the number of rows and columns of OD
nr <- nrow(OD)
nc <- ncol(OD)
# test
# 1. Initial tests on parameters --------------------------------------------------------------------------------------------------------------------
# 1.1 Testing the class of the variables of the original dataset OD ---------------------------------------------------------------------------------
ODClass <- class(OD[1, 1])
if ((ODClass != "factor") & (ODClass != "numeric")) {
stop("/!\\ The class of the variables contained in your original dataset
should be either 'factor' or 'numeric'")
}
if (ODClass == "factor") {
k <- length(sort(unique(as.vector(as.matrix(OD)))))
} else {
k <- max(OD)
}
# 1.3 Indicating the existence of entire rows of OD filled only with NAs to the user ----------------------------------------------------------------
i <- 1
numbOfNAFilledLines <- 0
while (i <= nrow(OD)) {
if (all(is.na(OD[i, ]))) {
OD <- OD[-i, ]
numbOfNAFilledLines <- numbOfNAFilledLines + 1
}
i <- i + 1
}
# if (numbOfNAFilledLines == 1) {
# warning(paste("Your data matrix contains 1 row filled solely with NAs. This row won't be included into the next statistics about NAs."),sep='')
# }
# if (numbOfNAFilledLines > 1) {
# warning(paste("Your data matrix contains",numbOfNAFilledLines,"rows filled solely with NAs."),sep='',"\n",
# " These rows have been removed and thus haven't been included in","\n",
# " the analysis of the impossible transitions among your dataset.")
# }
# Updating the number of rows in OD
nr <- nrow(OD)
# 1.4 Test on input parameter impTrans -----------------------------------------
for (i in 1:length(impTrans)) {
if (!str_detect(impTrans[i], "->")) {
stop("/!\\ Warning, you should construct your transition(s) vector trans with little arrows as follow: trans <- c('...->...', '...->...', etc.).")
}
# Testing if we are effectively analyzing a transition or not
locDash <- str_locate(impTrans[i], "-")
firstState <- substr(impTrans[i], 1, locDash - 1)
locSpike <- str_locate(impTrans[i], ">")
secondState <- substr(impTrans[i], locSpike + 1, nchar(impTrans[1]))
# if (firstState==secondState) {
# stop("/!\\ You have typed in two same states (two times '",firstState,"') on both sides of the arrow. This doesn't correspond to a transition.")
# }
}
# 3. Spotting impossible transitions ----------------------------------------------------------------------------------------------------------------
## Setup
#
# Transforming every line of OD into class character and adding a dash inbetween
# every state
dashes <- replicate(nc, "->")
CharAndDashes <- function(OD) {
mytestChar <- paste(as.vector(rbind(OD, dashes)), collapse = "")
}
ODCharAndDashes <- apply(OD, 1, CharAndDashes)
# 3.1 Number of listed impossible transitions -------------------------------------------------------------------------------------------------------
# Identifying the patterns of the impossible transitions in each line of OD
countImpTrans <- function(ODCharAndDashes) {
str_count(ODCharAndDashes, pattern = paste0("(?=", impTrans, ")"))
}
numbOfImpTransByRow <- sapply(ODCharAndDashes, countImpTrans)
if (length(impTrans) > 1) {
numbOfImpTrans <- rowSums(numbOfImpTransByRow)
} else {
numbOfImpTrans <- sum(numbOfImpTransByRow)
}
# Interrupting the program in case no impossible transitions among impTrans have
# been found
if (sum(numbOfImpTrans) == 0) {
message("Your dataset has no impossible transitions!")
seqTransList <- matrix(0, 0, 2)
colnames(seqTransList) <- c("row", "col")
return(seqTransList)
}
if (length(impTrans) == 1) {
numbOfImpTransByRow <- t(as.matrix(numbOfImpTransByRow))
}
# Computation of the matrix startLocMat of size length(impTrans)*ncol(OD)
# Each line correspond to a transition specified by the user. In each column j,
# we have the indices corresponding to the specified transition of observations j in the
# original dataset OD under the form of a string. For example, if the transition
# 3 and 14 of observation j correspond to the transition i specified by the user,
# we will have "3 14" in place i,j
startLocMat <- matrix(NA, nrow = length(impTrans), ncol = nrow(OD))
for (i in 1:length(impTrans)) {
# We need this roundabout with "(?=)" because in the case of a transition
# yes->yes->yes for example, if the user is interested in the transition
# yes->yes, the simple version without "(?=)" won't spot
# the second transtion because a part of it is used in the first one.
Tmplist <- str_locate_all(ODCharAndDashes, paste0("(?=", impTrans[i], ")"))
for (j in 1:length(ODCharAndDashes)) {
tempMat <- Tmplist[[j]]
if (nrow(tempMat) > 0) {
if (nrow(tempMat) > 1) {
tempMat <- paste(tempMat[, 1], collapse = " ")
}
startLocMat[i, j] <- as.matrix(tempMat[1])
}
}
}
# Function to extract the different digits when a string is composed
# of multiple digits
Numextract <- function(string) {
unlist(regmatches(string, gregexpr("[[:digit:]]+\\.*[[:digit:]]*", string)))
}
# Function that translates the value corresponding to where the transition
# occures in the string to where it occures in a observation
# To do so, we extract the substring until the desired a transition occures
# in the string and we compute the number +1 of ">" in this substring.
# For example, if the observation j is of the form "yes->yes->no->yes->yes"
# and the value 15 is stored in StartLocMat, we extract the substring
# "yes->yes->no->", we compute 3 + 1. The target transition therefore occurs
# in position 4.
GetRealColPosition <- function(ODCharAndDashes, RowPos, ColPos) {
ODCharAndDashesSubstring <- substr(ODCharAndDashes[RowPos], 1, ColPos)
realColPosition <- str_count(ODCharAndDashesSubstring, pattern = ">") + 1
return(realColPosition)
}
list_rows <- list()
list_cols <- list()
for (i in 1:length(impTrans)) {
if (numbOfImpTrans[i] > 0) {
TmpRows <- c()
TmpCols <- c()
for (j in 1:ncol(startLocMat)) {
if (numbOfImpTransByRow[i, j] > 0) {
detectTemp <- str_detect(startLocMat[i, j], " ")
if (detectTemp == FALSE) {
TmpRows <- c(TmpRows, j)
TmpCols <- c(TmpCols, GetRealColPosition(ODCharAndDashes = ODCharAndDashes, RowPos = j, ColPos = as.numeric(startLocMat[i, j])))
} else {
hiddenCol <- str_count(startLocMat[i, j], " ")
TmpRows <- c(TmpRows, rep(j, hiddenCol + 1))
extractedNumbers <- Numextract(startLocMat[i, j])
TmpVec <- c()
for (k in 1:length(extractedNumbers)) {
TmpVec[k] <- GetRealColPosition(ODCharAndDashes = ODCharAndDashes, RowPos = j, ColPos = as.numeric(extractedNumbers[k]))
}
TmpCols <- c(TmpCols, TmpVec)
}
}
}
list_rows[[i]] <- TmpRows
list_cols[[i]] <- TmpCols
}
}
MaxNumTransitions <- max(sapply(list_cols, function(x) length(x)))
rowMat <- matrix(NA, length(impTrans), MaxNumTransitions)
colMat <- matrix(NA, length(impTrans), MaxNumTransitions)
#
# # Converting into a dataframe
# rowMat <- as.data.frame(rowMat)
# # Renaming the columns of rowMat
# colnames_rowMat <- paste(1:ncol(rowMat),")",sep='')
# rownames(rowMat) <- impTrans
# colnames(rowMat) <- colnames_rowMat
#
# # Converting into a dataframe
# colMat <- as.data.frame(colMat)
# # Renaming the columns of colMat
# colnames_colMat <- paste(1:ncol(colMat),")",sep='')
# rownames(colMat) <- impTrans
# colnames(colMat) <- colnames_colMat
#
for (i in 1:length(impTrans)) {
if (numbOfImpTrans[i] > 0) {
rowMat[i, 1:length(list_rows[[i]])] <- list_rows[[i]]
colMat[i, 1:length(list_cols[[i]])] <- list_cols[[i]]
}
}
if (length(impTrans) > 1) {
rowMat <- rowMat[rowSums(!is.na(rowMat)) > 0, ]
colMat <- colMat[rowSums(!is.na(colMat)) > 0, ]
}
# 3.3 Summary data frame ----------------------------------------------------------------------------------------------------------------------------
#
# Update of impTrans with an arrow
impTransOverview <- data.frame(c(impTrans, "", "Total:"), c(numbOfImpTrans, "", sum(numbOfImpTrans)))
colnames(impTransOverview) <- c("Transitions", "Occurence")
if (length(rowMat) > 0) {
seqTransList <- matrix(0, length(rowMat), 2)
seqTransList[, 1] <- rowMat
seqTransList[, 2] <- colMat
}
colnames(seqTransList) <- c("row", "col")
return(seqTransList)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.