#' Create a pathway variable
#'
#' Feature Requests:
#' 1) a time variable and business rules based on date times;
#' 2) step grouping (IE: step1, step2, step3 = phase1; step4, step5 = phase2; etc.). Takes a dataframe column you want to group by, and a column you want to make a pathway out of and returns a pathway vector the size of your original data. Used when you want to know unique combinations of steps in order to count or group by. A medical pathway or business process steps are good use cases.
#'
#' @param groupcol The column you want to group by. Generally it's a person or employee.
#' @param pathcol The column you want to create a path from. IE: service_type, location, step
#' @param sep The seperator that goes between the parts of the pathway. The default is hyphen (\code{-}).
#' @param subset A boolean flag to indicate if you want to use every possible part/step in the pathway or if you just want to track certain steps. Default is \code{FALSE} (use all values). Must use the keepvalues parameter if the subset flag is \code{TRUE} (use certain values).
#' @param keepvalues A character vector of the pathway parts/steps you want to use. Only use when the subset flag is \code{TRUE}.
#' @param ordered A boolean flag to indicate whether or not the path should care about occurence order (when the step occured). Default is \code{TRUE}. If flag is set to \code{FALSE} the pathway vector will be sorted alphabetically.
#' @param keepconsec A boolean flag to indicate if you want to keep or remove duplicated steps in the pathway. Default is \code{TRUE}.
#' @param n.cores An integer value that indicates the number of cores you want to run the process on. The default is 1 less than the total number of available cores on the CPU for UNIX flavored OSs, while the only option (currently) on Windows OS is 1.
#'
#' @keywords path pathway steps
#'
#' @examples
#' asd <- data.frame(
#' id = rep(letters, times = 4)
#' , service = sample(
#' c('ps1', 'ps2', 'ps3', 'ps4', 'ps5', 'ps6', 'ps7'
#' , 'install1', 'install2', 'install3', 'other'
#' )
#' , size = 26 * 4
#' , replace = TRUE
#' )
#' , stringsAsFactors = FALSE
#' )
#'
#' asd$path1 <- makepath(
#' groupcol = asd$id
#' , pathcol = asd$service
#' , n.cores = 1
#' )
#' asd$path2 <- makepath(
#' groupcol = asd$id
#' , pathcol = asd$service
#' , subset = TRUE
#' , keepvalues = c('ps1', 'ps2', 'ps3')
#' , n.cores = 1
#' )
#' asd$path3 <- makepath(
#' groupcol = asd$id
#' , pathcol = asd$service
#' , subset = TRUE
#' , keepvalues = c('ps1', 'ps2', 'ps3')
#' , ordered = FALSE
#' , n.cores = 1
#' )
#' asd$path4 <- makepath(
#' groupcol = asd$id
#' , pathcol = asd$service
#' , subset = TRUE
#' , keepvalues = c('ps1', 'ps2', 'ps3')
#' , ordered = FALSE
#' , keepconsec = TRUE
#' , n.cores = 1
#' )
#'
#' asd
#'
#' @rdname makepath
#' @export
makepath <- function(
groupcol
, pathcol
, sep = '-'
, subset = FALSE
, keepvalues
, ordered = TRUE
, keepconsec = TRUE
, n.cores = parallel::detectCores() - 1
){
# group each person's obs together
personH <- hashcol(groupcol, n.cores)
pathVec <- function(KeyX, subset = subset){
# get the persons values
person <- pathcol[ personH[[KeyX]] ]
# subset to keep only the wanted values
if(subset == TRUE){
person <- person[which(person %in% keepvalues)]
}
# order the pathway
if(ordered == FALSE){
person <- sort(person)
}
# remove duplicate steps in the pathway
if(keepconsec == FALSE){
person <- unique(person)
}
## Feature Request: CODE USED FOR TIME ORDERED/SENSITIVE PATHWAYS
## la <- split(test_in, test_in$epi.id)
## for(i in 1:length(la)){
## lal <- la[[i]]
## keep <- rep(TRUE, each = nrow(lal))
## if(nrow(lal) > 1){
## for(j in 2:nrow(lal)){
## if(lal$service[j] == lal$service[j-1]){
## if(lal$clm.from.dt[j] - lal$clm.thru.dt[j-1] < 2){
## keep[j] <- FALSE
## }
## }
## }
## }
## tree <- paste(lal[keep, "service"], collapse = "-")
## lal$serv.tree <- tree
## la[[i]] <- lal
## }
## test_out <- rbind_all(la)
# put together the path value
if(length(person) > 0){
pathVal <- paste0(person, collapse = sep)
} else {
pathVal <- NA
}
# assign the path value to a path variable
pathVal
}
# parallelize it
pathH <- hash::hash(
hash::keys(personH)
, parallel::mclapply(
hash::keys(personH)
, function(X) pathVec(X, subset = subset)
)
)
# assign path values to the path vector
df_path <- data.frame(
id = hash::keys(pathH)
, path = hash::values(pathH)
)
df_groupcol <- data.frame(id = as.character(groupcol))
# return the path vector (while preserving original row order)
df_path$path[match(df_groupcol$id, df_path$id)]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.