#' Results for Irregular Activity Sequence Detection with Regular Sequence Score as an Input
#'
#' @param regseqscore Regular sequence scores.
#' @param newseqdf Dataframe of new sequences to be tested: first column time
#' stamps in POSIXct format, second column sequence vector in factor format.
#' @param seqdf Dataframe of regular sequences: first column time
#' stamps in POSIXct format, second column sequence vector in factor format.
#' @param timetype ime window for separating the dataset, i.e. day = '%Y-%m-%d', week = '%Y-%W' and month '%Y-%m'.
#' @param k Maximum length of sequence silhouette.
#' @param beta Score parameter.
#' @param lam Score parameter.
#' @param percentpwr Significance level.
#' @param bndwidth Bandwidth for the KDE (refer to stats::density function for options).
#' @param kern Kernel of choice for the KDE (refer to stats::density function for options).
#'
#' @return A list with the results of the test, showing which SQS have flagged as irregular.
#' @export
#'
#' @details
#' Ensure lam(k-1)k/2<beta to comply with the method constraints.
#'
#' @examples
#' seqdf <- data.frame(Time = c(
#' "2020-01-01 00:10:09", "2020-01-01 01:12:34", "2020-01-02 06:38:09",
#' "2020-01-02 07:21:51"
#' ), Cat = as.factor(c("A", "B", "A", "C")))
#' newseqdf <- data.frame(
#' Time = c("2020-01-03 01:30:20", "2020-01-03 04:19:14", "2020-01-03 06:51:29"),
#' Cat = as.factor(c("A", "B", "A"))
#' )
#' # For daily data:
#' regseqscore <- RegScoreWrapper(seqdf, "%Y-%m-%d", 2, 1, 0.5)
#' IRASD_RegScoreInput(regseqscore, newseqdf, seqdf, "%Y-%m-%d", 2, 1, 0.5, 0.05, "nrd0", "gaussian")
IRASD_RegScoreInput <- function(regseqscore, newseqdf, seqdf, timetype, k, beta, lam, percentpwr, bndwidth, kern) {
if (class(newseqdf) != "data.frame") {
stop("newseqdf is not a data frame. Try data.frame()")
}
if (class(seqdf) != "data.frame") {
stop("seqdf is not a data frame. Try data.frame()")
}
if (dim(seqdf)[2] != 2) {
stop("newseqdf does not have two columns. See documentation for instructions.")
}
if (dim(newseqdf)[2] != 2) {
stop("seqdf does not have two columns. See documentation for instructions.")
}
if (class(seqdf[, 2]) != "factor") {
stop("newseqdf second column is not a factor. Try as.factor()")
}
if (class(newseqdf[, 2]) != "factor") {
stop("seqdf second column is not a factor. Try as.factor()")
}
reglev <- levels(seqdf[, 2])
newlev <- levels(newseqdf[, 2])
catvec <- unique(c(reglev, newlev))
if (length(catvec) == 0) {
stop("No categories found, check column two of your dataframes and the instructions in the documentation.")
}
if (length(catvec) == 1) {
warning("Only one category found!")
}
seq1 <- seqdf
colnames(seq1) <- c("Time", "Cat")
probabilities <- seq1 %>%
dplyr::group_by(Cat) %>%
dplyr::tally() %>%
dplyr::mutate(freq = n / sum(n)) %>%
dplyr::select(freq) %>%
unlist(use.names = FALSE)
seqlist <- SeqList(seqdf, "%Y-%m-%d")
if (length(seqlist) == 1) {
stop("Need more than one regular sequence for analysis.")
}
if (length(seqlist) < 14) {
warning("For a better false positive rate, more regular sequences are needed. Over 30 is preferred. ")
}
newseqlist <- SeqList(newseqdf, "%Y-%m-%d")
dimseq <- sapply(seqlist, function(x) dim(x)[1])
dimnewseq <- sapply(newseqlist, function(x) dim(x)[1])
if (!all(dimseq > length(catvec))) {
warning("Inconsistent results may occur with sequences smaller than the number of categories.")
}
numtestseq <- length(newseqlist)
allsqs <- AllSQS(catvec, k)
sqslist <- SQSList(seqlist, k)
sqsindex <- SQSIndex(allsqs, sqslist)
newsqslist <- SQSList(newseqlist, k)
newsqsindex <- SQSIndex(allsqs, newsqslist)
scoretable <- vector("list", numtestseq)
for (i in 1:numtestseq) {
scoretable[[i]] <- ScoreTable(regseqscore, newseqlist[[i]], seqlist, allsqs, newsqsindex[, i], sqsindex, catvec, probabilities, k, beta, lam)
}
dataset <- DensEst(scoretable, percentpwr, bndwidth, kern)
dataset <- dataset[[1]]
return(dataset)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.