R/IRASD_RegScoreInput.R

Defines functions IRASD_RegScoreInput

Documented in IRASD_RegScoreInput

#' Results for Irregular Activity Sequence Detection with Regular Sequence Score as an Input
#'
#' @param regseqscore Regular sequence scores.
#' @param newseqdf Dataframe of new sequences to be tested: first column time
#' stamps in POSIXct format, second column sequence vector in factor format.
#' @param seqdf Dataframe of regular sequences: first column time
#' stamps in POSIXct format, second column sequence vector in factor format.
#' @param timetype ime window for separating the dataset, i.e. day = '%Y-%m-%d', week = '%Y-%W' and month '%Y-%m'.
#' @param k Maximum length of sequence silhouette.
#' @param beta Score parameter.
#' @param lam Score parameter.
#' @param percentpwr Significance level.
#' @param bndwidth Bandwidth for the KDE (refer to stats::density function for options).
#' @param kern Kernel of choice for the KDE (refer to stats::density function for options).
#'
#' @return A list with the results of the test, showing which SQS have flagged as irregular.
#' @export
#'
#' @details
#' Ensure lam(k-1)k/2<beta to comply with the method constraints.
#'
#' @examples
#' seqdf <- data.frame(Time = c(
#'   "2020-01-01 00:10:09", "2020-01-01 01:12:34", "2020-01-02 06:38:09",
#'   "2020-01-02 07:21:51"
#' ), Cat = as.factor(c("A", "B", "A", "C")))
#' newseqdf <- data.frame(
#'   Time = c("2020-01-03 01:30:20", "2020-01-03 04:19:14", "2020-01-03 06:51:29"),
#'   Cat = as.factor(c("A", "B", "A"))
#' )
#' # For daily data:
#' regseqscore <- RegScoreWrapper(seqdf, "%Y-%m-%d", 2, 1, 0.5)
#' IRASD_RegScoreInput(regseqscore, newseqdf, seqdf, "%Y-%m-%d", 2, 1, 0.5, 0.05, "nrd0", "gaussian")
IRASD_RegScoreInput <- function(regseqscore, newseqdf, seqdf, timetype, k, beta, lam, percentpwr, bndwidth, kern) {
  if (class(newseqdf) != "data.frame") {
    stop("newseqdf is not a data frame. Try data.frame()")
  }
  if (class(seqdf) != "data.frame") {
    stop("seqdf is not a data frame. Try data.frame()")
  }
  if (dim(seqdf)[2] != 2) {
    stop("newseqdf does not have two columns. See documentation for instructions.")
  }
  if (dim(newseqdf)[2] != 2) {
    stop("seqdf does not have two columns. See documentation for instructions.")
  }
  if (class(seqdf[, 2]) != "factor") {
    stop("newseqdf second column is not a factor. Try as.factor()")
  }
  if (class(newseqdf[, 2]) != "factor") {
    stop("seqdf second column is not a factor. Try as.factor()")
  }
  reglev <- levels(seqdf[, 2])
  newlev <- levels(newseqdf[, 2])
  catvec <- unique(c(reglev, newlev))
  if (length(catvec) == 0) {
    stop("No categories found, check column two of your dataframes and the instructions in the documentation.")
  }
  if (length(catvec) == 1) {
    warning("Only one category found!")
  }
  seq1 <- seqdf
  colnames(seq1) <- c("Time", "Cat")
  probabilities <- seq1 %>%
    dplyr::group_by(Cat) %>%
    dplyr::tally() %>%
    dplyr::mutate(freq = n / sum(n)) %>%
    dplyr::select(freq) %>%
    unlist(use.names = FALSE)
  seqlist <- SeqList(seqdf, "%Y-%m-%d")
  if (length(seqlist) == 1) {
    stop("Need more than one regular sequence for analysis.")
  }
  if (length(seqlist) < 14) {
    warning("For a better false positive rate, more regular sequences are needed. Over 30 is preferred. ")
  }
  newseqlist <- SeqList(newseqdf, "%Y-%m-%d")
  dimseq <- sapply(seqlist, function(x) dim(x)[1])
  dimnewseq <- sapply(newseqlist, function(x) dim(x)[1])
  if (!all(dimseq > length(catvec))) {
    warning("Inconsistent results may occur with sequences smaller than the number of categories.")
  }

  numtestseq <- length(newseqlist)
  allsqs <- AllSQS(catvec, k)
  sqslist <- SQSList(seqlist, k)
  sqsindex <- SQSIndex(allsqs, sqslist)
  newsqslist <- SQSList(newseqlist, k)
  newsqsindex <- SQSIndex(allsqs, newsqslist)
  scoretable <- vector("list", numtestseq)
  for (i in 1:numtestseq) {
    scoretable[[i]] <- ScoreTable(regseqscore, newseqlist[[i]], seqlist, allsqs, newsqsindex[, i], sqsindex, catvec, probabilities, k, beta, lam)
  }

  dataset <- DensEst(scoretable, percentpwr, bndwidth, kern)
  dataset <- dataset[[1]]

  return(dataset)
}
jgillam13/IRASD documentation built on Feb. 10, 2021, 9:38 a.m.