#' Results for Irregular Activity Sequence Detection
#'
#' @param newseqdf Dataframe of new sequences to be tested: first column time
#' stamps in POSIXct format, second column sequence vector in factor format.
#' @param seqdf Dataframe of regular sequences: first column time
#' stamps in POSIXct format, second column sequence vector in factor format.
#' @param timetype ime window for separating the dataset, i.e. day = '%Y-%m-%d', week = '%Y-%W' and month '%Y-%m'.
#' @param k Maximum length of sequence silhouette.
#' @param beta Score parameter.
#' @param lam Score parameter.
#' @param percentpwr Significance level.
#' @param bndwidth Bandwidth for the KDE (refer to stats::density function for options).
#' @param kern Kernel of choice for the KDE (refer to stats::density function for options).
#'
#' @return A list with the results of the test, showing which SQS have flagged as irregular.
#' @export
#'
#' @details
#' Ensure lam(k-1)k/2<beta to comply with the method constraints.
#'
#' @examples
#' seqdf<-data.frame(Time=c("2020-01-01 00:10:09", "2020-01-01 01:12:34" , "2020-01-02 06:38:09",
#' "2020-01-02 07:21:51"),Cat=as.factor(c('A','B','A','C')))
#' newseqdf<-data.frame(Time=c("2020-01-03 01:30:20", "2020-01-03 04:19:14" , "2020-01-03 06:51:29"),
#' Cat=as.factor(c('A','B','A')))
#' #For daily data:
#' IRASD(newseqdf, seqdf, '%Y-%m-%d',2, 1, 0.5,0.05,'nrd0','gaussian')
IRASD <- function(newseqdf, seqdf, timetype, k, beta, lam, percentpwr, bndwidth, kern) {
if(class(newseqdf)!='data.frame'){
stop('newseqdf is not a data frame. Try data.frame()')
}
if(class(seqdf)!='data.frame'){
stop('seqdf is not a data frame. Try data.frame()')
}
if(dim(seqdf)[2]!=2){
stop('newseqdf does not have two columns. See documentation for instructions.')
}
if(dim(newseqdf)[2]!=2){
stop('seqdf does not have two columns. See documentation for instructions.')
}
if(class(seqdf[,2])!='factor'){
stop('newseqdf second column is not a factor. Try as.factor()')
}
if(class(newseqdf[,2])!='factor'){
stop('seqdf second column is not a factor. Try as.factor()')
}
reglev<-levels(seqdf[,2])
newlev<-levels(newseqdf[,2])
catvec<-unique(c(reglev,newlev))
if(length(catvec)==0){
stop('No categories found, check column two of your dataframes and the instructions in the documentation.')
}
if(length(catvec)==1){
warning('Only one category found!')
}
seq1<-seqdf
colnames(seq1)<-c('Time','Cat')
probabilities<-seq1 %>% dplyr::group_by(Cat) %>% dplyr::tally() %>% dplyr::mutate(freq=n/sum(n)) %>% dplyr::select(freq) %>% unlist(use.names = FALSE)
seqlist<-SeqList(seqdf,'%Y-%m-%d')
if(length(seqlist)==1){
stop('Need more than one regular sequence for analysis.')
}
if(length(seqlist)<14){
warning('For a better false positive rate, more regular sequences are needed. Over 30 is preferred. ')
}
newseqlist<-SeqList(newseqdf,'%Y-%m-%d')
dimseq<-sapply(seqlist,function(x) dim(x)[1])
dimnewseq<-sapply(newseqlist,function(x) dim(x)[1])
if(!all(dimseq>length(catvec))){
warning('Inconsistent results may occur with sequences smaller than the number of categories.')
}
numtestseq <- length(newseqlist)
allsqs <- AllSQS(catvec, k)
sqslist <- SQSList(seqlist, k)
sqsindex <- SQSIndex(allsqs, sqslist)
regseqscore <- RegSeqScore(seqlist, allsqs, sqsindex, catvec, probabilities, k, beta, lam)
newsqslist <- SQSList(newseqlist, k)
newsqsindex <- SQSIndex(allsqs, newsqslist)
scoretable <- vector("list", numtestseq)
for (i in 1:numtestseq) {
scoretable[[i]] <- ScoreTable(regseqscore, newseqlist[[i]], seqlist, allsqs, newsqsindex[,i], sqsindex, catvec, probabilities, k, beta, lam)
}
dataset <- DensEst(scoretable, percentpwr, bndwidth, kern)
dataset <- dataset[[1]]
return(dataset)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.