#' Multiple Hypothesis Test using KDE: Finding the p-values.
#'
#' @param ScoreTables Table of scores for each new sequence. Must be in list format.
#' @param percentpwr Significance level.
#' @param bndwidth Bandwidth for the KDE.
#' @param kern Kernel of choice for the KDE.
#'
#' @return P-values for the multiple hypothesis test.
#' @export
#'
#' @examples
#' df<-data.frame(Time=c("2020-01-01 00:10:09", "2020-01-01 01:12:34" , "2020-01-02 06:38:09",
#' "2020-01-02 07:21:51"),Cat=c('A','B','A','C'))
#' newdf<-data.frame(Time=c("2020-01-03 01:30:20", "2020-01-03 04:19:14" , "2020-01-03 06:51:29"),
#' Cat=c('A','B','A'))
#' #For daily data:
#' seqlist<-SeqList(df,'%Y-%m-%d')
#' sqslist<-SQSList(seqlist,2)
#' newseqlist<-SeqList(newdf,'%Y-%m-%d')
#' newsqslist<-SQSList(newseqlist,2)
#' allsqs<-AllSQS(c('A','B','C'),2)
#' sqsindex<-SQSIndex(allsqs,sqslist)
#' newsqsindex<-SQSIndex(allsqs,newsqslist)
#' regseqscore<-RegSeqScore(seqlist,allsqs,sqsindex,c('A','B','C'),c(0.5,0.25,0.25),2,1,0.5)
#' ScoreTables<-ScoreTable(regseqscore, newseqlist[[1]], seqlist, allsqs, newsqsindex, sqsindex,
#' c('A','B','C'),c(0.5,0.25,0.25), 2, 1, 0.5)
#' DensEst(list(ScoreTables),0.05,'nrd0','gaussian')
DensEst <- function(ScoreTables, percentpwr, bndwidth, kern) {
tryCatch(stats::density(stats::rnorm(100),bw=bndwidth,kernel=kern),
error=function(e) print('Bandwidth and/or kernel chosen are incorrect. Check stats::density() for other options.'))
numtestseq <- length(ScoreTables)
pvals1 <- vector("list", numtestseq)
pvals <- vector("list", numtestseq)
irregcount <- vector("list", numtestseq)
for (j in 1:numtestseq) {
newtabl <- dplyr::select(as.data.frame(ScoreTables[[j]]), dplyr::last_col(offset = 2), Difference, SQS)
regtable <- dplyr::select(ScoreTables[[j]], -dplyr::last_col(offset = 2))
quant1 <- dplyr::select(regtable, -Difference, -SQS)
quant2 <- dplyr::select(newtabl, -Difference, -SQS)
pval <- rep(NA, dim(quant1)[1])
for (i in 1:dim(quant1)[1]) {
if (all(is.na(quant1[i, ])) & is.na(quant2[i, 1])) {
pval[i] <- NA
} else if (all(is.na(quant1[i, ])) & !is.na(quant2[i, 1])) {
pval[i] <- 0
} else {
ex <- stats::density(as.numeric(quant1[i, ]), bw = bndwidth, kernel = kern, na.rm = T)
cdf <- spatstat::CDF(ex)
pval[i] <- cdf(quant2[i, 1])
}
}
notna <- sum(!is.na(pval))
pvals[[j]] <- cbind(pval, 1:dim(quant1)[1])
pvals1[[j]] <- cbind(pvals[[j]][order(pvals[[j]][, 1]), ], 1:dim(quant1)[1])
pvals2 <- apply(pvals1[[j]], 1, function(x) x[1] < (x[3] * percentpwr) / notna)
counts <- dplyr::mutate(dplyr::select(newtabl, SQS), lowcount = rep(0, dim(quant1)[1]))
num <- which(pvals2 == 1)
num1 <- which(is.na(pvals2))
counts[pvals1[[j]][num, 2], 2] <- 1
counts[pvals1[[j]][num1, 2], 2] <- NA
countsum <- as.data.frame(t(c("SQSSum", sum(as.numeric(counts[, 2]), na.rm = T))))
colnames(countsum) <- c("SQS", "Flagged Irregular")
colnames(counts) <- c("SQS", "Flagged Irregular")
irregcount[[j]] <- as.data.frame(rbind(counts, countsum))
}
return(list(irregcount, pvals1))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.