R/DenEst.R

Defines functions DensEst

Documented in DensEst

#' Multiple Hypothesis Test using KDE: Finding the p-values.
#'
#' @param ScoreTables Table of scores for each new sequence. Must be in list format.
#' @param percentpwr Significance level.
#' @param bndwidth Bandwidth for the KDE.
#' @param kern Kernel of choice for the KDE.
#'
#' @return P-values for the multiple hypothesis test.
#' @export
#'
#' @examples
#' df<-data.frame(Time=c("2020-01-01 00:10:09", "2020-01-01 01:12:34" , "2020-01-02 06:38:09",
#' "2020-01-02 07:21:51"),Cat=c('A','B','A','C'))
#' newdf<-data.frame(Time=c("2020-01-03 01:30:20", "2020-01-03 04:19:14" , "2020-01-03 06:51:29"),
#' Cat=c('A','B','A'))
#' #For daily data:
#' seqlist<-SeqList(df,'%Y-%m-%d')
#' sqslist<-SQSList(seqlist,2)
#' newseqlist<-SeqList(newdf,'%Y-%m-%d')
#' newsqslist<-SQSList(newseqlist,2)
#' allsqs<-AllSQS(c('A','B','C'),2)
#' sqsindex<-SQSIndex(allsqs,sqslist)
#' newsqsindex<-SQSIndex(allsqs,newsqslist)
#' regseqscore<-RegSeqScore(seqlist,allsqs,sqsindex,c('A','B','C'),c(0.5,0.25,0.25),2,1,0.5)
#' ScoreTables<-ScoreTable(regseqscore, newseqlist[[1]], seqlist, allsqs, newsqsindex, sqsindex,
#' c('A','B','C'),c(0.5,0.25,0.25), 2, 1, 0.5)
#' DensEst(list(ScoreTables),0.05,'nrd0','gaussian')
DensEst <- function(ScoreTables, percentpwr, bndwidth, kern) {
  tryCatch(stats::density(stats::rnorm(100),bw=bndwidth,kernel=kern),
           error=function(e) print('Bandwidth and/or kernel chosen are incorrect. Check stats::density() for other options.'))
  numtestseq <- length(ScoreTables)
  pvals1 <- vector("list", numtestseq)
  pvals <- vector("list", numtestseq)
  irregcount <- vector("list", numtestseq)
  for (j in 1:numtestseq) {
    newtabl <- dplyr::select(as.data.frame(ScoreTables[[j]]), dplyr::last_col(offset = 2), Difference, SQS)
    regtable <- dplyr::select(ScoreTables[[j]], -dplyr::last_col(offset = 2))

    quant1 <- dplyr::select(regtable, -Difference, -SQS)
    quant2 <- dplyr::select(newtabl, -Difference, -SQS)

    pval <- rep(NA, dim(quant1)[1])

    for (i in 1:dim(quant1)[1]) {
      if (all(is.na(quant1[i, ])) & is.na(quant2[i, 1])) {
        pval[i] <- NA
      } else if (all(is.na(quant1[i, ])) & !is.na(quant2[i, 1])) {
        pval[i] <- 0
      } else {
        ex <- stats::density(as.numeric(quant1[i, ]), bw = bndwidth, kernel = kern, na.rm = T)
        cdf <- spatstat::CDF(ex)
        pval[i] <- cdf(quant2[i, 1])
      }
    }
    notna <- sum(!is.na(pval))
    pvals[[j]] <- cbind(pval, 1:dim(quant1)[1])

    pvals1[[j]] <- cbind(pvals[[j]][order(pvals[[j]][, 1]), ], 1:dim(quant1)[1])

    pvals2 <- apply(pvals1[[j]], 1, function(x) x[1] < (x[3] * percentpwr) / notna)

    counts <- dplyr::mutate(dplyr::select(newtabl, SQS), lowcount = rep(0, dim(quant1)[1]))

    num <- which(pvals2 == 1)
    num1 <- which(is.na(pvals2))
    counts[pvals1[[j]][num, 2], 2] <- 1
    counts[pvals1[[j]][num1, 2], 2] <- NA

    countsum <- as.data.frame(t(c("SQSSum", sum(as.numeric(counts[, 2]), na.rm = T))))
    colnames(countsum) <- c("SQS", "Flagged Irregular")

    colnames(counts) <- c("SQS", "Flagged Irregular")
    irregcount[[j]] <- as.data.frame(rbind(counts, countsum))
  }
  return(list(irregcount, pvals1))
}
jgillam13/IRASD documentation built on Feb. 10, 2021, 9:38 a.m.