R/RegSeqScore.R

Defines functions RegSeqScore

Documented in RegSeqScore

#' Calculation of the Regular Sequence Scores
#'
#' @param seqlist List of sequences separated by time window.
#' @param allsqs Matrix containing all the possible SQS for the possible categories.
#' @param sqsindex Index Matrix indicating which sequences have which SQS's.
#' @param catvec Vector of the different categories found in the sequences.
#' @param probabilities Estimated proportion of each category seen in the sequences.
#' @param k Maximum length of sequence silhouette.
#' @param beta Score parameter.
#' @param lam Score parameter.
#'
#' @return Regular sequences score matrix.
#' @export
#'
#' @examples
#' df<-data.frame(Time=c("2020-01-01 00:10:09", "2020-01-01 01:12:34" , "2020-01-02 06:38:09",
#' "2020-01-02 07:21:51"),Cat=c('A','B','A','C'))
#' #For daily data:
#' seqlist<-SeqList(df,'%Y-%m-%d')
#' sqslist<-SQSList(seqlist,2)
#' allsqs<-AllSQS(c('A','B','C'),2)
#' sqsindex<-SQSIndex(allsqs,sqslist)
#' RegSeqScore(seqlist,allsqs,sqsindex,c('A','B','C'),c(0.5,0.25,0.25),2,1,0.5)
RegSeqScore <- function(seqlist, allsqs, sqsindex, catvec, probabilities, k, beta, lam) {
  lengthofregset <- length(seqlist)
  variablenames <- colnames(sqsindex)
  SeqScore <- matrix(NA, dim(sqsindex)[1], dim(sqsindex)[2])
  SeqScore1 <- matrix(NA, dim(sqsindex)[1], dim(sqsindex)[2])
  SeqScore2 <- matrix(NA, dim(sqsindex)[1], dim(sqsindex)[2])
  wt <- 0
  wx <- 0
  for (i in 1:lengthofregset) {
    Newlist <- seqlist
    Newlist[[i]] <- NULL
    Newlist[[length(Newlist) + 1]] <- seqlist[[i]]
    NewIndex <- dplyr::select(sqsindex, variablenames[-i], variablenames[i])
    GamFun <- GammaFunction(Newlist, allsqs, NewIndex, catvec, beta, lam)
    testlength <- sapply(Newlist, function(x) dim(x)[[1]])[length(Newlist)]
    reglength <- sapply(Newlist, function(x) dim(x)[[1]])[-length(Newlist)]
    for (j in 1:dim(allsqs)[1]) {
      if (all(is.na(GamFun[j, ]))) {
        SeqScore[j, i] <- NA
        SeqScore1[j, i] <- NA
        SeqScore2[j, i] <- NA
      } else if (all(GamFun[j, ] == 0)) {
        SeqScore[j, i] <- 0
        SeqScore1[j, i] <- 0
        SeqScore2[j, i] <- 0
      } else {
        SeqScore[j, i] <- sum(GamFun[j, ], na.rm = T) / (lengthofregset - 1)
        if (allsqs[j, 3] == "0") {
          k <- 0
        } else {
          k <- as.numeric(allsqs[j, 3])
        }

        if (k > 1) {
          if (floor((testlength %% (2 * k)) > k)) {
            wt <- floor((testlength %% (2 * k)) %% (k))
          } else {
            wt <- 0
          }

          v <- sum(1:(k - 1))
          if (k > 2) {
            for (c in 1:k - 2) {
              v[c + 1] <- sum(1:(k - c - 1)) + c * (k - c - 1)
            }
          }
          v[k] <- 0

          v1 <- matrix(0, k, k)
          M <- matrix(0, k - 1, k - 1)
          for (q in 1:(k - 1)) {
            M[, q] <- sum(1:q)
            v2 <- v
            v2[q] <- v[1]
            v2[1] <- v[q]
            v1[, q] <- v2
          }
          v2 <- v
          v2[k] <- v[1]
          v2[1] <- v[k]
          v1[, k] <- v2
          M1 <- M
          M2 <- M
          gdata::upperTriangle(M1, diag = T) <- 0
          gdata::lowerTriangle(M2, diag = T) <- 0

          AB <- M1 - M2 + 2 * t(M1)
          AB1 <- cbind(rep(0, dim(AB)[1]), AB)
          AB1 <- rbind(rep(0, dim(AB1)[2]), AB1)

          Sc <- v1 + AB1
          Sc <- beta + lam * Sc
        }
        if (testlength != 0 & sum(reglength) != 0 & allsqs[j, 1] == allsqs[j, 2] & (testlength - 1) >= k) {
          if (k <= 1) {
            SeqScore1[j, i] <- (1 / (lengthofregset - 1)^2) * length(catvec)^(min(2, k + 1)) *
              (probabilities[which(catvec == allsqs[j, 1])]^(min(2, k + 1))) *
              (testlength - k) *
              (sum(reglength - k)) * beta

            SeqScore2[j, i] <- (testlength - k) * (sum(reglength - k)) * beta / (lengthofregset - 1)
          } else {
            SeqScore1[j, i] <- (1 / (lengthofregset - 1)^2) * length(catvec)^(min(2, k + 1)) *
              (probabilities[which(catvec == allsqs[j, 1])]^(min(2, k + 1))) *
              (testlength - k) *
              (sum(reglength - k)) * (beta + lam * sum(1:(k - 1)))

            SeqScore2[j, i] <- (testlength - k) * (sum(reglength - k)) * (beta + lam * sum(1:(k - 1))) / (lengthofregset - 1)
          }
        } else if (testlength != 0 & sum(reglength) != 0 & (testlength - 1) >= k) {
          if (k == 1) {
            SeqScore1[j, i] <- (1 / (lengthofregset - 1)^2) * length(catvec)^2 * probabilities[which(catvec == allsqs[j, 1])] *
              probabilities[which(catvec == allsqs[j, 2])] *
              floor(testlength / 2) * (sum(floor(reglength / 2))) * beta

            SeqScore2[j, i] <- floor(testlength / 2) * (sum(floor(reglength / 2))) * beta / (lengthofregset - 1)
          }
          if (k > 1) {
            wx <- apply(as.data.frame(reglength), 1, function(x) {
              if ((x %% (2 * k)) > (k)) {
                (x %% (2 * k)) %% (k)
              } else {
                0
              }
            })
            s1 <- apply(as.data.frame(wx), 1, function(x) {
              if (x > 0) {
                sum(Sc[1:x, ])
              } else {
                0
              }
            })
            s2 <- apply(as.data.frame(wx), 1, function(x) {
              if (x > 0 & wt > 0) {
                sum(Sc[1:x, 1:wt])
              } else {
                0
              }
            })

            SeqScore1[j, i] <- (1 / (lengthofregset - 1)^2) * length(catvec)^2 * probabilities[which(catvec == allsqs[j, 1])] *
              probabilities[which(catvec == allsqs[j, 2])] *
              ((floor(testlength / (2 * k)) * sum(Sc) + if (wt > 0) {
                sum(Sc[, 1:wt])
              } else {
                0
              }) *
                (sum((floor(reglength / (2 * k)) * k))) + sum(floor(testlength / (2 * k)) * s1) + sum(s2))

            SeqScore2[j, i] <- ((floor(testlength / (2 * k)) * sum(Sc) + if (wt > 0) {
              sum(Sc[, 1:wt])
            } else {
              0
            }) *
              (sum((floor(reglength / (2 * k)) * k))) + sum(floor(testlength / (2 * k)) * s1) + sum(s2)) / (lengthofregset - 1)
          }
        } else {
          SeqScore1[j, i] <- 0
          SeqScore2[j, i] <- 0
        }
      }
    }
  }
  return(list(SeqScore, SeqScore1, SeqScore2))
}
jgillam13/IRASD documentation built on Feb. 10, 2021, 9:38 a.m.