#' Calculation of the Regular Sequence Scores
#'
#' @param seqlist List of sequences separated by time window.
#' @param allsqs Matrix containing all the possible SQS for the possible categories.
#' @param sqsindex Index Matrix indicating which sequences have which SQS's.
#' @param catvec Vector of the different categories found in the sequences.
#' @param probabilities Estimated proportion of each category seen in the sequences.
#' @param k Maximum length of sequence silhouette.
#' @param beta Score parameter.
#' @param lam Score parameter.
#'
#' @return Regular sequences score matrix.
#' @export
#'
#' @examples
#' df<-data.frame(Time=c("2020-01-01 00:10:09", "2020-01-01 01:12:34" , "2020-01-02 06:38:09",
#' "2020-01-02 07:21:51"),Cat=c('A','B','A','C'))
#' #For daily data:
#' seqlist<-SeqList(df,'%Y-%m-%d')
#' sqslist<-SQSList(seqlist,2)
#' allsqs<-AllSQS(c('A','B','C'),2)
#' sqsindex<-SQSIndex(allsqs,sqslist)
#' RegSeqScore(seqlist,allsqs,sqsindex,c('A','B','C'),c(0.5,0.25,0.25),2,1,0.5)
RegSeqScore <- function(seqlist, allsqs, sqsindex, catvec, probabilities, k, beta, lam) {
lengthofregset <- length(seqlist)
variablenames <- colnames(sqsindex)
SeqScore <- matrix(NA, dim(sqsindex)[1], dim(sqsindex)[2])
SeqScore1 <- matrix(NA, dim(sqsindex)[1], dim(sqsindex)[2])
SeqScore2 <- matrix(NA, dim(sqsindex)[1], dim(sqsindex)[2])
wt <- 0
wx <- 0
for (i in 1:lengthofregset) {
Newlist <- seqlist
Newlist[[i]] <- NULL
Newlist[[length(Newlist) + 1]] <- seqlist[[i]]
NewIndex <- dplyr::select(sqsindex, variablenames[-i], variablenames[i])
GamFun <- GammaFunction(Newlist, allsqs, NewIndex, catvec, beta, lam)
testlength <- sapply(Newlist, function(x) dim(x)[[1]])[length(Newlist)]
reglength <- sapply(Newlist, function(x) dim(x)[[1]])[-length(Newlist)]
for (j in 1:dim(allsqs)[1]) {
if (all(is.na(GamFun[j, ]))) {
SeqScore[j, i] <- NA
SeqScore1[j, i] <- NA
SeqScore2[j, i] <- NA
} else if (all(GamFun[j, ] == 0)) {
SeqScore[j, i] <- 0
SeqScore1[j, i] <- 0
SeqScore2[j, i] <- 0
} else {
SeqScore[j, i] <- sum(GamFun[j, ], na.rm = T) / (lengthofregset - 1)
if (allsqs[j, 3] == "0") {
k <- 0
} else {
k <- as.numeric(allsqs[j, 3])
}
if (k > 1) {
if (floor((testlength %% (2 * k)) > k)) {
wt <- floor((testlength %% (2 * k)) %% (k))
} else {
wt <- 0
}
v <- sum(1:(k - 1))
if (k > 2) {
for (c in 1:k - 2) {
v[c + 1] <- sum(1:(k - c - 1)) + c * (k - c - 1)
}
}
v[k] <- 0
v1 <- matrix(0, k, k)
M <- matrix(0, k - 1, k - 1)
for (q in 1:(k - 1)) {
M[, q] <- sum(1:q)
v2 <- v
v2[q] <- v[1]
v2[1] <- v[q]
v1[, q] <- v2
}
v2 <- v
v2[k] <- v[1]
v2[1] <- v[k]
v1[, k] <- v2
M1 <- M
M2 <- M
gdata::upperTriangle(M1, diag = T) <- 0
gdata::lowerTriangle(M2, diag = T) <- 0
AB <- M1 - M2 + 2 * t(M1)
AB1 <- cbind(rep(0, dim(AB)[1]), AB)
AB1 <- rbind(rep(0, dim(AB1)[2]), AB1)
Sc <- v1 + AB1
Sc <- beta + lam * Sc
}
if (testlength != 0 & sum(reglength) != 0 & allsqs[j, 1] == allsqs[j, 2] & (testlength - 1) >= k) {
if (k <= 1) {
SeqScore1[j, i] <- (1 / (lengthofregset - 1)^2) * length(catvec)^(min(2, k + 1)) *
(probabilities[which(catvec == allsqs[j, 1])]^(min(2, k + 1))) *
(testlength - k) *
(sum(reglength - k)) * beta
SeqScore2[j, i] <- (testlength - k) * (sum(reglength - k)) * beta / (lengthofregset - 1)
} else {
SeqScore1[j, i] <- (1 / (lengthofregset - 1)^2) * length(catvec)^(min(2, k + 1)) *
(probabilities[which(catvec == allsqs[j, 1])]^(min(2, k + 1))) *
(testlength - k) *
(sum(reglength - k)) * (beta + lam * sum(1:(k - 1)))
SeqScore2[j, i] <- (testlength - k) * (sum(reglength - k)) * (beta + lam * sum(1:(k - 1))) / (lengthofregset - 1)
}
} else if (testlength != 0 & sum(reglength) != 0 & (testlength - 1) >= k) {
if (k == 1) {
SeqScore1[j, i] <- (1 / (lengthofregset - 1)^2) * length(catvec)^2 * probabilities[which(catvec == allsqs[j, 1])] *
probabilities[which(catvec == allsqs[j, 2])] *
floor(testlength / 2) * (sum(floor(reglength / 2))) * beta
SeqScore2[j, i] <- floor(testlength / 2) * (sum(floor(reglength / 2))) * beta / (lengthofregset - 1)
}
if (k > 1) {
wx <- apply(as.data.frame(reglength), 1, function(x) {
if ((x %% (2 * k)) > (k)) {
(x %% (2 * k)) %% (k)
} else {
0
}
})
s1 <- apply(as.data.frame(wx), 1, function(x) {
if (x > 0) {
sum(Sc[1:x, ])
} else {
0
}
})
s2 <- apply(as.data.frame(wx), 1, function(x) {
if (x > 0 & wt > 0) {
sum(Sc[1:x, 1:wt])
} else {
0
}
})
SeqScore1[j, i] <- (1 / (lengthofregset - 1)^2) * length(catvec)^2 * probabilities[which(catvec == allsqs[j, 1])] *
probabilities[which(catvec == allsqs[j, 2])] *
((floor(testlength / (2 * k)) * sum(Sc) + if (wt > 0) {
sum(Sc[, 1:wt])
} else {
0
}) *
(sum((floor(reglength / (2 * k)) * k))) + sum(floor(testlength / (2 * k)) * s1) + sum(s2))
SeqScore2[j, i] <- ((floor(testlength / (2 * k)) * sum(Sc) + if (wt > 0) {
sum(Sc[, 1:wt])
} else {
0
}) *
(sum((floor(reglength / (2 * k)) * k))) + sum(floor(testlength / (2 * k)) * s1) + sum(s2)) / (lengthofregset - 1)
}
} else {
SeqScore1[j, i] <- 0
SeqScore2[j, i] <- 0
}
}
}
}
return(list(SeqScore, SeqScore1, SeqScore2))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.