Nothing
# '''
# 1. This file contains functions that are used for supporting the other
# functions in the package.
# 2. It is worth noting that none of the following functions are available
# to the user directly, apart from the last one, `create_sequence()`.
# 3. For the user to gain access to these functions, he has to use
# dsmmR:::foo()
# '''
# ==============================================================================
# Check for equality.
# ==============================================================================
all_equal <- function(obj1, obj2) isTRUE(all.equal(obj1, obj2))
all_equal_numeric <- function(obj1, obj2, tol = sqrt(.Machine$double.eps))
all(abs(obj1 - obj2) < tol)
all_equal_numeric_vector <- function(obj1, obj2,
tol = sqrt(.Machine$double.eps))
abs(obj1 - obj2) < tol
# ==============================================================================
# Check for types and classes of objects.
# ==============================================================================
is_vector <- function(obj)
is.vector(obj) && !is.list(obj) && !is.expression(obj)
is_logical <- function(obj) {
# '''
# Returns TRUE or FALSE if `obj` is a TRUE or FALSE logical vector
# with ___length 1____.
# '''
is_vector(obj) && (length(obj) == 1) && (isTRUE(obj) || isFALSE(obj))
}
is_number <- function(obj) {
# '''
# Checks if `obj` is a vector of numbers with length = 1.
# '''
is_vector(obj) && (length(obj) == 1) && is.numeric(obj) &&
!is.logical(obj) && is.finite(obj) && !is.nan(obj)
}
is_double <- function(obj) {
# '''
# Checks if `obj` is a double vector with length = 1.
# '''
is_number(obj) && is.double(obj)
}
is_double_vector <- function(obj) # Used in `valid_initial_dist()`.
is_vector(obj) && is.double(obj) && !is_number(obj)
is_double_matrix <- function(obj) {
# '''
# This is used in `is_fdist_parametric` for checking `params` array.
# The `params` array should contain NA values.
# '''
is.matrix(obj) && !any(is.nan(obj)) &&
!any(is.infinite(obj)) && all(is.double(obj))
}
is_double_array <- function(obj) {
# '''
# This is used in `is_fdist_parametric` for checking `params` array.
# The `params` array should contain NA values.
# '''
is.array(obj) && !is.matrix(obj) && !any(is.nan(obj)) &&
!any(is.infinite(obj)) && all(is.double(obj))
}
is_integer <- function(obj) {
# '''
# Checks if `obj` is a number that can be represented as an integer.
# E.g. for the double 10 not equal to the integer 10L, it returns TRUE.
# '''
is_number(obj) && obj > 0 && obj - as.integer(obj) == 0
}
is_integer_vector <- function(obj) # used in `valid_soj_times()`.
is_vector(obj) && all(sapply(obj, is_integer))
is_prob <- function(prob) {
# '''
# This function checks whether 'obj' is `sufficiently close`
# to the domain [0, 1]
# (`sufficiently` is with accordance to `sqrt(.Machine$double.eps)`.
# E.g. 0.00001, 1.00000001, -0.00000001, 0 and 1 all return TRUE, but
# -0.0000001 returns FALSE (not `sufficiently` far from 0).
# '''
!( # The opposite of the below is a valid way to define a probability.
( (!all_equal_numeric(min <- min(prob), 0)) && min < 0) ||
# `prob` not close to 0 & less than 0 OR
( (!all_equal_numeric(max <- max(prob), 1)) && max > 1)
# `prob` not close to 1 & more than 1
)
}
is_prob_vector <- function(prob) {
# '''
# This function checks whether 'obj' is `sufficiently close`
# to the domain [0, 1]
# (`sufficiently` is with accordance to `sqrt(.Machine$double.eps)`.
# E.g. 0.00001, 1.00000001, -0.00000001, 0 and 1 all return TRUE, but
# -0.0000001 returns FALSE (not `sufficiently` far from 0).
# '''
!( # The opposite of the below is a valid way to define a probability.
( (!all_equal_numeric_vector(prob, 0)) & prob < 0) |
# `prob` not close to 0 & less than 0 OR
( (!all_equal_numeric_vector(prob, 1)) & prob > 1)
# `prob` not close to 1 & more than 1
)
}
# ==============================================================================
# Check for the valid use of the different arguments that the user can input
# ==============================================================================
valid_sequence <- function(sequence, s, seq_id = NULL) {
# '''
# Used in `fit_dsmm`.
# '''
warning_message = ''
if (!is.null(seq_id)) warning_message <- paste0("In the list `sequences`, ",
"for the sequence numbered (",
seq_id,
"), this warning occured:")
if (!is.character(sequence)){
stop(paste0(warning_message,
"\nThe `sequence` argument should be a character vector."))
} else if (!missing(s) && length(unique(sequence)) < s) {
# check for (very) short sequence.
stop(paste0(warning_message,
"\nThe `sequence` argument should have more than s = ",
s, " values."))
}
TRUE
}
valid_seq <- function(emc) {
# '''
# This function is for checking the correct use of the 'trimmed'
# sequence that has all sojourn times equal to 1. This is not
# about the correct use of the original sequence given e.g. for
# `fit_dsmm`.
# Intended for use in the function `is.dsmm`.
# '''
if (!is.character(emc)){
stop("\n`emc` argument should be a character vector.")
} else if (!all_equal(emc, rle(emc)$values)) {
# check for emc not having any recurrent states.
stop("\n`emc` argument should have all sojourn times equal to 1.")
}
TRUE
}
valid_soj_times <- function(soj_times, length_seq) {
# '''
# Intended for use in `is.dsmm`.
# '''
if (!is_integer_vector(soj_times)) {
stop("\nAttribute `soj_times` should be an integer vector.")
} else if (!all_equal_numeric(length(soj_times), length_seq)) {
stop("\nAttribute `soj_times` should have length equal to that of ",
"attribute `emc`.")
}
TRUE
}
valid_k_max <- function(k_max, soj_times) {
# '''
# This functions checks for correct behavior of the `k_max` parameter.
# Intended for use in `fit.dsmm`, `is.dsmm` & `nonparametric_dsmm`.
# '''
if (is_integer(k_max)) {
if (k_max == max(soj_times)) {
return(TRUE)
} else {
stop("\nThe maximum sojourn time attribute `k_max` should be",
" an integer,\nequal to the maximum of `soj_times`.")
}
}
stop("\nThe maximum sojourn time `k_max` should be an integer.")
}
valid_model_size <- function(model_size, length_seq) {
# '''
# Intended for use in `is.dsmm`.
# `length_seq` is with regards to the sequence without the
# sojourn times, e.g. from the `rle()`function.
# '''
if (is_integer(model_size)) {
if (!all_equal_numeric(model_size, length_seq - 1)) {
stop("\n`model_size` should be equal to the length of `emc`.")
}
}
TRUE
}
valid_states <- function(states) {
# '''
# This functions checks for correct behavior of the `states` parameter.
# '''
if (!is.character(states) ||
!is_vector(states) ||
!all_equal(states, unique(states))) {
stop("\nThe `states` of the state space should be a character vector",
" of unique values.")
} else if (length(states) == 1L) {
# Check for a single state.
stop("\nState space `states` should have more than one states.")
} else if (!all_equal(states, unique(states))) {
# Check for uniqueness of states.
stop("\nThe state space given in `states` contains duplicate values.")
} else if(min(prob <- (sapply(strsplit(
x = gsub(" ", "", states), split = ""), length))) == 0) {
# Check for a state equal to an empty string "" or `character(0)`.
stop("\nState space `states` includes a state without a given name,\n",
"at position(s): ", which(prob == 0))
}
TRUE
}
valid_state <- function(state, states) {
# '''
# Intended for use in `get_kernel`.
# '''
if (!is.character(state)) {
stop("\nAttribute `state` should be a character.")
}
stopifnot(valid_states(states))
if (!state %in% states) {
stop("\nState `", state, "` is not included in the state",
" space that is given,\nE = (", paste(states, collapse = ', '),
").")
}
TRUE
}
valid_length_states <- function(s, states) {
# '''
# Intended for use in `is.dsmm`.
# '''
if (!(is_integer(s) && s == (lstates <- length(states)))) {
stop("\nAttribute `s` = ", s,
" should be an integer,\nequal to the length of `states` = ",
lstates)
}
TRUE
}
valid_degree <- function(degree, model_size) {
# '''
# This functions checks for correct behaviour of the `degree` parameter.
# '''
if (!is_integer(degree)) {
stop("\nThe the polynomial `degree` should be a positive integer.")
}
if (!missing(model_size)) {
if (degree > model_size) {
stop("\nThe polynomial `degree` = ", degree,
" should not be larger than the model size = ", model_size,
",\nwhich is the length of the EMC of the sequence, minus 1.",
"\nSee `Definition` section at ?dsmmR for more information.")
}
}
TRUE
}
valid_initial_dist <- function(initial_dist, s) {
# '''
# This functions checks for correct behaviour of the `initial_dist`
# parameter.
# '''
if (!is_double_vector(initial_dist)) {
stop("\nThe `initial_dist` should be a vector of 'double' ",
"numeric values.")
} else if ((n_init <- length(initial_dist)) != s) {
stop("\nThe `initial_dist` has ", n_init, " values and it ",
"should be equal",
" to the number of `states` given, ", s, ".")
} else if (!is_prob(initial_dist)) {
stop("\nThe `initial_dist` should contain numeric values ",
"between 0 and 1.")
} else if (!sum(initial_dist) == 1) {
stop("\nThe sum of the `initial_dist` should be exactly ",
"equal to 1.")
}
TRUE
}
valid_initial_dist_char <- function(obj) {
# '''
# Returns TRUE or FALSE if `obj` is a single character in the list
# ['unif', 'freq'].
# Used in `fit_dsmm()`.
# '''
if (!(is.character(obj) && length(obj) == 1 &&
(obj == "unif" || obj == "freq"))) {
stop('\n`initial_dist` should be either "unif" or "freq".')
}
TRUE
}
valid_model <- function(p_is_drifting, f_is_drifting) {
# '''
# Intended for use in `is.dsmm_fit` & `fit_dsmm`, as well as
# `dsmm_parametric`, `dsmm_nonparametric`,
# `is.dsmm_parametric` and `is.dsmm_nonparametric`
# '''
if (!p_is_drifting && !f_is_drifting) {
# Neither p or f are drifting.
stop("\nAt least p or f should be drifting,\nto use a drifting ",
"semi-Markov model for the estimation.\n",
"Otherwise, a semi-Markov model should be used.")
}
TRUE
}
valid_estimation <- function(estimation, fpar, s, f_is_drifting, degree,
states) {
# '''
# This function is used in `fit_dsmm` with the purpose of
# checking whether the attributes `estimation` and `fpar`
# correspond to either "nonparametric" and NULL
# or to "nonparametric" and `f_parametric`.
# '''
if (!is.character(estimation) ||
(estimation != "nonparametric" && estimation != "parametric")) {
stop('\nThe `estimation` attribute should be either "nonparametric",\n',
'for the non-parametric estimation, or "parametric" for the\n',
'parametric estimation.')
}
if (estimation == "nonparametric") {
if (!is.null(fpar)) {
stop('\nWhen `estimation = "nonparametric"`, attribute `fpar` ',
'should be NULL')
}
} else if (estimation == "parametric") {
stopifnot(valid_fdist_parametric(fdist = fpar, params = NULL, s = s,
f_is_drifting = f_is_drifting,
degree = degree, states = states))
}
TRUE
}
valid_p_dist <- function(p_dist, s, degree, p_is_drifting, states) {
# '''
# Intended for use in `dsmm_nonparametric` & `dsmm_parametric`.
# The parametric case only concerns the sojourn dime distribution, f.
# In other words, we do not need to separate cases for `valid_p_dist`,
# only the non-parametric case of p is concerned.
# '''
D <- degree + 1L
if (p_is_drifting) {
# Case when p is drifting, a.k.a. `p_is_drifting` = TRUE.
if (!is_double_array(p_dist)) {
stop("\nSince `p_is_drifting` is TRUE,\n`p_dist` should be a ",
"numeric array,\n with dimensions: (s, s, degree + 1).")
} else if (!is_prob(p_dist)) {
non_prob <- which(!is_prob_vector(p_dist))
stop("\nArray `p_drift` contains values that are not ",
"probabilities:\n",
paste0(p_dist[non_prob], collapse = ", "))
} else if (!all_equal((dimension <- dim(p_dist)),
c(s, s, D))) {
stop("\nSince `p_is_drifting` = TRUE,\n`p_dist` should be an ",
"array\n",
"with dimensions: (s, s, degree + 1) = (",
paste(c(s, s, D), collapse = ', '), ")\n",
"when it has dimensions of = (",
paste(dimension, collapse = ', '), ").")
} else if (!all_equal_numeric(
sum(p_dist * array(diag(s), dim = c(s, s, D))), 0)) {
stop("\nThe diagonal values of `p_dist` should all be",
" equal to 0.")
} else if (!all_equal_numeric(
(p_drift_rowsum <- c(sapply(1:D,
function(u) rowSums(p_dist[ , , u])))),
no_diag <- sapply(p_drift_rowsum, function(i) if (i > 0) 1 else 0))
) {
# Case where, for every d + 1 matrix, the sums over v for every u
# are not equal to 1.
logical_vector <- sapply(p_drift_rowsum,
function(u) !all_equal(u, 1))
possible_cases <- paste0("u = ", rep(states, s - 1), ", ",
rep(names_i_d(degree, kernel_name = "p"),
each = s))
stop("\nFor the transition probability matrices",
" contained in `p_dist`,\nthe probabilities of transistioning",
" from previous state `u`,\nover all the possible next states",
" `v`, should be equal to 1,\nfor all the (d + 1) matrices",
" p_(i/d).\nWhat follows are the cases that violate",
" this principle:\n",
paste("\n", possible_cases[logical_vector]))
} else if (any(same <- sapply(1:D, function(d1) {
sapply(1:D, function(d2) {
if (d1 != d2) {
return(all_equal(p_dist[, , d1], p_dist[, , d2]))
}
FALSE
})}))) {
# case where p is NOT drifting, but given as drifting.
which_same <- which(same)
arrays <- array(names_i_d(degree, 'p'), dim = c(D, D))
same_arrays <- arrays[which_same] # always even in number...
odd <- same_arrays[x <- seq(1, length(same_arrays), by = 2)]
even <- same_arrays[x + 1]
same_pairs <- paste0("\n\n", odd, " and ", even, collapse = ',\n ')
stop("\nThe values given in `p_dist`",
" are the same for the arrays:",
same_pairs, ".")
}
} else {
# Case when p is not drifting, a.k.a. `p_is_drifting` = FALSE.
if (!is_double_matrix(p_dist)) {
stop("\nSince `p_is_drifting` = FALSE,\n`p_dist` should be",
" a square matrix with dimensions: (s, s)")
} else if (!is_prob(p_dist)) {
non_prob <- which(!is_prob_vector(p_dist))
stop("\nArray `p_drift` contains values that are not ",
"probabilities:\n",
paste0(p_dist[non_prob], collapse = ", "))
} else if (!all((dimension <- dim(p_dist)) == c(s, s))) {
stop("\nSince `p_is_drifting` = FALSE,\n`p_dist` should be",
" a square matrix\n",
" with dimensions: (s, s) = (",
paste(c(s,s), collapse = ', '), "),\n",
" when it has dimensions = (",
paste(dimension, collapse = ', '), ").")
} else if (!all_equal(sum(p_dist * diag(s)), 0)) {
stop("\nThe diagonal values of `p_dist` should all be equal to 0.")
} else if (!all_equal(as.vector(p_notdrift_rowsum <- rowSums(p_dist)),
rep(1, s))) {
# Case where the sums over v for every u are not equal to 1.
logical_vector <- sapply(p_notdrift_rowsum,
function(u) !all_equal(u, 1))
stop("\nFor the transition probability matrice contained in ",
"`p_dist`,\nthe probabilities of transistioning from",
"previous state `u`,\nover all the possible next states ",
"`v`, should be equal to 1.\nWhat follows are ",
"the states which violate this principle.\n",
paste("\n", states[logical_vector]))
}
}
TRUE
}
# ==============================================================================
# Functions specific to the nonparametric object.
# ==============================================================================
valid_fdist_nonparametric <- function(f_dist, states, s, degree,
f_is_drifting, k_max) {
# '''
# Intended for use in `dsmm_nonparametric`.
# '''
D <- degree + 1L
if (f_is_drifting) {
# Case when f is drifting, a.k.a. `f_is_drifting` == TRUE.
if (!is_double_array(f_dist)) {
stop("\nSince `f_is_drifting` is TRUE,\n`f_dist` should be a ",
"numeric array with dimensions:\n (s, s, k_max, degree + 1).")
} else if (!is_prob(f_dist)) {
non_prob <- which(!is_prob_vector(f_dist))
stop("\nArray `f_drift` contains values that are not ",
"probabilities:\n",
paste0(f_dist[non_prob], collapse = ", "))
} else if (!all((dimension <- dim(f_dist)) == c(s, s, k_max, D))) {
stop("\nSince `f_is_drifting` is TRUE,\n`f_dist` should be an ",
"array with dimensions:\n",
" (s, s, k_max, degree + 1) = (",
paste(c(s, s, k_max, D), collapse = ', '), '),\n',
' when it has dimensions = (', paste(dimension, collapse = ', '),
").\nThese dimensions",
" should correspond to previous states `u`,\n",
"current states `v`, ",
"maximum sojourn time `k_max`\nand degree + 1.")
} else if (!sum(f_dist * array(diag(s),
dim = c(s, s, k_max, D))) == 0) {
# Diagonal values are not equal to 0.
stop("\nThe diagonal values of `f_dist` should all be equal to 0.")
} else if (!all_equal_numeric(
f_drift_l_sum <- apply(f_dist, c(1,2,4), sum),
no_diag <- sapply(f_drift_l_sum,
function(i) if (i > 0) 1 else 0)
# array(array(1, dimension[1:2]) -
# base::diag(dimension[1]),
# dim = dimension[-3]))
)) {
# Sums over l are not equal to 1.
array_names <- sapply(
names_i_d(as.integer(degree), "f"), function(d)
sapply(states, function(v)
sapply(states, function(u)
paste(c(paste("u =", u),
paste("v =", v),
paste("Array =", d)),
collapse = ', '))))
diffs <- sapply(f_drift_l_sum - no_diag, function(diff)
!all_equal_numeric(diff, 0))
stop('\nThe sums over l of `f_dist` are not equal to 1,\nfor the ',
'following cases of u, v and array:\n',
paste0(array_names[diffs], collapse = '\n'))
} else if (any(same <- sapply(1:D, function(d1) {
sapply(1:D, function(d2) {
if (d1 != d2) {
return(all_equal(f_dist[, , , d1], f_dist[, , , d2]))
}
FALSE
})}))) {
# case where f is NOT drifting, but given as drifting.
which_same <- which(same)
arrays <- array(names_i_d(degree, 'f'), dim = c(D, D))
same_arrays <- arrays[which_same] # always even in number...
odd <- same_arrays[x <- seq(1, length(same_arrays), by = 2)]
even <- same_arrays[x + 1]
same_pairs <- paste0("\n\n", odd, " and ", even, collapse = ',\n ')
stop("\nThe values given in `f_dist` are the same for the arrays:",
"\n ", same_pairs, ".")
}
} else {
# Case when f is not drifting, a.k.a. `f_is_drifting` = FALSE.
if (!is_double_array(f_dist)) {
stop("\nSince `f_is_drifting` is FALSE,\n`f_dist` should be a ",
"numeric array with dimensions:\n(s, s, k_max).")
} else if (!is_prob(f_dist)) {
non_prob <- which(!is_prob_vector(f_dist))
stop("\nArray `f_drift` contains values that are not ",
"probabilities:\n",
paste0(f_dist[non_prob], collapse = ", "))
} else if (!all((dimension <- dim(f_dist)) == c(s, s, k_max))) {
stop("\nSince `f_is_drifting` is FALSE,\n`f_dist` should be an ",
"array\n",
" with dimensions: (s, s, k_max) = (",
paste(c(s, s, k_max), collapse = ', '), '),\n',
' when it has dimensions = (',
paste(dimension, collapse = ', '),
").\nThese dimensions",
" should correspond to previous states `u`,\n",
"current states `v`, ",
"and maximum sojourn time `k_max`")
} else if (!all_equal_numeric(sum(f_dist *
array(diag(s), dimension)), 0)) {
stop("\nThe diagonal values of `f_dist` should all be equal to 0.")
} else if (!all_equal_numeric(
f_notdrift_l_sum <- apply(f_dist, c(1,2), sum),
no_diag <- sapply(f_notdrift_l_sum,
function(i) if (i > 0) 1 else 0)
# array(1, dimension[1:2]) - base::diag(dimension[1]))
)) {
array_names <- sapply(states, function(v)
sapply(states, function(u)
paste(c(paste("u =", u),
paste("v =", v)),
collapse = ', ')))
diffs <- sapply(f_notdrift_l_sum - no_diag, function(diff)
!all_equal_numeric(diff, 0))
stop('\nThe sums over l of `f_dist` are not equal to 1 for the ',
'following cases of u and v:\n',
paste0(
array_names[diffs],
collapse = '\n')
)
}
}
TRUE
}
# Specific to the parametric object. -------------------------------------------
# Check the validity of the f distribution given.
valid_fdist_parametric <- function(fdist, params, degree, s, f_is_drifting,
states) {
# '''
# * To be used in `dsmm_parametric` as a check for valid parameters given.
# * This is used before `get_fdist_parametric`.
# * Also used in `fit_dsmm()` for the parametric estimation, for which
# we define a special case when `params` attribute is equal to NULL.
# In this case, we check specifically for when `fdist` is a character
# array.
# '''
# fdist check.
# both drift & nondrift cases.
D <- degree + 1L
if (!is.array(fdist) && !is.character(fdist)) {
stop("\n`f_dist` should be a character array.")
} else if (all(is.na(fdist))) {
stop("\n`f_dist` should not have all if its values equal to NA.")
} else if (!all((unqf <- unique(fdist[!is.na(fdist)])) %in%
(dist_vector <- c('dweibull', 'geom',
'nbinom', 'pois', 'unif')))) {
stop('\n`f_dist` should only contain the following values: \nNA, "',
paste0(dist_vector, collapse = '", "'), '", when it contains:\n',
unqf)
}
# Checking f.
if (f_is_drifting) {
if (!all((tmp_d <- dim(fdist)) == (real_dim <- c(s, s, D)))) {
stop("\nSince `f_is_drifting` = TRUE,",
"\n`f_dist` should be an array\n",
" with dimensions: (s, s, degree + 1) = (",
paste(real_dim, collapse = ', '), ')\n',
' when it has dimensions = (',
paste(tmp_d, collapse = ', '), ").")
} else if (!all(apply(fdist, c(3), function(matrix_uv)
all(is.na(diag(matrix_uv)))))) {
stop("\n`f_dist` should have all the diagonal values equal to NA.")
}
} else {
if (!all((tmp_d <- dim(fdist)) == (real_dim <- c(s, s)))) {
stop("\nSince `f_is_drifting` = FALSE,\n",
"`f_dist` should be a square matrix\n",
" with dimensions: (s, s) = (",
paste(real_dim, collapse = ', '), '),\n',
' when it has dimensions = (',
paste(tmp_d, collapse = ', '), ").")
} else if (!all(is.na(diag(fdist)))) {
stop("\n`f_dist` should have all the diagonal values equal to NA.")
}
}
# Parametric ESTIMATION case - `fit_dsmm()`.
if (is.null(params)) {
return(TRUE) # checks for parameters do not occur.
}
# Parametric OBJECT case - `parametric_dsmm()`.
if (!is_double_array(params)) {
stop("\n`f_dist_parameters` should be a array with double ",
"and NA values.")
} else if (all(is.na(params))) {
stop("\n`f_dist_parameters` should not have all if its values ",
"equal to NA.")
}
# Checking parameters
if (f_is_drifting) {
if (!all((par_tmp_d <- dim(params)) == (par_real_d <- c(s, s, 2L, D)))) {
stop("\nSince `f_is_drifting` = TRUE,\n`f_dist_parameters`",
"should be an array\n",
" with dimensions: (s, s, 2, degree + 1) = (",
paste(par_real_d, collapse = ', '), "),\n",
' when it has dimensions = (',
paste(par_tmp_d, collapse = ', '), ").")
} else if (!all(apply(params, c(3, 4), function(matrix_uvd)
all(is.na(diag(matrix_uvd)))))) {
stop("\n`f_dist_parameters` should have all the diagonal",
" values equal to NA.")
} else if (any(same <- sapply(1:D, function(d1) {
sapply(1:D, function(d2) {
if (d1 != d2) {
return(all_equal(params[, , ,d1], params[, , ,d2]))
}
FALSE
})}))) {
# case where f is NOT drifting, but given as drifting.
which_same <- which(same)
arrays <- array(names_i_d(degree, 'f_pars'), dim = c(D, D))
same_arrays <- arrays[which_same] # always even in number...
odd <- same_arrays[x <- seq(1, length(same_arrays), by = 2)]
even <- same_arrays[x + 1]
same_pairs <- paste0("\n\n", odd, " and ", even, collapse = ',\n ')
stop("\nThe values given in `f_dist_pars`",
" are the same for the arrays:",
same_pairs, ".")
}
# Check for the validity of the parameters, for every i, j, d.
is_valid_f_param <- sapply(1:D, function(d) {
dist_ij <- fdist[,,d]
params_ij <- params[,,,d]
sapply(1:s, function(j) {
dist_i <- dist_ij[, j]
params_i <- params_ij[, j, ]
sapply(1:s, function(i) {
row <- c(dist_i[i], params_i[i,])
valid_parameters(row = row, i = i, j = j, d = d,
degree = degree, states = states)
})
})
})
} else {
if (!all((par_tmp_d <- dim(params)) == (par_real_d <- c(s, s, 2L)))) {
stop("\nSince `f_is_drifting` = FALSE,\n",
"`f_dist_parameters` should be an array\n",
" with dimensions: (s, s, 2) = (",
paste(par_real_d, collapse = ', '),
',\n when it has dimensions = (',
paste(par_tmp_d, collapse = ', '), ").")
} else if (!all(
apply(params, c(3),
function(matrix_uvd) all(is.na(diag(matrix_uvd)))))) {
stop("\n`f_dist_parameters` should have all the diagonal",
" values equal to NA.")
}
is_valid_f_param <- sapply(1:s, function(j) {
dist_i <- fdist[,j]
params_i <- params[,j,]
sapply(1:s, function(i) {
row <- c(dist_i[i], params_i[i,])
valid_parameters(row = row, i = i, j = j, d = 0,
degree = degree, states = states)
})
})
}
all(is_valid_f_param)
}
# These functions are used in conjunction, therefore they are packed together.
# Check the ''row'' values.
valid_parameters <- function(row, i, j, d, degree, states) {
# '''
# This is to be used in function `valid_fdist_parametric`.
# `row` argument is supposed to have 3 values:
# The first one : a string in ('dweibull', 'geom', 'pois', 'nbinom',
# 'unif').
# The second & third one : a numeric value to be given to the
# corrresponding distributions.
# '''
if (all(is.na(row))) return(TRUE)
distr <- row[1]
p1 <- as.numeric(row[2])
p2 <- as.numeric(row[3])
msg <- paste0("\nCurrently for the sojourn time distribution ",
names_i_d(d = degree, kernel_name = "f")[d],
",\n for the previous state u = ", states[i],
" and the next state v = ", states[j],
",\n we have that the first parameter is equal to ", p1,
" and the second parameter is equal to ", p2, ".")
if (distr == "unif") {
if (is.na(p1) || !(is.na(p2))) {
stop("\nFor Uniform distributions, the first parameter must",
" be specified,\nand the second parameter must be NA.", msg)
} else if (!((p1 >= 0) && ((p1 %% 1) == 0))) {
stop("\nFor Uniform distributions,\nthe parameter",
" must be a positive integer.", msg)
}
} else if (distr == "geom") {
if (is.na(p1) || !(is.na(p2))) {
stop("\nFor Geometric distributions, the first parameter must",
" be specified,\nand the second parameter must be NA.", msg)
} else if (p1 <= 0 || p1 >= 1) {
stop("\nFor Geometric distributions,\nthe parameter",
" must be a probability in (0, 1).", msg)
}
} else if (distr == "pois") {
if (is.na(p1) || !(is.na(p2))) {
stop("\nFor Poisson distributions, the first parameter must",
" be specified,\nand the second parameter must be NA.", msg)
} else if (p1 <= 0) {
stop("\nFor Poisson distributions,\nthe parameter ",
"must be a positive number.", msg)
}
} else if (distr == "dweibull") {
if (anyNA(c(p1, p2))) {
stop("\nFor Discrete Weibull distributions, both ",
"parameters must be specified.", msg)
} else if (p1 <= 0 || p1 >= 1 || p2 <= 0) {
stop("\nFor Discrete Weibull distributions,\n",
"the first parameter must be a probability in (0, 1),\n",
"and the second parameter must be a positive number.",
msg)
}
} else if (distr == "nbinom") {
if (anyNA(c(p1, p2))) {
stop("\nFor Negative Binomial distributions, both parameters ",
"must be specified.", msg)
} else if (p1 <= 0 || p2 <= 0 || p2 >= 1) {
stop("\nFor Negative Binomial distributions,\nthe first parameter ",
"must be a positive number (overdispersion),\n",
"and the second parameter must be a probability in ",
"(0, 1).", msg)
}
}
TRUE
}
# Get the numerical simulated values for a big limit of `klim` sojourn times.
get_fdist_parametric <- function(fdist, params, klim) {
# '''
# This is intended to be used in S3 methods `get_kernel` and `simulate`,
# for the class `dsmm_parametric`.
# Therefore, no check is required for `fdist` and `params`.
# '''
if (length(dim(params)) == 4) { # drifting case, dim(params) == 4
m <- matrix(c(fdist, params[, , 1, ], params[, , 2, ]), ncol = 3)
} else if (length(dim(params)) == 3) { # non-drifting case, dim(params) == 3
m <- matrix(c(fdist, params[, , 1], params[, , 2]), ncol = 3)
}
# No `nrow` argument is given above,
# because by not defining the number of rows,
# we simultaneously define the drifting & non-drifting cases.
ans <- apply(m, c(1), function(row, klim) get_dist_param(row, klim),
k = klim)
return(ans)
}
# Get the ''row'' values.
get_dist_param <- function(row, k) {
# '''
# This is a helper function intended for use in `get_fdist_parametric`.
# '''
if (all(is.na(row))) return(rep(0, k))
dist <- row[1]
par1 <- as.numeric(row[2])
par2 <- as.numeric(row[3]) # if it is NA, it is not given as a parameter.
if (dist == 'unif') {
unif_value <- 1/par1
distklim <- sapply(1:k, function(x) if (x <= par1) unif_value else 0)
} else if (dist == 'geom') {
distklim <- dgeom(0:(k - 1), prob = par1)
} else if (dist == 'pois') {
distklim <- dpois(0:(k - 1), lambda = par1)
} else if (dist == 'nbinom') {
distklim <- dnbinom(0:(k - 1), size = par1, prob = par2)
} else if (dist == 'dweibull') {
distklim <- DiscreteWeibull::ddweibull(1:k, q = par1,
beta = par2,
zero = FALSE)
# We keep zero = false because of the parametric estimation.
}
if ((sumd <- sum(distklim)) < 1) {
distklim[k] <- 1 - sumd
}
distklim
}
valid_estimation_fit <- function(estimation, degree, states, s, k_max,
p_is_drifting, f_is_drifting, fdist, pdist) {
# '''
# This function is acting on the OBJECT `dsmm_fit_nonparametric` and
# `dsmm_fit_parametric`. Specifically, it is used when printing through
# `check_attributes()`, in order to prevent someone from defining
# their own object in a `vile` manner.
# '''
if (estimation != 'nonparametric' && estimation != 'parametric') {
stop("`estimation` attribute should be either 'nonparametric',\nor ",
" 'parametric'.")
}
stopifnot(valid_p_dist(p_dist = pdist,
states = states,
s = s, degree = degree,
p_is_drifting = p_is_drifting))
if (estimation == 'nonparametric') {
stopifnot(valid_fdist_nonparametric(f_dist = fdist,
states = states,
s = s,
degree = degree,
f_is_drifting = f_is_drifting,
k_max = k_max))
} else {
stopifnot(valid_estimation(estimation = estimation,
fpar = fdist, s = s,
f_is_drifting = f_is_drifting,
degree = degree, states = states),
valid_fdist_parametric(fdist = fdist,
params = NULL,
s = s, degree = degree,
f_is_drifting = f_is_drifting))
}
TRUE
}
# ------------------------------------------------------------------------------
# Regarding the drifting kernels.
poly_coeff <- function(degree) {
# '''
# Returns a matrix with the coefficients of the polynomials `A_i`.
# '''
x <- (s0 <- seq(0, degree)) / degree
x <- sapply(x, function(v) sapply(s0, function(i) v ** i))
ans <- solve(x)
colnames(ans) <- paste0("t^", s0)
rownames(ans) <- paste0("A_", s0)
ans
}
names_i_d <- function(d, kernel_name = "q") {
# '''
# Generate names for the drifting cases.
# '''
kernel_name <- paste0(kernel_name, "_")
kernel_0 <- paste0(kernel_name, "0")
kernel_1 <- paste0(kernel_name, "1")
if (is_integer(d) && d > 1) {
return(c(paste0(kernel_name, "0"),
paste0(kernel_name, "(", seq(1, d-1), "/", d, ")"),
paste0(kernel_name, "1"))
)
} else if (d == 1) {
return(c(kernel_0, kernel_1))
} else if (d == 0) {
return(kernel_name)
}
}
seq_states_to_id <- function(emc, states) {
# Transforming a sequence of states into numbers.
list_which <- sapply(states, function(u) which(emc == u) )
for (i in seq_along(states)) {
emc[list_which[[i]]] <- i
}
emc
}
# Get different values needed to compute the different distributions. ----------
get_1_u <- function(id_seq, l, n, id_states, s) {
# '''
# Used in `fit_dsmm()` to get the indicator function 1_u.
# `id_seq` is the sequence of the embedded Markov chain where we
# substituted the states for positive integers.
# `id_states` is seq(1, s).
# '''
zero_v <- rep(0, n)
vector_1_u <- sapply(id_states,
function(u, seq_u, zero_v) {
zero_v[which(u == seq_u)] <- 1
return(zero_v)},
seq_u = id_seq[-l], # minus the last state
zero_v = zero_v)
rownames(vector_1_u) <- paste0("t = ", 1:n)
vector_1_u
}
get_1_uvl <- function(id_seq, l, n, X, k_max, id_states, s) {
# '''
# Used in `fit_dsmm()` to get the indicator function 1_uvl.
# `id_seq` is the sequence of the embedded Markov chain where we
# substituted the states for positive integers.
# `id_states` is seq(1, s).
# '''
seq_uvl <- paste(id_seq[-l], c(id_seq[-1]), X[-l])
Es <- rep(id_states, s)
pEs <- paste(Es, sort(Es))
possible_uvl_states <- paste(rep(pEs, k_max), sort(rep(1:k_max, s*s)))
zero_v <- rep(0, n)
vector_1_uvl <- sapply(X = possible_uvl_states,
FUN = function(uvl, seq_uvl, zero_v) {
zero_v[which(uvl == seq_uvl)] <- 1
return(zero_v)},
seq_uvl = seq_uvl, zero_v = zero_v)
rownames(vector_1_uvl) <- paste0("t = ", 1:n)
vector_1_uvl
}
get_model <- function(p_is_drifting, f_is_drifting) {
# '''
# Should only be used AFTER `valid_model(...)` function has been used!
# Used in `fit_dsmm`, `parametric_dsmm` and `nonparametric_dsmm`.
# '''
Model_1 <- p_is_drifting && f_is_drifting
Model_2 <- p_is_drifting && !f_is_drifting
Model_3 <- !p_is_drifting && f_is_drifting
model_id <- which(c(Model_1, Model_2, Model_3) == TRUE)
model <- c("Model_1", "Model_2", "Model_3")[model_id]
model
}
get_vl <- function(t, u, vl_names, kernel, states) {
# '''
# This function is intended to be used in the generic function
# `simulate.dsmm`.
# It returns a vector of two character values:
# The first is the name of the next state `v`, and the second is the
# sojourn time `spend on previous state
# '''
vl <- sample(vl_names, prob = kernel[u, , , t], size = 1)
# ## Because we have `size = 1`,
# ## the `replace` argument is irrelevant
# ## See `sample()` for more details.
ans <- strsplit(vl, " ")[[1]] # Splitting the space added in `paste()`.
if ((l_ans <- length(ans)) > 2) {
# If the state contains spaces, we take all but the last values of
# `ans` and paste them together. The last value of `ans` is the
# corresponding sojourn time of the state.
v <- paste(ans[1:(l_ans-1)], collapse = " ")
l <- ans[l_ans]
ans <- c(v, l)
}
ans
}
get_A_i <- function(degree, model_size) {
# '''
# For intended use in `fit_dsmm` and `get_kernel`.
# '''
n <- model_size
coeff <- poly_coeff(degree = degree)
t_n <- sapply((t <- 0:n)/n , function(tn) tn**(0:degree))
res <- coeff %*% t_n
colnames(res) <- paste0("t = ", t)
return(res)
}
# Normalization of f.
get_f <- function(v) (a <- abs(v)) / sum(a)
# Check the validity of the computed kernel and return it.
get_valid_kernel <- function(Ji, Ai, s, n, k_max, states) {
N <- n + 1
ans <- Ji %*% Ai # ans has dimensions of (s*s*k_max, n+1)
kernel <- array(ans, dim = c(s, s, k_max, N))
if (!all_equal_numeric(apply(kernel, c(1, 4), sum), 1)) {
stop("\nThe sums of the kernel are not equal to 1.")
}
if (!is_prob(kernel)) {
kernel_pos <- apply(kernel, c(1,4), get_f)
dim(kernel_pos) <- c(s, k_max, s, N)
kernel <- aperm(kernel_pos, c(3,1,2,4))
}
dimnames(kernel) <- list(
as.list(states),
as.list(states),
as.list(paste0("l = ", 1:k_max)),
as.list(paste0("t = ", 0:n))
)
kernel
}
# Random sequence creation
#' @title Simulate a sequence for states of choice.
#' @description This is a wrapper function around \code{sample()}.
#'
#' @param states Character vector of unique values. If given the value
#' "DNA" the values c("a", "c", "g", "t") are given instead.
#' @param len Optional. Positive integer with the default value
#' equal to 5000.
#' @param probs Optional. Numeric vector with values interpreted as
#' probabilities for each of the states in \code{states}. Default value
#' is equal to 1 over the number of states given, for every state.
#' @param seed Optional. Object specifying the initialization of the random
#' number generator (see more in \code{\link[base:set.seed]{set.seed}}).
#'
#' @seealso
#' For the simulation of a sequence with a drifting semi-Markov kernel:
#' \link{simulate.dsmm}.
#'
#' The original function: \code{\link[base:sample]{sample}}.
#'
#' About random number generation in R: \code{\link[base:RNG]{RNG}}.
#'
#' For the theoretical background of drifting semi-Markov models: \link{dsmmR}.
#'
#' @return A character sequence of length \code{len}.
#' @export
#' @examples
#' # This is equal to having the argument `probs = c(1/4, 1/4, 1/4, 1/4)`.
#' rand_dna_seq <- create_sequence(states = "DNA")
#' table(rand_dna_seq)
#'
#' random_letters <- sample(letters, size = 5, replace = FALSE)
#' rand_dna_seq2 <- create_sequence(
#' states = random_letters,
#' probs = c(0.6, 0.3, 0.05, 0.025, 0.025),
#' len = 10000)
#' table(rand_dna_seq2)
create_sequence <- function(states, len = 5000, probs = NULL, seed = NULL) {
# '''
# This is a convenience function intended for fast sequence simulation.
# '''
if (missing(states)) {
stop("\nPlease input the character vector of `states`.")
} else if (identical(states, "DNA")) {
states <- c("a", "c", "g", "t")
}
if (!is.null(seed)) {
set.seed(seed)
}
stopifnot(valid_states(states), is_integer(len))
s <- length(states)
if (is.null(probs)) {
overs <- 1/s
probs <- rep(overs, s)
}
stopifnot(is_prob(probs))
seq <- sample(x = states, size = len, replace = TRUE, prob = probs)
seq
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.