#' Commpute initial state probabilites
#' @keywords internal
#'
#' \code{initial_est} return the initial state probabilities for each level in the passed \code{by_factor},
#' defined as the expectation of the vectors for each level. These probabilities correspond to Small_Gamma_j,b^(t+1)(q)
#' at step 4 of Algorithm 1 in Woodard et a., 2017
#'
#' @param state_prob An \code{n x Q} matrix of initial state probabilities for \code{n} observations and \code{Q} states
#' @param by_factor A vector of factors (link + time bin) of size \code{n} (a factor for each row of \code{state_prob}).
#' @param subset A vector indexing the rows of \code{state_prob} for which states probabilities are computed using only this subset, defualt is \code{NULL}
#'
#' @details NULL
#'
#' @return An \code{m x Q} matrix of probabilites for the m levels of \code{by_factor}.
#'
#' @examples
#' \dontrun{
#' x = runif(10)
#' x = cbind(x, 1-x)
#' initial_est(x, factor(sample(c("A", "B"), 10, replace=TRUE)))
#' }
#' @references
#' {Woodard, D., Nogin, G., Koch, P., Racz, D., Goldszmidt, M., Horvitz, E., 2017. Predicting travel time reliability using mobile phone GPS data. Transportation Research Part C, 75, 30-44.}
#' @export
initial_est <- function(state_prob, by_factor, subset=NULL) {
# Basic verfication of inputs; stop if any is incorrect.
if(!is.matrix(state_prob))
stop('state_prob is not a matrix!')
if(!is.factor(by_factor))
stop('by_factor is not a factor')
if(nrow(state_prob) != length(by_factor))
stop('legnth of factors is not the same is the number of rows of state_prob.')
if(!is.null(subset) & !is.vector(subset)) # If not null, 'subset' needs to be a vector.
stop('subset must be vector.')
nQ = ncol(state_prob) # Number of states as defined elsewhere
# Compute a matrix of dimensions nQ x m (m = number of levels: link + time bin), either using the full set of observations
# or the subset defined in "subset". Each row contains the expected values (in vector form) for the level's
# initial probability estimates.
if(is.null(subset)){
rawinit = vapply(split(state_prob, by_factor),
function(r) .colMeans(r,length(r)/nQ, nQ, na.rm = TRUE), numeric(nQ), USE.NAMES = FALSE)
}else{
rawinit = vapply(split(state_prob[subset, ], by_factor[subset]),
function(r) .colMeans(r,length(r)/nQ, nQ, na.rm = TRUE), numeric(nQ), USE.NAMES = FALSE)
}
# Clean and transpose to get the final m x nQ matrix
return(matrix(drop(rawinit), ncol = nQ, byrow = TRUE))
}
#' Compute transition matrix probabilities
#' @keywords internal
#'
#' \code{tmat_est} returns the transition state probabilities for each level in the passed \code{by_factor}. These probabilities
#' correspond to Big_Gamma_j,b^(t+1)(q', q) at step 4 of Algorithm 1 in Woodard et a., 2017
#'
#' @param joint_prob An \code{n-1 x Q^2} matrix of joint state probabilities for \code{n} observations and \code{Q} states
#' @param state_prob An \code{n-1 x Q} matrix of initial state probabilities for \code{n} observations and \code{Q} states
#' @param init_ids A vector of indices of the initial state observation for each trip
#' @param by_factor A vector of factors (link + time bin) of size \code{n} (a factor for each row of \code{state_prob}).
#'
#' @return An \code{m x Q} matrix of probabilites for the m levels of \code{by_factor}.
#'
#' @export
tmat_est <- function(joint_prob,state_prob, init_ids, by_factor){
# Basic verification of inputs; stop if any is incorrect.
if(!is.matrix(joint_prob))
stop('state_prob is not a matrix!')
if(!is.matrix(state_prob))
stop('state_prob is not a matrix!')
if(!is.factor(by_factor))
stop('by_factor is not a factor')
if(nrow(joint_prob) != length(by_factor)-1)
stop('legnth of factors does not match the number of rows of joint_prob.')
if(nrow(state_prob) != length(by_factor))
stop('legnth of factors is not the same is the number of rows of state_prob.')
nQ <- ncol(state_prob) # Number of states
nQ2 <- nQ^2 # Compute nQ2, the square of nQ, which will be used often
nObs <- nrow(state_prob) # Number of observations
# We need to remove the first observation of each trip, except the very first one
# which is already out.
joint_prob = joint_prob[-c(init_ids[-1] - 1), ] # removing first obs per trip
by_factor = by_factor[-init_ids] # removing first obs per trip
# Numerator (num): compute a matrix of dimensions m x nQ^2 (where m = the number of factors)...
num = vapply(split(joint_prob, by_factor), function(r) .colSums(r,length(r)/nQ2, nQ2), numeric(nQ2), USE.NAMES = FALSE)
num = matrix(drop(num), ncol = nQ2, byrow = TRUE) # ...which becomes a cleaner nQ^2 x m matrix.
# Denominator (den): compute a matrix of dimensions m x nQ (where m = the number of factors)...
den <- state_prob[-c(init_ids[-1] - 1, nObs),] # removing last obs per trip
den = vapply(split(den, by_factor), function(r) .colSums(r, length(r)/nQ, nQ), numeric(nQ), USE.NAMES = FALSE)
den = matrix(drop(den), ncol = nQ, byrow = TRUE) # ...which becomes a cleaner m x nQ matrix.
# We return the ratio of 'num' over a modified 'den' with each column replicated nQ times consecutively.
return(num/den[, rep(1:nQ, each = nQ)])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.