Nothing
#' Load the HMDP model defined in the binary files. The model are created in memory
#' using the external C++ library.
#'
#' @param prefix A character string with the prefix added to `binNames`. Used to identify a
#' specific model.
#' @param binNames A character vector of length 7 giving the names of the binary
#' files storing the model.
#' @param eps The sum of the transition probabilities must at most differ `eps` from one.
#' @param check Check if the MDP seems correct.
#' @param verbose More output when running algorithms.
#' @param getLog Output the log messages.
#'
#' @return A list containing relevant information about the model such as model file names
#' (`binNames`), time horizon (`timeHorizon`), number of states (`states`), number of states at
#' last stage of the founder process (`founderStatesLast`), number of actions (`actions`),
#' number of levels (`levels`), names of the weights associated to each action (`weightNames`)
#' and a pointer `ptr` to the model object in memory. Note for models with an infinite
#' time-horizon the states at the founder level is repeated at stage two so have something aka
#' a double array representation.
#' @example inst/examples/machine-ex.R
#' @export
loadMDP <-
function(prefix = "",
binNames = c(
"stateIdx.bin",
"stateIdxLbl.bin",
"actionIdx.bin",
"actionIdxLbl.bin",
"actionWeight.bin",
"actionWeightLbl.bin",
"transProb.bin",
"externalProcesses.bin",
"transWeight.bin",
"transWeightLbl.bin"
),
eps = 0.00001,
check = TRUE,
verbose = FALSE,
getLog = TRUE
) {
binNames<-paste(prefix,binNames,sep="")
if (!is.logical(verbose)) verbose = FALSE
mdp<-methods::new(HMDP, binNames, verbose)
.makeMDPList(mdp, binNames, eps = eps, check = check, getLog = getLog)
}
.makeMDPList <- function(mdp,
binNames = character(),
eps = 0.00001,
check = TRUE,
getLog = TRUE) {
if (!mdp$okay) {
message(mdp$getLog())
rm(mdp)
return(invisible(NULL))
}
else if (getLog) message(mdp$getLog())
if (check) {
msg<-mdp$checkHMDP(as.numeric(eps))
if (msg==2) {
stop(mdp$getLog(), call. = FALSE)
return(invisible(NULL))
}
else if (getLog) message(mdp$getLog())
}
timeHorizon = mdp$timeHorizon
if (timeHorizon>=1000000000) timeHorizon = Inf
if (timeHorizon>=Inf) {
founderStatesLast<-mdp$getStateSizeStage("1")
states<-mdp$getStateSize()-founderStatesLast
} else {
founderStatesLast<-mdp$getStateSizeStage(as.character(timeHorizon-1))
states<-mdp$getStateSize()
}
actions <- mdp$getActionSize()
levels <- mdp$levels
weightActionNames <- mdp$wActionNames
weightTransNames <- mdp$wTransNames
weightNames <- mdp$wNames
v<-list(binNames=binNames, timeHorizon=timeHorizon, states=states,
founderStatesLast=founderStatesLast, actions=actions, levels=levels,
weightNames=weightNames, weightActionNames=weightActionNames,
weightTransNames=weightTransNames, ptr=mdp)
if (mdp$externalProc) {
v$external <- as.data.frame(matrix(mdp$getExternalInfo(),ncol = 2, byrow = TRUE), stringsAsFactors=FALSE)
colnames(v$external) <- c("stageStr","prefix")
}
class(v)<-c("HMDP", "list")
return(v)
}
#' Internal function. Check if the indexes given are okay. Should not be used except you know what you are doing
#'
#' @aliases .checkWDurIdx
#' @param iW Index of the weight we want to optimize.
#' @param iDur Index of the duration/time.
#' @param wLth Number of weights in the model.
#' @return Nothing.
#' @name checkWDurIdx
#' @keywords internal
.checkWDurIdx<-function(iW, iDur, wLth) {
if (length(iW)!=1) stop("Index iW must be of length one!",call. = FALSE)
if (iW>wLth-1) stop("Global weight index iW must be less than ",wLth,"!",call. = FALSE)
if (iW<0) stop("Global weight index iW must be greater or equal zero!",call. = FALSE)
if (!is.null(iDur)) {
if (length(iDur)!=1) stop("Index iDur must be of length one!",call. = FALSE)
if (iW==iDur) stop("Indices iW and iDur must not be the same!",call. = FALSE)
if (iDur>wLth-1) stop("Global duration weight index iDur must be less than ",wLth,"!",call. = FALSE)
if (iDur<0) stop("Global duration weight index iDur must be greater or equal zero!",call. = FALSE)
}
invisible()
}
#' Internal function. Check if the index of the weight is okay. Should not be used except you know what you are doing
#'
#' @aliases .checkWIdx
#' @param iW Index of the weight we want to optimize.
#' @param wLth Number of weights in the model.
#' @return Nothing.
#' @name checkWIdx
#' @keywords internal
.checkWIdx<-function(iW, wLth) {
if (max(iW)>wLth-1) stop("Global weight index iW must be less than ",wLth,"!",call. = FALSE)
if (min(iW)<0) stop("Global weight index iW must be greater or equal zero!",call. = FALSE)
invisible()
}
#' Return the index of a weight in the model. Note that index always start from zero (C++ style), i.e. the first weight, the first state at a stage etc has index 0.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param wLbl The label/string of the weight.
#' @return The index (integer).
#' @export
getWIdx <- function(mdp, wLbl) {
idx <- which(mdp$weightNames == wLbl)
if (length(idx) == 0) idx <- which(grepl(wLbl, mdp$weightNames, fixed = TRUE))
if (length(idx) == 0)
stop("The weight name does not seem to exist in the global weight namespace.", call. = FALSE)
if (length(idx) > 1)
stop("The weight name is ambiguous in the global weight namespace.", call. = FALSE)
return(idx - 1)
}
.optSenseIdx <- function(objective) {
objective <- match.arg(objective, c("max", "min"))
if (objective == "max") return(0)
1
}
.bellmanOpIdx <- function(bellmanOp, includeVariance = TRUE) {
if (length(bellmanOp) > 1) bellmanOp <- bellmanOp[1]
choices <- c("auto", "discount", "average", "expected", "min", "max", "secondMoment")
if (includeVariance) choices <- c(choices, "variance")
bellmanOp <- match.arg(bellmanOp, choices)
switch(bellmanOp,
discount = 0,
average = 1,
expected = 2,
min = 5,
max = 6,
secondMoment = 7,
variance = 8,
auto = NA_integer_)
}
#' Perform policy iteration using the average expected-weight Bellman operator on the MDP.
#'
#' The policy can afterwards be received using functions `getPolicy` and `getPolicyW`.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param w The label of the weight we optimize.
#' @param dur The label of the duration/time such that discount rates can be calculated.
#' @param maxIte Max number of iterations. If the model does not satisfy the unichain assumption the algorithm may loop.
#' @param objective Optimize by maximizing (`"max"`) or minimizing (`"min"`) the Bellman value.
#' @param getLog Output the log messages.
#'
#' @return The optimal gain (g) calculated.
#' @seealso [getPolicy()].
#' @export
runPolicyIteAve<-function(mdp, w, dur, maxIte=100, objective = c("max", "min"), getLog = TRUE) {
iW<-getWIdx(mdp,w)
iDur<-getWIdx(mdp,dur)
sense <- .optSenseIdx(objective)
.checkWDurIdx(iW,iDur,length(mdp$weightNames))
g<-mdp$ptr$policyIte(1, sense, as.integer(maxIte), as.integer(iW), as.integer(iDur), discountFactor=1)
#message(mdp$ptr$getLog())
if (getLog) cat(mdp$ptr$getLog())
return(g)
}
#' Perform policy iteration using the discounted expected-weight Bellman operator on the MDP.
#'
#' The policy can afterwards be received using functions `getPolicy` and `getPolicyW`.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param w The label of the weight we optimize.
#' @param dur The label of the duration/time such that discount rates can be calculated.
#' @param rate The interest rate.
#' @param rateBase The time-horizon the rate is valid over.
#' @param discountFactor The discount rate for one time unit. If specified `rate` and `rateBase` are not used to calculate the discount rate.
#' @param maxIte Max number of iterations. If the model does not satisfy the unichain assumption the algorithm may loop.
#' @param discountMethod Either 'continuous' or 'discrete', corresponding to discount factor `exp(-rate/rateBase)` or `1/(1 + rate/rateBase)`, respectively. Only used if `discountFactor` is `NULL`.
#' @param objective Optimize by maximizing (`"max"`) or minimizing (`"min"`) the Bellman value.
#' @param getLog Output the log messages.
#'
#' @return Nothing.
#' @seealso [getPolicy()].
#' @export
runPolicyIteDiscount<-function(mdp, w, dur, rate = 0, rateBase = 1, discountFactor = NULL, maxIte = 100,
discountMethod="continuous", objective = c("max", "min"), getLog = TRUE) {
iW<-getWIdx(mdp,w)
iDur<-getWIdx(mdp,dur)
sense <- .optSenseIdx(objective)
.checkWDurIdx(iW,iDur,length(mdp$weightNames))
if (is.null(discountFactor)) {
if (discountMethod=="continuous") discountFactor<-exp(-rate/rateBase)
if (discountMethod=="discrete") discountFactor<-1/(1 + rate/rateBase)
}
g<-mdp$ptr$policyIte(0, sense, as.integer(maxIte), as.integer(iW), as.integer(iDur), discountFactor)
if (getLog) cat(mdp$ptr$getLog())
invisible()
}
#' Perform value iteration on the MDP.
#'
#' If the MDP has a finite time-horizon then arguments `times` and `eps`
#' are ignored.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param w The label of the weight we optimize.
#' @param dur The label of the duration/time such that discount rates can be calculated.
#' @param rate Interest rate.
#' @param rateBase The time-horizon the rate is valid over.
#' @param discountFactor The discount rate for one time unit. If specified `rate` and `rateBase` are not used to calculate the discount rate.
#' @param maxIte The max number of iterations value iteration is performed.
#' @param eps Stopping tolerance. If $max(w(t)-w(t+1)) < `eps`$ then stop the algorithm, i.e the policy becomes epsilon optimal (see Puterman p161).
#' @param termValues The terminal values used (values of the last stage in the MDP).
#' @param g Average weight. If specified then do a single iteration using the update equations under the average expected-weight Bellman operator with the specified g value.
#' @param objective Optimize by maximizing (`"max"`) or minimizing (`"min"`) the Bellman value.
#' @param bellmanOp Bellman operator. Use `"auto"` for existing behavior, `"min"` for the minimum-successor operator, `"max"` for the maximum-successor operator, or `"secondMoment"` for the second moment of total accumulated weight.
#' @param getLog Output the log messages.
#' @param discountMethod Either 'continuous' or 'discrete', corresponding to discount factor `exp(-rate/rateBase)` or `1/(1 + rate/rateBase)`, respectively. Only used if `discountFactor` is `NULL`.
#'
#' @return NULL (invisible)
#' @references Puterman, M. Markov Decision Processes, Wiley-Interscience, 1994.
#' @example inst/examples/machine-ex.R
#' @export
runValueIte<-function(mdp, w, dur = NULL, rate = 0, rateBase = 1, discountFactor = NULL, maxIte = 100,
eps = 1e-05, termValues = NULL, g=NULL, objective = c("max", "min"),
bellmanOp = c("auto", "expected", "discount", "average", "min", "max", "secondMoment"),
getLog = TRUE, discountMethod="continuous") {
iW<-getWIdx(mdp,w)
iDur<-NULL
sense <- .optSenseIdx(objective)
op <- .bellmanOpIdx(bellmanOp, includeVariance = FALSE)
if (!is.null(dur)) iDur<-getWIdx(mdp,dur)
.checkWDurIdx(iW,iDur,length(mdp$weightNames))
if (is.null(discountFactor)) {
if (discountMethod=="continuous") discountFactor<-exp(-rate/rateBase)
if (discountMethod=="discrete") discountFactor<-1/(1 + rate/rateBase)
}
if (is.null(termValues)) termValues<-rep(0,mdp$founderStatesLast)
if (!is.na(op)) {
if (op %in% c(0L, 1L) && is.null(iDur))
stop("A duration index must be specified for this Bellman operator.", call. = FALSE)
if (op == 1L && is.null(g))
stop("The average weight g must be specified for the average Bellman operator.", call. = FALSE)
mdp$ptr$valueIte(op, sense, as.integer(ifelse(mdp$timeHorizon>=Inf, maxIte, 1)),
as.numeric(eps), as.integer(iW), as.integer(ifelse(is.null(iDur), 0, iDur)),
as.numeric(termValues), as.numeric(ifelse(is.null(g), 0, g)), as.numeric(discountFactor))
} else if (is.null(g)) {
if (mdp$timeHorizon>=Inf) {
if (is.null(iDur)) stop("A duration index must be specified under infinite time-horizon!")
mdp$ptr$valueIte(0, sense, as.integer(maxIte),
as.numeric(eps), as.integer(iW), as.integer(iDur), as.numeric(termValues),
as.numeric(0), as.numeric(discountFactor) )
} else {
if (!is.null(iDur)) mdp$ptr$valueIte(0, sense, as.integer(1),
as.numeric(0), as.integer(iW), as.integer(iDur), as.numeric(termValues),
as.numeric(0), as.numeric(discountFactor) )
if (is.null(iDur)) mdp$ptr$valueIte(2, sense, as.integer(1),
as.numeric(0), as.integer(iW), as.integer(0), as.numeric(termValues),
as.numeric(0), as.numeric(1) )
}
} else { # value ite under average expected-weight Bellman operator
if (is.null(iDur)) stop("A duration index must be specified under the average expected-weight Bellman operator!")
mdp$ptr$valueIte(1, sense, as.integer(1),
as.numeric(eps), as.integer(iW), as.integer(iDur), as.numeric(termValues),
as.numeric(g), as.numeric(1) )
}
if (getLog) cat(mdp$ptr$getLog())
invisible(NULL)
}
#' Get parts of the optimal policy.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param sId Vector of id's of the states we want to retrieve.
#' @param stageStr Stage string. If specified then find `sId` based on the stage string.
#' @param stateLabels Add state labels.
#' @param actionLabels Add action labels of policy.
#' @param actionIdx Add action index.
#' @param rewards Add weights calculated for each state.
#' @param stateStr Add the state string for each state.
#' @param external A vector of stage strings corresponding to external processes we want the optimal policy of.
#' @param ... Parameters passed on when find the optimal policy of the external processes.
#'
#' Note if external is specified then it must contain stage strings from `mdp$external`. Moreover you
#' must specify further arguments passed on to [runValueIte()] used for recreating the optimal policy e.g.
#' the g value and the label for weight and duration. See the vignette about external processes.
#'
#' @return The policy (data frame).
#' @example inst/examples/machine-ex.R
#' @export
getPolicy<-function(mdp, sId = ifelse(mdp$timeHorizon>=Inf, mdp$founderStatesLast+1,1):
ifelse(mdp$timeHorizon>=Inf, mdp$states + mdp$founderStatesLast,mdp$states)-1,
stageStr = NULL, stateLabels = TRUE, actionLabels = TRUE, actionIdx = TRUE,
rewards = TRUE, stateStr = TRUE, external = NULL, ...) {
if (!is.null(stageStr)) sId = mdp$ptr$getStateIdsStages(stageStr)
maxS<-ifelse(mdp$timeHorizon>=Inf, mdp$states + mdp$founderStatesLast,mdp$states)
if (max(sId)>=maxS | min(sId)<0)
stop("Out of range (sId). Need to be a subset of 0,...,",maxS-1,"!")
cols <- 1 + stateLabels + actionIdx + actionLabels + rewards + stateStr
policy<-data.frame(matrix(NA, nrow = length(sId), ncol = cols))
cols<-1
policy[,cols] <- sId
colNames = "sId"; cols = cols + 1
if (stateStr) {
policy[,cols]<-mdp$ptr$getStateStr(sId)
colNames = c(colNames, "stateStr"); cols = cols + 1
}
if (stateLabels) {
policy[,cols] = mdp$ptr$getStateLabel(sId);
colNames = c(colNames, "stateLabel"); cols = cols + 1
}
if (actionIdx) {
policy[,cols] = mdp$ptr$getPolicy(sId)
colNames = c(colNames, "aIdx"); cols = cols + 1
}
if (actionLabels) {
policy[,cols] = mdp$ptr$getPolicyLabel(sId)
colNames = c(colNames, "actionLabel"); cols = cols + 1
}
if (rewards) {
policy[,cols] = mdp$ptr$getPolicyW(sId)
colNames = c(colNames, "weight"); cols = cols + 1
}
colnames(policy) <- colNames
if (!is.null(external)) {
policy <- list(main=policy)
for (s in external) {
prefix <- subset(mdp$external, stageStr == s, select = "prefix", drop = TRUE)
lastStage<-mdp$ptr$getNextStageStr(s)
termValues <- getPolicy(mdp, stageStr = lastStage)$weight
extMDP<-loadMDP(prefix, getLog = FALSE)
valueIte(extMDP, termValues=termValues, getLog = FALSE, ...)
extPolicy<-getPolicy(extMDP)
policy[[s]]<-extPolicy
rm(extMDP)
}
}
return(dplyr::as_tibble(policy))
}
#' Information about the MDP
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param sId The id of the state(s) considered.
#' @param stateStr A character vector containing the index of the state(s) (e.g. "n0,s0,a0,n1,s1").
#' Parameter `sId` are ignored if not NULL.
#' @param stageStr A character vector containing the index of the stage(s) (e.g. "n0,s0,a0,n1").
#' Parameter `sId` and `idxS` are ignored if not NULL.
#' @param withList Output info as a list `lst`.
#' @param withDF Output the info as a data frame.
#' @param dfLevel If `withDF` and equal `"state"` the data frame contains a row for each state. If equal `"action"` the data frame contains a row for each action.
#' @param asStringsState Write state vector as a string; otherwise, output it as columns.
#' @param asStringsActions Write action vectors (weights, transitions and probabilities) as strings; otherwise, output it as columns.
#' @param withHarc Output a hyperarcs data frame. Each row contains a hyperarc with the first column denoting the
#' head (`sId`), the tails (`sId`) and the label.
#'
#' @return A list containing the list, data frame(s).
#' @example inst/examples/machine-ex.R
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @export
getInfo<-function(mdp,
sId=1:ifelse(mdp$timeHorizon<Inf, mdp$states, mdp$states+mdp$founderStatesLast)-1,
stateStr=NULL, stageStr=NULL,
withList = TRUE,
withDF = TRUE, dfLevel = "state", asStringsState = TRUE, asStringsActions = FALSE,
withHarc = FALSE
) {
if (!is.null(stageStr)) {
sId<-mdp$ptr$getStateIdsStages(stageStr)
stateStr<-mdp$ptr$getStateStr(sId)
}else {
if (!is.null(stateStr)) sId<-mdp$ptr$getStateIdsStates(stateStr)
else stateStr<-mdp$ptr$getStateStr(sId)
}
maxS<-ifelse(mdp$timeHorizon>=Inf, mdp$states + mdp$founderStatesLast,mdp$states)
if (max(sId)>=maxS | min(sId)<0)
stop("Out of range (sId). Need to be a subset of 0,...,",maxS-1,"!")
l<-vector("list", length(sId))
lapply(l,function(x) x<-list(sId=NULL,stateStr=NULL,label=NULL,actions=NULL))
labels<-mdp$ptr$getStateLabel(sId)
for (i in 1:length(l)) {
l[[i]]$sId <- sId[i]
l[[i]]$stateStr <- stateStr[i]
l[[i]]$label <- labels[i]
l[[i]]$actions <- mdp$ptr$getActionInfo(sId[i])
}
names(l) <- sId
lst <- list()
if (withList) lst$lst <- l
if (withDF) {
df <- dplyr::tibble(sId = l) # add list
df <- df %>% tidyr::unnest_wider(sId) # convert states to columns
if (dfLevel == "action") {
df <- df %>%
tidyr::unnest_longer("actions") %>% # convert actions (one row for each action)
tidyr::unnest_wider("actions", names_repair = tidyr::tidyr_legacy) # convert action to columns
df <- df %>%
dplyr::rename(label_action = "label1")
if (asStringsActions) {
df <- df %>%
dplyr::mutate(weights = sapply(.data$weights, function(x) paste0(x, collapse = ",")),
trans = sapply(.data$trans, function(x) paste0(x, collapse = ",")),
pr = sapply(.data$pr, function(x) paste0(x, collapse = ","))) %>%
dplyr::mutate(weights = dplyr::na_if(.data$weights, ""),
trans = dplyr::na_if(.data$trans, ""),
pr = dplyr::na_if(.data$pr, ""))
}
} else {
if (asStringsActions) {
df <- df %>%
tidyr::unnest_longer("actions") %>% # convert actions (one row for each action)
tidyr::unnest_wider("actions", names_repair = tidyr::tidyr_legacy) # convert action to columns
df <- df %>%
dplyr::rename(label_action = .data$label1)
df <- df %>%
dplyr::mutate(weights = sapply(.data$weights, function(x) paste0(x, collapse = ",")),
trans = sapply(.data$trans, function(x) paste0(x, collapse = ",")),
pr = sapply(.data$pr, function(x) paste0(x, collapse = ","))) %>%
dplyr::mutate(weights = dplyr::na_if(.data$weights, ""),
trans = dplyr::na_if(.data$trans, ""),
pr = dplyr::na_if(.data$pr, ""))
df <- df %>%
dplyr::group_by(sId, .data$stateStr, .data$label) %>%
tidyr::nest() %>%
dplyr::mutate(data = lapply(.data$data, function(x) {if (all(is.na(x$aIdx))) return(NULL) else return(x)})) %>%
dplyr::rename(actions = .data$data)
}
}
if (!asStringsState) {
levels <- (max(stringr::str_count(df$stateStr, ",")) + 1) %/% 3 + 1
if (levels == 1)
nm <- paste(c("n", "s"), levels - 1, sep = "")
if (levels > 1)
nm <-
c(paste(c("n", "s", "a"), rep(0:(levels - 2), each = 3), sep = ""),
paste(c("n", "s"), levels - 1, sep = ""))
df <- df %>%
tidyr::separate(.data$stateStr, into = nm, sep = ",", remove = FALSE, fill = "right")
}
lst$df <- df
}
if (withHarc) {
df <- dplyr::tibble(sId = l) %>%
tidyr::unnest_wider(sId) %>%
tidyr::unnest_longer("actions") %>% # convert actions (one row for each action)
tidyr::unnest_wider("actions", names_repair = tidyr::tidyr_legacy) %>%
tidyr::unnest_wider(.data$trans, names_sep = "") %>%
dplyr::filter(!is.na(.data$aIdx)) %>%
dplyr::select(.data$sId, tidyr::contains("trans"), label = .data$label1)
colnames(df) <- stringr::str_replace(colnames(df), "trans", "tail")
colnames(df)[1] <- "head"
lst$harcDF <- df
}
return(lst)
}
#' Modify the current policy by setting policy action of states.
#'
#' If the policy does not contain all states then the actions from the previous optimal
#' policy are used.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param policy A data frame with two columns state id `sId` and action index `aIdx`.
#' @return NULL (invisible)
#' @example inst/examples/machine-ex.R
#' @export
setPolicy<-function(mdp, policy) {
if (!all(c("sId", "aIdx") %in% colnames(policy))) stop("You must specify `sId` and action index `aIdx`.")
#if (dim(policy)[2]!=2) stop("You must specify two columns in policy.")
mdp$ptr$setPolicy(as.integer(policy$sId),as.integer(policy$aIdx))
return(invisible(NULL))
}
#' Calculate weights based on current policy. Normally run after an optimal policy has been found.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param wLbl The label of the weight we consider.
#' @param criterion The Bellman operator shortcut. If `expected` use expected weights, if `discount` use discounted expected weights, if `average` use average expected weights, if `min` use minimum-successor weights, if `max` use maximum-successor weights, if `secondMoment` use the second moment of total accumulated weight, and if `variance` use the law-of-total-variance recursion under the current policy.
#' @param durLbl The label of the duration/time such that discount rates can be calculated.
#' @param rate The interest rate.
#' @param rateBase The time-horizon the rate is valid over.
#' @param discountFactor The discount rate for one time unit. If specified `rate` and `rateBase` are not used to calculate the discount rate.
#' @param termValues The terminal values used (values of the last stage in the MDP).
#' @param discountMethod Either 'continuous' or 'discrete', corresponding to discount factor `exp(-rate/rateBase)` or `1/(1 + rate/rateBase)`, respectively. Only used if `discountFactor` is `NULL`.
#'
#' @return Nothing.
#' @example inst/examples/machine-ex.R
#' @export
runCalcWeights<-function(mdp, wLbl, criterion="expected", durLbl = NULL, rate = 0, rateBase = 1,
discountFactor = NULL, termValues = NULL, discountMethod = "continuous") {
iW<-getWIdx(mdp,wLbl)
if (!is.null(durLbl)) iDur<-getWIdx(mdp,durLbl)
.checkWIdx(iW,length(mdp$weightNames))
if (is.null(discountFactor)) {
if (discountMethod=="continuous") discountFactor<-exp(-rate/rateBase)
if (discountMethod=="discrete") discountFactor<-1/(1 + rate/rateBase)
}
if (mdp$timeHorizon<Inf) {
if (is.null(termValues)) stop("Terminal values must be specified under finite time-horizon!")
if (criterion=="expected") mdp$ptr$calcPolicy(2,iW,0,1,discountFactor)
if (criterion=="min") mdp$ptr$calcPolicy(5,iW,0,1,discountFactor)
if (criterion=="max") mdp$ptr$calcPolicy(6,iW,0,1,discountFactor)
if (criterion=="secondMoment") mdp$ptr$calcPolicy(7,iW,0,1,discountFactor)
if (criterion=="variance") {
mdp$ptr$setTerminalW(as.numeric(termValues))
mdp$ptr$calcPolicy(8,iW,0,1,discountFactor)
}
if (criterion=="discount") mdp$ptr$calcPolicy(0,iW,0,iDur,discountFactor)
} else {
if (criterion=="discount") mdp$ptr$policyIteFixedPolicy(0,iW,iDur,discountFactor)
if (criterion=="average") return( mdp$ptr$policyIteFixedPolicy(1,iW,iDur,discountFactor) )
#if (criterion=="expected") .Call("MDP_CalcWeightsFinite", mdp$ptr, as.integer(iW), as.numeric(termValues), PACKAGE="MDP")
}
invisible(NULL)
}
#' Calculate the retention pay-off (RPO) or opportunity cost for some states.
#'
#' The RPO is defined as the difference between
#' the weight of the state when using action `iA` and the maximum
#' weight of the node when using another predecessor different from `iA`.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param w The label of the weight we calculate RPO for.
#' @param iA The action index we calculate the RPO with respect to (same size as `sId`).
#' @param sId Vector of id's of the states we want to retrieve.
#' @param criterion The Bellman operator shortcut. If `expected` use expected weights, if `discount` use discounted expected weights, if `average` use average expected weights.
#' @param dur The label of the duration/time such that discount rates can be calculated.
#' @param rate The interest rate.
#' @param rateBase The time-horizon the rate is valid over.
#' @param discountFactor The discount rate for one time unit. If specified `rate` and `rateBase` are not used to calculate the discount rate.
#' @param g The optimal gain (g) calculated (used if `criterion = "average"`).
#' @param objective Optimize by maximizing (`"max"`) or minimizing (`"min"`) the Bellman value.
#' @param discountMethod Either 'continuous' or 'discrete', corresponding to discount factor `exp(-rate/rateBase)` or `1/(1 + rate/rateBase)`, respectively. Only used if `discountFactor` is `NULL`.
#' @param stateStr Output the state string.
#'
#' @return The RPO (matrix/data frame).
#' @importFrom magrittr %>%
#' @export
getRPO<-function(mdp, w, iA, sId = ifelse(mdp$timeHorizon>=Inf, mdp$founderStatesLast+1,1):
ifelse(mdp$timeHorizon>=Inf, mdp$states + mdp$founderStatesLast,mdp$states)-1,
criterion="expected", dur = "", rate = 0, rateBase = 1, discountFactor = NULL,
g = 0, objective = c("max", "min"), discountMethod="continuous", stateStr = TRUE) {
iW<-getWIdx(mdp,w)
sense <- .optSenseIdx(objective)
if (criterion != "expected" && !nzchar(dur))
stop("A duration weight must be specified for this Bellman operator.", call. = FALSE)
iDur<-if (nzchar(dur)) getWIdx(mdp,dur) else 0
.checkWIdx(iW,length(mdp$weightNames))
if (is.null(discountFactor)) {
if (discountMethod=="continuous") discountFactor<-exp(-rate/rateBase)
if (discountMethod=="discrete") discountFactor<-1/(1 + rate/rateBase)
}
maxS<-ifelse(mdp$timeHorizon>=Inf, mdp$states + mdp$founderStatesLast,mdp$states)
if (max(sId)>=maxS | min(sId)<0)
stop("Out of range (sId). Need to be a subset of 0,...,",maxS-1,"!")
if (length(sId)!=length(iA))
stop("Vectors sId and iA must have same length!")
rpo<-NA
if (criterion=="expected") rpo<-mdp$ptr$calcRPO(2, sense, as.integer(sId), iW, as.integer(iA), g, iDur, discountFactor)
if (criterion=="discount") rpo<-mdp$ptr$calcRPO(0, sense, as.integer(sId), iW, as.integer(iA), g, iDur, discountFactor)
if (criterion=="average") rpo<-mdp$ptr$calcRPO(1, sense, as.integer(sId), iW, as.integer(iA), g, iDur, discountFactor)
rpo[rpo <= -1.8e+16]<-NA # less than 2 actions
rpo <- dplyr::tibble(sId=sId, rpo=rpo)
if (stateStr) {
rpo <- rpo %>%
dplyr::transmute(sId, stateStr = mdp$ptr$getStateStr(sId), rpo)
}
return(rpo)
}
#' Save the MDP to binary files
#'
#' Currently do not save external files.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param prefix A character string with the prefix added to `binNames`. Used to identify a specific model.
#' @param getLog Output the log as a message.
#'
#' @return ???
#' @export
saveMDP<-function(mdp,prefix="", getLog=TRUE) {
mdp$ptr$save2Binary(prefix)
if (getLog) message(mdp$ptr$getLog())
}
#' Calculate the steady state transition probabilities for the founder process (level 0).
#'
#' Assume that we consider an ergodic/irreducible time-homogeneous Markov chain specified using a policy in the MDP.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param getLog Output log text.
#'
#' @return A vector with steady state probabilities for all the states at the founder level.
#' @export
getSteadyStatePr<-function(mdp, getLog=FALSE) {
pr<-mdp$ptr$steadyStatePr()
if (getLog) message(mdp$ptr$getLog())
return(pr)
}
# #' Set the weight of an action.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param w The weight.
# #' @param sId The state id of the state.
# #' @param idxA The action index.
# #' @param wLbl The label of the weight we consider.
# #' @return Nothing.
# #' @example inst/examples/machine.R
# #' @export
# setActionWeight<-function(mdp, w, sId, iA, wLbl) {
# iW<-getWIdx(mdp,wLbl)
#
# .Call("MDP_SetActionW", mdp$ptr, as.numeric(w), as.integer(sId), as.integer(iA), as.integer(iW), PACKAGE="MDP")
# invisible(NULL)
# }
#
#
#
#
# #' Fix the action of a state. That is, the other actions are removed from the HMDP.
# #'
# #' The actions can be reset using \code{resetActions}.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param sId The state id of the state we want to fix the action for.
# #' @param iA The action index of the state.
# #' @return Nothing.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @seealso \code{\link{resetActions}}, \code{\link{removeAction}}.
# #' @export
# fixAction<-function(mdp, sId, iA) {
# .Call("MDP_FixAction", mdp$ptr, as.integer(sId), as.integer(iA), PACKAGE="MDP")
# invisible(NULL)
# }
#
#
# #' Remove the action of a state from the HMDP.
# #'
# #' The actions can be reset using \code{resetActions}.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param sId The state id of the state we want to remove the action for.
# #' @param iA The action index of the state.
# #' @return Nothing.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @seealso \code{\link{resetActions}}, \code{\link{fixAction}}.
# #' @example inst/examples/machine.R
# #' @export
# removeAction<-function(mdp, sId, iA) {
# .Call("MDP_RemoveAction", mdp$ptr, as.integer(sId), as.integer(iA), PACKAGE="MDP")
# invisible(NULL)
# }
#
#
# #' Reset the actions of a state.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @return Nothing.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @seealso \code{\link{resetActions}}, \code{\link{fixAction}}.
# #' @example inst/examples/machine.R
# #' @export
# resetActions<-function(mdp) {
# .Call("MDP_ResetActions", mdp$ptr, PACKAGE="MDP")
# invisible(NULL)
# }
#
#
#
# #' Set the weight of a state.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param w The weight.
# #' @param sId The state id of the state.
# #' @param wLbl The label of the weight we consider.
# #' @return Nothing.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @export
# setStateWeight<-function(mdp, w, sId, wLbl) {
# iW<-getWIdx(mdp,wLbl)
# .Call("MDP_SetStateW", mdp$ptr, as.numeric(w), as.integer(sId), as.integer(iW), PACKAGE="MDP")
# invisible(NULL)
# }
#
#
# #' Return ids for states in a stage.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param stages A char vector of index in the form "n0,s0,a0,n1", i.e. 3*level+1 elements in the string.
# #' @return A vector of ids for the states.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getIdSStages<-function(mdp, stages) {
# v<-.Call("MDP_GetIdSStage", mdp$ptr, as.character(stages), PACKAGE="MDP")
# return(v)
# }
#
#
# #' Return the index strings for states having id idS.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param idS A vector of state ids.
# #' @return A vector of index for the states.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getStrIdxS<-function(mdp, idS) {
# n<- mdp$states + ifelse(mdp$timeHorizon>=Inf,mdp$founderStatesLast,0)
# idS <- idS[idS<n & idS>=0]
# v<-.Call("MDP_GetIdxS", mdp$ptr, as.integer(idS), PACKAGE="MDP")
# return(v)
# }
#
#
# #' Return the label of states having id idS.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param idS A vector of state ids.
# #' @return A vector of labels for the states.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getLabel<-function(mdp, idS) {
# n<- mdp$states + ifelse(mdp$timeHorizon>=Inf,mdp$founderStatesLast,0)
# idS <- idS[idS<n & idS>=0]
# v<-.Call("MDP_GetLabel", mdp$ptr, as.integer(idS), PACKAGE="MDP")
# return(v)
# }
#
#
# #' Get the weights of an action.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param idS The state id.
# #' @param idxA The action index.
# #' @return A vector of weights for the action.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getActionW<-function(mdp, idS, idxA) {
# l<-info(mdp, idS[1])
# l<-l[[1]]$actions[idxA+1]
# l<-substring(l,regexpr("w",l)+3)
# l<-gsub(").*","",l)
# zz<-textConnection(l)
# l<-scan(zz, sep=",", quiet = TRUE)
# close(zz)
# return(l)
# }
#
#
# #' Get the ids of the transition states of an action.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param idS The state id.
# #' @param idxA The action index.
# #' @return A vector of weights for the action.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getActionTransIdS<-function(mdp, idS, idxA) {
# l<-info(mdp, idS[1])
# l<-l[[1]]$actions[idxA+1]
# l<-substring(l,regexpr("trans",l)+7)
# l<-gsub(").*","",l)
# zz<-textConnection(l)
# l<-scan(zz, sep=",", quiet = TRUE)
# close(zz)
# return(l)
# }
#
#
# #' Get the transition probabilities of the transition states of an action.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param idS The state id.
# #' @param idxA The action index (c++ style starting from zero).
# #' @return A vector of weights for the action.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getActionTransPr<-function(mdp, idS, idxA) {
# return(.Call("MDP_GetActionTransPr", mdp$ptr, as.integer(idS), as.integer(idxA), PACKAGE="MDP") )
# }
# # getActionTransPr<-function(mdp, idS, idxA) {
# # l<-info(mdp, idS[1])
# # l<-l[[1]]$actions[idxA+1]
# # l<-substring(l,regexpr("pr",l)+4)
# # l<-gsub(").*","",l)
# # zz<-textConnection(l)
# # l<-scan(zz, sep=",", quiet = TRUE)
# # close(zz)
# # return(l)
# # }
#
#
#
# #' Get the transition probability matrix P for the founder process (level 0).
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @return The state probability matrix.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @export
# getTransPr<-function(mdp) {
# v<-.Call("MDP_GetTransPr", mdp$ptr, PACKAGE="MDP")
# v<-matrix(v,nrow=mdp$states)
# return(v)
# }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.