R/mdp.R
In MDP2: Markov Decision Processes (MDPs)

Documented in .checkWDurIdx .checkWIdx getInfo getPolicy getRPO getSteadyStatePr getWIdx loadMDP runCalcWeights runPolicyIteAve runPolicyIteDiscount runValueIte saveMDP setPolicy

#' Load the HMDP model defined in the binary files. The model are created in memory
#' using the external C++ library.
#'
#' @param prefix A character string with the prefix added to `binNames`. Used to identify a 
#'   specific model.
#' @param binNames A character vector of length 7 giving the names of the binary
#'     files storing the model.
#' @param eps The sum of the transition probabilities must at most differ `eps` from one.
#' @param check Check if the MDP seems correct.
#' @param verbose More output when running algorithms.
#' @param getLog Output the log messages.
#' 
#' @return A list containing relevant information about the model such as model file names 
#'    (`binNames`), time horizon (`timeHorizon`), number of states (`states`), number of states at 
#'    last stage of the founder process (`founderStatesLast`), number of actions (`actions`), 
#'    number of levels (`levels`), names of the weights associated to each action (`weightNames`)
#'    and a pointer `ptr` to the model object in memory. Note for models with an infinite 
#'    time-horizon the states at the founder level is repeated at stage two so have something aka
#'    a double array representation. 
#' @example inst/examples/machine-ex.R
#' @export
loadMDP <-
   function(prefix = "",
            binNames = c(
               "stateIdx.bin",
               "stateIdxLbl.bin",
               "actionIdx.bin",
               "actionIdxLbl.bin",
               "actionWeight.bin",
               "actionWeightLbl.bin",
               "transProb.bin",
               "externalProcesses.bin",
               "transWeight.bin",
               "transWeightLbl.bin"
            ),
            eps = 0.00001,
            check = TRUE,
            verbose = FALSE,
            getLog = TRUE
   ) {
	binNames<-paste(prefix,binNames,sep="")
	if (!is.logical(verbose)) verbose = FALSE
	mdp<-methods::new(HMDP, binNames, verbose)
   .makeMDPList(mdp, binNames, eps = eps, check = check, getLog = getLog)
}

.makeMDPList <- function(mdp,
                         binNames = character(),
                         eps = 0.00001,
                         check = TRUE,
                         getLog = TRUE) {
	if (!mdp$okay) {
		message(mdp$getLog())
		rm(mdp)
		return(invisible(NULL))
	}
	else if (getLog) message(mdp$getLog())

	if (check) {
	   msg<-mdp$checkHMDP(as.numeric(eps))
      if (msg==2) {
         stop(mdp$getLog(), call. = FALSE)
         return(invisible(NULL))
      }
	   else if (getLog) message(mdp$getLog())
	}

	timeHorizon = mdp$timeHorizon
	if (timeHorizon>=1000000000) timeHorizon = Inf
	if (timeHorizon>=Inf) {
	   founderStatesLast<-mdp$getStateSizeStage("1")
		states<-mdp$getStateSize()-founderStatesLast
	} else {
	   founderStatesLast<-mdp$getStateSizeStage(as.character(timeHorizon-1))
		states<-mdp$getStateSize()
	}
	actions <- mdp$getActionSize()
	levels <- mdp$levels
	weightActionNames <- mdp$wActionNames
	weightTransNames <- mdp$wTransNames
	weightNames <- mdp$wNames
	v<-list(binNames=binNames, timeHorizon=timeHorizon, states=states, 
	     founderStatesLast=founderStatesLast, actions=actions, levels=levels, 
	     weightNames=weightNames, weightActionNames=weightActionNames,
	     weightTransNames=weightTransNames, ptr=mdp)
	if (mdp$externalProc) {
	   v$external <- as.data.frame(matrix(mdp$getExternalInfo(),ncol = 2, byrow = TRUE), stringsAsFactors=FALSE)
	   colnames(v$external) <- c("stageStr","prefix")
	}
	class(v)<-c("HMDP", "list")
	return(v)
}



#' Internal function. Check if the indexes given are okay. Should not be used except you know what you are doing
#'
#' @aliases .checkWDurIdx
#' @param iW Index of the weight we want to optimize.
#' @param iDur Index of the duration/time.
#' @param wLth Number of weights in the model.
#' @return Nothing.
#' @name checkWDurIdx
#' @keywords internal
.checkWDurIdx<-function(iW, iDur, wLth) {
	if (length(iW)!=1) stop("Index iW must be of length one!",call. = FALSE)
	if (iW>wLth-1) stop("Global weight index iW must be less than ",wLth,"!",call. = FALSE)
	if (iW<0) stop("Global weight index iW must be greater or equal zero!",call. = FALSE)
	if (!is.null(iDur)) {
		if (length(iDur)!=1) stop("Index iDur must be of length one!",call. = FALSE)
		if (iW==iDur) stop("Indices iW and iDur must not be the same!",call. = FALSE)
		if (iDur>wLth-1) stop("Global duration weight index iDur must be less than ",wLth,"!",call. = FALSE)
		if (iDur<0) stop("Global duration weight index iDur must be greater or equal zero!",call. = FALSE)
	}
	invisible()
}


#' Internal function. Check if the index of the weight is okay. Should not be used except you know what you are doing
#'
#' @aliases .checkWIdx
#' @param iW Index of the weight we want to optimize.
#' @param wLth Number of weights in the model.
#' @return Nothing.
#' @name checkWIdx
#' @keywords internal
.checkWIdx<-function(iW, wLth) {
	if (max(iW)>wLth-1) stop("Global weight index iW must be less than ",wLth,"!",call. = FALSE)
	if (min(iW)<0) stop("Global weight index iW must be greater or equal zero!",call. = FALSE)
	invisible()
}


#' Return the index of a weight in the model. Note that index always start from zero (C++ style), i.e. the first weight, the first state at a stage etc has index 0.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param wLbl The label/string of the weight.
#' @return The index (integer).
#' @export
getWIdx <- function(mdp, wLbl) {
   idx <- which(mdp$weightNames == wLbl)
   if (length(idx) == 0) idx <- which(grepl(wLbl, mdp$weightNames, fixed = TRUE))
   if (length(idx) == 0)
      stop("The weight name does not seem to exist in the global weight namespace.", call. = FALSE)
   if (length(idx) > 1)
      stop("The weight name is ambiguous in the global weight namespace.", call. = FALSE)
   return(idx - 1)
}

.optSenseIdx <- function(objective) {
   objective <- match.arg(objective, c("max", "min"))
   if (objective == "max") return(0)
   1
}

.bellmanOpIdx <- function(bellmanOp, includeVariance = TRUE) {
   if (length(bellmanOp) > 1) bellmanOp <- bellmanOp[1]
   choices <- c("auto", "discount", "average", "expected", "min", "max", "secondMoment")
   if (includeVariance) choices <- c(choices, "variance")
   bellmanOp <- match.arg(bellmanOp, choices)
   switch(bellmanOp,
          discount = 0,
          average = 1,
          expected = 2,
          min = 5,
          max = 6,
          secondMoment = 7,
          variance = 8,
          auto = NA_integer_)
}


#' Perform policy iteration using the average expected-weight Bellman operator on the MDP.
#'
#' The policy can afterwards be received using functions `getPolicy` and `getPolicyW`.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param w The label of the weight we optimize.
#' @param dur The label of the duration/time such that discount rates can be calculated.
#' @param maxIte Max number of iterations. If the model does not satisfy the unichain assumption the algorithm may loop.
#' @param objective Optimize by maximizing (`"max"`) or minimizing (`"min"`) the Bellman value.
#' @param getLog Output the log messages.
#' 
#' @return The optimal gain (g) calculated.
#' @seealso [getPolicy()].
#' @export
runPolicyIteAve<-function(mdp, w, dur, maxIte=100, objective = c("max", "min"), getLog = TRUE) {
	iW<-getWIdx(mdp,w)
	iDur<-getWIdx(mdp,dur)
	sense <- .optSenseIdx(objective)
	.checkWDurIdx(iW,iDur,length(mdp$weightNames))
	g<-mdp$ptr$policyIte(1, sense, as.integer(maxIte), as.integer(iW), as.integer(iDur), discountFactor=1)
	#message(mdp$ptr$getLog())
	if (getLog) cat(mdp$ptr$getLog())
	return(g)
}


#' Perform policy iteration using the discounted expected-weight Bellman operator on the MDP.
#'
#' The policy can afterwards be received using functions `getPolicy` and `getPolicyW`.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param w The label of the weight we optimize.
#' @param dur The label of the duration/time such that discount rates can be calculated.
#' @param rate The interest rate.
#' @param rateBase The time-horizon the rate is valid over.
#' @param discountFactor The discount rate for one time unit. If specified `rate` and `rateBase` are not used to calculate the discount rate.
#' @param maxIte Max number of iterations. If the model does not satisfy the unichain assumption the algorithm may loop.
#' @param discountMethod Either 'continuous' or 'discrete', corresponding to discount factor `exp(-rate/rateBase)` or `1/(1 + rate/rateBase)`, respectively. Only used if `discountFactor` is `NULL`.
#' @param objective Optimize by maximizing (`"max"`) or minimizing (`"min"`) the Bellman value.
#' @param getLog Output the log messages.
#' 
#' @return Nothing.
#' @seealso [getPolicy()].
#' @export
runPolicyIteDiscount<-function(mdp, w, dur, rate = 0, rateBase = 1, discountFactor = NULL, maxIte = 100, 
                            discountMethod="continuous", objective = c("max", "min"), getLog = TRUE) {
	iW<-getWIdx(mdp,w)
	iDur<-getWIdx(mdp,dur)
	sense <- .optSenseIdx(objective)
	.checkWDurIdx(iW,iDur,length(mdp$weightNames))
	if (is.null(discountFactor)) {
	   if (discountMethod=="continuous") discountFactor<-exp(-rate/rateBase)
	   if (discountMethod=="discrete") discountFactor<-1/(1 + rate/rateBase)
	}
	g<-mdp$ptr$policyIte(0, sense, as.integer(maxIte), as.integer(iW), as.integer(iDur), discountFactor)
	if (getLog) cat(mdp$ptr$getLog())
	invisible()
}


#' Perform value iteration on the MDP.
#'
#' If the MDP has a finite time-horizon then arguments `times` and `eps`
#' are ignored.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param w The label of the weight we optimize.
#' @param dur The label of the duration/time such that discount rates can be calculated.
#' @param rate Interest rate.
#' @param rateBase The time-horizon the rate is valid over.
#' @param discountFactor The discount rate for one time unit. If specified `rate` and `rateBase` are not used to calculate the discount rate.
#' @param maxIte The max number of iterations value iteration is performed.
#' @param eps Stopping tolerance. If $max(w(t)-w(t+1)) < `eps`$ then stop the algorithm, i.e the policy becomes epsilon optimal (see Puterman p161).
#' @param termValues The terminal values used (values of the last stage in the MDP).
#' @param g Average weight. If specified then do a single iteration using the update equations under the average expected-weight Bellman operator with the specified g value.
#' @param objective Optimize by maximizing (`"max"`) or minimizing (`"min"`) the Bellman value.
#' @param bellmanOp Bellman operator. Use `"auto"` for existing behavior, `"min"` for the minimum-successor operator, `"max"` for the maximum-successor operator, or `"secondMoment"` for the second moment of total accumulated weight.
#' @param getLog Output the log messages.
#' @param discountMethod Either 'continuous' or 'discrete', corresponding to discount factor `exp(-rate/rateBase)` or `1/(1 + rate/rateBase)`, respectively. Only used if `discountFactor` is `NULL`.
#' 
#' @return NULL (invisible)
#' @references Puterman, M. Markov Decision Processes, Wiley-Interscience, 1994.
#' @example inst/examples/machine-ex.R
#' @export
runValueIte<-function(mdp, w, dur = NULL, rate = 0, rateBase = 1, discountFactor = NULL, maxIte = 100, 
                   eps = 1e-05, termValues = NULL, g=NULL, objective = c("max", "min"),
                   bellmanOp = c("auto", "expected", "discount", "average", "min", "max", "secondMoment"),
                   getLog = TRUE, discountMethod="continuous") {
	iW<-getWIdx(mdp,w)
	iDur<-NULL
	sense <- .optSenseIdx(objective)
	op <- .bellmanOpIdx(bellmanOp, includeVariance = FALSE)
	if (!is.null(dur)) iDur<-getWIdx(mdp,dur)
	.checkWDurIdx(iW,iDur,length(mdp$weightNames))
	if (is.null(discountFactor)) {
	   if (discountMethod=="continuous") discountFactor<-exp(-rate/rateBase)
	   if (discountMethod=="discrete") discountFactor<-1/(1 + rate/rateBase)
	}
	if (is.null(termValues)) termValues<-rep(0,mdp$founderStatesLast)
	if (!is.na(op)) {
	   if (op %in% c(0L, 1L) && is.null(iDur))
	      stop("A duration index must be specified for this Bellman operator.", call. = FALSE)
	   if (op == 1L && is.null(g))
	      stop("The average weight g must be specified for the average Bellman operator.", call. = FALSE)
	   mdp$ptr$valueIte(op, sense, as.integer(ifelse(mdp$timeHorizon>=Inf, maxIte, 1)),
            as.numeric(eps), as.integer(iW), as.integer(ifelse(is.null(iDur), 0, iDur)),
            as.numeric(termValues), as.numeric(ifelse(is.null(g), 0, g)), as.numeric(discountFactor))
	} else if (is.null(g)) {
   	if (mdp$timeHorizon>=Inf) {
   		if (is.null(iDur)) stop("A duration index must be specified under infinite time-horizon!")
	      mdp$ptr$valueIte(0, sense, as.integer(maxIte),
   			as.numeric(eps), as.integer(iW), as.integer(iDur), as.numeric(termValues),
   			as.numeric(0),	as.numeric(discountFactor) )
   	} else {
	      if (!is.null(iDur)) mdp$ptr$valueIte(0, sense, as.integer(1),
                as.numeric(0), as.integer(iW), as.integer(iDur), as.numeric(termValues),
                as.numeric(0), as.numeric(discountFactor) )
	      if (is.null(iDur)) mdp$ptr$valueIte(2, sense, as.integer(1),
               as.numeric(0), as.integer(iW), as.integer(0), as.numeric(termValues),
               as.numeric(0), as.numeric(1) )
   	}
	} else {  # value ite under average expected-weight Bellman operator
	   if (is.null(iDur)) stop("A duration index must be specified under the average expected-weight Bellman operator!")
	   mdp$ptr$valueIte(1, sense, as.integer(1),
           as.numeric(eps), as.integer(iW), as.integer(iDur), as.numeric(termValues),
           as.numeric(g), as.numeric(1) )
	}
	if (getLog) cat(mdp$ptr$getLog())
	invisible(NULL)
}


#' Get parts of the optimal policy.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param sId Vector of id's of the states we want to retrieve.
#' @param stageStr Stage string. If specified then find `sId` based on the stage string.
#' @param stateLabels Add state labels.
#' @param actionLabels Add action labels of policy.
#' @param actionIdx Add action index.
#' @param rewards Add weights calculated for each state.
#' @param stateStr Add the state string for each state.
#' @param external A vector of stage strings corresponding to external processes we want the optimal policy of.
#' @param ... Parameters passed on when find the optimal policy of the external processes.
#' 
#' Note if external is specified then it must contain stage strings from `mdp$external`. Moreover you 
#' must specify further arguments passed on to [runValueIte()] used for recreating the optimal policy e.g. 
#' the g value and the label for weight and duration. See the vignette about external processes.
#' 
#' @return The policy (data frame).
#' @example inst/examples/machine-ex.R
#' @export
getPolicy<-function(mdp, sId = ifelse(mdp$timeHorizon>=Inf, mdp$founderStatesLast+1,1):
                       ifelse(mdp$timeHorizon>=Inf, mdp$states + mdp$founderStatesLast,mdp$states)-1, 
                    stageStr = NULL, stateLabels = TRUE, actionLabels = TRUE, actionIdx = TRUE, 
                    rewards = TRUE, stateStr = TRUE, external = NULL, ...) {
	if (!is.null(stageStr)) sId = mdp$ptr$getStateIdsStages(stageStr)
   maxS<-ifelse(mdp$timeHorizon>=Inf, mdp$states + mdp$founderStatesLast,mdp$states)
	if (max(sId)>=maxS | min(sId)<0)
		stop("Out of range (sId). Need to be a subset of 0,...,",maxS-1,"!")
   cols <- 1 + stateLabels + actionIdx + actionLabels + rewards + stateStr
   policy<-data.frame(matrix(NA, nrow = length(sId), ncol = cols))
   cols<-1
   policy[,cols] <- sId
   colNames = "sId"; cols = cols + 1
   if (stateStr) {
      policy[,cols]<-mdp$ptr$getStateStr(sId)
      colNames = c(colNames, "stateStr"); cols = cols + 1
   }
   if (stateLabels) {
      policy[,cols] = mdp$ptr$getStateLabel(sId); 
      colNames = c(colNames, "stateLabel"); cols = cols + 1
   }
   if (actionIdx) {
      policy[,cols] = mdp$ptr$getPolicy(sId) 
      colNames = c(colNames, "aIdx"); cols = cols + 1       
   }
   if (actionLabels) {
      policy[,cols] = mdp$ptr$getPolicyLabel(sId)
      colNames = c(colNames, "actionLabel"); cols = cols + 1
   }
   if (rewards) {
      policy[,cols] = mdp$ptr$getPolicyW(sId) 
      colNames = c(colNames, "weight"); cols = cols + 1
   }
   colnames(policy) <- colNames
   
   if (!is.null(external)) {
      policy <- list(main=policy)
      for (s in external) {
         prefix <- subset(mdp$external, stageStr == s, select = "prefix", drop = TRUE)
         lastStage<-mdp$ptr$getNextStageStr(s)
         termValues <- getPolicy(mdp, stageStr = lastStage)$weight
         extMDP<-loadMDP(prefix, getLog = FALSE)
         valueIte(extMDP, termValues=termValues, getLog = FALSE, ...)
         extPolicy<-getPolicy(extMDP)
         policy[[s]]<-extPolicy
         rm(extMDP)
      }
   }
	return(dplyr::as_tibble(policy))
}




#' Information about the MDP
#' 
#' @param mdp The MDP loaded using [loadMDP()].
#' @param sId The id of the state(s) considered.
#' @param stateStr A character vector containing the index of the state(s) (e.g. "n0,s0,a0,n1,s1"). 
#'   Parameter `sId` are ignored if not NULL.
#' @param stageStr A character vector containing the index of the stage(s) (e.g. "n0,s0,a0,n1"). 
#'   Parameter `sId` and `idxS` are ignored if not NULL.
#' @param withList Output info as a list `lst`.  
#' @param withDF Output the info as a data frame.
#' @param dfLevel If `withDF` and equal `"state"` the data frame contains a row for each state. If equal `"action"` the data frame contains a row for each action.
#' @param asStringsState Write state vector as a string; otherwise, output it as columns.
#' @param asStringsActions Write action vectors (weights, transitions and probabilities) as strings; otherwise, output it as columns.
#' @param withHarc Output a hyperarcs data frame. Each row contains a hyperarc with the first column denoting the
#'   head (`sId`), the tails (`sId`) and the label.
#'   
#' @return A list containing the list, data frame(s).
#' @example inst/examples/machine-ex.R
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @export
getInfo<-function(mdp, 
                  sId=1:ifelse(mdp$timeHorizon<Inf, mdp$states, mdp$states+mdp$founderStatesLast)-1,
                  stateStr=NULL, stageStr=NULL, 
                  withList = TRUE, 
                  withDF = TRUE, dfLevel = "state", asStringsState = TRUE, asStringsActions = FALSE,
                  withHarc = FALSE
                  ) {
   if (!is.null(stageStr)) {
      sId<-mdp$ptr$getStateIdsStages(stageStr)
      stateStr<-mdp$ptr$getStateStr(sId)
   }else {
      if (!is.null(stateStr)) sId<-mdp$ptr$getStateIdsStates(stateStr)
      else stateStr<-mdp$ptr$getStateStr(sId)
   }
   maxS<-ifelse(mdp$timeHorizon>=Inf, mdp$states + mdp$founderStatesLast,mdp$states)
   if (max(sId)>=maxS | min(sId)<0)
      stop("Out of range (sId). Need to be a subset of 0,...,",maxS-1,"!")
   l<-vector("list", length(sId))
   lapply(l,function(x) x<-list(sId=NULL,stateStr=NULL,label=NULL,actions=NULL))

   labels<-mdp$ptr$getStateLabel(sId)
   for (i in 1:length(l)) {
      l[[i]]$sId <- sId[i]
      l[[i]]$stateStr <- stateStr[i]
      l[[i]]$label <- labels[i]
      l[[i]]$actions <- mdp$ptr$getActionInfo(sId[i])
   }
   names(l) <- sId
   lst <- list()
   if (withList) lst$lst <- l
   if (withDF) {
      df <- dplyr::tibble(sId = l) # add list 
      df <- df %>% tidyr::unnest_wider(sId)  # convert states to columns
      if (dfLevel == "action") {
         df <- df %>% 
            tidyr::unnest_longer("actions") %>% # convert actions (one row for each action)
            tidyr::unnest_wider("actions", names_repair = tidyr::tidyr_legacy) # convert action to columns
         df <- df %>% 
            dplyr::rename(label_action = "label1")
         if (asStringsActions) {
            df <- df %>% 
               dplyr::mutate(weights = sapply(.data$weights, function(x) paste0(x, collapse = ",")),
                      trans = sapply(.data$trans, function(x) paste0(x, collapse = ",")),
                      pr = sapply(.data$pr, function(x) paste0(x, collapse = ","))) %>% 
               dplyr::mutate(weights = dplyr::na_if(.data$weights, ""),
                      trans = dplyr::na_if(.data$trans, ""),
                      pr = dplyr::na_if(.data$pr, ""))
         }
      } else {
         if (asStringsActions) {
            df <- df %>% 
               tidyr::unnest_longer("actions") %>% # convert actions (one row for each action)
               tidyr::unnest_wider("actions", names_repair = tidyr::tidyr_legacy) # convert action to columns
            df <- df %>% 
               dplyr::rename(label_action = .data$label1)
            df <- df %>% 
               dplyr::mutate(weights = sapply(.data$weights, function(x) paste0(x, collapse = ",")),
                      trans = sapply(.data$trans, function(x) paste0(x, collapse = ",")),
                      pr = sapply(.data$pr, function(x) paste0(x, collapse = ","))) %>% 
               dplyr::mutate(weights = dplyr::na_if(.data$weights, ""),
                      trans = dplyr::na_if(.data$trans, ""),
                      pr = dplyr::na_if(.data$pr, ""))
            df <- df %>% 
               dplyr::group_by(sId, .data$stateStr, .data$label) %>% 
               tidyr::nest() %>% 
               dplyr::mutate(data = lapply(.data$data, function(x) {if (all(is.na(x$aIdx))) return(NULL) else return(x)})) %>% 
               dplyr::rename(actions = .data$data)
         }
      }
      if (!asStringsState) {
         levels <- (max(stringr::str_count(df$stateStr, ",")) + 1) %/% 3 + 1
         if (levels == 1)
            nm <- paste(c("n", "s"), levels - 1, sep = "")
         if (levels > 1)
            nm <-
               c(paste(c("n", "s", "a"), rep(0:(levels - 2), each = 3), sep = ""),
                 paste(c("n", "s"), levels - 1, sep = ""))
         df <- df %>% 
            tidyr::separate(.data$stateStr, into = nm, sep = ",", remove = FALSE, fill = "right")
      }
      lst$df <- df
   }
   if (withHarc) {
      df <- dplyr::tibble(sId = l)  %>% 
         tidyr::unnest_wider(sId) %>% 
         tidyr::unnest_longer("actions") %>% # convert actions (one row for each action)
         tidyr::unnest_wider("actions", names_repair = tidyr::tidyr_legacy) %>% 
         tidyr::unnest_wider(.data$trans, names_sep = "") %>% 
         dplyr::filter(!is.na(.data$aIdx)) %>% 
         dplyr::select(.data$sId, tidyr::contains("trans"), label = .data$label1)
      colnames(df) <- stringr::str_replace(colnames(df), "trans", "tail")
      colnames(df)[1] <- "head"
      lst$harcDF <- df
   }
   return(lst)
}


#' Modify the current policy by setting policy action of states. 
#' 
#' If the policy does not contain all states then the actions from the previous optimal 
#' policy are used.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param policy A data frame with two columns state id `sId` and action index `aIdx`.
#' @return NULL (invisible)
#' @example inst/examples/machine-ex.R
#' @export
setPolicy<-function(mdp, policy) {
   if (!all(c("sId", "aIdx") %in% colnames(policy))) stop("You must specify `sId` and action index `aIdx`.")
   #if (dim(policy)[2]!=2) stop("You must specify two columns in policy.")
   mdp$ptr$setPolicy(as.integer(policy$sId),as.integer(policy$aIdx))
	return(invisible(NULL))
}


#' Calculate weights based on current policy. Normally run after an optimal policy has been found.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param wLbl The label of the weight we consider.
#' @param criterion The Bellman operator shortcut. If `expected` use expected weights, if `discount` use discounted expected weights, if `average` use average expected weights, if `min` use minimum-successor weights, if `max` use maximum-successor weights, if `secondMoment` use the second moment of total accumulated weight, and if `variance` use the law-of-total-variance recursion under the current policy.
#' @param durLbl The label of the duration/time such that discount rates can be calculated.
#' @param rate The interest rate.
#' @param rateBase The time-horizon the rate is valid over.
#' @param discountFactor The discount rate for one time unit. If specified `rate` and `rateBase` are not used to calculate the discount rate.
#' @param termValues The terminal values used (values of the last stage in the MDP).
#' @param discountMethod Either 'continuous' or 'discrete', corresponding to discount factor `exp(-rate/rateBase)` or `1/(1 + rate/rateBase)`, respectively. Only used if `discountFactor` is `NULL`.
#' 
#' @return Nothing.
#' @example inst/examples/machine-ex.R
#' @export
runCalcWeights<-function(mdp, wLbl, criterion="expected", durLbl = NULL, rate = 0, rateBase = 1, 
                      discountFactor = NULL, termValues = NULL, discountMethod = "continuous") {
	iW<-getWIdx(mdp,wLbl)
	if (!is.null(durLbl)) iDur<-getWIdx(mdp,durLbl)
	.checkWIdx(iW,length(mdp$weightNames))
	if (is.null(discountFactor)) {
	   if (discountMethod=="continuous") discountFactor<-exp(-rate/rateBase)
	   if (discountMethod=="discrete") discountFactor<-1/(1 + rate/rateBase)
	}
	if (mdp$timeHorizon<Inf) {
		if (is.null(termValues)) stop("Terminal values must be specified under finite time-horizon!")
		if (criterion=="expected") mdp$ptr$calcPolicy(2,iW,0,1,discountFactor)
		if (criterion=="min") mdp$ptr$calcPolicy(5,iW,0,1,discountFactor)
		if (criterion=="max") mdp$ptr$calcPolicy(6,iW,0,1,discountFactor)
		if (criterion=="secondMoment") mdp$ptr$calcPolicy(7,iW,0,1,discountFactor)
		if (criterion=="variance") {
		   mdp$ptr$setTerminalW(as.numeric(termValues))
		   mdp$ptr$calcPolicy(8,iW,0,1,discountFactor)
		}
		if (criterion=="discount") mdp$ptr$calcPolicy(0,iW,0,iDur,discountFactor)
	} else {
		if (criterion=="discount") mdp$ptr$policyIteFixedPolicy(0,iW,iDur,discountFactor)
		if (criterion=="average") return( mdp$ptr$policyIteFixedPolicy(1,iW,iDur,discountFactor) )
		#if (criterion=="expected") .Call("MDP_CalcWeightsFinite", mdp$ptr, as.integer(iW), as.numeric(termValues), PACKAGE="MDP")
	}
	invisible(NULL)
}


#' Calculate the retention pay-off (RPO) or opportunity cost for some states.
#'
#' The RPO is defined as the difference between
#' the weight of the state when using action `iA` and the maximum
#' weight of the node when using another predecessor different from `iA`.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param w The label of the weight we calculate RPO for.
#' @param iA  The action index we calculate the RPO with respect to (same size as `sId`).
#' @param sId Vector of id's of the states we want to retrieve.
#' @param criterion The Bellman operator shortcut. If `expected` use expected weights, if `discount` use discounted expected weights, if `average` use average expected weights.
#' @param dur The label of the duration/time such that discount rates can be calculated.
#' @param rate The interest rate.
#' @param rateBase The time-horizon the rate is valid over.
#' @param discountFactor The discount rate for one time unit. If specified `rate` and `rateBase` are not used to calculate the discount rate.
#' @param g The optimal gain (g) calculated (used if `criterion = "average"`).
#' @param objective Optimize by maximizing (`"max"`) or minimizing (`"min"`) the Bellman value.
#' @param discountMethod Either 'continuous' or 'discrete', corresponding to discount factor `exp(-rate/rateBase)` or `1/(1 + rate/rateBase)`, respectively. Only used if `discountFactor` is `NULL`.
#' @param stateStr Output the state string. 
#' 
#' @return The RPO (matrix/data frame).
#' @importFrom magrittr %>%
#' @export
getRPO<-function(mdp, w, iA, sId = ifelse(mdp$timeHorizon>=Inf, mdp$founderStatesLast+1,1):
                    ifelse(mdp$timeHorizon>=Inf, mdp$states + mdp$founderStatesLast,mdp$states)-1, 
                  criterion="expected", dur = "", rate = 0, rateBase = 1, discountFactor = NULL, 
                  g = 0, objective = c("max", "min"), discountMethod="continuous", stateStr = TRUE) {
   iW<-getWIdx(mdp,w)
   sense <- .optSenseIdx(objective)
   if (criterion != "expected" && !nzchar(dur))
      stop("A duration weight must be specified for this Bellman operator.", call. = FALSE)
   iDur<-if (nzchar(dur)) getWIdx(mdp,dur) else 0
   .checkWIdx(iW,length(mdp$weightNames))
   if (is.null(discountFactor)) {
      if (discountMethod=="continuous") discountFactor<-exp(-rate/rateBase)
      if (discountMethod=="discrete") discountFactor<-1/(1 + rate/rateBase)
   }
   maxS<-ifelse(mdp$timeHorizon>=Inf, mdp$states + mdp$founderStatesLast,mdp$states)
   if (max(sId)>=maxS | min(sId)<0)
      stop("Out of range (sId). Need to be a subset of 0,...,",maxS-1,"!")
   if (length(sId)!=length(iA))
      stop("Vectors sId and iA must have same length!")
   rpo<-NA
   if (criterion=="expected") rpo<-mdp$ptr$calcRPO(2, sense, as.integer(sId), iW, as.integer(iA), g, iDur, discountFactor)
   if (criterion=="discount") rpo<-mdp$ptr$calcRPO(0, sense, as.integer(sId), iW, as.integer(iA), g, iDur, discountFactor)
   if (criterion=="average")  rpo<-mdp$ptr$calcRPO(1, sense, as.integer(sId), iW, as.integer(iA), g, iDur, discountFactor)
   rpo[rpo <= -1.8e+16]<-NA # less than 2 actions
   rpo <- dplyr::tibble(sId=sId, rpo=rpo)
   if (stateStr) {
      rpo <- rpo %>% 
         dplyr::transmute(sId, stateStr = mdp$ptr$getStateStr(sId), rpo)
   }
   return(rpo)
}

#' Save the MDP to binary files
#' 
#' Currently do not save external files.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param prefix A character string with the prefix added to `binNames`. Used to identify a specific model.
#' @param getLog Output the log as a message.
#' 
#' @return ???
#' @export
saveMDP<-function(mdp,prefix="", getLog=TRUE) {
   mdp$ptr$save2Binary(prefix)
   if (getLog) message(mdp$ptr$getLog())
}


#' Calculate the steady state transition probabilities for the founder process (level 0).
#'
#' Assume that we consider an ergodic/irreducible time-homogeneous Markov chain specified using a policy in the MDP.
#'
#' @param mdp The MDP loaded using [loadMDP()].
#' @param getLog Output log text.
#' 
#' @return A vector with steady state probabilities for all the states at the founder level.
#' @export
getSteadyStatePr<-function(mdp, getLog=FALSE) {
   pr<-mdp$ptr$steadyStatePr()
   if (getLog) message(mdp$ptr$getLog())
	return(pr)
}



# #' Set the weight of an action.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param w The weight.
# #' @param sId The state id of the state.
# #' @param idxA The action index.
# #' @param wLbl The label of the weight we consider.
# #' @return Nothing.
# #' @example inst/examples/machine.R
# #' @export
# setActionWeight<-function(mdp, w, sId, iA, wLbl) {
# 	iW<-getWIdx(mdp,wLbl)
# 	
# 	.Call("MDP_SetActionW", mdp$ptr, as.numeric(w), as.integer(sId), as.integer(iA), as.integer(iW), PACKAGE="MDP")
# 	invisible(NULL)
# }
# 




#
#
#
# #' Fix the action of a state. That is, the other actions are removed from the HMDP.
# #'
# #' The actions can be reset using \code{resetActions}.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param sId The state id of the state we want to fix the action for.
# #' @param iA  The action index of the state.
# #' @return Nothing.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @seealso \code{\link{resetActions}}, \code{\link{removeAction}}.
# #' @export
# fixAction<-function(mdp, sId, iA) {
# 	.Call("MDP_FixAction", mdp$ptr, as.integer(sId), as.integer(iA), PACKAGE="MDP")
# 	invisible(NULL)
# }
#
#
# #' Remove the action of a state from the HMDP.
# #'
# #' The actions can be reset using \code{resetActions}.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param sId The state id of the state we want to remove the action for.
# #' @param iA  The action index of the state.
# #' @return Nothing.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @seealso \code{\link{resetActions}}, \code{\link{fixAction}}.
# #' @example inst/examples/machine.R
# #' @export
# removeAction<-function(mdp, sId, iA) {
# 	.Call("MDP_RemoveAction", mdp$ptr, as.integer(sId), as.integer(iA), PACKAGE="MDP")
# 	invisible(NULL)
# }
#
#
# #' Reset the actions of a state.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @return Nothing.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @seealso \code{\link{resetActions}}, \code{\link{fixAction}}.
# #' @example inst/examples/machine.R
# #' @export
# resetActions<-function(mdp) {
# 	.Call("MDP_ResetActions", mdp$ptr, PACKAGE="MDP")
# 	invisible(NULL)
# }
#

#
#
# #' Set the weight of a state.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param w The weight.
# #' @param sId The state id of the state.
# #' @param wLbl The label of the weight we consider.
# #' @return Nothing.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @export
# setStateWeight<-function(mdp, w, sId, wLbl) {
# 	iW<-getWIdx(mdp,wLbl)
# 	.Call("MDP_SetStateW", mdp$ptr, as.numeric(w), as.integer(sId), as.integer(iW), PACKAGE="MDP")
# 	invisible(NULL)
# }
#
#
# #' Return ids for states in a stage.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param stages A char vector of index in the form "n0,s0,a0,n1", i.e. 3*level+1 elements in the string.
# #' @return A vector of ids for the states.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getIdSStages<-function(mdp, stages) {
# 	v<-.Call("MDP_GetIdSStage", mdp$ptr, as.character(stages), PACKAGE="MDP")
# 	return(v)
# }
#
#
# #' Return the index strings for states having id idS.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param idS A vector of state ids.
# #' @return A vector of index for the states.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getStrIdxS<-function(mdp, idS) {
# 	n<- mdp$states + ifelse(mdp$timeHorizon>=Inf,mdp$founderStatesLast,0)
# 	idS <- idS[idS<n & idS>=0]
# 	v<-.Call("MDP_GetIdxS", mdp$ptr, as.integer(idS), PACKAGE="MDP")
# 	return(v)
# }
#
#
# #' Return the label of states having id idS.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param idS A vector of state ids.
# #' @return A vector of labels for the states.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getLabel<-function(mdp, idS) {
# 	n<- mdp$states + ifelse(mdp$timeHorizon>=Inf,mdp$founderStatesLast,0)
# 	idS <- idS[idS<n & idS>=0]
# 	v<-.Call("MDP_GetLabel", mdp$ptr, as.integer(idS), PACKAGE="MDP")
# 	return(v)
# }
#
#
# #' Get the weights of an action.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param idS The state id.
# #' @param idxA The action index.
# #' @return A vector of weights for the action.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getActionW<-function(mdp, idS, idxA) {
# 	l<-info(mdp, idS[1])
# 	l<-l[[1]]$actions[idxA+1]
# 	l<-substring(l,regexpr("w",l)+3)
# 	l<-gsub(").*","",l)
# 	zz<-textConnection(l)
# 	l<-scan(zz, sep=",", quiet = TRUE)
# 	close(zz)
# 	return(l)
# }
#
#
# #' Get the ids of the transition states of an action.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param idS The state id.
# #' @param idxA The action index.
# #' @return A vector of weights for the action.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getActionTransIdS<-function(mdp, idS, idxA) {
# 	l<-info(mdp, idS[1])
# 	l<-l[[1]]$actions[idxA+1]
# 	l<-substring(l,regexpr("trans",l)+7)
# 	l<-gsub(").*","",l)
# 	zz<-textConnection(l)
# 	l<-scan(zz, sep=",", quiet = TRUE)
# 	close(zz)
# 	return(l)
# }
#
#
# #' Get the transition probabilities of the transition states of an action.
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @param idS The state id.
# #' @param idxA The action index (c++ style starting from zero).
# #' @return A vector of weights for the action.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @example inst/examples/machine.R
# #' @export
# getActionTransPr<-function(mdp, idS, idxA) {
#   return(.Call("MDP_GetActionTransPr", mdp$ptr, as.integer(idS), as.integer(idxA), PACKAGE="MDP") )
# }
# # getActionTransPr<-function(mdp, idS, idxA) {
# # 	l<-info(mdp, idS[1])
# # 	l<-l[[1]]$actions[idxA+1]
# # 	l<-substring(l,regexpr("pr",l)+4)
# # 	l<-gsub(").*","",l)
# # 	zz<-textConnection(l)
# # 	l<-scan(zz, sep=",", quiet = TRUE)
# # 	close(zz)
# # 	return(l)
# # }
#
#

#
# #' Get the transition probability matrix P for the founder process (level 0).
# #'
# #' @param mdp The MDP loaded using \link{loadMDP}.
# #' @return The state probability matrix.
# #' @author Lars Relund \email{lars@@relund.dk}
# #' @export
# getTransPr<-function(mdp) {
# 	v<-.Call("MDP_GetTransPr", mdp$ptr, PACKAGE="MDP")
# 	v<-matrix(v,nrow=mdp$states)
# 	return(v)
# }
Any scripts or data that you put into this service are public.
MDP2 documentation built on June 13, 2026, 1:08 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
MDP2
Markov Decision Processes (MDPs)

R/mdp.R
In MDP2: Markov Decision Processes (MDPs)

Defines functions getSteadyStatePr saveMDP getRPO runCalcWeights setPolicy getInfo getPolicy runValueIte runPolicyIteDiscount runPolicyIteAve .bellmanOpIdx .optSenseIdx getWIdx .checkWIdx .checkWDurIdx .makeMDPList loadMDP

Documented in .checkWDurIdx .checkWIdx getInfo getPolicy getRPO getSteadyStatePr getWIdx loadMDP runCalcWeights runPolicyIteAve runPolicyIteDiscount runValueIte saveMDP setPolicy

Try the MDP2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MDP2 Markov Decision Processes (MDPs)

R/mdp.R In MDP2: Markov Decision Processes (MDPs)

Defines functions getSteadyStatePr saveMDP getRPO runCalcWeights setPolicy getInfo getPolicy runValueIte runPolicyIteDiscount runPolicyIteAve .bellmanOpIdx .optSenseIdx getWIdx .checkWIdx .checkWDurIdx .makeMDPList loadMDP

Documented in .checkWDurIdx .checkWIdx getInfo getPolicy getRPO getSteadyStatePr getWIdx loadMDP runCalcWeights runPolicyIteAve runPolicyIteDiscount runValueIte saveMDP setPolicy

Try the MDP2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MDP2
Markov Decision Processes (MDPs)

R/mdp.R
In MDP2: Markov Decision Processes (MDPs)