Description Usage Arguments Value Author(s) Examples
Set the weight of an action.
1 | setActionWeight(mdp, w, sId, iA, wLbl)
|
mdp |
The MDP loaded using loadMDP. |
w |
The weight. |
sId |
The state id of the state. |
iA |
The action index. |
wLbl |
The label of the weight we consider. |
Nothing.
Lars Relund lars@relund.dk
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | # Create the small machine repleacement problem used as an example in L.R.
# Nielsen and A.R. Kristensen. Finding the K best policies in a finite-horizon
# Markov decision process. European Journal of Operational Research,
# 175(2):1164-1179, 2006. doi:10.1016/j.ejor.2005.06.011.
## Create the MDP using a dummy replacement node
prefix<-"machine1_"
w <- binaryMDPWriter(prefix)
w$setWeights(c("Net reward"))
w$process()
w$stage() # stage n=0
w$state(label="Dummy") # v=(0,0)
w$action(label="buy", weights=-100, prob=c(1,0,0.7, 1,1,0.3), end=TRUE)
w$endState()
w$endStage()
w$stage() # stage n=1
w$state(label="good") # v=(1,0)
w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=70, prob=c(1,0,0.6, 1,1,0.4), end=TRUE)
w$endState()
w$state(label="average") # v=(1,1)
w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=50, prob=c(1,1,0.6, 1,2,0.4), end=TRUE)
w$endState()
w$endStage()
w$stage() # stage n=2
w$state(label="good") # v=(2,0)
w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=70, prob=c(1,0,0.5, 1,1,0.5), end=TRUE)
w$endState()
w$state(label="average") # v=(2,1)
w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=50, prob=c(1,1,0.5, 1,2,0.5), end=TRUE)
w$endState()
w$state(label="not working") # v=(2,2)
w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
w$action(label="rep", weights=5, prob=c(1,3,1), end=TRUE)
w$endState()
w$endStage()
w$stage() # stage n=3
w$state(label="good") # v=(3,0)
w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=70, prob=c(1,0,0.2, 1,1,0.8), end=TRUE)
w$endState()
w$state(label="average") # v=(3,1)
w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=50, prob=c(1,1,0.2, 1,2,0.8), end=TRUE)
w$endState()
w$state(label="not working") # v=(3,2)
w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
w$action(label="rep", weights=5, prob=c(1,3,1), end=TRUE)
w$endState()
w$state(label="replaced") # v=(3,3)
w$action(label="Dummy", weights=0, prob=c(1,3,1), end=TRUE)
w$endState()
w$endStage()
w$stage() # stage n=4
w$state(label="good", end=TRUE) # v=(4,0)
w$state(label="average", end=TRUE) # v=(4,1)
w$state(label="not working", end=TRUE) # v=(4,2)
w$state(label="replaced", end=TRUE) # v=(4,3)
w$endStage()
w$endProcess()
w$closeWriter()
## Some info about the model
stateIdxDf(prefix) # states of the MDP with labels returned as a data frame
actionInfo(prefix) # all action information of the MDP returned in a single data frame
## Load the model into memory
mdp<-loadMDP(prefix)
mdp
## Perform value iteration
w<-"Net reward" # label of the weight we want to optimize
scrapValues<-c(30,10,5,0) # scrap values (the values of the 4 states at stage 4)
valueIte(mdp, w, termValues=scrapValues)
## Print the optimal policy
policy<-getPolicy(mdp, labels=TRUE) # optimal policy for each sId
states<-stateIdxDf(prefix) # information about the states
policy<-merge(states,policy) # merge the two data frames
policyW<-getPolicyW(mdp, w) # the optimal rewards of the policy
policy<-merge(policy,policyW) # add the rewards
policy
## Calculate the weights of the policy always to maintain
policy<-data.frame(sId=states$sId,iA=0)
setPolicy(mdp, policy)
calcWeights(mdp, w, termValues=scrapValues)
policy<-getPolicy(mdp, labels=TRUE) # optimal policy for each sId
states<-stateIdxDf(prefix) # information about the states
policy<-merge(states,policy) # merge the two data frames
policyW<-getPolicyW(mdp, w) # the optimal rewards of the policy
policy<-merge(policy,policyW) # add the rewards
policy
## Modify the MDP in memory: remove the maintain action in the states of stage 1
removeAction(mdp, sId=1, iA=0) # remove action 0 at the state with sId=1
removeAction(mdp, sId=2, iA=0)
## Perform value iteration on the modified MDP
valueIte(mdp, w, termValues=scrapValues)
policy<-getPolicy(mdp, labels=TRUE) # optimal policy for each sId
states<-stateIdxDf(prefix) # information about the states
policy<-merge(states,policy) # merge the two data frames
policyW<-getPolicyW(mdp, w) # the optimal rewards of the policy
policy<-merge(policy,policyW) # add the rewards
policy
resetActions(mdp) # reset the MDP such that all actions are used
## Modify the weight of action 'buy'
setActionWeight(mdp, w=-50, sId=0, iA=0, wLbl=w)
## Perform value iteration on the modified MDP
valueIte(mdp, w, termValues=scrapValues)
policy<-getPolicy(mdp, labels=TRUE) # optimal policy for each sId
states<-stateIdxDf(prefix) # information about the states
policy<-merge(states,policy) # merge the two data frames
policyW<-getPolicyW(mdp, w) # the optimal rewards of the policy
policy<-merge(policy,policyW) # add the rewards
policy
# The example given in L.R. Nielsen and A.R. Kristensen. Finding the K best
# policies in a finite-horizon Markov decision process. European Journal of
# Operational Research, 175(2):1164-1179, 2006. doi:10.1016/j.ejor.2005.06.011,
# does actually not have any dummy replacement node as in the MDP above. The same
# model can be created using a single dummy node at the end of the process.
## Create the MDP using a single dummy node
prefix<-"machine2_"
w <- binaryMDPWriter(prefix)
w$setWeights(c("Net reward"))
w$process()
w$stage() # stage n=0
w$state(label="Dummy") # v=(0,0)
w$action(label="buy", weights=-100, prob=c(1,0,0.7, 1,1,0.3), end=TRUE)
w$endState()
w$endStage()
w$stage() # stage n=1
w$state(label="good") # v=(1,0)
w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=70, prob=c(1,0,0.6, 1,1,0.4), end=TRUE)
w$endState()
w$state(label="average") # v=(1,1)
w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=50, prob=c(1,1,0.6, 1,2,0.4), end=TRUE)
w$endState()
w$endStage()
w$stage() # stage n=2
w$state(label="good") # v=(2,0)
w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=70, prob=c(1,0,0.5, 1,1,0.5), end=TRUE)
w$endState()
w$state(label="average") # v=(2,1)
w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=50, prob=c(1,1,0.5, 1,2,0.5), end=TRUE)
w$endState()
w$state(label="not working") # v=(2,2)
w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
w$action(label="rep", weights=5, prob=c(3,12,1), end=TRUE) # transition to the node with sId=12 (Dummy)
w$endState()
w$endStage()
w$stage() # stage n=3
w$state(label="good") # v=(3,0)
w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=70, prob=c(1,0,0.2, 1,1,0.8), end=TRUE)
w$endState()
w$state(label="average") # v=(3,1)
w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
w$action(label="nmt", weights=50, prob=c(1,1,0.2, 1,2,0.8), end=TRUE)
w$endState()
w$state(label="not working") # v=(3,2)
w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
w$action(label="rep", weights=5, prob=c(3,12,1), end=TRUE)
w$endState()
w$endStage()
w$stage() # stage n=4
w$state(label="good") # v=(4,0)
w$action(label="rep", weights=30, prob=c(1,0,1), end=TRUE)
w$endState()
w$state(label="average") # v=(4,1)
w$action(label="rep", weights=10, prob=c(1,0,1), end=TRUE)
w$endState()
w$state(label="not working") # v=(4,2)
w$action(label="rep", weights=5, prob=c(1,0,1), end=TRUE)
w$endState()
w$endStage()
w$stage() # stage n=5
w$state(label="Dummy", end=TRUE) # v=(5,0)
w$endStage()
w$endProcess()
w$closeWriter()
## Some info about the model
stateIdxDf(prefix) # states of the MDP with labels returned as a data frame
actionInfo(prefix) # all action information of the MDP returned in a single data frame
## Have a look at the state-expanded hypergraph
mdp<-loadMDP(prefix)
hypergf(mdp)
## Find optimal policy
w<-"Net reward"
valueIte(mdp, w, termValues=0)
policy<-getPolicy(mdp, labels=TRUE) # optimal policy for each sId
states<-stateIdxDf(prefix) # information about the states
policy<-merge(states,policy) # merge the two data frames
policyW<-getPolicyW(mdp, w) # the optimal rewards of the policy
policy<-merge(policy,policyW) # add the rewards
policy
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.