In cboettig/multiple_uncertainty: Multiple Uncertainty

profvis::profvis({

library("MDPtoolbox")
devtools::load_all()

f = logistic
x_grid = seq(0,200,by=0.5)
h_grid = x_grid 
Tmax = 50
sigma_g = 0.5 
sigma_m = 0.1 
sigma_i = 0.1
delta = 0.05
noise_dist = "uniform"
y_grid = x_grid 
q_grid = x_grid
profit_fn = function(x,h) pmin(x, h)
assume = "Bayes"
  ## 'assume' determines the rule on how we compute the inverse

  ## allow f to be a function or a character string
  if(is.character(f))
    f <- get(f)

  ## Handle choice of noise distribution. Needs to define pdf, cdf, invpdf, invcdf
  #noise_dist <- match.arg(noise_dist)
  if(noise_dist == "uniform"){
    pdf = function(p,mu,s) dunif(p, mu * (1 - s), mu * (1 + s)) 
    cdf = function(p,mu,s) punif(p, mu * (1 - s), mu * (1 + s))
    invpdf = function(p,mu,s) iunifpdf(p, mu, s, assume = assume) 
    invcdf = function(p,mu,s) iunifcdf(p, mu, s, assume = assume)
  } else if (noise_dist == "lognormal"){
    pdf = function(p,mu,s) dlnorm(p, log(mu), s)
    cdf = function(p,mu,s) plnorm(p, log(mu), s)
    invpdf = function(p,mu,s) ilognpdf(p, log(mu), s)
    invcdf = function(p,mu,s) ilogncdf(p, log(mu), s)
    ## Confirm cdf is integral pdf
  } else {
    stop("Noise distribution not recognized")
  }

  ## Store constants 
  n_x <- length(x_grid)
  n_h <- length(h_grid)
  n_y <- length(y_grid)
  n_q <- length(q_grid)

  ## Define a profit function.    

  P <- outer(x_grid, h_grid, profit_fn)
  Minv <- pdf_matrix(y_grid, x_grid, sigma_m, invpdf, invcdf) 
  M <- pdf_matrix(x_grid, y_grid, sigma_m, pdf, cdf) 
  I <- pdf_matrix(q_grid, h_grid, sigma_i, pdf, cdf)

  F <- array(0, c(n_x, n_x, n_h))
  for(h in 1:n_h){
    for(x in 1:n_x){
      mu <- f(x_grid[x], h_grid[h]) 
      F[x, , h] <- pdf_matrix(mu, x_grid, sigma_g, pdf, cdf) ## 12% of runtime spent here
    }
  }

  Phi <- array(0, c(n_y, n_y, n_h))
  for(k in 1:n_h){
    Phi[,,k] <- Minv %*% F[, , k] %*% M  ## 18% of runtime here
  }

  T <- array(0, dim = c(n_y, n_y, n_q))
  for(i in 1:n_y){  
    T[i, , ] <- Phi[i, , ] %*% t(I)  ## 23% of runtime here
  }

  Ep <- Minv %*% P %*% t(I)


  out <- MDPtoolbox::mdp_policy_iteration(P = T, R = Ep, discount = 1/(1+delta) )
  S <- y_grid - q_grid[out$policy]

  ## Initialize 
  #V <- Ep 
  #v_t <- numeric(n_y)
  #D <- array(0, c(n_y, Tmax))

  ## main SDP recursion loop (now done by MDPtoolbox, which is faster:
  # for(t in 1:Tmax){
  #  for(j in 1:n_q){
  #    V[,j] <- Ep[,j] + 1 / (1 + delta) * T[, , j] %*% v_t  ## 43% of runtime here
  #  }
  #  v_t <- apply(V, 1, max)
  #  v_index <- apply(V, 1, which.max) 
  #  D[, (Tmax - t + 1)] = t(v_index)
  #}
  #S = y_grid - q_grid[D[,1]]



})