R/control_traj.R
In netcontrol: Control Theory Methods for Networks

Documented in control_scheme_DLI_freestate control_traj LQR

#' Discrete Linear Time-Invariant Free Final State Classic Control Scheme
#'
#' Given a system dynamics \eqn{A}, control input matrix \eqn{B}, final state weighting matrix \eqn{S},
#'  intermediate state weighting matrix sequence \eqn{Q_seq}, and cost matrix sequence \eqn{R_seq}, 
#'  calculates the Kalman gain sequence to minimize the LQR by time \eqn{t_max}.
#'   See section 2.2 of \insertCite{lewisOptimalControl2012}{netcontrol} for details.
#'  
#'
#' @param t_max Required. An integer total number of time points to determine the trajectory over
#' @param A Required. A \eqn{p x p} matrix of system coefficients
#' @param B Required. A \eqn{p x q} matrix of control weights
#' @param S A \eqn{p x p} final state weighting matrix
#' @param Q_seq A list of \eqn{t} \eqn{p x p} intermediate state weighting matrices or a single \eqn{p x p} intermediate state weighting matrix
#' @param R_seq A list of \eqn{t} \eqn{q x q} intermediate cost matrices or a single \eqn{q x q} cost matrix
#' @references
#' \insertRef{lewisOptimalControl2012}{netcontrol}
#' @return A list containing an entry labeled \code{gain_seq} containing either 1 or \code{t_max - 1} Kalman gain matrices and an entry labeled \code{cost_func} which contains a LQR function.
#' @export
#'
#' @examples
#' 
#' A = matrix(c(0,-3,-2,2,-2,1,-1,2,-1), 3,3)
#' 
#' #Normalize rows to sum to 1
#' A = solve(diag(rowSums(A))) %*% A
#' 
#' B = S = Q_seq = R_seq = diag(3)
#' 
#' CS = control_scheme_DLI_freestate(100, A, B, S, Q_seq, R_seq)
#' 
control_scheme_DLI_freestate <- function(t_max, A, B, S, Q_seq, R_seq){
  
  S_seq = .S_seq_calc(A, S, B, Q_seq, R_seq, t_max)
  G_seq = .G_seq_calc(S_seq, R_seq, A, B, t_max)
  
  S_n = S
  Q_seq_n = Q_seq
  R_seq_n = R_seq
  
  return(list("gain_seq" =G_seq,"cost_func" = LQR(S = S_n, Q_seq = Q_seq_n, R_seq = R_seq_n, eval = F)))
  
}



#' Calculate the trajectory of a discrete linear time invariant system under a given control scheme
#'
#' This function is designed to work with control_scheme objects generated by \code{control_scheme_DLI_freestate} 
#' In future versions of \code{netcontrol} this function will be used to simulate any control trajectory. 
#' For general details on control theory trajectories, see \insertCite{lewisOptimalControl2012}{netcontrol}.
#'
#' CAUTION: Use of saturation parameters and/or bound parameters \code{delta, d_nosign, d_toggle, upper.bound, lower.bound, u.pos} 
#' leads to estimates of the optimal trajectory to be sub-optimal, as the Kalman gain calculations do not take any of those restrictions into account.
#' This functionality will be added later, and this caution statement removed at that time.
#'
#' @param t_max Required. An integer total number of time points to determine the trajectory over
#' @param x_0 Required. A \eqn{p} length numeric vector of starting values
#' @param A Required. A \eqn{p x p} matrix of system coefficients
#' @param B Required. A \eqn{p x q} matrix of control weights
#' @param theta Optional. A \eqn{p x p} covariance matrix for state errors. If \code{NA}, state mechanics will be deterministic
#' @param gamma Optional. A \eqn{p x p} covariance matrix for observation errors. If \code{NA}, no observation/measurement error will be modelled.
#' @param control_scheme Required. A list containing an entry labeled \code{gain_seq} containing either 1 or \code{t_max - 1} Kalman gain matrices and an entry labeled \code{cost_func} which contains an appropriately constructed cost function 
#' @param delta Optional. A vector of length 2, where the first entry contains the point of saturation for control inputs, and the second entry contains the saturation value for control inputs.
#' @param d_nosign Optional. Boolean. If \code{TRUE} and \code{delta} is not \code{NA}, control inputs are forced to be positive. 
#' @param d_toggle Optional. Boolean. If \code{TRUE} and \code{delta} is not \code{NA}, control inputs are either 0 or the saturation value. 
#' @param upper_bounds Optional. A \eqn{p} length vector of upper bounds on state values.
#' @param lower_bounds Optional. A \eqn{p} length vector of lower bounds on state values.
#' @param u_pos Optional. Boolean. If \code{TRUE} restricts control inputs to be positive,
#' @references 
#' 
#' \insertRef{lewisOptimalControl2012}{netcontrol}.
#' 
#' @return A list containing 4 entries: a `t_max x p` state value matrix, a `t_max x p` observation matrix, a `t_max-1 x q` matrix of control inputs and a `t_max` length vector of cost function values.
#' @export
#'
#' @examples
#' 
#' A = matrix(c(0,-3,-2,2,-2,1,-1,2,-1), 3,3)
#' 
#' #Normalize rows to sum to 1
#' A = solve(diag(rowSums(A))) %*% A
#' 
#' B = S = Q_seq = R_seq = diag(3)
#' 
#' CS = control_scheme_DLI_freestate(100, A, B, S, Q_seq, R_seq)
#' 
#' traj = control_traj(100, rep(100,3), A, B, control_scheme = CS)
#' 
#' #First 5 control inputs
#' print(head(traj[[3]]))
control_traj <- function(t_max, x_0, A, B, theta = NA, gamma = NA, control_scheme, delta = NA, d_nosign = F, d_toggle = F, upper_bounds = NA, lower_bounds = NA, u_pos = F){
  
  
  G_seq = control_scheme[["gain_seq"]]
  cost_func = control_scheme[["cost_func"]]
  j_vec = vector(length =t_max-1)
  
  x_mat = matrix(0, t_max, length(x_0))
  x_mat[1,] = x_0
  y_mat = matrix(0, t_max, length(x_0))
  
  if(!is.na(gamma)){
    
    y_mat[1,] = mvrnorm(mu = x_0, Sigma = gamma)
  }else{
    y_mat[1,] = x_0
  }
  
  
  if(is.vector(B)){
    B_len = 1
  }else{
    B_len = dim(B)[[2]]
  }
  
  u_mat = matrix(0,t_max-1 ,B_len)
  for(i in 1:(t_max-1)){
    
    if(names(control_scheme)[1] == "control_inputs"){
      u = control_scheme[["control_inputs"]][i,]
    }else{
      if(is.matrix(G_seq)){G = G_seq}else{G = G_seq[[i]]}
      
      u = G %*% y_mat[i,]
    }
    if(!is.na(delta)){
      u = sign(u)*(abs(u) >= delta[1])*delta[2] + u*(abs(u) < delta[1])
      
      if(d_toggle){
        u =sign(u)*(abs(u) >= delta[1])*delta[2]
      }
      if(d_nosign){
        u = (u > delta[1])*delta[2]+ u*(u <= delta[1])*(u > 0)
      }
      
      
    }
    if(u_pos){
      u[which(u < 0)] = 0
    }
    
    
    temp = .control_step(x_mat[i,], A, B, u, theta, gamma, u.bounds = upper_bounds, l.bounds=lower_bounds)
    x_mat[i+1,] = t(temp$x)
    y_mat[i+1,] = t(temp$y)
    u_mat[i,] = t(u)
  }
  
  LQR_seq = cost_func(X_2 = x_mat, U_2 = u_mat)
  
  return(list(x_mat,y_mat,u_mat, LQR_seq))
  
}



#' Linear Quadratic Regulator
#' 
#' Creates a function that can be used to calculate the cumulative value of the LQR
#'  for any set of states and control inputs. By setting eval to True, 
#'  the LQR is immediately calculated. See \insertCite{lewisOptimalControl2012}{netcontrol}
#'  
#' NOTE: LQR functions, as they are calculated forward in time, go to 0 by the maximum time regardless of input. 
#' This is expected behavior, but that does make using the LQR value to evaluate control efficacy somewhat difficult. 
#'  
#'
#' @param X A \eqn{t x p} matrix of state values
#' @param U A \eqn{t-1 x q} matrix of control inputs`
#' @param S A \eqn{p x p} final state weighting matrix
#' @param Q_seq A list of \eqn{t} \eqn{p x p} intermediate state weighting matrices or a single \eqn{p x p} intermediate state weighting matrix
#' @param R_seq A list of \eqn{t} \eqn{q x q} intermediate cost matrices or a single \eqn{q x q} cost matrix
#' @param eval Boolean, if \code{FALSE} returns a function, if \code{TRUE} calculates the LQR series
#'
#' @return A function or a \eqn{t} length numeric vector
#' @export
#'
#' @references 
#' 
#' \insertRef{lewisOptimalControl2012}{netcontrol}
#'
#' @examples
#' 
#' X = matrix(1, 100, 3)
#' U = matrix(-1, 99, 3)
#' S = Q_seq = R_seq = diag(3)
#' 
#' print(LQR(X,U, S, Q_seq, R_seq)[1:5])
#' 
LQR <- function(X, U, S, Q_seq, R_seq, eval = TRUE){
  
  
  temp_function <- function(X_2, U_2, S_n= S, Q_seq_n = Q_seq, R_seq_n=R_seq){
    j = vector(length = nrow(X_2))
    j_n = t(X_2[nrow(X_2),]) %*% S_n %*% X_2[nrow(X_2),]
    
    j_i = j
    
    if(is.matrix(Q_seq_n)){Q_seq_n = rep(list(Q_seq_n), nrow(X_2)-1)}
    if(is.matrix(R_seq_n)){R_seq_n = rep(list(R_seq_n), nrow(X_2)-1)}
    for(i in 1:(nrow(X_2)-1)){
      j_i[i] = t(X_2[i,]) %*% Q_seq_n[[i]] %*% X_2[i,] + t(U_2[i,]) %*% R_seq_n[[i]] %*% U_2[i,] 
      
    }
    j = rev(cumsum(rev(j_i))) +c(j_n)
    
    return(j)
  }
  if(!eval){
    return(temp_function)
  }else{
    return(temp_function(X_2 = X, U_2 = U))
  }
}



.control_step <- function(x, A, B, u, theta = NA, gamma = NA, u.bounds = NA, l.bounds = NA){
  
  cont_input = B %*% u
  
  if(!is.na(u.bounds)){
    cont_input = pmin(u.bounds-x,cont_input)
    if(!is.na(l.bounds)){
      cont_input =  pmax(l.bounds-x, cont_input)
    }
  }
  
  
  
  x_t1 = A %*% x + cont_input
  
  if(is.matrix(theta)){
    targets = which(!diag(theta) == 0)
    theta = theta[targets, targets]
    noise = rep(0, ncol(A))
    
    noise[targets] = MASS::mvrnorm(mu = rep(0, length(targets)), Sigma = theta)
    x_t1 = x_t1 + noise
  }
  
  if(is.matrix(gamma)){
    
    y_t1 = MASS::mvrnorm(mu = x_t1, Sigma = gamma)
    return(list(x = x_t1, y = y_t1))
  }else{
    if(!is.na(u.bounds)){
      x_t1 = pmin(u.bounds,x_t1)
      if(!is.na(l.bounds)){
        x_t1 =  pmax(l.bounds, x_t1)
      }
    }
    return(list(x = x_t1, y = x_t1))
    
  }
  
}

.S_seq_calc <- function(A, S, B, Q_seq, R_seq, tmax){
  
  S_seq = list()
  S_seq[[tmax]] = S
  
  for(i in (tmax-1):1){
    
    if(is.matrix(Q_seq)){Q = Q_seq}else{Q = Q_seq[[i]]}
    if(is.matrix(R_seq) | is.numeric(R_seq)){R = R_seq}else{R = R_seq[[i]]}
    S_seq[[i]] = t(A)%*%(S_seq[[i+1]] - S_seq[[i+1]] %*% B %*% solve(t(B) %*% S_seq[[i+1]] %*% B + R) %*% t(B) %*% S_seq[[i+1]]) %*% A + Q
  }
  
  return(S_seq)
  
}

.G_seq_calc <- function(S_seq, R_seq, A, B, t_max){
G_seq = list()
for(i in (t_max-1):1){
  if(is.matrix(R_seq)){R = R_seq}else{R = R_seq[[i]]}
  G_seq[[i]] = -solve(t(B) %*% S_seq[[i+1]]%*% B + R) %*% t(B) %*%S_seq[[i+1]] %*% A
}
return(G_seq)
}