R/HigherOrderRiskPreferences.R

Defines functions compute_higher_order_risk_preferences derivative compute_measures compute_measures_aux compute_function_aux compute_function find_optimal_lambda evaluate_cross_validation estimate_model bbase tpower

Documented in bbase compute_function compute_function_aux compute_higher_order_risk_preferences compute_measures compute_measures_aux derivative estimate_model evaluate_cross_validation find_optimal_lambda tpower

# Authors
# Sebastian Schneider, sschneider@coll.mpg.de; sebastian@sebastianschneider.eu
# Giulia Baldini, giulia.baldini@uni-bonn.de

# Copyright (C) 2020 Sebastian O. Schneider & Giulia Baldini

# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


##########################################################
#### Utility Tools for Higher Order Risk Preferences #####
##########################################################

#' Truncated p-th power function. Helper function for creating the B-Spline basis (Code by Paul Eilers, Package JOPS, http://statweb.lsu.edu/faculty/marx/JOPS_0.1.0.tar.gz)
#' @param x Function value.
#' @param t Point of truncation.
#' @param p degree of the truncated polynomial function.
#' @return Returns a piece-wise defined basis functions for x > t.
#' @examples 
#' tpower(1, 2, 3)
#' @export
tpower <- function(x, t, p) {
  return ((x - t) ^ p * (x > t))
}


#' Constructs a B-spline basis of degree 'deg' (Code by Paul Eilers, Package JOPS, http://statweb.lsu.edu/faculty/marx/JOPS_0.1.0.tar.gz).
#'
#' @param x values for the x axis.
#' @param xl minimum value, default is the minimum value of the x-values.
#' @param xr maximum value, default is maximum value of the x-values.
#' @param ndx number of intervals to partition the distance between xl and xr.
#' @param deg degree of the B-spline basis.
#' @return a B-spline basis of degree deg and ndx + 1 internal knots.
#' @examples
#' x_finegrid <- seq(0.001, 1.0, (1.0 - 0.001) / 1000)
#' bbase(x_finegrid)
#' @export
bbase <- function(x,
                  xl = min(x),
                  xr = max(x),
                  ndx = 20,
                  deg = 6) {
  dx <- (xr - xl) / ndx
  knots <- seq(xl - deg * dx, xr + deg * dx, by = dx)
  P <- outer(x, knots, tpower, deg)
  n <- dim(P)[2]
  D <- diff(diag(n), diff = deg + 1) / (gamma(deg + 1) * dx ^ deg)
  B <- (-1) ^ (deg + 1) * P %*% t(D)
  return(B)
}

#' Estimates the model
#'
#' @param xi a vector containing the certainty equivalents (x-values of utility points) for a given participant in each use case.
#' @param yi can be a vector or a matrix representing the corresponding utility values (y-values of utility points).
#' @param lambda lambda is the penalization weight used to compute the initial estimate. The default value is 1.
#' @param n_penalty_dimensions number of dimensions (i.e., derivatives) to penalize. Possible values are 1 or 2. The default value is 1.
#' @param penalty_order highest dimension (i.e., derivative) to penalize. Must be lower than deg.
#' @param ndx number of intervals to partition the distance between the lowest and highest x-values of the utility points.
#' @param deg degree of the B-spline basis. Determines the degree of the function to be estimated. If deg = 2, the estimated utility function will consist of quadratic functions. 
#' @param cross_validation_mode determines which cross validation mode should be used. If 0, then the cross validation method is leave-one-third-out. If 1, then the cross validation method is a theoretical leave-one-out, i.e., based on a formula. The default value is 1.
#' @param return_estimate parameter that indicates whether or not to return the (initially) estimated coefficients. Default is false.
#' @param left_out_xi needed for cross validation: the x-values of the points that are left out for fitting the model, so that they can be predicted
#' @param left_out_yi needed for cross validation: the y-values of the points that are left out for fitting the model, so that they can be predicted
#' @return Returns the sum of residuals of the prediction of the left-out points using cross validation. If specified, additionally returns the estimated coefficients of the utility function (in the B-spline basis).
#' @examples
#' x <- c(0.0000000, 0.2819824, 0.3007812, 0.4375000, 0.5231934, 0.7784882, 0.8945312, 1.0000000)
#' y <- c(0.0000, 0.1250, 0.2500, 0.5000, 0.6250, 0.6875, 0.7500, 1.0000)
#' estimate_model(x, y, .5)
#' @importFrom stats lsfit
#' @export
estimate_model <- function(xi,
                           yi,
                           lambda = 1,
                           n_penalty_dimensions = 1,
                           penalty_order = 4,
                           ndx = 20,
                           deg = 6,
                           cross_validation_mode = 0,
                           return_estimate = 0,
                           left_out_xi = c(),
                           left_out_yi = c()) {
  if (n_penalty_dimensions > penalty_order) {
    n_penalty_dimensions = penalty_order
  }
  
  lambda <- lambda * 1000 ^ -(seq(0, n_penalty_dimensions - 1))
  
  # Enforce U(0) = 0 and U(1) = 1
  w1 <- 0
  w2 <- 0
  repeat {
    xi <- c(rep(0, w1) , xi, rep(1, w2))
    yi <- c(rep(0, w1) , yi, rep(1, w2))
    
    # Generate base and penalty
    B = bbase(xi, ndx = ndx, deg = deg)
    n = ncol(B)
    
    # Fit the model - poor algorithm, using one penalty
    P <- NULL
    sumP <- matrix(rep(0, n ^ 2), nrow = n)
    nix <- NULL
    # Fit the model using lsfit
    for (p in 1:n_penalty_dimensions) {
      D = diff(diag(n), diff = penalty_order - (p - 1))
      P = rbind(P, sqrt(lambda[p]) * D)
      sumP = sumP + (lambda[p] * t(D) %*% D)
      nix = c(nix, rep(0, n - (penalty_order - (p - 1))))
    }
    
    f = lsfit(rbind(B, P), c(yi, nix), intercept = F)
    
    a <- f$coef
    
    if (n_penalty_dimensions == 2) {
      # Re-adjust lambda
      lambda_ratio <-
        sum(abs(diff(a, diff = penalty_order))) / sum(abs(diff(a, diff = penalty_order - 1)))
      if (length(lambda) > 1) {
        lambda <- lambda[1]
      }
      lambda <- c(lambda, lambda * lambda_ratio / 5)
      
      P <- NULL
      sumP <- matrix(rep(0, n ^ 2), nrow = n)
      nix <- NULL
      for (p in 1:n_penalty_dimensions) {
        D = diff(diag(n), diff = penalty_order - (p - 1))
        P = rbind(P, sqrt(lambda[p]) * D)
        sumP = sumP + (lambda[p] * t(D) %*% D)
        nix = c(nix, rep(0, n - (penalty_order - (p - 1))))
      }
    }
    
    # Enforce monotonicity
    kappa <- 100000000
    for (i in 1:10) {
      D1 <- diff(diag(n), diff = 1)
      W <- as.vector(1 * (D1 %*% a < 0))
      P2 <- sqrt(kappa) * diag(W) %*% D1
      nix2 <- rep(0, n - 1)
      f_new <- lsfit(rbind(B, P, P2), c(yi, nix, nix2), intercept = F)
      a_new <- f_new$coef
      if (identical(a, a_new)) {
        break
      }
      a = a_new
    }
    
    # Predict at observed points
    z = B %*% a
    
    if (cross_validation_mode) {
      # == 1, so leave one out
      # Predict at missing points
      x_finegrid <- seq(min(xi), max(xi), (max(xi) - min(xi)) / 1000)
      y_hat <- bbase(x_finegrid, ndx = ndx, deg = deg) %*% a
    }
    
    # Increase weight at (0,0) and (-1,-1) if not predicted with the desired precision
    if ((round(z[1], 2) == 0) & round(z[length(z)], 2) == 1) {
      break
    } else {
      if (round(z[1], 2) != 0) {
        w1 <- w1 + 1
      }
      if (round(z[length(z)], 2) != 1) {
        w2 <- w2 + 1
      }
    }
  }
  
  if (return_estimate) {
    return (a)
  }
  
  # Model choice using Cross Validation
  if (cross_validation_mode) {
    # == 1, so leave one out
    lhs <- t(B) %*% B + sumP + kappa * t(D1) %*% diag(W) %*% D1
    H <- lsfit(lhs, diag(nrow = nrow(lhs)), intercept = F)
    h_new <- diag(B %*% H$coeff %*% t(B))
    r = (yi[abs(xi) > 0 &
              abs(xi) < 1] - z[abs(xi) > 0 &
                                 abs(xi) < 1]) / (1 - h_new[abs(xi) > 0 &
                                                              abs(xi) < 1])
    res <- sqrt(sum(r ^ 2))
  } else {
    # == 0, so 1/3
    # Predict at left-out points
    z <- bbase(c(left_out_xi, xi), ndx = ndx, deg = deg) %*% a
    res <- sum((left_out_yi - z[1:length(left_out_yi)]) ^ 2)
  }
  return(res)
}

#' Evaluates the cross validation function.
#'
#' @param xi a vector containing the certainty equivalents (x-values of utility points) for a given participant in each use case.
#' @param yi can be a vector or a matrix representing the corresponding utility values (y-values of utility points).
#' @param lambda lambda is the penalization weight used to compute the initial estimate. The default value is 1.
#' @param n_penalty_dimensions number of dimensions (i.e., derivatives) to penalize. Possible values are 1 or 2. The default value is 1.
#' @param penalty_order highest dimension (i.e., derivative) to penalize. Must be lower than deg.
#' @param ndx number of intervals to partition the distance between the lowest and highest x-values of the utility points.
#' @param deg degree of the B-spline basis. Determines the degree of the function to be estimated. If deg = 2, the estimated utility function will consist of quadratic functions. 
#' @param cross_validation_mode determines which cross validation mode should be used. If 0, then the cross validation method is leave-one-third-out. If 1, then the cross validation method is a theoretical leave-one-out, i.e., based on a formula. The default value is 1.
#' @return Returns, for the given utility points and (possibly default) settings, the predictive quality of the estimated utility function according to cross validation as a function of a specified penalty weight lambda.
#' @examples 
#' x <- c(0.0000000, 0.2819824, 0.3007812, 0.4375000, 0.5231934, 0.7784882, 0.8945312, 1.0000000)
#' y <- c(0.0000, 0.1250, 0.2500, 0.5000, 0.6250, 0.6875, 0.7500, 1.0000)
#' evaluate_cross_validation(x, y, .5)
#' @export
evaluate_cross_validation <- function(xi,
                                      yi,
                                      lambda = 1,
                                      n_penalty_dimensions = 1,
                                      penalty_order = 4,
                                      ndx = 20,
                                      deg = 6,
                                      cross_validation_mode = 0) {
  if (!cross_validation_mode) {
    # == 0, so 1/3 out
    var_xi <- xi[abs(xi) > 0 & abs(xi) < 1]
    var_yi <- yi[abs(xi) > 0 & abs(xi) < 1]
    number_xi <- length(var_xi)
    # Leave 1/3 out CV
    combinations <-
      combn(1:number_xi, number_xi - ceiling(1 * number_xi / 3))
    number_combn <- dim(combinations)[[2]]
    sum_ssr <- 0
    for (c in 1:number_combn) {
      sum_ssr <- sum_ssr + estimate_model(
        xi = c(xi[which(abs(xi) == 0 |
                          abs(xi) == 1)], var_xi[combinations[, c]]),
        yi = c(yi[which(abs(xi) == 0 |
                          abs(xi) == 1)], var_yi[combinations[, c]]),
        lambda = lambda,
        n_penalty_dimensions = n_penalty_dimensions,
        penalty_order = penalty_order,
        ndx = ndx,
        deg = deg,
        cross_validation_mode = 0,
        left_out_xi = var_xi[-combinations[, c]],
        left_out_yi = var_yi[-combinations[, c]]
      )
    }
    avg_ssr = sum_ssr / number_combn
    return (avg_ssr)
  } else {
    return(
      estimate_model(
        xi,
        yi,
        lambda = lambda,
        n_penalty_dimensions = n_penalty_dimensions,
        penalty_order = penalty_order,
        ndx = ndx,
        deg = deg,
        cross_validation_mode = 1
      )
    )
  }
}
#' Finds an optimal penalty weight lambda given the parameters
#' @param xi a vector containing the certainty equivalents (x-values of utility points) for a given participant in each use case.
#' @param yi can be a vector or a matrix representing the corresponding utility values (y-values of utility points).
#' @param lambda_max maximum lambda used for computing the optimal lambda. The default value is 10000.
#' @param n_penalty_dimensions number of dimensions (i.e., derivatives) to penalize. Possible values are 1 or 2. The default value is 1.
#' @param penalty_order highest dimension (i.e., derivative) to penalize. Must be lower than deg.
#' @param ndx number of intervals to partition the distance between the lowest and highest x-values of the utility points.
#' @param deg degree of the B-spline basis. Determines the degree of the function to be estimated. If deg = 2, the estimated utility function will consist of quadratic functions. 
#' @param cross_validation_mode determines which cross validation mode should be used. If 0, then the cross validation method is leave-one-third-out. If 1, then the cross validation method is a theoretical leave-one-out, i.e., based on a formula. The default value is 1.
#' @param grid_dim dimension of the search grid for the initial grid search before the actual optimization. Default value is 5.
#' @return the optimal lambda for the given set of utility points and (possibly default) settings according to the specified cross validation method. 
#' @examples 
#' x <- c(0.0000000, 0.2819824, 0.3007812, 0.4375000, 0.5231934, 0.7784882, 0.8945312, 1.0000000)
#' y <- c(0.0000, 0.1250, 0.2500, 0.5000, 0.6250, 0.6875, 0.7500, 1.0000)
#' find_optimal_lambda(x, y)
#' @importFrom stats optim
#' @importFrom utils combn
#' @importFrom spatstat.geom ppp pairdist
#' @export 
find_optimal_lambda <- function(xi,
                                yi,
                                lambda_max = 10000,
                                n_penalty_dimensions = 1,
                                penalty_order = 4,
                                ndx = 20,
                                deg = 6,
                                cross_validation_mode = 0,
                                grid_dim = 5) {
  # Take distance between points
  dist <- (pairdist(ppp(xi, yi)) <= .1) * 1
  colsums <- apply(dist, 1, sum)
  skip <- 0
  num_balls <- 0
  for (p in 1:length(colsums)) {
    if (skip == 0) {
      skip <- colsums[p]
      num_balls <- num_balls + 1
    }
    skip <- max(0, skip - 1)
  }
  lambda_min = max(0.01, (num_balls * (9 / length(xi)) - 1) ^ 2.5)
  
  interval <- c(lambda_min, lambda_max + 1)
  argmin <- 1
  lower.lim <- lambda_min
  upper.lim <- lambda_max
  
  # Do a grid search first
  for (i in 1:grid_dim) {
    vals <-
      exp(seq(log(interval[1]), log(interval[2]), length.out = grid_dim))
    min_grid <- 2e+308
    min_index <- 1
    for (j in 1:length(vals)) {
      est <- evaluate_cross_validation(
        xi = xi,
        yi = yi,
        lambda = vals[j],
        n_penalty_dimensions = n_penalty_dimensions,
        penalty_order = penalty_order,
        ndx = ndx,
        deg = deg,
        cross_validation_mode = cross_validation_mode
      )
      if (est < min_grid) {
        min_grid <- est
        min_index <- j
      }
    }
    argmin <- vals[min_index]
    lower.lim <- pmax(argmin - lambda_max / 10 ^ i, lambda_min)
    upper.lim <- pmin(argmin + lambda_max / 10 ^ i, lambda_max)
    interval <- c(lower.lim[1], upper.lim[1])
  }
  
  
  optim_argmin <-
    optim(
      argmin,
      evaluate_cross_validation,
      xi = xi,
      yi = yi,
      n_penalty_dimensions = n_penalty_dimensions,
      penalty_order = penalty_order,
      ndx = ndx,
      deg = deg,
      cross_validation_mode = cross_validation_mode,
      method = "L-BFGS-B",
      lower = lower.lim,
      upper = upper.lim,
      control = list(
        fnscale = 1,
        maxit = 30,
        trace = T
      )
    )
  
  return (optim_argmin$par)
}

#' Computes a continuous and smooth utility function from the given utility points
#'
#' @param x a matrix or dataframe containing the certainty equivalents (x-values of utility points) for a given participant in each use case.
#' @param y can be a vector or a matrix representing the corresponding utility values (y-values of utility points).
#' @param ids a list containing the IDs of the participants. If not given, a list with IDs from 1 to n_observations will be created.
#' @param mode an integer between 0, 1, 2 representing the three possible modes: multiple imputation, optimal classification or 'weak' classification. Default is optimal classification (1).
#' @param penalty_order highest dimension (i.e., derivative) to penalize. Must be lower than deg.
#' @param lambda_max maximum lambda used for computing the optimal lambda. It is used only in multiple imputation (mode = 0) and optimal (mode = 1). The default value is 10000.
#' @param current_lambda lambda considered in the current iteration. Only used in multiple imputation (mode = 0) to create the combinations and as actual lambda value in 'weak' classification mode (mode = 2). The default value is 1.
#' @param ndx number of intervals to partition the distance between the lowest and highest x-values of the utility points.
#' @param deg degree of the B-spline basis. Determines the degree of the function to be estimated. If deg = 2, the estimated utility function will consist of quadratic functions. 
#' @param verbose shows some information while the program is running.
#' @return A smooth and continuous utility function.
#' @examples
#' \donttest{
#' x <- matrix(c(24.60938,34.76074,78.75,81.86035,128.5156, 
#'               7.109375,80.4248,113.75,115.083,135.0781, 
#'               3.828125,7.211914,8.75,124.1064,131.7969, 
#'               1.640625,2.084961,8.75,36.94824,98.98438), nrow = 4, ncol = 5, byrow = TRUE)
#' y <- c(0.25, 0.375, 0.5, 0.625, 0.75)
#' compute_function(x, y, verbose = 1)
#' }
#' @export
compute_function <- function(x,
                             y,
                             ids = NULL,
                             mode = 1,
                             penalty_order = 4,
                             lambda_max = 10000,
                             current_lambda = 1,
                             ndx = 20,
                             deg = 6,
                             verbose = 0) {
  if (is.data.frame(x)) {
    # If the data is a dataframe
    x <- as.matrix(sapply(x, as.numeric))   # convert to matrix
  } else if (!is.matrix(x)) {
    stop("Please convert x to a dataframe or to a matrix before calling this function.")
  }
  
  if (is.data.frame(y)) {
    # If y is a dataframe
    if (nrow(y) == 1) {
      y <- as.vector(sapply(x, as.numeric))   # convert to vector
    } else {
      y <- as.matrix(sapply(x, as.numeric))   # convert to matrix
    }
  }
  
  if (!is.matrix(y) & !is.vector(y)) {
    stop("The accepted values for y are: matrix or vector.")
  }
  
  if (is.vector(y) && ncol(x) != length(y) || is.matrix(y) && ncol(x) != ncol(y)) {
    stop("The y values do not have the same number of columns as the x values.")
  }
  
  if (length(penalty_order) > 1) {
    stop("The order of penalty should be a single integer.")
  }
  
  if (!is.null(ids) & !is.vector(ids) & !is.list(ids)) {
    stop("Please convert the ids field to a list or a vector.")
  }
  
  if (!verbose) {
    if(.Platform$OS.type == "unix") {
      sink(file = "/dev/null") # use /dev/null in UNIX
    } else {
      sink(file = "NUL") # use /dev/null in UNIX
    }
  }
  
  if (is.null(ids)) {
    ids = seq(from = 1,
              to = nrow(x),
              by = 1)
  }

  if (deg <= penalty_order) {
    stop(
      paste(
        "A degree value of ",
        deg,
        " with penalty order",
        penalty_order,
        " is not valid. The degree must always be greater than the order of penalty.",
        sep = ""
      )
    )
  }
  
  if (penalty_order > 4) {
    message(
      "This program has not been tested with order of penalties higher than 4 and it might produce wrong or unexpected results."
    )
    warning(
      "This program has not been tested with order of penalties higher than 4 and it might produce wrong or unexpected results."
    )
  }
  if (verbose){
    if (mode == 0) {
      message("Using multiple imputation mode.")
    } else if (mode == 1) {
      message("Using optimal classification mode.")
    } else {
      message("Using weak classification mode.")
    }
  }
  
  n_penalty_dimensions = 2
  
  if (mode == 1) {
    if (penalty_order != 3 & penalty_order != 4) {
      stop("The orders of penalty for optimal classification can either be 3 or 4.")
    }
  } else if (mode == 2) {
    n_penalty_dimensions = 1
  }
  
  xi <- x[1, ]
  xi <- xi[!is.na(xi)]
  min_length_xi <- length(xi)
  max_length_xi <- length(xi)
  for (i in 2:nrow(x)) {
    # Find the xi of minimum length
    xi <- x[i, ]
    xi <- xi[!is.na(xi)]
    min_length_xi <- min(min_length_xi, length(xi))
    max_length_xi <- max(max_length_xi, length(xi))
  }
  
  
  if (ndx < min_length_xi | ndx >= 4 * (max_length_xi+2)) {
    message(
      paste(
        "The value of ndx should be larger than or equal to the minimum length of xi existing in the dataset",
        " and not too large compared to the the maximum length of xi existing in the dataset (say, less than 4 times this number).",
        "Other values have not been tested to yield reliable results.",
        "Consider allowing a higher (lower) degree of flexibility and increase (decrease) the value of ndx."
      )
    )
    warning(
      paste(
        "The value of ndx should be larger than or equal to the minimum length of xi existing in the dataset",
        " and not too large compared to the the maximum length of xi existing in the dataset (say, less than 4 times this number).",
        "Other values have not been tested to yield reliable results.",
        "Consider allowing a higher (lower) degree of flexibility and increase (decrease) the value of ndx."
      )
    )
  }
  
  # deg should be larger than 1, but still lower than 1.2 * min(length(xi))
  if (deg < 1 | deg > 1.2 * min_length_xi)  {
    message(
      paste(
        "The value of deg should be larger than or equal to 1 and not exceed 1.2 times the minimum length of xi existing in the dataset.",
        "The amount of data is likely too small to fit such a flexible model, consider lowering deg."
      )
    )
    warning(
      paste(
        "The value of deg should be larger than or equal to 1 and not exceed 1.2 times the minimum length of xi existing in the dataset.",
        "The amount of data is likely too small to fit such a flexible model, consider lowering deg."
      )
    )
  }
  return (
    compute_function_aux(
      x,
      y,
      ids,
      mode,
      penalty_order,
      lambda_max,
      current_lambda,
      n_penalty_dimensions,
      ndx,
      deg,
      verbose
    )
  )
}

#' Computes a continuous and smooth function according to the given utility points
#' @keywords internal
#' 
#' @param x a matrix or dataframe containing the certainty equivalents (x-values of utility points) for a given participant in each use case.
#' @param y can be a vector or a matrix representing the corresponding utility values (y-values of utility points).
#' @param ids a list containing the IDs of the participants. If not given, a list with IDs from 1 to n_observations will be created.
#' @param mode an integer between 0, 1, 2 representing the three possible modes: multiple imputation, optimal classification or 'weak' classification. Default is optimal classification (1).
#' @param penalty_order highest dimension (i.e., derivative) to penalize. Must be lower than deg.
#' @param lambda_max maximum lambda used for computing the optimal lambda. It is used only in multiple imputation (mode = 0) and optimal (mode = 1). The default value is 10000.
#' @param current_lambda lambda considered in the current iteration. Only used in multiple imputation (mode = 0) to create the combinations and as actual lambda value in 'weak' classification mode (mode = 2). The default value is 1.
#' @param n_penalty_dimensions number of dimensions to penalise. Possible values are 1 or 2. The default value is 1.
#' @param ndx number of intervals to partition the distance between the lowest and highest x-values of the utility points.
#' @param deg degree of the B-spline basis. Determines the degree of the function to be estimated. If deg = 2, the estimated utility function will consist of quadratic functions. 
#' @param verbose shows some information while the program is running.
#' @return A smooth and continuous utility function.
compute_function_aux <- function(x,
                                 y,
                                 ids,
                                 mode = 1,
                                 penalty_order = 4,
                                 lambda_max = 10000,
                                 current_lambda = 1,
                                 n_penalty_dimensions = 1,
                                 ndx = 20,
                                 deg = 6,
                                 verbose = 0) {
  nval_grid <- 1000
  coeffs <- data.frame(row.names = ids, matrix(NA, nrow = nrow(x), ncol = (ndx + deg)))
  x_finegrids <- data.frame(row.names = ids, matrix(0, nrow = nrow(x), ncol = (nval_grid + 1)))
  for (i in 1:nrow(x)) {
    cross_validation_mode <- 0 # means leave 1/3 out
    # Collect the needed observation
    xi <- x[i, ]
    yi <- y
    if (is.matrix(y)) {
      yi <- y[i, ]
    }
    id <- ids[i]
    
    # Consider only the columns which are not NA
    yi <- yi[!is.na(xi)]
    xi <- xi[!is.na(xi)]
    if (verbose){
      message(paste("Considering participant with id", id))
    }
  
    # Rescale
    xi <- xi / max(abs(xi))
    yi <- yi / max(abs(yi[length(yi)]), abs(yi[1]))
    
    # Estimate utility curve if there is at least one point between (0,0) and (-1,-1) or (1,1), respectively
    if (length(xi[abs(xi) > 0 &
                  abs(xi) < 1]) < 1 || sum(is.na(xi))) {
      message(paste("The participant with id", id, "cannot be analysed."))
      next
    }
    # If total sum of points to combine is less than or equal to penalty order,
    # only leave-one-out CV possible according to formula
    if (length(xi) <= penalty_order) {
      if (mode == 0) {
        message(
          paste(
            "The participant with id ",
            id,
            " cannot be analysed with multiple imputation mode, and penalty order ",
            penalty_order,
            ".",
            sep = ""
          )
        )
        next
      }
      cross_validation_mode <- 1 # leave one out
    }
    
    if (penalty_order == 4 &
        n_penalty_dimensions == 1 & length(xi) == 3) {
      # In that case, cannot estimate the model. Solution: Duplicate the end-points
      xi <- c(0, xi, 1)
      yi <- c(0, yi, 1)
    }
    
    # If we are in optimal classification or in MI, we want to find the optimal lambda
    if (mode == 0 || mode == 1) {
      new_lambda = find_optimal_lambda(
        xi,
        yi,
        lambda_max,
        n_penalty_dimensions,
        penalty_order,
        ndx,
        deg,
        cross_validation_mode
      )
    } else {
      new_lambda = current_lambda
    }
    
    if (mode == 0) {
      # MI
      # Leave 1/3 out CV
      var_xi <- xi[abs(xi) > 0 & abs(xi) < 1]
      var_yi <- yi[abs(xi) > 0 & abs(xi) < 1]
      number_xi <- length(var_xi)
      combinations <-
        combn(1:number_xi, number_xi - ceiling(1 * number_xi / 3))
      number_combn <- dim(combinations)[[2]]
      if (number_combn < current_lambda) {
        current_lambda <- current_lambda %% number_combn
        current_lambda <- current_lambda + 1
      }
      coeff <-
        estimate_model(
          xi = c(xi[which(abs(xi) == 0 | abs(xi) == 1)], var_xi[combinations[, current_lambda]]),
          yi = c(yi[which(abs(xi) == 0 | abs(xi) == 1)], var_yi[combinations[, current_lambda]]),
          lambda = new_lambda,
          n_penalty_dimensions = n_penalty_dimensions,
          penalty_order = penalty_order,
          ndx = ndx,
          deg = deg,
          return_estimate = 1,
        )
    } else {
      coeff <-
        estimate_model(
          xi = xi,
          yi = yi,
          lambda = new_lambda,
          n_penalty_dimensions = n_penalty_dimensions,
          penalty_order = penalty_order,
          ndx = ndx,
          deg = deg,
          return_estimate = 1,
        )
    }
    
    x_finegrid <- seq(min(xi), max(xi), (max(xi) - min(xi)) / nval_grid)
    coeffs[i, ] <- coeff
    x_finegrids[i, ] <- x_finegrid
  }
  
  return(list(x_finegrids, coeffs))
}

#' Given a set of smooth and continuous functions, computes predefined and user-defined measures.
#' @keywords internal
#' @param x_grids a dataframe of vectors of x-values for a smooth and continuous function.
#' @param coeffs a dataframe of coefficients for a smooth and continuous function for each participant.
#' @param ids a list containing the IDs of the participants. If not given, a list with IDs from 1 to n_observations will be created.
#' @param ndx number of intervals to partition the distance between the lowest and highest x-values of the utility points.
#' @param deg degree of the B-spline basis. Determines the degree of the function to be estimated. If deg = 2, the estimated utility function will consist of quadratic functions. 
#' @param measures a vector of measures to be computed.
#' @param ... additional parameters for user-defined measures.
#' @return A set of measurements.
compute_measures_aux <- function(x_grids,
                                  coeffs,
                                  ids,
                                  ndx = 20,
                                  deg = 6,
                                  measures = c("risk-arrow-pratt", "crainich-eeckhoudt", "denuit-eeckhoudt"),
                                  ...)  {
  output_measures <- data.frame(row.names = ids, matrix(0, nrow = length(ids), ncol = length(measures)))
  for (i in 1:nrow(coeffs)) {
    coeff <- as.numeric(coeffs[i,])
    x_finegrid <- as.numeric(x_grids[i,])
      
    if (all(is.na(coeff))){
      message(paste("The participant with id", ids[i], "cannot be analysed because all the coefficients are NAs."))
      next
    }
    
    # Computation of first derivative
    dy_rd <- derivative(x_finegrid, coeff, 1, ndx, deg)
    for (j in 1:length(measures)) {
      measure <- measures[[j]]
      if (mode(measure) == "function") {
        mes <- measure(x_finegrid, coeff, ndx, deg, ...)
        colnames(output_measures)[j] <- paste("custom-", j, sep="")
      } else {
        colnames(output_measures)[j] <- measure
        if (measure == "risk-arrow-pratt") {
          # Computation of second derivative
          ddy_rd <- derivative(x_finegrid, coeff, 2, ndx, deg)
          # Compute Risk Aversion measures by Pratt / Arrow
          mes <- -mean(ddy_rd, na.rm = T) / mean(dy_rd, na.rm = T)
        } else if (measure == "crainich-eeckhoudt") {
          # Computation of third derivative
          dddy_rd <- derivative(x_finegrid, coeff, 3, ndx, deg)
          # Compute Prudence intensity
          mes <- mean(dddy_rd, na.rm = T) / mean(dy_rd, na.rm = T)
        } else if (measure == "denuit-eeckhoudt") {
          # Computation of fourth derivative
          ddddy_rd <- derivative(x_finegrid, coeff, 4, ndx, deg)
          # Compute Temperance intensity
          mes <- -mean(ddddy_rd, na.rm = T) / mean(dy_rd, na.rm = T)
        } else {
          stop("The desired measure does not exist. Please use another measure.")
        }
      }
      output_measures[i, j] <- mes
    }
  }
  return (output_measures)
}

#' Given a set of smooth and continuous functions, computes predefined and user-defined measures.
#'
#' @param x_grids a dataframe of vectors of x values for a smooth and continuous function.
#' @param coeffs a dataframe of coefficients for a smooth and continous function for each participant.
#' @param ids a list containing the IDs of the participants. If not given, a list with IDs from 1 to n_observations will be created.
#' @param ndx number of intervals to partition the distance between the lowest and highest x-values of the utility points.
#' @param deg degree of the B-spline basis. Determines the degree of the function to be estimated. If deg = 2, the estimated utility function will consist of quadratic functions. 
#' @param measures a vector of measures to be computed.
#' @param ... additional parameters for user-defined measures.
#' @return A set of measurements.
#' @examples 
#' x <- rbind(seq(0.000002, 1.0, (1.0 - 0.000002) / 1000),
#'            seq(0.001, 1.0, (1.0 - 0.001) / 1000),
#'            seq(0.0004, 1.0, (1.0 - 0.0004) / 1000))
#' y <- rbind(seq(0.000002, 1.0, (1.0 - 0.000002) / 15),
#'            seq(0.001, 1.0, (1.0 - 0.001) / 15),
#'            seq(0.0004, 1.0, (1.0 - 0.0004) / 15))
#' compute_measures(x, y, ndx = 10, deg = 6)

#' # x_finegrid, coeff, ndx, deg are always there to be used
#' # The function should have additional unknown arguments (...) if the given parameters are not used
#' risk_arrow_pratt <- function(x_finegrid, coeff, ndx, deg){ 
#'   dy_rd <- derivative(x_finegrid, coeff, 1, ndx, deg)
#'   ddy_rd <- derivative(x_finegrid, coeff, 2, ndx, deg)
#'   return (-mean(ddy_rd, na.rm = TRUE) / mean(dy_rd, na.rm = TRUE))
#' }
#' measures = c("crainich-eeckhoudt", "denuit-eeckhoudt", risk_arrow_pratt)
#' compute_measures(x, y, ndx = 10, deg = 6, measures=measures)
#' @export
compute_measures <- function(x_grids,
                             coeffs,
                             ids = NULL,
                             ndx = 20,
                             deg = 6,
                             measures = c("risk-arrow-pratt", "crainich-eeckhoudt", "denuit-eeckhoudt"),
                             ...) {
  if (!is.data.frame(x_grids) & !is.matrix(x_grids)) {
    stop("Please convert x_grids to a dataframe or to a matrix before calling this function.")
  }
  
  if (!is.data.frame(coeffs) & !is.matrix(coeffs)) {
    stop("Please convert coeffs to a dataframe or to a matrix before calling this function.")
  }
  
  if (nrow(x_grids) != nrow(coeffs)){
    stop("The number of participants in the coefficients matrix and in the x matrix do not correspond.")
  }
  
  if (!is.null(ids) & !is.vector(ids) & !is.list(ids)) {
    stop("Please convert the ids field to a list or a vector.")
  }
  
  if (!is.null(ids) & length(ids) != ncol(x_grids)) {
    stop("The number of participants in the ids vector and in the x matrix do not correspond.")
  }
  
  if (ncol(coeffs) != (ndx + deg)){
    stop("The number of coefficients is not the same as ndx + deg.")
  }
  
  if (is.null(ids)) {
    ids = seq(from = 1,
              to = nrow(x_grids),
              by = 1)
  }
  
  return(compute_measures_aux(x_grids, coeffs, ids, ndx, deg, measures, ...))
}

#' Computes the derivative of a function
#'
#' @param x the x values for which the derivative should be computed.
#' @param coeffs the coefficient.
#' @param degree the degree of the derivative.
#' @param ndx number of intervals to partition the distance between the lowest and highest x-values of the utility points.
#' @param deg degree of the B-spline basis. Determines the degree of the function to be estimated. If deg = 2, the estimated utility function will consist of quadratic functions. 
#' @return the derivative of the specified degree.
#' @examples 
#' coeffs <- seq(0.000002, 1.0, (1.0 - 0.000002) / 25)
#' x <- seq(0.01, 1.0, (1.0 - 0.01) / 5)
#' derivative(x, coeffs)
#' @export
derivative <- function(x,
                       coeffs,
                       degree = 1,
                       ndx = 20,
                       deg = 6) {
  dB = bbase(x = x, ndx = ndx, deg = deg - degree)
  for (i in 1:degree) {
    coeffs <- (diff(coeffs) / ndx)
  }
  dy = dB %*% coeffs
  range = abs(min(dy) - max(dy))
  dy_rd <- round(dy / (range / 1000)) * (range / 1000)
  return (dy_rd)
}

#' Computes a continuous and smooth function according to the given utility points
#'
#' @param x a matrix or dataframe containing the certainty equivalents (x-values of utility points) for a given participant in each use case.
#' @param y can be a vector or a matrix representing the corresponding utility values (y-values of utility points).
#' @param ids a list containing the IDs of the participants. If not given, a list with IDs from 1 to n_observations will be created.
#' @param mode an integer between 0, 1, 2 representing the three possible modes: multiple imputation, optimal classification or 'weak' classification. Default is optimal classification (1).
#' @param penalty_orders vector or constant that contains the derivates that will be smoothened. The values in this vector should not be larger than 4.
#' @param ndx number of intervals to partition the distance between the lowest and highest x-values of the utility points.
#' @param deg degree of the B-spline basis. Determines the degree of the function to be estimated. If deg = 2, the estimated utility function will consist of quadratic functions. 
#' @param measures the utility based (intensity) measures to be computed.
#' @param ... additional parameters for user-defined measures.
#' @param root_filename filename containing the location of where the output files are going to be saved.
#' @param verbose shows some information while the program is running.
#' @return A smooth and continuous function.
#' @examples
#' \donttest{
#' x <- matrix(c(24.60938,34.76074,78.75,81.86035,128.5156, 
#'               7.109375,80.4248,113.75,115.083,135.0781, 
#'               3.828125,7.211914,8.75,124.1064,131.7969, 
#'               1.640625,2.084961,8.75,36.94824,98.98438), nrow = 4, ncol = 5, byrow = TRUE)
#' y <- c(0.25, 0.375, 0.5, 0.625, 0.75)
#' compute_higher_order_risk_preferences(x, y, mode = 1)
#' 
#' # could be used with root_filename argument: 
#' # Linux
#' # outfile <- paste(dirname(getwd()), "/out", sep="")
#' # Win
#' # outfile <- paste(dirname(getwd()), "\out", sep="")
#' compute_higher_order_risk_preferences(x, y, mode = 2, verbose = 1)
#' }
#' @importFrom utils write.csv
#' @export
compute_higher_order_risk_preferences <- function(x,
                                                  y,
                                                  ids = NULL,
                                                  mode = 1,
                                                  penalty_orders = c(4),
                                                  ndx = 20,
                                                  deg = 6,
                                                  measures = c("risk-arrow-pratt", "crainich-eeckhoudt", "denuit-eeckhoudt"),
                                                  ...,
                                                  root_filename = NULL,
                                                  verbose = 0) {
  if (is.data.frame(x)) {
    # If the data is a dataframe
    x <- as.matrix(sapply(x, as.numeric))   # convert to matrix
  } else if (!is.matrix(x)) {
    stop("Please convert x to a dataframe or to a matrix before calling this function.")
  }
  
  if (is.data.frame(y)) {
    # If y is a dataframe
    if (nrow(y) == 1) {
      y <- as.vector(sapply(x, as.numeric))   # convert to vector
    } else {
      y <- as.matrix(sapply(x, as.numeric))   # convert to matrix
    }
  }
  
  if (!is.matrix(y) & !is.vector(y)) {
    stop("The accepted values for y are: matrix or vector.")
  }
  
  if (is.vector(y) && ncol(x) != length(y) || is.matrix(y) && ncol(x) != ncol(y)) {
    stop("The y values do not have the same number of columns as the x values.")
  }
  
  if (!is.vector(penalty_orders) ||
      !is.numeric(penalty_orders)) {
    stop("The accepted values for the order of penalty are either a constant or a vector.")
  }
  
  if (!is.null(ids) & !is.vector(ids) & !is.list(ids)) {
    stop("Please convert the ids field to a list or a vector.")
  }
  
  if (is.null(ids)) {
    ids = seq(from = 1,
              to = nrow(x),
              by = 1)
  }
  
  for (order in penalty_orders) {
    if (deg <= order) {
      stop(
        paste(
          "A degree value of ",
          deg,
          " with penalty order",
          order,
          " is not valid. The degree must always be greater than the order of penalty.",
          sep = ""
        )
      )
    }
    if (order > 4) {
      message(
        "This program has not been tested with order of penalties higher than 4 and it might produce wrong or unexpected results."
      )
      warning(
        "This program has not been tested with order of penalties higher than 4 and it might produce wrong or unexpected results."
      )
    }
  }
  
  xi <- x[1, ]
  xi <- xi[!is.na(xi)]
  min_length_xi <- length(xi)
  max_length_xi <- length(xi)
  for (i in 2:nrow(x)) {
    # Find the xi of minimum length
    xi <- x[i, ]
    xi <- xi[!is.na(xi)]
    min_length_xi <- min(min_length_xi, length(xi))
    max_length_xi <- max(max_length_xi, length(xi))
  }
  
  if (ndx < min_length_xi | ndx >= 4 * (max_length_xi+2)) {
    message(
      paste(
        "The value of ndx should be larger than or equal to the minimum length of xi existing in the dataset",
        " and not too large compared to the the maximum length of xi existing in the dataset (say, less than 4 times this number).",
        "Other values have not been tested to yield reliable results.",
        "Consider allowing a higher (lower) degree of flexibility and increase (decrease) the value of ndx."
      )
    )
    warning(
      paste(
        "The value of ndx should be larger than or equal to the minimum length of xi existing in the dataset",
        " and not too large compared to the the maximum length of xi existing in the dataset (say, less than 4 times this number).",
        "Other values have not been tested to yield reliable results.",
        "Consider allowing a higher (lower) degree of flexibility and increase (decrease) the value of ndx."
      )
    )
  }
  
  # deg should be larger than 1, but still lower than 1.2 * min(length(xi))
  if (deg < 1 | deg > 1.2 * min_length_xi)  {
    message(
      paste(
        "The value of deg should be larger than or equal to 1 and not exceed 1.2 times the minimum length of xi existing in the dataset.",
        "The amount of data is likely too small to fit such a flexible model, consider lowering deg."
      )
    )
    warning(
      paste(
        "The value of deg should be larger than or equal to 1 and not exceed 1.2 times the minimum length of xi existing in the dataset.",
        "The amount of data is likely too small to fit such a flexible model, consider lowering deg."
      )
    )
  }
  
  mode_txt <- "weak"
  if (mode == 0 || mode == 1) {
    # Set range for lambda (minimum is determined by the data)
    lambda_max = 10000
    if (mode == 0) {
      mode_txt <- "MI"
      lambda_fix_loop_lambdas <- 1:15
    } else {
      mode_txt <- "opt"
      for (order in penalty_orders) {
        if (order != 3 & order != 4) {
          stop("The orders of penalty for optimal classification can either be 3 or 4.")
        }
      }
      lambda_fix_loop_lambdas <- 1
    }
    n_penalty_dimensions = 2
  } else {
    lambda_fix_loop_lambdas <- c(.1, 1, 10, 20, 50, 100, 500, 750, 1000, 2000, 5000, 10000)
    n_penalty_dimensions = 1
  }
  
  if (verbose){
    if (mode == 0) {
      message("Using multiple imputation mode.")
    } else if (mode == 1) {
      message("Using optimal classification mode.")
    } else {
      message("Using weak classification mode.")
    }
  }
  
  time_begin <- format(Sys.time(), "%y.%m.%d_%H.%M.%OS")
  return_val = NULL
  # Loop over the penalization order
  for (penalty_order in penalty_orders) {
    # Start smoothing & classification
    if (verbose){
      message(paste("Smoothing over the", penalty_order, "order of penalty."))
    }
    
    # Iterate over the lambdas, only once for optimization
    for (lambda_fix_loop in lambda_fix_loop_lambdas) {
      # Start smoothing & classification
      if (verbose){
        message("Computing the function for all individuals.")
      }
      fct <-
        compute_function_aux(
          x,
          y,
          ids,
          mode,
          penalty_order,
          lambda_max,
          lambda_fix_loop,
          n_penalty_dimensions,
          ndx,
          deg,
          verbose
        )
      
      x_grids <- fct[[1]]
      coeffs <- fct[[2]]
      if (verbose){
        message("Computing the measures for all individuals.")
      }
      out_measures <- compute_measures_aux(x_grids, coeffs, row.names(coeffs), ndx, deg, measures)
      if (verbose) {
        message("The measures have been computed.")
      }
      if (!is.null(root_filename)) {
        if (verbose) {
          message("Printing current run...")
        }
        specific_folder <-
          paste(
            mode_txt,
            "_",
            time_begin,
            sep = ""
          )
        
        addition_folder <-
          paste(
            "pord",
            penalty_order,
            "_lambda",
            lambda_fix_loop,
            sep = ""
          )
        
        folder_save <- file.path(root_filename, specific_folder, addition_folder, "")
        dir.create(folder_save, recursive = TRUE, showWarnings = FALSE)
        write.csv(x_grids, file = paste(folder_save, "x_grids.csv", sep=""))
        write.csv(coeffs, file = paste(folder_save, "coeffs.csv", sep=""))
        write.csv(out_measures, file = paste(folder_save, "measures.csv", sep=""))
        if (verbose) {
          message(paste("Saved in", folder_save))
        }
      }
    
      if (mode == 1){
        return_val = fct
      }
    }
  }
  
  return(return_val)
}

Try the utilityFunctionTools package in your browser

Any scripts or data that you put into this service are public.

utilityFunctionTools documentation built on March 29, 2021, 5:06 p.m.