R/utils.R
In copulaboost: Fitting Additive Copula Regression Models for Binary Outcome Regression

.make_hfunc_key <- function(i, cond_set) {

  #
  # This function creates a string key to be used when organising the evaluated
  # h-functions of every level of an R-vine pair copula model in a hash table,
  # using R's built-in enviroments.
  #

  cond_set <- sort(cond_set)
  paste0(c("(", i, "| ",
           paste0(c(sapply(cond_set[-length(cond_set)],
                           function(cond_var) paste0(c(cond_var, ", "),
                                                     collapse = "")),
                    cond_set[length(cond_set)]),
                  collapse = ""),
           ")"), collapse = "")
}

.weave_transformed <- function(u1, u2) {

  # This function weaves two transformed variables (so that the resulting
  # variable has the form [u+, u-], if any of the two margins are discrete,
  # u- only contains columns from discrete variables),
  # where u_j will be a 2xn matrix if the conditioned variable
  # in the j-th transformed variable is discrete, and a numeric vector
  # of length n if continuous.

  u <- cbind(u1, u2)

  if (is.null(ncol(u1)) || ncol(u1) == 1) {
    u
  } else if (is.null(ncol(u2)) || ncol(u2) == 1) {
    u[, c(1, 3, 2)]
  } else {
    u[, c(1, 3, 2, 4)]
  }
}

.bicop_2_hbicop <- function(u, bicop_obj, cond_var=2, return_u_minus=FALSE) {

  # Function that lets you compute h-functions, without having to
  # specify the u_1^- when computing C(u_1 | u_2), when u_1 is
  # discrete. This is because u_1^- is redundant in that case, but rvinecopulibs
  # hbicop() function demands that it is provided. In addition, the specifying
  # return_u_minus = TRUE, will make the function output the n x 2 matrix
  # [C(u_2 | u_1), C(u_2^- | u_1)] if cond_var = 1, or
  # [C(u_1 | u_2), C(u_1^- | u_2)] if cond_var = 2.

  if (!bicop_obj$var_types[c(2, 1)[cond_var]] == "d") {
    # In this case (that the conditioned variable is continuous), the
    # h-function in rvinecopulib can be called directly, as it then behaves as
    # expected

    if (return_u_minus) {
      cbind(rvinecopulib::hbicop(u, cond_var = cond_var,
                                 family = bicop_obj$family,
                                 rotation = bicop_obj$rotation,
                                 parameters = bicop_obj$parameters,
                                 var_types = bicop_obj$var_types),
            rvinecopulib::hbicop(u, cond_var = cond_var,
                                 family = bicop_obj$family,
                                 rotation = bicop_obj$rotation,
                                 parameters = bicop_obj$parameters,
                                 var_types = bicop_obj$var_types))
    } else {
      rvinecopulib::hbicop(u, cond_var = cond_var, family = bicop_obj$family,
                           rotation = bicop_obj$rotation,
                           parameters = bicop_obj$parameters,
                           var_types = bicop_obj$var_types)
    }
  } else {

    # This is more complicated. There are four_cases.

    u_columns_1 <- switch(1 + 2 * (cond_var - 1) + 1 * (ncol(u) == 4),
                          c(1, 2, 1, 3),
                          c(1, 2, 3, 4),
                          c(1, 2, 3, 2),
                          c(1, 2, 3, 4))

    if (return_u_minus) {

      u_columns_2 <- switch(1 + 2 * (cond_var - 1) + 1 * (ncol(u) == 4),
                            c(1, 3, 1, 3),
                            c(1, 4, 3, 4),
                            c(3, 2, 3, 2),
                            c(3, 2, 3, 4))

      cbind(rvinecopulib::hbicop(u[, u_columns_1], cond_var = cond_var,
                                 family = bicop_obj$family,
                                 rotation = bicop_obj$rotation,
                                 parameters = bicop_obj$parameters,
                                 var_types = bicop_obj$var_types),
            rvinecopulib::hbicop(u[, u_columns_2], cond_var = cond_var,
                                 family = bicop_obj$family,
                                 rotation = bicop_obj$rotation,
                                 parameters = bicop_obj$parameters,
                                 var_types = bicop_obj$var_types))

    } else {
      rvinecopulib::hbicop(u[, u_columns_1], cond_var = cond_var,
                           family = bicop_obj$family,
                           rotation = bicop_obj$rotation,
                           parameters = bicop_obj$parameters,
                           var_types = bicop_obj$var_types)
    }
  }
}

.approx_cond_exp <- function(y, u, v, m, r, truncate=TRUE) {

  first_six_moments <- function(mu, sigma) {
    cbind(mu,
          mu**2 + sigma**2,
          mu**3 + 3 * mu * sigma**2,
          mu**4 + 6 * mu**2 * sigma**2 + 3 * sigma**4,
          mu**5 + 10 * mu**3 * sigma**2 + 15 * mu * sigma**4,
          mu**6 + 15 * mu**4 * sigma**2 + 45 * mu**2 + sigma**4 +
            15 * sigma**6)
  }

  if (! m %in% 1:6) {
    stop("m must be one of 1, 2, 3, 4, 5, or 6")
  }

  n <- length(y)

  gammas <- stats::lm(
    y~. - 1,
    data = as.data.frame(
      sapply(seq_len(m + 1), function(j) stats::qnorm(u) ** (j - 1))
    )
  )$coef

  means <- c(r) * stats::qnorm(v)
  sd <- rep(sqrt(1 - r**2), n)

  if (m == 1) {
    est <- (
      gammas[1] +
        as.numeric(first_six_moments(means, sd)[, 1] * gammas[-1])
    )
  } else {
    est <- (
      gammas[1] +
        as.numeric(first_six_moments(means, sd)[, 1:m] %*%  gammas[-1])
    )
  }
  if (truncate) {
    est[est > 1] <- 1
    est[est < -1] <- -1
    est
  } else {
    est
  }
}

.get_hfunc <- function(j, cond_set, conditionals) {

  u <- get(.make_hfunc_key(j, cond_set),
           envir = conditionals)
  if (is.null(dim(u))) {
    u
  } else {
    u[, 1]
  }
}

.get_valid_edges <- function(t, rvine_mat, flipped_mat=FALSE) {

  # This function finds all valid values that can
  # be entered into m_{d + 1 -t, 1} for t from 2 to d-1,
  # given an R-vine matrix

  if (t == 1) {
    seq(ncol(rvine_mat) - 1)
  } else {

    # Todo: This will always be false, so I should perhaps
    # rewrite the code below.
    if (!flipped_mat) {
      rvine_mat <- rvine_mat[seq(nrow(rvine_mat), 1), ]
    }

    k <- nrow(rvine_mat) + 2 - t
    match_vec <- rvine_mat[k:nrow(rvine_mat), 1]

    valid_values <- c()

    for (l in 2:(nrow(rvine_mat) + 1 - t)) {
      set_l_k <- rvine_mat[k:nrow(rvine_mat), l]
      if (setequal(set_l_k, match_vec)) {
        valid_values <- c(valid_values, rvine_mat[l, l])
      }
      if (k == nrow(rvine_mat)) {
        tilde_set_l_k <- rvine_mat[l, l]
      } else {
        tilde_set_l_k <- c(rvine_mat[l, l],
                           rvine_mat[(k + 1):nrow(rvine_mat), l])
      }
      if (setequal(tilde_set_l_k, match_vec)) {
        valid_values <- c(valid_values, rvine_mat[k, l])
      }
    }

    valid_values
  }
}

.compute_distrbs <- function(x, x_type, parametric=FALSE) {

  #
  # This function takes a matrix x, and returns a list containing empirical
  # cdf functions for each column, as well as a function that can transform
  # a new matrix that has the same number of columns, by these empirical cdf
  # functions. If parametric = TRUE, the function will fit and return marginal
  # cdfs that are gaussian or bernoulli distributed.
  #

  # Check, in case x is 1d
  if (is.null(ncol(x))) {
    x <- as.matrix(x)
  }

  ecdf_np1 <- function(x, x_type) {
    #
    # copy of stats::ecdf, but with a different
    # dividing constant: (1/n) is replaced by (1/(n+1)), if x is continuous
    #
    x <- sort(x)
    n <- length(x)

    if (n < 1) {
      stop("'x' must have 1 or more non-missing values")
    }

    weight <- switch(x_type, c = 1 / (n + 1), d = 1 / n)
    vals <- unique(x)
    rval <- stats::approxfun(
      vals, cumsum(tabulate(match(x, vals))) * weight, method = "constant",
      yleft = 0, yright = 1, f = 0, ties = "ordered"
    )
    class(rval) <- c("ecdf", "stepfun", class(rval))
    assign("nobs", n, envir = environment(rval))
    attr(rval, "call") <- sys.call()
    rval
  }

  margins <- sapply(
    seq(ncol(x)), function(j) {
      if (x_type[j] == "d") {
        if (all(sort(unique(x[, j])) == c(0, 1)) & parametric) {
          bernoulli_marg(x[, j])
        } else {
          ecdf_np1(x[, j], x_type[j])
        }
      } else {
        if (parametric) {
          normal_marg(x[, j])
        } else {
          ecdf_np1(x[, j], x_type[j])
        }
      }
    }
  )

  transform <- function(x, var_types_x) {
    # Check, in case x is 1d
    if (is.null(ncol(x))) {
      x <- as.matrix(x)
    }
    distr_plus <- sapply(seq_len(ncol(x)), function(i) margins[[i]](x[, i]))
    if (any(var_types_x == "d")) {
      distr_min <- sapply(which(var_types_x == "d"),
                          function(i) margins[[i]](x[, i] - 1))
      return(cbind(distr_plus, distr_min))
    } else{
      return(distr_plus)
    }
  }

  list(margins = margins, transform = transform)

}

.extract_margs <- function(f, mrgs) {
  margins <- sapply(mrgs, function(j) f$margins[[j]])

  transform <- function(x, var_types_x) {
    # Check, in case x is 1d
    if (is.null(ncol(x))) {
      x <- as.matrix(x)
    }
    distr_plus <- sapply(seq_len(ncol(x)), function(i) margins[[i]](x[, i]))
    if (any(var_types_x == "d")) {
      distr_min <- sapply(which(var_types_x == "d"),
                          function(i) margins[[i]](x[, i] - 1))
      return(cbind(distr_plus, distr_min))
    } else{
      return(distr_plus)
    }
  }

  list(margins = margins, transform = transform)
}

.pick_u_cols <- function(var_inds, u, var_types) {
  # Small function that just selects the relevant columns of U for a specific
  # variable pair, used when computing the transformed variables for the
  # first tree, to avoid that the code in the loop below becomes too messy.
  if (any(var_types[var_inds] == "d")) {
    u[, c(var_inds, length(var_types) + cumsum(var_types == "d")[var_inds])]
  } else {
    u[, var_inds]
  }
}

normal_marg <- function(x) {
  f <- function(z) {
    mu <- attr(sys.function(), "mu")
    sigma <- attr(sys.function(), "sigma")
    stats::pnorm(z, mu, sigma)
  }
  attr(f, "mu") <- mean(x)
  attr(f, "sigma") <- stats::sd(x)
  class(f) <- c("normal_marg")
  attr(f, "call") <- sys.call()
  f
}

quantile.normal_marg <- function(f, probs) {
  stats::qnorm(probs, attr(f, "mu"), attr(f, "sigma"))
}

bernoulli_marg <- function(x) {
  f <- function(z) {
    prob <- attr(sys.function(), "prob")
    stats::pbinom(z, 1, prob)
  }
  attr(f, "prob") <- mean(x)
  class(f) <- c("bernoulli_marg")
  attr(f, "call") <- sys.call()
  f
}

quantile.bernoulli_marg <- function(f, probs) {
  stats::qbinom(probs, 1, attr(f, "prob"))
}

Any scripts or data that you put into this service are public.

copulaboost documentation built on Aug. 23, 2022, 9:05 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

copulaboost
Fitting Additive Copula Regression Models for Binary Outcome Regression

R/utils.R
In copulaboost: Fitting Additive Copula Regression Models for Binary Outcome Regression

Defines functions quantile.bernoulli_marg bernoulli_marg quantile.normal_marg normal_marg .pick_u_cols .extract_margs .compute_distrbs .get_valid_edges .get_hfunc .approx_cond_exp .bicop_2_hbicop .weave_transformed .make_hfunc_key

Try the copulaboost package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

copulaboost Fitting Additive Copula Regression Models for Binary Outcome Regression

R/utils.R In copulaboost: Fitting Additive Copula Regression Models for Binary Outcome Regression

Defines functions quantile.bernoulli_marg bernoulli_marg quantile.normal_marg normal_marg .pick_u_cols .extract_margs .compute_distrbs .get_valid_edges .get_hfunc .approx_cond_exp .bicop_2_hbicop .weave_transformed .make_hfunc_key

Try the copulaboost package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

copulaboost
Fitting Additive Copula Regression Models for Binary Outcome Regression

R/utils.R
In copulaboost: Fitting Additive Copula Regression Models for Binary Outcome Regression