R/fit_cop_corr.R
In cylcop: Circular-Linear Copulas with Angular Symmetry for Movement Data

#' Estimate Copula Parameters from Correlation Measures
#'
#' This function implements a simple search of the parameter space of a
#' '\code{\linkS4class{cyl_copula}}' object to find the
#' parameter values that lead to a correlation that is closest to the correlation
#' in the data (\code{theta} and \code{x}). In some special cases of
#' '\code{\linkS4class{cyl_rect_combine}}' copulas, the parameter can be
#' obtained analytically from Kendall's tau of the data.
#'
#' @param copula \R object of class '\code{\linkS4class{cyl_copula}}'.
#' @param theta \link[base]{numeric} \link[base]{vector} of angles
#' (measurements of a circular variable).
#' @param x \link[base]{numeric} \link[base]{vector} of step lengths
#' (measurements of a linear variable).
#' @param acc \link[base]{numeric} value, the interval of the copula parameter
#' at which to evaluate the correlation.
#' @param n \link[base]{numeric} value, the number of sample points at each
#' optimization step.
#' @param method \link[base]{character} string describing what correlation metric
#'  to use. Either a rank-based circular-linear correlation coefficient (\code{"cor_cyl"}),
#' mutual information (\code{"mi_cyl"}), or Kendall's tau (\code{"tau"}).
#'
#' @param ... Additional parameters (see individual methods).
#'
#' @details The code assumes that the correlation captured by the copula increases
#' monotonously with the copula parameter values. It starts with a parameter value close
#' to the minimum for that copula and calculates the correlation for a sample of size \code{n}
#' from that copula. Next, the parameter is doubled and again the correlation for a sample
#' of size \code{n} calculated. After this exponential search pattern, a binary search
#' is implemented similarly between the bounds found with the exponential search. For this
#' binary search, the interval between those bounds is split into small intervals of length
#' \code{acc}. Thus, smaller values of \code{acc} lead to higher accuracy.
#'
#' If a '\code{\linkS4class{cyl_rect_combine}}' copula has rectangles spanning
#' the entire unit square and as background the independence copula, Kendall's tau can be used
#' to analytically calculate the parameter value leading to the correlation of the data.
#' No search is necessary in this case. This makes it the recommended method to use
#' for those '\code{\linkS4class{cyl_rect_combine}}' copulas.
#' \code{optCor()} is an alias for \code{fit_cylcop_cor}.
#'
#' See also individual methods (below) for more detailed explanations.
#'
#' @return \link[base]{numeric} \link[base]{vector} containing the estimated
#' parameter value(s).
#'
#' @examples set.seed(123)
#'
#' sample <- rcylcop(100, cyl_rect_combine(copula::frankCopula(2)))
#' fit_cylcop_cor(cyl_rect_combine(copula::frankCopula()),
#'   theta = sample[,1],
#'   x = sample[,2],
#'   method = "tau"
#' )
#'
#' fit_cylcop_cor(cyl_rect_combine(copula::frankCopula()),
#'   theta = sample[,1],
#'   x = sample[,2],
#'   method = "mi_cyl",
#'   n = 100
#' )
#'
#' fit_cylcop_cor(cyl_rect_combine(copula::claytonCopula()),
#'   theta = sample[,1],
#'   x = sample[,2],
#'   method = "tau"
#' )
#'
#' fit_cylcop_cor(cyl_quadsec(), theta = sample[,1], x = sample[,2], method = "mi_cyl")
#' fit_cylcop_cor(cyl_quadsec(), theta = sample[,1], x = sample[,2], method = "cor_cyl")
#' fit_cylcop_cor(cyl_quadsec(),
#'   theta = sample[,1],
#'   x = sample[,2],
#'   method = "cor_cyl",
#'   n = 100,
#'   acc = 0.001
#' )
#'
#' @references \insertRef{Hodelappl}{cylcop}
#'
#' \insertRef{Hodelmethod}{cylcop}
#'
#' @seealso \code{\link{mi_cyl}()}, \code{\link{cor_cyl}()}, \code{\link{fit_cylcop_ml}()},
#' \code{\link{opt_auto}()}, \code{copula::\link[copula]{fitCopula}()}.
#'
#' @export
#'
setGeneric("fit_cylcop_cor",
           function(copula,
                    theta,
                    x,
                    acc = NULL,
                    n = 10000,
                    method,
                    ...){
             missing_flag <- FALSE
             if(rlang::is_missing(method)){
               method <- "tau"
               missing_flag <- TRUE
                 }
             #validate input
             tryCatch({
               check_arg_all(check_argument_type(copula,
                                                 type="cyl_copula")
               ,1)
               check_arg_all(check_argument_type(theta,
                                                 type="numeric")
                             ,1)
               check_arg_all(check_argument_type(x,
                                                 type="numeric")
                             ,1)

               check_arg_all(list(check_argument_type(acc,
                                                      type="NULL"),
                                  check_argument_type(acc,
                                                      type="numeric",
                                                      length=1,
                                                      lower=0))
               ,2)
               check_arg_all(check_argument_type(n,
                                                 type="numeric",
                                                 length = 1,
                                                 integer=T,
                                                 lower=1)
                             ,1)
               check_arg_all(check_argument_type(method,
                                                 type="character",
                                                 values=c("cor_cyl", "mi_cyl", "tau"),
                                                 length=1)
                             ,1)
             },
             error = function(e) {
               error_sound()
               rlang::abort(conditionMessage(e))
             }
             )
             if(length(theta)!=length(x)){
               stop(
                 error_sound(),
                 "theta and x must have the same length."
               )
             }

             if(missing_flag) method <- "missing"

             standardGeneric("fit_cylcop_cor")},
           signature = "copula")



# Roughly estimate cyl_vonmises parameter from some correlation metric
#
# It only makes sense to optimize kappa, mu does not influence correlation
#
#' @describeIn  fit_cylcop_cor only parameter \code{"kappa"} can be optimized, since parameter
#' \code{"mu"} does not influence the correlation.
#
# @param copula A \code{cyl_vonmises} object
# @param theta A numeric vector of angles (measurements of a circular
#   variable).
# @param x A numeric vector of steplengths (measurements of a linear variable).
# @param acc A numeric value, the interval of kappa at which to evaluate the
#   correlation. DEFAULT: 0.5
# @param n The number of sample points at each optimization step (kappa value). DEFAULT: 10000
# @param method A string of characters, describing what correlation metric to use.
# Either a rank-based circular-linear coefficient ("cor_cyl") or
# mutual information ("mi_cyl").
#
# @return The estimated value of kappa.
#' @export
#'
setMethod("fit_cylcop_cor", "cyl_vonmises", function(copula,
                                             theta,
                                             x,
                                             acc,
                                             n,
                                             method = "cor_cyl") {

  if(method=="missing") method <- "cor_cyl"

  data <- data.frame(theta, x)
  if (method != "cor_cyl" &&
      method != "mi_cyl"
  )
    stop(error_sound(),
         "only methods 'cor_cyl' and 'mi_cyl' are implemented for this copula")
  if (is.null(acc))
    acc <- 0.5
  min <- 0
  max <- 1000
  param_name <- "kappa"
  if(cylcop.env$silent==F){
    message(
    " find parameter kappa\nmethod: ",
    method,
    "\naccuracy: ",
    acc,
    "\nnumber of samples, n, in each step: ",
    n,
    "\n"
  )}
  kappa <-
    search_cor(copula, data, method, acc, n, min, max, param_name)
  if(cylcop.env$silent==F){
    message("kappa approx. ", kappa, "\n")}
  return(kappa)
})



#' Roughly estimate cyl_quadsec parameter from some correlation metric
#'
#' @describeIn  fit_cylcop_cor the absolute value of the parameter is optimized, positive
#' and negative values give the same correlation.
#'
# @param copula A \code{cyl_quadsec} object
# @param theta A numeric vector of angles (measurements of a circular
#   variable).
# @param x A numeric vector of steplengths (measurements of a linear variable).
# @param acc A numeric value, the interval of a at which to evaluate the
#   correlation. DEFAULT: 0.01
# @param n The number of sample points at each optimization step (value of a). DEFAULT: 10000
# @param method A string of characters, describing what correlation metric to use.
# Either a rank-based circular-linear coefficient ("cor_cyl") or
# mutual information ("mi_cyl").
#
# @return The estimated value of a.
#' @export
#'
setMethod("fit_cylcop_cor", "cyl_quadsec", function(copula,
                                            theta,
                                            x,
                                            acc,
                                            n,
                                            method = "cor_cyl") {
  if(method=="missing") method <- "cor_cyl"

  data <- data.frame(theta, x)
  if (method != "cor_cyl" &&
      method != "mi_cyl"
      )
    stop(error_sound(),
         "only methods 'cor_cyl' and 'mi_cyl' are impelemented for this copula")
  if (is.null(acc))
    acc <- 0.01
  min <- 0
  max <- 1 / (2 * pi)
  param_name <- "a"
  if (cylcop.env$silent==FALSE){
    message(
    "find parameter ",
    param_name,
    "\nmethod: ",
    method,
    "\naccuracy: ",
    acc,
    "\nnumber of samples, n, in each step: ",
    n,
    "\n"
  )}
  a <-
    search_cor(copula, data, method, acc, n, min, max, param_name)
  if(cylcop.env$silent==F){
    message("a approx. ", a, " or ", -a, "\n")}
  return(a)
})



# Roughly estimate cyl_cubsec parameters from some correlation metric
#
#
# @param copula A \code{cyl_cubsec} object
# @param theta A numeric vector of angles (measurements of a circular
#   variable).
# @param x A numeric vector of steplengths (measurements of a linear variable).
# @param acc A numeric value, the interval of a and/or b at which to evaluate
#   the correlation. DEFAULT: 0.01
# @param n The number of sample points at each optimization step (value of a
#   and/or b). DEFAULT: 10000
# @param method A string of characters, describing what correlation metric to
#   use. Either a rank-based circular-linear coefficient ("cor_cyl") or mutual
#   information ("mi_cyl").
#' @describeIn  fit_cylcop_cor optimization of parameters, \code{"a"} and \code{"b"},
#' can be done separately or simultaneously.
#' @param parameter For '\code{\linkS4class{cyl_cubsec}}' copulas: A character
#' string specifying which parameter of the copula to optimize,
#'   \code{"a"}, \code{"b"}, or \code{"both"}
#'
# @return The estimated value of \code("a") or \code("b"), or a \link[base]{numeric} \link[base]{vector} holding the
#   estimated values of a and b.
#'
#'
#' @export
#'
setMethod("fit_cylcop_cor", "cyl_cubsec", function(copula,
                                           theta,
                                           x,
                                           acc,
                                           n,
                                           method = "cor_cyl",
                                           parameter = "both") {

  if(method=="missing") method <- "cor_cyl"

  data <- data.frame(theta, x)
  if (method != "cor_cyl" &&
      method != "mi_cyl"
  )
    stop(error_sound(),
         "only methods 'cor_cyl' and 'mi_cyl' are impelemented for this copula")
  if (is.null(acc))
    acc <- 0.01
  min <- -1 / (2 * pi)
  max <- 1 / (2 * pi)

  if (parameter == "a" || parameter =="b") {
    param_name <- parameter
    if(cylcop.env$silent==F){message(
      "find parameter ",
      param_name,
      "\nmethod: ",
      method,
      "\naccuracy: ",
      acc,
      "\nnumber of samples, n, in each step: ",
      n,
      "\n"
    )}
    param <-
      search_cor(copula,
                 data,
                 method,
                 acc,
                 n,
                 min,
                 max,
                 param_name
      )
    if(cylcop.env$silent==F){message(param_name ," approx. ", param, "\n")}
    return(param)
  }

  else if (parameter == "both") {
    if(cylcop.env$silent==F){
    message(
      "find parameters a and b\nmethod: ",
      method,
      "\naccuracy: ",
      acc,
      "\nnumber of samples, n, in each step: ",
      n,
      "\n"
    )}

    if (method == "cor_cyl") {
      fun <- cor_cyl
    }
    else if (method == "mi_cyl" ) {
      fun <- mi_cyl
    }

    #calculate correlation values for all possible combinations of a and b, find the minimum
    cor <- fun(theta = theta, x = x)
    a <- seq(min, max, by = acc)
    b <- seq(min, max, by = acc)
    input <- expand.grid(a, b)
    diff <- Inf
    if(cylcop.env$silent==F){
      message("total steps: ", nrow(input))}
    for (i in seq_len(nrow(input))) {
      if (i %% 10 == 0)
        if(cylcop.env$silent==F){
          message("\ncurrent step: ", i)}
      copula <- set_cop_param(copula, param_val=as.matrix(input[i,]), param_name=c("a","b"))
      sample <- rcylcop(n, copula)
      test <- fun(c(sample[, 1]), c(sample[, 2]))
      if (abs(cor - test) < diff) {
        diff <- abs(cor - test)
        final_a <- input[i, 1]
        final_b <- input[i, 2]
      }
    }
    if(cylcop.env$silent==F){
      message("\nthe optimal parameter set is: a= ",
        round(final_a, 2),
        ", b= ",
        round(final_b, 2),
        "\n")}
    return(c(final_a,final_b))
  }
  else
    stop(error_sound(),
         "argument 'parameter' must be from c(\"a\",\"b\",\"both\")")
})



# Roughly estimate cyl_rot_combine parameter from some correlation metric
#
#
# @param copula A \code{cyl_rot_combine} object
# @param theta A numeric vector of angles (measurements of a circular
#   variable).
# @param x A numeric vector of steplengths (measurements of a linear variable).
# @param acc A numeric value, the interval of the copula parameter at which to evaluate the
#   correlation. DEFAULT: 0.5
# @param n The number of sample points at each optimization step (parameter value). DEFAULT: 10000
# @param method A string of characters, describing what correlation metric to use.
#   Only mutual information ("mi_cyl") makes sense
#
# @return The estimated value of the copula parameter.
#' @describeIn  fit_cylcop_cor the circular-linear correlation coefficient will give a
#' value close to 0 for any parameter value. It therefore only makes sense to
#' use \code{method = "mi_cyl"} for the optimization.
#' @export
#'
setMethod("fit_cylcop_cor", "cyl_rot_combine", function(copula, theta, x, acc, n, method ="mi_cyl") {

  if(method=="missing") method <- "mi_cyl"

  data <- data.frame(theta, x)
  if (method == "cor_cyl")
    stop(
      error_sound(),
      "correlation will always be approximately 0 for these types of copulas.\n",
      "Try optimization with method=mi_cyl"
    )
  if (method != "cor_cyl" &&
      method != "mi_cyl"
  )
    stop(error_sound(),
         "only method 'mi_cyl' is impelemented for this copula")

  #adhoc method of only testing positive parameter values.
  #in the future also implement negative ones.
  min <- max(0,copula@param.lowbnd)
  max <- copula@param.upbnd
  if (is.null(acc))
    acc <- 0.5
  param_name <- copula@param.names[1]
  if(cylcop.env$silent==F){
    message(
    "find parameter ",
    param_name,
    "\nmethod: ",
    method,
    "\naccuracy: ",
    acc,
    "\nnumber of samples, n, in each step: ",
    n,
    "\n"
  )}
  a <- search_cor(copula, data, "mi_cyl", acc, n, min, max, param_name)
  if(cylcop.env$silent==F){
    message(param_name, "approx. ", a, "\n")}
  return(a)
})



# Roughly estimate cyl_rect_combine parameters from some correlation metric
#
#
# @param copula A \code{cyl_rect_combine} object
# @param theta A numeric vector of angles (measurements of a circular
#   variable).
# @param x A numeric vector of steplengths (measurements of a linear variable).
# @param acc A numeric value, the interval of the copula parameter at which to evaluate the
#   correlation. DEFAULT: 0.5
# @param n The number of sample points at each optimization step (parameter value). DEFAULT: 10000
# @param method A string of characters, describing what correlation metric to
#   use. Either Kendall's tau ("tau", recommended), a rank-based circular-linear coefficient
#   ("cor_cyl"), or mutual information ("mi_cyl").
#' @param background For '\code{\linkS4class{cyl_rect_combine}}' copulas :
#' A \link[base]{logical} value describing whether to optimize
#' the parameter of the background copula, (\code{background = TRUE}) or
#' the one of the copula in the rectangles (\code{background = FALSE}).
#'
#' @describeIn  fit_cylcop_cor if the rectangles span the entire unit square and the background
#' is the independence copula, it is recommended to use \code{method = "tau"}, since this
#' calculates the copula parameter analytically. If there is a background copula,
#' other than the independence copula, its parameter can be optimized by setting
#' \code{background=TRUE}.
#
# @return The estimated value of the copula parameter.
#' @export
#'
setMethod("fit_cylcop_cor", "cyl_rect_combine", function(copula,
                                               theta,
                                               x,
                                               acc,
                                               n,
                                               method = "tau",
                                               background = FALSE) {

  if(method=="missing") method <- "tau"

  data <- data.frame(theta, x)

  low_rect <- copula@parameters[match(c("low_rect1", "low_rect2"),copula@param.names)]
  up_rect <- copula@parameters[match(c("up_rect1", "up_rect2"),copula@param.names)]
  if ((isTRUE(all.equal(low_rect,c(0, 0.5))) &&
      isTRUE(all.equal(up_rect, c(0.5, 1))) )||
      any(is(copula@background.cop) == "indepCopula")) {
    if(background)
    stop(
      error_sound(),
      "background = TRUE only makes sense when there is a background parameter to optimize."
      )
  }

  if(method=="tau"){
    if(any(is(copula@sym.cop)=="cyl_copula")) stop(
      error_sound(),
      "method = tau is only implemented for linear-linear copulae in the rectangles"
    )

    a <- optTau(copula,theta,x)
  }
  else{
    ind <- 1
    if(any(is(copula@sym.cop)=="cyl_vonmises")) ind <- 2
  ind <- intersect(which(stringr::str_starts(copula@param.names,"bg_",negate = TRUE)),
              which(stringr::str_ends(copula@param.names,"rect.",negate = TRUE)))[ind]
  min <- max(0,copula@param.lowbnd[ind])
  if (is.null(acc))
    acc <- 0.5
  if (!background) {
    max <- copula@param.upbnd[ind]
    param_name <- copula@param.names[ind]
    if(cylcop.env$silent==F){
      message(
      "find parameter ",
      param_name,
      "\nmethod: ",
      method,
      "\naccuracy: ",
      acc,
      "\nnumber of samples, n, in each step: ",
      n,
      "\n"
    )}
  }
  else {
    max <- copula@param.upbnd[which(stringr::str_starts(copula@param.names,"bg_"))[1]]
    param_name <- copula@param.names[which(stringr::str_starts(copula@param.names,"bg_"))[1]]
    if(cylcop.env$silent==F){
      message(
      "find background parameter ",
      param_name,
      "\nmethod: ",
      method,
      "\naccuracy: ",
      acc,
      "\nnumber of samples, n, in each step: ",
      n,
      "\n"
    )}
  }

  a <-
    search_cor(copula, data, method, acc, n, min, max, param_name)
  if(cylcop.env$silent==FALSE){
    message(param_name, "approx. ", a,"\n")}
  }

  return(a)
})



# Finds the value of a specified parameter which gives correlation closest to empirical copula
#
# This function assumes that correlation and MI increase monotonously with the copula parameter
#
# @param copula A \code{cyl_copula} object.
# @param data A data.frame containing circular and linear measurements.
# @param method A string of characters, describing what correlation metric to
#   use. Either a rank-based circular-linear coefficient ("cor_cyl") or mutual
#   information ("mi_cyl").
# @param acc A numeric value, the interval of the copula parameter at which to evaluate the
#   correlation.
# @param n The number of sample points at each optimization step (parameter value).
# @param min The minimum value of parameter to test.
# @param max The maximum value of parameter to test.
# @param param_name A character string, the name of the parameter to be varied.
#
# @return The value of parameter \code{param_name} that gives correlation closest to the empirical copula
# of the data in \code{data}.
# @export
#
search_cor <-
  function(copula,
           data,
           method = c("cor_cyl", "mi_cyl"),
           acc,
           n,
           min,
           max,
           param_name) {

    if (method == "cor_cyl") {
      fun <- cor_cyl
    }
    else if (method == "mi_cyl") {
      fun <- mi_cyl
    }
    else
      stop(error_sound(), "provide method, 'cor_cyl' or 'mi_cyl'")

    #get empirical correlation or MI of the data
    cor <- fun(theta = c(data[, 1]), x = c(data[, 2]))

    bound <- min + acc
    if(bound >= max){
      stop(error_sound(),
           paste0("The interval of the copula parameter at which the correlation is calculated\n",
                 "(acc = ",acc,") is larger than the search range of the parameter (",
                 min, " - ",max,". Reduce acc.")
           )
    }
    copula <- set_cop_param(copula,  param_val=bound, param_name=param_name)
    sample <- rcylcop(n, copula)
    current <- fun(c(sample[, 1]), c(sample[, 2]))


#exponential search for upper and lower bounds

    if(cylcop.env$silent==F){
      cat("Starting exponential search: \n")}
    while ((current < cor) && (bound < max)) {
      if(cylcop.env$silent==F){
        cat(param_name, ">", bound, "\n")}
      bound <- bound * 2
      if(bound>=max) break
      copula <- set_cop_param(copula, param_val=bound, param_name=param_name)
      sample <- rcylcop(n, copula)
      current <- fun(c(sample[, 1]), c(sample[, 2]))
    }


#binary search between bounds, the possible parameter values in the search "grid" vals will have a spacing of acc

    vals <- seq(from = bound / 2,
                to = min(bound + 1, max),
                by = acc)
    l <- 1
    r <- length(vals)
    m <- floor((l + r) / 2)
    if(cylcop.env$silent==F){
      cat("Starting binary search :\n")}
    while (m != l & m != r) {
      if(cylcop.env$silent==F){
        cat(vals[l], " < ", param_name, " < ",  vals[r], "\n")}
      copula <- set_cop_param(copula, param_val=vals[m], param_name=param_name)
      sample <- rcylcop(n, copula)
      current <- fun(c(sample[, 1]), c(sample[, 2]))
      if (current < cor)
        l <- m
      else if (current > cor)
        r <- m
      m <- floor((l + r) / 2)
    }
    return(vals[m])
  }



# Estimate copula parameters based on Kendall's tau of the empirical copula
#
# @param copula A \code{Copula} object (from the copula package) or a
#   \code{cyl_rect_combine} object.
# @param theta A numeric vector of angles (measurements of a circular
#   variable).
# @param x A numeric vector of steplengths (measurements of a linear variable).
#
# @return A vector holding the parameter estimates.
# @export
#
setGeneric("optTau",
           function(copula, theta, x)
             standardGeneric("optTau"),
           signature = "copula")



# @describeIn optTau Works only for rectangles spanning the entire unit square, see text.
#
setMethod("optTau", "cyl_rect_combine", function(copula, theta, x) {
  low_rect <- copula@parameters[match(c("low_rect1", "low_rect2"),copula@param.names)]
  up_rect <- copula@parameters[match(c("up_rect1", "up_rect2"),copula@param.names)]
  if (!isTRUE(all.equal(low_rect,c(0, 0.5))) ||
      !isTRUE(all.equal(up_rect, c(0.5, 1))) ||
      !any(is(copula@background.cop) == "indepCopula")) {
    stop(
      error_sound(),
      "method = tau is only implemented for rectangles spanning the entire unit square and independent background copula"    )
  }

  data <- data.frame(theta, x) %>% na.omit()
  #calculate empirical copula
  emp_cop <- as.data.frame(pobs(data, ties.method = "average"))
  #flip  upper or lower part of empirical copula , recalculate ranks and optimize
  if (copula@flip_up)
    emp_cop$theta <- modify_if(emp_cop$theta, ~ .x > 0.5, ~ 1 - .x)
  else
    emp_cop$theta <- modify_if(emp_cop$theta, ~ .x < 0.5, ~ 1 - .x)
  emp_cop <- pobs(emp_cop, ties.method = "average")
  result <- fitCopula(copula@sym.cop, emp_cop, method = "itau")
  return(result@estimate)
})



# @describeIn optTau Only here for consistency, just calls function from copula package.
#
setMethod("optTau", "Copula", function(copula, theta, x) {
  data <- data.frame(theta, x) %>% na.omit()
  #calculate empirical copula
  emp_cop <- pobs(data, ties.method = "average")
  result <- fitCopula(copula, emp_cop, method = "itau")
  return(result@estimate)
})

#' @rdname fit_cylcop_cor
#' @examples
#' optCor(cyl_quadsec(),
#'  theta = sample[,1],
#'  x = sample[,2],
#'  method = "mi_cyl")
#'
#' @export
optCor <- fit_cylcop_cor