algstat: Algebraic Statistics

Documented in project_onto_variety project_onto_variety_lagrange

#' Projection onto a variety
#'
#' A R-based implementation of the gradient descent homotopies. The lagrange
#' version uses Newton's method on the Lagrangian system.
#'
#'
#' @param x0 Atomic vector, the point to be projected.
#' @param poly An mpoly object, typically created with [mp()].
#' @param dt The t-mesh size for the homotopy.
#' @param varorder A character vector specifying the variable order to pass to
#'   [mpoly::as.function.mpoly()].
#' @param n_correct The number of Newton correction iterations to use.
#' @param al A numeric vector of length 2; the patch to do projective
#'   calculations over.
#' @param tol A tolerance on the residual; a warning is issued if the magnitude
#'   of the residual is larger than \code{tol}.
#' @param tol_x A tolerance on subsequent step sizes.
#' @param method Used in [project_onto_variety_lagrange()] and
#'   [project_onto_variety_gradient_descent()]. In the former, if
#'   \code{"newton"}, a simple R implementation of Newton's method to solve the
#'   nonlinear algebraic system generated by the Lagrangian; otherwise, a
#'   character string to pass to [optim()] to minimize the sum of the squares of
#'   the Lagrangian. In the latter, the method of selecting the learning rate,
#'   \code{"line"} (line search) or \code{"fixed"}.
#' @param maxit Number of maximum iterations in solving Newton's method.
#' @param ga Learning rate for gradient descent.
#' @param max_ga Maximum learning rate for gradient descent line search.
#' @param message If \code{TRUE}, the user is issued messages on the algorithm's
#'   progress.
#' @param ... Additional arguments to pass to [optim()] when \code{method} is
#'   not \code{"newton"}.
#' @param gfunc,dgfunc,ddgfunc The polynomial [poly], its gradient, and its
#'   Hessian as functions. Only used in [project_onto_variety()], and computed
#'   internally if not provided.
#' @param bias A multiple to add to the identity to make the Jacobian
#'   invertible.
#' @return A numeric vector the same length as \code{x0}.
#' @references Griffin, Z. and J. Hauenstein (2015). Real solutions to systems
#'   of polynomial equations and parameter continuation. \emph{Advances in
#'   Geometry} 15(2), pp.173--187.
#' @references Bates, D., J. Hauenstein, A. Sommese, and C. Wampler (2013).
#'   Numerically Solving Polynomial Systems with Bertini. SIAM. pp.34--35.
#' @author David Kahle
#' @name project-onto-variety
#' @examples
#'
#'
#' library("ggplot2")
#'
#'
#' ## basic usage
#' ########################################
#'
#' x0 <- c(1,1)
#' p <- mp("x^2 + y^2 - 1")
#' (x0_proj <- project_onto_variety(x0, p))
#'
#' as.function(p)(x0_proj)
#' sqrt(2)/2
#'
#' cbind(t(x0), t(x0_proj)) %>%
#'   as.data.frame() %>% tibble::as_tibble() %>%
#'   purrr::set_names(c("x", "y", "x_proj", "y_proj")) -> df
#'
#' ggvariety(p) + coord_equal() +
#'   geom_segment(
#'     aes(x, y, xend = x_proj, yend = y_proj),
#'     data = df, inherit.aes = FALSE
#'   )
#'
#'
#' # alternatives
#' 1 / sqrt(2)
#' project_onto_variety_lagrange(x0, p)
#' project_onto_variety_newton(x0, p)
#' project_onto_variety_gradient_descent(x0, p)
#' project_onto_variety_gradient_descent(x0, p, method = "line")
#'
#'
#' # number of variables > 2
#' x0 <- c(1,1,1)
#' p <- mp("x^2 + y^2 + z^2 - 1")
#' 1 / sqrt(3)
#' project_onto_variety(x0, p)
#' project_onto_variety_lagrange(x0, p)
#' project_onto_variety_newton(x0, p)
#' project_onto_variety_gradient_descent(x0, p)
#' project_onto_variety_gradient_descent(x0, p, method = "line")
#'
#'
#' ## options
#' ########################################
#'
#' x0 <- c(1,1)
#' p <- mp("x^2 + y^2 - 1")
#' project_onto_variety(x0, p, message = TRUE)
#' project_onto_variety(x0, p, dt = .25, message = TRUE)
#'
#'
#' # precomputing the function, gradient, and hessian
#' varorder <- c("x", "y")
#' gfunc <- as.function(p, varorder = varorder)
#'
#' dg <- deriv(p, var = varorder)
#' dgfunc <- as.function(dg, varorder = varorder)
#'
#' ddg <- lapply(dg, deriv, var = varorder)
#' ddgfunc_list <- lapply(ddg, as.function, varorder = varorder, silent = TRUE)
#' ddgfunc <- function(x) sapply(ddgfunc_list, function(f) f(x))
#'
#' project_onto_variety(x0, p, gfunc = gfunc, dgfunc = dgfunc, ddgfunc = ddgfunc)
#'
#'
#' ## more complex example
#' ########################################
#'
#' p <- mp("(x^2 + y^2)^2 - 2 (x^2 - y^2)")
#' ggvariety(p, c(-2, 2), n = 201) + coord_equal()
#'
#' x0 <- c(.025, .30)
#' (x0_proj <- project_onto_variety(x0, p))
#'
#' cbind(t(x0), t(x0_proj)) %>%
#'   as.data.frame() %>% tibble::as_tibble() %>%
#'   purrr::set_names(c("x", "y", "x_proj", "y_proj")) -> df
#'
#' ggvariety(p, c(-2, 2)) + coord_equal() +
#'   geom_segment(
#'     aes(x, y, xend = x_proj, yend = y_proj),
#'     data = df, inherit.aes = FALSE
#'   )
#'
#'
#'
#'
#' ## projecting a dataset - grid
#' ########################################
#'
#' library("ggplot2")
#' library("dplyr")
#'
#' (p <- lissajous(5, 5, 0, 0))
#' # (p <- lissajous(9, 9, 0, 0))
#' # p <- mp("x^2 + y^2 - 1")
#' ggvariety(p, n = 251) + coord_equal()
#'
#' set.seed(1)
#' (s <- seq(-1, 1, .25))
#' n <- length(s)
#' grid <- expand.grid(x = s, y = s)
#' grid$x <- jitter(grid$x)
#' grid$y <- jitter(grid$y)
#'
#' ggplot(grid, aes(x, y)) + geom_point() + coord_equal()
#'
#' grid_proj <- project_onto_variety(grid, p)
#' head(grid_proj)
#' names(grid_proj) <- c("x_proj", "y_proj")
#'
#' ggvariety(p, n = 251) + coord_equal() +
#'   geom_segment(
#'     aes(x, y, xend = x_proj, yend = y_proj),
#'     data = bind_cols(grid, grid_proj), inherit.aes = FALSE
#'   ) +
#'   geom_point(aes(x, y), data = grid, inherit.aes = FALSE)
#'
#'
#' # here's what happens when you use a naive implementation -
#' # gradient descent on g^2 with line search
#' grid_proj_gd <- project_onto_variety_gradient_descent(grid, p, method = "optimal")
#' names(grid_proj_gd) <- c("x_proj", "y_proj")
#'
#' grid_proj_lagrange <- project_onto_variety_lagrange(grid, p)
#' names(grid_proj_lagrange) <- c("x_proj", "y_proj")
#'
#' grid_proj_newton <- project_onto_variety_newton(grid, p)
#' names(grid_proj_newton) <- c("x_proj", "y_proj")
#'
#' df <- bind_rows(
#'   bind_cols(grid, grid_proj) %>% mutate(method = "gradient descent homotopy"),
#'   bind_cols(grid, grid_proj_gd) %>% mutate(method = "optimal gradient descent"),
#'   bind_cols(grid, grid_proj_newton) %>% mutate(method = "newton"),
#'   bind_cols(grid, grid_proj_lagrange) %>% mutate(method = "newton on lagrangian")
#' )
#'
#' ggvariety(p, n = 251) +
#'   geom_segment(
#'     aes(x, y, xend = x_proj, yend = y_proj),
#'     data = df, inherit.aes = FALSE
#'   ) +
#'   geom_point(aes(x, y), data = grid, inherit.aes = FALSE) +
#'   coord_equal() +
#'   facet_wrap(~ method) + xlim(-1.1, 1.1) + ylim(-1.1, 1.1)
#'
#'
#' ## projecting a dataset - rvnorm
#' ########################################
#'
#' library("ggplot2")
#' library("dplyr")
#'
#' \dontrun{ requires stan
#'
#' (p <- lissajous(5, 5, 0, 0))
#' ggvariety(p, n = 251) + coord_equal()
#'
#' set.seed(1)
#' (samps <- rvnorm(1e4, p, sd = .025, output = "tibble"))
#'
#' ggplot(samps, aes(x, y)) +
#'   geom_point(aes(color = chain)) +
#'   coord_equal() +
#'   facet_wrap(~ chain)
#'
#' ggplot(samps, aes(x, y)) +
#'   geom_bin2d(binwidth = .03*c(1,1)) +
#'   coord_equal()
#'
#' # cut down on draws for time
#' subsamps <- samps %>% sample_n(500)
#' ggplot(subsamps, aes(x, y)) + geom_point() + coord_equal()
#'
#' subsamps %>%
#'   select(x, y) %>%
#'   as.matrix() %>%
#'   #apply(1, function(x0) project_onto_variety(x0, p, dt = .025, n_correct = 3)) %>% t() %>%
#'   apply(1, function(x0) project_onto_variety_lagrange(x0, p)) %>% t() %>%
#'   as.data.frame() %>% tibble::as_tibble() %>%
#'   purrr::set_names(c("x_proj", "y_proj")) %>%
#'   bind_cols(subsamps, .) ->
#'   subsamps
#'
#' ggvariety(p, n = 251) + coord_equal() +
#'   geom_segment(
#'     aes(x, y, xend = x_proj, yend = y_proj),
#'     data = subsamps, inherit.aes = FALSE
#'   ) +
#'   geom_point(
#'     aes(x, y, color = factor(chain)),
#'     data = subsamps, inherit.aes = FALSE
#'   )
#'
#' ggplot(subsamps, aes(x_proj, y_proj)) + geom_point() + coord_equal()
#'
#' }
#' 


#' @rdname project-onto-variety
#' @export
project_onto_variety <- function(
  x0, poly, dt = .05, varorder = sort(vars(poly)), 
  n_correct = 2, al = rnorm(length(x0)), 
  message = FALSE, tol = .Machine$double.eps^(1/2), 
  gfunc, dgfunc, ddgfunc, bias = 0
) {
  
  if (missing(x0)) stop("`x0` must be supplied.")
  if (missing(poly)) stop("`poly` must be supplied.")
  
  
  # compute gfunc, dgfunc, and ddgfunc if not user specified
  if (missing(gfunc) || missing(dgfunc) || missing(ddgfunc)) {
    
    if (!missing(gfunc) || !missing(dgfunc) || !missing(ddgfunc)) {
      "If any of `gfunc`, `dgfunc`, or `ddgfunc` is not provided, they are all computed internally." 
    }
    
    # polynomial as a function
    g <- poly
    gfunc <- as.function(g, varorder = varorder, silent = TRUE)
    
    # jacobian
    dg <- deriv(g, var = varorder)
    dgfunc <- as.function(dg, varorder = varorder, silent = TRUE)
    
    # hessian
    ddg <- lapply(dg, deriv, var = varorder)
    ddgfunc_list <- lapply(ddg, as.function, varorder = varorder, silent = TRUE)
    ddgfunc <- function(x) sapply(ddgfunc_list, function(f) f(x))
    
  }
  
  
  # if x0 is a matrix, apply to rows and return. this is a hack for now,
  # but it's pretty useful
  if (is.matrix(x0) || is.data.frame(x0)) {
    out <- apply(
      x0, 1, project_onto_variety, 
      poly = poly, dt = dt, varorder = varorder, n_correct = n_correct,
      al = al, message = message, tol = tol,
      gfunc = gfunc, dgfunc = dgfunc, ddgfunc = ddgfunc
    )
    out <- t(out)
    if (is.data.frame(x0)) out <- as.data.frame(out)
    if (inherits(x0, "tbl_df")) class(out) <- c("tbl_df", "tbl", "data.frame")
    return(out)
  }
  
  # begin main computation for one vector x0
  n_vars <- length(varorder)
  
  ts <- seq(1, 0, -dt)
  vn <- c(x0, al[1], 0)
  
  Ha <- function(v, t) {
    # v = (x, y, la0, la1)
    x <- v[1:n_vars]; la <- v[-(1:n_vars)]
    c(
      gfunc(x) - t*gfunc(x0),
      as.numeric(cbind((x-x0), dgfunc(x)) %*% la),
      la[1] + la[2]*al[2] - al[1]
    )
  }
  # Ha(vn, .99)
  
  JHa <- function(v, t) {
    x <- v[1:n_vars]
    la0 <- v[n_vars+1] 
    la1 <- v[n_vars+2] 
    al1 <- al[2]
    rbind(
      c(dgfunc(x), 0, 0),
      cbind(
        la0*diag(n_vars) + la1*ddgfunc(x),
        x - x0,
        dgfunc(x)
      ),
      c(rep(0, n_vars), 1, al1)
    )
  }
  # JHa(vn, .99)
  
  # partial derivative of H w.r.t t:
  Ht <- function(v, t) c(-gfunc(x0), rep(0, n_vars + 2 - 1))
  # Ht(c(1,1), 1)
  
  
  for (i in 2:length(ts)) {
    
    # predict
    # vn <- vn + as.numeric(solve(JHa(vn, t = ts[i-1])) %*% Ht(vn, t = ts[i-1])) * dt
    vn <- vn + solve(JHa(vn, t = ts[i-1]), Ht(vn, t = ts[i-1])) * dt
    if (message) message(paste(round(vn, 5), collapse = " "))
    
    # correct
    for (. in 1:n_correct) {
      # vnp1 <- as.numeric( vn - solve(JHa(vn, t = ts[i])) %*% Ha(vn, t = ts[i]) )
      vnp1 <- vn - solve(JHa(vn, t = ts[i]) + bias*diag(n_vars+2), Ha(vn, t = ts[i]))
      vn <- vnp1
      if (message) message("  ", paste(round(vn, 5), collapse = " "))
    }
    
  }
  
  resid <- gfunc(vn[1:n_vars])
  if (abs(resid) >= tol) warning(sprintf("Tolerance not met (residual = %f).", resid), call. = FALSE)
  
  vn[1:n_vars]
}












#' @rdname project-onto-variety
#' @export
project_onto_variety_lagrange <- function(
  x0, poly, varorder = vars(poly), method = "newton", 
  maxit = 1e3, tol = .Machine$double.eps^(1/2), tol_x = .Machine$double.eps^(1/2),
  message = FALSE, ...
) {
  # this only projects R2 points
  
  # if x0 is a matrix, apply to rows and return. this is a hack for now,
  # but it's pretty useful
  if (is.matrix(x0) || is.data.frame(x0)) {
    out <- apply(
      x0, 1, project_onto_variety_lagrange, 
      poly = poly, varorder = varorder, method = method, maxit = maxit, 
      tol = tol, tol_x = tol_x, 
      message = message, ...
    )
    out <- t(out)
    if (is.data.frame(x0)) out <- as.data.frame(out)
    if (inherits(x0, "tbl_df")) class(out) <- c("tbl_df", "tbl", "data.frame")
    return(out)
  }

  # formulate lagrangian
  form <- paste0("(", varorder, " - ", x0, ")^2", collapse = " + ")
  L <- mp(form) + mp("la")*poly
  dL <- deriv(L, var = c(varorder, "la"))
  dLf <- as.function(dL, varorder = c(varorder, "la"), silent = TRUE)
  
  if (method == "newton") {
    
    ddL <- lapply(dL, deriv, var = c(varorder, "la"))
    ddLf <- lapply(ddL, as.function, varorder = c(varorder, "la"), silent = TRUE)
    H <- function(v) do.call(rbind, lapply(ddLf, function(f) f(v)))
    
    la0 <- optimize(function(.) sum(dLf(c(x0,.))^2), lower = 0, upper = 1e3)$minimum
    
    xn_1 <- c(x0, la0)
    xn <- xn_1 + solve( H(xn_1), -dLf(xn_1) )

    count <- 0L
    while(sum(abs(dLf(xn))) > tol && sum(abs(xn - xn_1)) > tol_x && count <= maxit) {
      xn_1 <- xn
      xn <- xn_1 + solve( H(xn_1), -dLf(xn_1) )
      if (message) message(paste(round(xn, 5), collapse = " "))
      count <- count + 1L
    }
    xn <- xn[seq_along(varorder)]
    
  } else {
 
    la0 <- optimize(function(.) sum(dLf(c(x0,.))^2), lower = 0, upper = 1e3)$minimum
    xn <- optim(c(x0, la0), function(.) sum(dLf(.))^2, method = method, ...)$par[1:2]
    xn <- xn[seq_along(varorder)]
    
  }
  
  pf  <- as.function(poly, varorder = varorder, silent = TRUE)
  resid <- pf(xn)
  if (abs(resid) >= tol) warning(sprintf("Tolerance not met (residual = %f).", resid), call. = FALSE)
  
  xn
  
}







#' @rdname project-onto-variety
#' @export
project_onto_variety_gradient_descent <- function(
  x0, poly, varorder = vars(poly), ga = .01, max_ga = .1, 
  method = c("line", "optimal", "fixed"), 
  tol = .Machine$double.eps^(1/2), tol_x = .Machine$double.eps^(1/2),
  maxit = 1e3, message = FALSE
) {
  
  # if x0 is a matrix, apply to rows and return. this is a hack for now,
  # but it's pretty useful
  if (is.matrix(x0) || is.data.frame(x0)) {
    out <- apply(
      x0, 1, project_onto_variety_gradient_descent, 
      poly = poly, varorder = varorder, ga = ga, max_ga = max_ga, method = method, 
      tol = tol, tol_x = tol_x, 
      maxit = maxit, message = message
    )
    out <- t(out)
    if (is.data.frame(x0)) out <- as.data.frame(out)
    if (inherits(x0, "tbl_df")) class(out) <- c("tbl_df", "tbl", "data.frame")
    return(out)
  }

  # arg check
  method <- match.arg(method)
  
  # create functions
  pf  <- as.function(poly, varorder = varorder, silent = TRUE)
  p2f <- as.function(poly^2, varorder = varorder, silent = TRUE)
  dp2 <- deriv(poly^2, var = varorder)
  dp2f <- as.function(dp2, varorder = varorder, silent = TRUE)
  ddp2 <- lapply(dp2, function(.) deriv(., var = varorder))
  ddp2f <- function(v) {
    sapply(
      lapply(ddp2, as.function, silent = TRUE),
      function(f) f(v)
    )
  }
  
  
  if (method == "optimal") {
    
    xn_2 <- x0
    
    direction_2 <- dp2f(xn_2)
    ga <- optimize(function(.) p2f(xn_2 - .*direction_2), lower = 0, upper = max_ga)$minimum
    xn_1 <- xn_2 - ga*direction_2
    if (message) message(paste(round(xn_1, 5), collapse = " "))
    
    direction_1 <- dp2f(xn_1)
    ga <- optimize(function(.) p2f(xn_1 - .*direction_1), lower = 0, upper = max_ga)$minimum
    xn <- xn_1 - ga*direction_1
    if (message) message(paste(round(xn, 5), collapse = " "))
    
    count <- 0
    while (abs(p2f(xn)) > tol && sum(abs(xn-xn_1)) > tol_x && count <= maxit) {
      xn_2 <- xn_1
      xn_1 <- xn
      direction_2 <- dp2f(xn_2)
      direction_1 <- dp2f(xn_1)
      ga   <- abs(sum((xn_1 - xn_2)*(direction_1 - direction_2))) / sum((direction_1 - direction_2)^2)
      xn   <- xn_1 - ga*direction_1
      count <- count + 1
      if (message) message(paste(round(xn, 5), collapse = " "))
    }
    
  } else if (method == "line") {
    
    xn_1 <- x0
    
    direction <- dp2f(xn_1)
    ga <- optimize(function(.) p2f(xn_1 - .*direction), lower = 0, upper = max_ga)$minimum
    xn <- xn_1 - ga*dp2f(xn_1)
    if (message) message(paste(round(xn, 5), collapse = " "))
    
    count <- 0
    while (abs(p2f(xn)) > tol && sum(abs(xn-xn_1)) > tol_x && count <= maxit) {
      xn_1 <- xn
      direction <- dp2f(xn_1)
      ga <- optimize(function(.) p2f(xn_1 - .*direction), lower = 0, upper = max_ga)$minimum
      xn   <- xn_1 - ga*direction
      count <- count + 1
      if (message) message(paste(round(xn, 5), collapse = " "))
    }
    
  } else if (method == "fixed") {
    
    xn_1 <- x0
    
    direction <- dp2f(xn_1)
    xn <- xn_1 - ga*direction
    if (message) message(paste(round(xn, 5), collapse = " "))
    
    count <- 0
    while (abs(p2f(xn)) > tol && sum(abs(xn-xn_1)) > tol_x && count <= maxit) {
      xn_1 <- xn
      direction <- dp2f(xn_1)
      xn   <- xn_1 - ga*direction
      count <- count + 1
      if (message) message(paste(round(xn, 5), collapse = " "))
    }
    
  }

  resid <- pf(xn)
  if (abs(resid) >= tol) warning(sprintf("Tolerance not met (residual = %f).", resid), call. = FALSE)
  
  xn
}








#' @rdname project-onto-variety
#' @export
project_onto_variety_newton <- function(
    x0, poly, varorder = vars(poly), ga = 1e-4, max_ga = 2, 
    method = c("line", "fixed"), 
    tol = .Machine$double.eps^(1/2), tol_x = .Machine$double.eps^(1/2),
    maxit = 1e3, message = FALSE
) {
  
  # if x0 is a matrix, apply to rows and return. this is a hack for now,
  # but it's pretty useful
  if (is.matrix(x0) || is.data.frame(x0)) {
    out <- apply(
      x0, 1, project_onto_variety_newton, 
      poly = poly, varorder = varorder, ga = ga, max_ga = max_ga, 
      method = method, tol = tol, tol_x = tol_x, 
      maxit = maxit, message = message
    )
    out <- t(out)
    if (is.data.frame(x0)) out <- as.data.frame(out)
    if (inherits(x0, "tbl_df")) class(out) <- c("tbl_df", "tbl", "data.frame")
    return(out)
  }
  
  # arg check
  method <- match.arg(method)
  
  # create functions
  pf  <- as.function(poly, varorder = varorder, silent = TRUE)
  p2f <- as.function(poly^2, varorder = varorder, silent = TRUE)
  dp2 <- deriv(poly^2, var = varorder)
  dp2f <- as.function(dp2, varorder = varorder, silent = TRUE)
  ddp2 <- lapply(dp2, function(.) deriv(., var = varorder))
  ddp2f <- function(v) {
    sapply(
      lapply(ddp2, as.function, silent = TRUE),
      function(f) f(v)
    )
  }
  
  # iterate 
  xn_1 <- x0
  
  direction <- solve(ddp2f(xn_1), -dp2f(xn_1))
  if (method == "line") ga <- optimize(function(.) p2f(xn_1 + .*direction), lower = 0, upper = max_ga)$minimum
  xn <- xn_1 + ga*direction
  if (message) message(paste(round(xn, 5), collapse = " "))
  
  count <- 0
  while (abs(p2f(xn)) > tol && sum(abs(xn-xn_1)) > tol_x && count <= maxit) {
    xn_1 <- xn
    direction <- solve(ddp2f(xn_1), -dp2f(xn_1))
    if (method == "line") ga <- optimize(function(.) p2f(xn_1 + .*direction), lower = 0, upper = max_ga)$minimum
    xn   <- xn_1 + ga*direction
    count <- count + 1
    if (message) message(paste(round(xn, 5), collapse = " "))
  }
   
  
  resid <- pf(xn)
  if (abs(resid) >= tol) warning(sprintf("Tolerance not met (residual = %f).", resid), call. = FALSE)
  
  xn
}