R/fit_elastic_regression.R

Defines functions make_reg_design get_reg_model_data fit_elastic_regression

Documented in fit_elastic_regression

#' Compute a elastic mean for a collection of curves
#' @name fit_elastic_regression
#' @description Computes a Fréchet mean for the curves stored in \code{data_curves} with respect
#' to the elastic distance. Constructor function for class \code{elastic_reg_model}.
#' @param formula an object of class "formula" of the form data_curves ~ ...".
#' @param data_curves list of \code{data.frame}s with observed points in each row. Each
#' variable is one coordinate direction. If there is a variable \code{t},
#' it is treated as the time parametrization, not as an additional coordinate.
#' @param x_data a \code{data.frame} with covariates.
#' @param knots set of knots for the parameter curves of the regression model
#' @param type if "smooth" linear srv-splines are used which results in a differentiable mean curve
#' if "polygon" the mean will be piecewise linear.
#' @param closed \code{TRUE} if the curves should be treated as closed.
#' @param eps the algorithm stops if L2 norm of coefficients changes less
#' @param max_iter maximal number of iterations
#' @param pre_align TRUE if curves should be pre aligned to the mean
#' @return an object of class \code{elastic_reg_model}, which is a \code{list}
#' with entries
#'   \item{type}{"smooth" if linear srv-splines or
#'   "polygon" if constant srv-splines were used}
#'   \item{coefs}{spline coeffiecients}
#'   \item{knots}{spline knots}
#'   \item{data_curves}{list of \code{data.frame}s with observed points in each row.
#'   First variable \code{t} gives the initial parametrization, second variable \code{t_optim}
#'   the optimal parametrization when the curve is aligned to the model prediction.}
#'   \item{closed}{\code{TRUE} if the regression model fitted closed curves.}
#' @export
#' @importFrom splines splineDesign
#' @examples
#' curve <- function(x_1, x_2, t){
#'   rbind(2*t*cos(6*t) - x_1*t , x_2*t*sin(6*t))
#' }
#' set.seed(18)
#' x_data <- data.frame(x_1 = runif(10,-1,1), x_2 = runif(10,-1,1))
#' data_curves <- apply(x_data, 1, function(x){
#'   m <- sample(10:15, 1)
#'   delta <- abs(rnorm(m, mean = 1, sd = 0.05))
#'   t <- cumsum(delta)/sum(delta)
#'   data.frame(t(curve((x[1] + 1), (x[2] + 2), t))
#'    + 0.07*t*matrix(cumsum(rnorm(2*length(delta))), ncol = 2))
#' })
#' reg_model <- fit_elastic_regression(data_curves ~ x_1 + x_2,
#'                                     data_curves = data_curves, x_data = x_data)
#' plot(reg_model)

fit_elastic_regression <- function(formula, data_curves, x_data, knots = seq(0,1,0.2), type = "smooth",
                                   closed = FALSE, max_iter = 10, eps = 0.001, pre_align = FALSE){
  #input checking
  formula <- as.formula(formula)
  if(formula[[2]] != "data_curves") stop("formula must be of form data_curves ~ ...")
  stopifnot(all(sapply(data_curves, is.data.frame)))
  # remove duplicated points
  data_curves <- lapply(data_curves, remove_duplicate, closed = closed)
  if(sum(sapply(data_curves, function(curve){attributes(curve)$points_rm}) > 0)){
    warning("Duplicated points in data curves have been removed!")
  }
  data_curves <- lapply(data_curves, function(curve){
    attr(curve, "points_rm") <- NULL
    curve
  })
  # input checking given parametrisation t
  lapply(data_curves, function(data_curve){
    if("t" %in% names(data_curve)) check_param(data_curve, closed)
  })
  # input checking for closed curves
  if(closed){
    data_curves <- lapply(data_curves, function(data_curve){
      check_closed(data_curve)
    })
  }
  # parametrisation with respect to arc length if not given,
  # after this, parametrisation is always in the first column
  data_curves <- lapply(data_curves, function(data_curve){
    if(!("t" %in% colnames(data_curve))){
      data.frame("t" = get_arc_length_param(data_curve), data_curve)
    } else {
      param <- data_curve$t
      data_curve$t <- NULL
      data.frame("t" = param, data_curve)
    }
  })
  ##############################################################################
  # create x dataframe
  x_data <- data.frame(x_data)

  # create x model matrix
  x_model_matrix <- model.matrix(formula, cbind("data_curves" = 1, x_data))

  #compute srv
  srv_data <- lapply(data_curves, get_srv_from_points)

  if(pre_align){
    #initial alignment as optimal alignment to the mean
    mean <- compute_elastic_mean(data_curves, knots = seq(0,1,0.01), max_iter = 100, type = "polygon",
                                 eps = 0.001, closed = closed)

    data_curves <- lapply(mean$data_curves, function(data_curve){
      if(data_curve$t_optim[nrow(data_curve)] == 0){
        data_curve$t_optim[nrow(data_curve)] <- 1
      }
      attr(data_curve, "dist_to_prediction") <- attributes(data_curve)$dist_to_mean
      attr(data_curve, "dist_to_mean") <- NULL
      data_curve
    })
  } else {
    data_curves <- lapply(data_curves, function(data_curve){
      data_curve <- data.frame("t" = data_curve$t, "t_optim" = data_curve$t, data_curve[,-1])
      attr(data_curve, "dist_to_prediction") <- Inf
      data_curve
    })
  }

  srv_data_initial <- lapply(1:length(srv_data), function(i){
    dat <- srv_data[[i]]
    idx_start <- which(data_curves[[i]]$t_optim == 0)[1]:nrow(dat)
    dat <- rbind(dat[idx_start,], dat[-idx_start,])
    dat$t <- dat$t - dat$t[1]
    dat$t <- sapply(dat$t, function(t) ifelse(t < 0, t + 1, t))
    dat
  })
  data_curves_t <- lapply(srv_data_initial, function(srv_data_initial){
    cbind("t" = c(srv_data_initial$t, 1), get_points_from_srv(srv_data_initial))
  })

  #initiate values
  t_optims <- lapply(data_curves, function(curve){
    c(sort(curve$t_optim[-length(curve$t_optim)]), 1)
  })
  coefs_list <- 0

  for(i in 1:max_iter){
    coefs_list_old <- coefs_list
    data_curves_t_now <- lapply(1:length(data_curves_t), function(i){
      idx <- c(TRUE, diff(t_optims[[i]]) != 0)
      data.frame("t" = t_optims[[i]][idx], data_curves_t[[i]][idx, -1])
    })

    model_data <- get_reg_model_data(data_curves_t_now, seq(0,1,0.01), x_model_matrix)
    design_mat <- make_reg_design(model_data[, 1:ncol(x_model_matrix)], model_data$m_long,
                                  knots = knots, type = type, closed = closed)

    coefs_long <- apply(model_data[,-(1:(ncol(x_model_matrix) + 1)), drop = FALSE], 2, function(q_m_x_long){
      q_m_x_long[!is.finite(q_m_x_long)] <- NA
      coef(lm(q_m_x_long ~ -1 + design_mat))
    })

    n_coefs <- nrow(coefs_long)/ncol(x_model_matrix)
    coefs_list <- lapply(1:ncol(x_model_matrix), function(i){
      coefs <- coefs_long[n_coefs*(i - 1) + 1:n_coefs, ]
      colnames(coefs) <- colnames(srv_data[[1]][,-1])
      rownames(coefs) <- paste0("coef_", 1:n_coefs)
      coefs
    })

    names(coefs_list) <- paste0("beta_", 1:ncol(x_model_matrix) - 1)

    #stop if coefficients don't change much anymore
    stop_crit <- sum((unlist(coefs_list) - unlist(coefs_list_old))^2)/sum(unlist(coefs_list)^2)
    if(stop_crit < eps | max_iter == 0){
      elastic_reg_model <- list("coefs_list" = coefs_list, "formula" = formula,
                                "data_curves" = data_curves, "x_data" = x_data,
                                "knots" = knots, "type" = type, "closed" = closed)
      class(elastic_reg_model) <- "elastic_reg_model"
      return(elastic_reg_model)
    }
    ############################################################################
    # update warping alignment
    t_optims <- lapply(1:length(data_curves), function(i){
      x_i <- x_model_matrix[i,]
      pred_coefs <- Reduce("+", lapply(1:length(x_i), function(j) x_i[j]*coefs_list[[j]]))

      if(type == "smooth"){
        pfun <- function(t){
          t(make_reg_design(x_design_mat = NULL, t = t, knots = knots,
                            type = type, closed = closed) %*% pred_coefs)
        }

        find_optimal_t(srv_curve = pfun, s = c(srv_data_initial[[i]][,1],1),
                       q = t(srv_data_initial[[i]][,-1]), initial_t = t_optims[[i]],
                       eps = 0.01)
      } else {
        find_optimal_t_discrete(r = knots, p = t(pred_coefs), s = c(srv_data_initial[[i]][,1],1),
                                q = t(srv_data_initial[[i]][,-1]), initial_t = t_optims[[i]],
                                eps = 0.01)
      }
    })
  }
  # add optimal time parametrisation to data curves
  for(i in 1:length(data_curves)){
    warping <- data_curves[[i]][-nrow(data_curves[[i]]),1:2]
    warping <- warping[order(warping$t_optim), ]
    warping$t_optim <- t_optims[[i]][-length(t_optims[[i]])]
    warping <- warping[order(warping$t), ]
    data_curves[[i]][,1:2] <- rbind(warping, c(1, ifelse(warping[1,2] == 0, 1, warping[1,2])))
    attr(data_curves[[i]], "dist_to_prediction") <- attributes(t_optims[[i]])$dist
  }

  elastic_reg_model <- list("coefs_list" = coefs_list, "formula" = formula,
                            "data_curves" = data_curves, "x_data" = x_data,
                            "knots" = knots, "type" = type, "closed" = closed)
  class(elastic_reg_model) <- "elastic_reg_model"
  return(elastic_reg_model)
}

get_reg_model_data <- function(data_curves_t, t_grid, x_model_data){
  q_m <- lapply(data_curves_t, function(data_curve){
    data_curve <- data.frame("t" = t_grid, get_evals(data_curve, t_grid = t_grid))
    get_srv_from_points(data_curve)[,-1]
  })

  #convert in long format
  x_long <- do.call("rbind", lapply(1:nrow(x_model_data), function(i){
    sapply(x_model_data[i,], function(x) rep(x, length(t_grid) - 1))
    }))
  m_long <- rep(t_grid[-length(t_grid)] + diff(t_grid)/2, length(q_m))
  q_m_long <- do.call(rbind, q_m)
  data.frame("x_long" = x_long, "m_long" = m_long, "q_m_long" = q_m_long)
}

# creating the design matrix
make_reg_design <- function(x_design_mat, t, knots, type = "smooth", closed = FALSE) {
  deg <- ifelse(type == "smooth", 1, 0)
  spline_design_mat <- splines::splineDesign(knots = c(rep(0, deg), knots, rep(1, deg)),
                                    x = t, outer.ok = TRUE, ord = deg + 1)
  if(closed == TRUE & type == "smooth"){
    spline_design_mat[,1] <- spline_design_mat[,1] + spline_design_mat[, ncol(spline_design_mat)]
    spline_design_mat <- spline_design_mat[,-ncol(spline_design_mat)]
  }

  if(is.null(x_design_mat)) return(spline_design_mat)
  x_design_mat <- as.matrix(x_design_mat)
  t(sapply(1:nrow(x_design_mat), function(i) kronecker(x_design_mat[i,], spline_design_mat[i,])))
}

Try the elasdics package in your browser

Any scripts or data that you put into this service are public.

elasdics documentation built on June 22, 2024, 12:27 p.m.