R/plr.R In psda: Polygonal Symbolic Data Analysis

Documented in plr

#' Polygonal linear regression
#' @description  plr is used to fit polygonal linear models.
#' @param formula an object of class "formula": a symbolic description of the model to be fitted.
#' @param data a environment that contains the variables of the study.
#' @param model logicals. If TRUE the corresponding components of the fit are returned.
#' @param ... additional arguments to be passed to the low level polygonal linear regression fitting functions.
#' @return residuals is calculated as the response variable minus the fitted values.
#' @return rank the numeric rank of the fitted polygonal linear model.
#' @return call the matched call.
#' @return fitted.values the fitted mean values.
#' @return coefficients a named vector of coefficients.
#' @return model the matrix model for center and radius.
#' @details Polygonal linear regression is the first model to explain the behavior of a symbolic polygonal
#' variable in furnction to other polygonal variables, dependent and regressors, respectively.
#' \href{https://www.sciencedirect.com/science/article/pii/S0950705118304052}{PLR} is based on the
#' least squares and uses the center and radius of polygons as representation them. The model is
#' given by \eqn{y = X\beta + \epsilon}, where \eqn{y, X, \beta}, and \eqn{\epsilon} is the dependent
#' variable, matrix model, unknown parameters, and non-observed errors. In the model, the vector
#' \eqn{y = (y_c^T, y_r)^T}, where \eqn{y_c} and \eqn{y_r} is the center and radius of center and radius.
#' The matrix model \eqn{X = diag(X_c, X_r)} for \eqn{X_c} and \eqn{X_r} describing the center and radius
#' of regressors variables and finally, \eqn{\beta = (\beta_c^T, \beta_r^T)^T}. A detailed study about the
#' model can be found in \href{https://www.sciencedirect.com/science/article/pii/S0950705118304052}{Silva et al.(2019)}.
#' @examples
#' yp <- psim(10, 10) #simulate 10 polygons of 10 sides
#' xp1 <- psim(10, 10) #simulate 10 polygons of 10 sides
#' xp2 <- psim(10, 10) #simulate 10 polygons of 10 sides
#' e <- new.env()
#' e$yp <- yp #' e$xp1 <- xp1
#' e$xp2 <- xp2 #' fit <- plr(yp~xp1+xp2, e) #' @references Silva, W.J.F, Souza, R.M.C.R, Cysneiros, F.J.A. (2019) \url{https://www.sciencedirect.com/science/article/pii/S0950705118304052}. #' @exportClass plr #' @export plr <- function(formula, data, model = TRUE, ...){ if(class(formula) != "formula"){ stop("Insert a valid formula!") } terms_formula <- terms(formula) intercept <- attr(terms_formula, 'intercept') nf <- all.vars(formula) n_variables <- length(nf) n_regressors <- n_variables - 1 n_observations <- length( get(nf[1], envir = data)) args <- list() cl <- match.call() for(i in 1:n_variables){ args[[i]] <- get(nf[i], envir = data) } names(args) <- nf missings_position <- unlist(sapply(args, function(x) which(is.na(x)))) if(length(missings_position) != 0){ args <- lapply(args, function(x) x[-c(missings_position)]) len_missings <- length(missings_position) } else{ args <- args[sapply(args, function(x) !is.null(x))] len_missings <- 0 } response_variable <- args[[1]] args <- args[nf[-1]] x_centers <- lapply(args, function(x) t(sapply(x, pmean_id))) y_centers <- t(sapply(response_variable, pmean_id)) n_observations <- nrow(x_centers[[1]]) mat_x <- lapply(x_centers, function(x) t(sapply(x, function(y) y))) x_radius <- lapply(args, function(x) lapply(x, function(y){ dist(matrix(c(pmean_id(y), y[1,]), ncol = 2, byrow = T)) } )) x_radius <- matrix(unlist(x_radius), ncol = (n_variables - 1)) colnames(x_radius) <- NULL y_radius <- sapply(response_variable, function(y) { dist(matrix(c(pmean_id(y), y[1,]), ncol = 2, byrow = T)) }) mat_xc <- list() for (j in 1:n_regressors){ for (i in 1:n_observations) { mat_xc[[j]] <- matrix(mat_x[[j]][1:n_observations], nrow = n_observations, byrow = T) } } mat_xc <- matrix(unlist(mat_xc), ncol = (n_variables - 1)) colnames(mat_xc) <- nf[-1] colnames(x_radius) <- nf[-1] yc <- y_centers[, 1] yr <- y_radius if(intercept == T){ xc <- cbind(rep(1, n_observations), mat_xc) xr <- cbind(rep(1, n_observations), x_radius) mat_zero <- matrix(0, nrow = nrow(xc), ncol = ncol(xc)) xc_zero <- cbind(xc, mat_zero) xr_zero <- cbind(mat_zero, xr) X <- rbind(xc_zero, xr_zero) } else{ xc <- mat_xc xr <- x_radius mat_zero <- matrix(0, nrow = nrow(xc), ncol = ncol(xc)) xc_zero <- cbind(xc, mat_zero) xr_zero <- cbind(mat_zero, xr) X <- rbind(xc_zero, xr_zero) } Y <- c(yc, yr) coefficients <- solve(t(X) %*% X) %*% t(X) %*% Y beta_pol_center <- coefficients[1:n_variables] beta_pol_radius <- coefficients[-(1:n_variables)] coefficients <- c(beta_pol_center, beta_pol_radius) Y_hat <- X %*% coefficients rownames(coefficients) <- NULL if(intercept == T){ names(coefficients) <- c('(center-intercept)', paste('center-', nf[-1], sep = ''), '(radius-intercept)', paste('radius-', nf[-1], sep = '')) } else{ names(coefficients) <- c(paste('center-', nf[-1], sep = ''), paste('radius-', nf[-1], sep = '')) } res <- new.env() res$coefficients <- coefficients

residuals <- Y - Y_hat

if (n_variables == 0L) {
return(list(coefficients = numeric(), residuals = Y,
fitted.values = 0 , rank = 0))
}

rank <- ncol(X)

res$rank <- rank res$call <- cl
res$fitted.values <- Y_hat res$residuals <- residuals
mf <- sum(is.na(response_variable))
class(len_missings) <- 'omit'
res$na.action <- missings_position if(model){ center_radius_data <- data.frame(yc = yc, yr = yr, mat_xc, x_radius) names_center_radius_data <- paste(nf[1], 1:2, paste('_center', sep = ''), sep = '') names(center_radius_data) <- c('yc', 'yr', paste0("xp", rep(1 : (2 * n_regressors), each = 2, length.out = (2 * n_regressors)), "", c('c', 'r'))) res$model <- center_radius_data
res$model <- X } res$terms <- terms_formula

class(res) <- 'plr'
res
}


Try the psda package in your browser

Any scripts or data that you put into this service are public.

psda documentation built on July 1, 2020, 6:10 p.m.