# R/FitLm.R In metR: Tools for Easier Analysis of Meteorological Fields

#### Documented in DetrendFitLmResidLm

#' Fast estimates of linear regression
#'
#' Computes a linear regression with [stats::.lm.fit] and returns the estimate
#' and, optionally, standard error for each regressor.
#'
#' @param y numeric vector of observations to model
#' @param ... numeric vectors of variables used in the modelling
#' @param intercept logical indicating whether to automatically add the intercept
#' @param se logical indicating whether to compute the standard error
#' @param r2 logical indicating whether to compute r squared
#' @param weights numerical vector of weights (which doesn't need to be normalised)
#' @param time time vector to use for detrending. Only necessary in the case of
#' irregularly sampled timeseries
#'
#' @return
#' FitLm returns a list with elements
#' \describe{
#'    \item{term}{the name of the regressor}
#'    \item{estimate}{estimate of the regression}
#'    \item{std.error}{standard error}
#'    \item{df}{degrees of freedom}
#'    \item{r.squared}{Percent of variance explained by the model (repeated in each term)}
#' }
#'
#' ResidLm and Detrend returns a vector of the same length
#'
#' If there's no complete cases in the regression, NAs are returned with no
#' warning.
#'
#' @examples
#' # Linear trend with "signficant" areas shaded with points
#' library(data.table)
#' library(ggplot2)
#' system.time({
#'   regr <- geopotential[, FitLm(gh, date, se = TRUE), by = .(lon, lat)]
#' })
#'
#' ggplot(regr[term != "(Intercept)"], aes(lon, lat)) +
#'     geom_contour(aes(z = estimate, color = after_stat(level))) +
#'     stat_subset(aes(subset = abs(estimate) > 2*std.error), size = 0.05)
#'
#' # Using stats::lm() is much slower and with no names.
#' \dontrun{
#' system.time({
#'   regr <- geopotential[, coef(lm(gh ~ date))[2], by = .(lon, lat)]
#' })
#' }
#'
#' @export
FitLm <- function(y, ..., intercept = TRUE, weights = NULL, se = FALSE, r2 = se) {
.FitLm(y = y, ..., intercept = intercept, weights = weights, se = se, r2 = r2, resid = FALSE)
}

#' @export
#' @rdname FitLm
ResidLm <- function(y, ..., intercept = TRUE, weights = NULL) {
.FitLm(y = y, ..., intercept = intercept, weights = weights, resid = TRUE)
}

#' @export
#' @rdname FitLm
Detrend <- function(y, time = seq_along(y)) {
m <- mean(y, na.rm = TRUE)
ResidLm(y, time) + m
}

.FitLm <- function(y, ..., intercept = TRUE, weights = NULL, se = FALSE, r2 = se, resid = FALSE) {
if (isTRUE(intercept)) {
X <- cbind((Intercept) = intercept, ...)
} else {
X <- cbind(...)
}

has_weights <- !is.null(weights)

term <- dimnames(X)[[2]]
missing <- term == ""
term[missing] <- paste0("V", seq_len(sum(missing)))

remove <- which(!stats::complete.cases(X) | is.na(y))
N <- length(y) - length(remove)
residuals <- rep(NA_real_, length(y))
# If empty, return NA
if (N < 2) {
if (resid) {
return(residuals)
}
estimate <- rep(NA_real_, length(term))
out <- list(term = term,
estimate = estimate)
if (se == TRUE) {
df <- N - ncol(X)
out$std.error <- estimate out$df <- rep(df, length(term))
}

if (r2 == TRUE) {
out$r.squared <- estimate out$adj.r.squared <- estimate
}

return(out)
} else {
if (length(remove) > 0) {
X <- X[-remove, ]
y <- y[-remove]
if (has_weights) {
weights <- weights[-remove]
}

}

if (has_weights) {
weights <- weights/sum(weights)
fit <- stats::lm.wfit(X, y, w = weights)
} else {
fit <- stats::.lm.fit(X, y)
}

estimate <- unname(fit$coefficients) } if (resid) { if (length(remove) > 0) { residuals[-remove] <- fit$residuals
}
residuals <- fit$residuals return(residuals) } out <- list(term = term, estimate = estimate) if (se == TRUE | r2 == TRUE) { df <- N - ncol(X) if (has_weights) { res_sum <- sum(weights*fit$residuals^2)
ss <- sum(weights*(y - stats::weighted.mean(y, w = weights))^2)
} else {
res_sum <- sum(fit$residuals^2) ss <- sum((y - mean(y))^2) } if (se == TRUE) { if (all(fit$residuals == 0)) {
out$std.error <- rep(NA_real_, length(term)) out$df <-  rep(NA_integer_, length(term))
} else {
p <- seq_len(fit$rank) if (is.qr(fit$qr)) fit$qr <- fit$qr$qr R <- chol2inv(fit$qr[p, p, drop = FALSE])
std.error <- sqrt(diag(R) * res_sum/df)
out$std.error <- std.error out$df <- rep(df, length(term))
}
}

if (r2 == TRUE) {
r_squared <- 1 - res_sum/ss
out$r.squared <- rep(r_squared, length(term)) out$adj.r.squared <- rep(adj_r_squared, length(term))
`