R/statistics.R

Defines functions get_lm_stats vector_stats vector_errors rmse

Documented in get_lm_stats rmse vector_errors vector_stats

#' Calculate RMSE
#'
#' Compute root mean square error (RMSE).
#'
#' @param x Numeric vector of data.
#' @param y Numeric vector of data to compare to x, same length.
#' @param dolog10 TRUE/FALSE, should x and y be logged (base 10)? (i.e. calculate RMSLE?)
#' @return Single numeric value.
#' @export
rmse <- function(x, y, dolog10=FALSE) {
    if (dolog10) {
        x <- log10(x)
        x[!is.finite(x)] <- NA
        y <- log10(y)
        y[!is.finite(y)] <- NA
    }
    return(sqrt(mean((x - y)^2, na.rm=TRUE)))
}


#' Calculate error vectors
#'
#' Given vectors x and y of the same length, compute various forms of error between the two.
#'
#' @param x Numeric vector of data.
#' @param y Numeric vector of data to compare to x, same length.
#' @return Dataframe with columns representing error for each point. Types of error include: error, magnitude of error, percent error, magnitude of percent error, error of logged values, and magnitude of error of logged values.
#' @export
vector_errors <- function(x, y) {

    # Compute model errors.
    error <- y - x
    error_mag <- abs(y - x)

    # Compute model percent errors.
    percent_error <- (y - x)/x * 100
    percent_error_mag <- abs((y - x)/x * 100)

    # Compute log error.
    log_error <- log10(y) - log10(x)
    log_error_mag <- abs(log10(y) - log10(x))

    # Remove non-finite values.
    error[!is.finite(error)] <- NA
    error_mag[!is.finite(error_mag)] <- NA
    percent_error[!is.finite(percent_error)] <- NA
    percent_error_mag[!is.finite(percent_error_mag)] <- NA
    log_error[!is.finite(log_error)] <- NA
    log_error_mag[!is.finite(log_error_mag)] <- NA

    return(data.frame(error = error,
                      error_mag = error_mag,
                      percent_error = percent_error,
                      percent_error_mag = percent_error_mag,
                      log_error = log_error,
                      log_error_mag = log_error_mag,
                      stringsAsFactors = FALSE))

}


#' Calculate statistics
#'
#' Calculate the statistics on a vector: Number of valid observations, mean, median, standard deviation, and coefficient of variation.
#'
#' @param x Numeric vector of data.
#' @return Named numeric vector of statistics, length 5.
#' @export
vector_stats <- function(x) {

    output <- rep(NA,5)
    output[1] <- sum(is.finite(x))
    output[2] <- mean(x,na.rm=T)
    output[3] <- median(x,na.rm=T)
    output[4] <- sd(x,na.rm=T)
    output[5] <- sd(x,na.rm=T)/mean(x,na.rm=T)
    names(output) <- c("nobs", "mean", "median", "sd", "cv")

    return(output)

}


#' Get p-value
#'
#' Calculate the F-test p-value from a linear model object generated by the lm() function.
#'
#' @param modelobject Object of class lm
#' @return F-test p-value (single numeric value)
#' @references
#' Source: Stephen Turner, Jan 2011
#' https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
#' @export
lmp <- function (modelobject) {
    if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
    f <- summary(modelobject)$fstatistic
    p <- pf(f[1],f[2],f[3],lower.tail=F)
    attributes(p) <- NULL
    return(p)
}


#' Get model object results
#'
#' Given a linear model object created with lm() or lmodel2(), retrieve the coefficients (intercept and slope) and a dataframe of simple statistics.
#'
#' @param modelobject Object of class lm or lmodel2
#' @param method For lmodel2 only, regression method: either "OLS", "MA", or "SMA", see ?lmodel2 for details
#' @return List containing coefficients in the first place and a dataframe of model statistics in the second place
#' @export
get_lm_stats <- function(modelobject, method="OLS") {
    if (class(modelobject)=="lm") {
        coefs <- summary(modelobject)$coefficients
        stats <- data.frame(Rsquared = summary(modelobject)$r.squared,
                            pvalue = lmp(modelobject),
                            num_obs = nobs(modelobject),
                            RMSE = sqrt(mean(modelobject$residuals^2)),
                            stringsAsFactors = FALSE)
    } else if (class(modelobject)=="lmodel2") {
        coefs <- modelobject$regression.results
        coefs <- coefs[coefs$Method==method,c("Intercept","Slope")]
        stats <- data.frame(Rsquared = modelobject$rsquare,
                            pvalue = ifelse(method=="SMA",NA,modelobject$P.param),
                            num_obs = modelobject$n,
                            stringsAsFactors = FALSE)
    }
    return(list(coefs=coefs, stats=stats))
}
BIO-RSG/oceancolouR documentation built on April 30, 2024, 7:54 a.m.