R/util.R
In itsadug: Interpreting Time Series and Autocorrelated Data Using GAMMs

Documented in convertNonAlphanumeric corfit diff_terms find_difference missing_est modeledf observations print_summary refLevels res_df summary_data timeBins

#' Prepare string for regular expressions 
#' (backslash for all non-letter and non-digit characters)
#' 
#' @export
#' @param text A text string (smooth term label) that needs to be converted 
#' to a regular expression. 
#' @return A regular expression string.
#' @author Jacolien van Rij
#' @examples
#' data(simdat)
#' # Model for illustrating coefficients:
#' m0 <- bam(Y ~ s(Time) + s(Subject, bs='re') 
#' + s(Time, Subject, bs='re'), data=simdat)
#' 
#' # get all coefficients:
#' coef(m0)
#' # to get only the Subject intercepts:
#' coef(m0)[grepl(convertNonAlphanumeric('s(Subject)'), names(coef(m0)))]
#' # to get only the Subject slopes:
#' coef(m0)[grepl(convertNonAlphanumeric('s(Time,Subject)'), names(coef(m0)))]
#'
#' @family Utility functions
convertNonAlphanumeric <- function(text) {
    return(gsub("([^a-zA-Z0-9])", "\\\\\\1", as.character(text)))
}





#' Calculate the correlation between the fitted model and data.
#' 
#' @export
#' @import mgcv
#' @param model A fitted regression model (using gam, or bam).
#' @return Numeric value: correlation between fitted model and data. 
#' @examples 
#' data(simdat)
#'
#' # Fit simple GAM model:
#' gam1 <- bam(Y ~ s(Time), data=simdat, discrete=TRUE)
#' corfit(gam1)
#' 
#' @family Utility functions
corfit = function(model) {
    if ("gamm" %in% class(model)) {
        stop(sprintf("Apply the function on the gam part of the model: modeledf(%s$gam)", deparse(substitute(model))))
    } else if ((!"gam" %in% class(model))) {
        stop("Model is not a gam object. Currently this function only works for models build with bam(), gam(), or gamm().")
    }
    return(cor(model$model[, 1], fitted(model)))
}





#' Compare the formulas of two models and return the difference(s). 
#' 
#' @export
#' @import mgcv
#' @param model1 A fitted regression model (using lm, glm, gam, or bam).
#' @param model2 A fitted regression model (using lm, glm, gam, or bam).
#' @return A list with model terms that are not shared by both models.
#' @author Jacolien van Rij
#' @examples 
#' data(simdat)
#'
#' # Fit simple GAM model:
#' gam1 <- bam(Y ~ s(Time), data=simdat)
#' gam2 <- bam(Y ~ Group+s(Time), data=simdat)
#' diff_terms(gam1, gam2)
#'
#' @family Utility functions
diff_terms <- function(model1, model2) {
    fa <- c()
    fb <- c()
    get_formula <- function(x) {
        if ("gam" %in% class(x)) {
            return(attr(terms(x$formula), "term.labels"))
        } else {
            stop(sprintf("This function does not work for model of class %s.", class(x)))
        }
    }
    fa <- get_formula(model1)
    fb <- get_formula(model2)
    d1 <- fa[!fa %in% fb]
    d2 <- fb[!fb %in% fa]
    out <- list()
    out[[deparse(substitute(model1))]] <- d1
    out[[deparse(substitute(model2))]] <- d2
    return(out)
}





#' Retrieve the degrees of freedom specified in the model.
#' 
#' @export
#' @import mgcv
#' @param model A fitted regression model (using gam, or bam).
#' @return Numeric value: degrees of freedom specified in the model.
#' @examples 
#' data(simdat)
#' 
#' \dontrun{
#' # models take somewhat longer time to run:
#' 
#' # Fit simple GAM model:
#' gam1 <- bam(Y ~ s(Time), data=simdat, discrete=TRUE)
#' modeledf(gam1)
#' gam2 <- bam(Y ~ s(Time)+s(Time, Subject, bs='fs', m=1), 
#'     data=simdat, discrete=TRUE)
#' modeledf(gam2)
#' gam3 <- bam(Y ~ Subject+s(Time, by=Subject), 
#'     data=simdat, discrete=TRUE)
#' modeledf(gam3)
#' gam4 <- bam(Y ~ Group+s(Time)+s(Time, Subject, bs='fs', m=1), 
#'     data=simdat, discrete=TRUE)
#' modeledf(gam4) 
#' gam5 <- bam(Y ~ Group+s(Time, by=Group)+s(Time, Subject, bs='fs', m=1), 
#'     data=simdat, discrete=TRUE)
#' modeledf(gam5) 
#' 
#' # Fit a gamm:
#' gam6 <- gamm(Y ~ Group+s(Time), random=list(Subject=~1) data=simdat, discrete=TRUE)
#' # this produces an error...
#' modeledf(gam6)
#' # ... but this works:
#' modeledf(gam6$gam)
#' }
#' 
#' @family Utility functions
modeledf = function(model) {
    if ("gamm" %in% class(model)) {
        stop(sprintf("Apply the function on the gam part of the model: modeledf(%s$gam)", deparse(substitute(model))))
    } else if ((!"gam" %in% class(model))) {
        stop("Model is not a gam object. Currently this function only works for models build with bam(), gam(), or gamm().")
    }
    null.space.dim <- 0
    if (length(model$smooth) > 0) {
        null.space.dim <- sum(sapply(model$smooth, FUN = function(x) {
            x$null.space.dim
        }, USE.NAMES = FALSE))
    }
    return(length(model$sp) + model$nsdf + null.space.dim)
}
#' Number of observations in the model.
#' 
#' @export
#' @import mgcv
#' @param model A fitted regression model (using gam, bam, (g)lm, (g)lmer).
#' @return Numeric value: number of observations that are considered by the 
#' model. 
#' @examples 
#' data(simdat)
#' # simulate some missing data:
#' simdat[sample(1:nrow(simdat), size=15),]$Y <- NA
#' simdat[sample(1:nrow(simdat), size=7),]$Group <- NA
#' 
#' # Fit simple GAM models:
#' gam1 <- bam(Y ~ s(Time), data=simdat, discrete=TRUE)
#' gam2 <- bam(Y ~ Group + s(Time, by=Group), data=simdat, discrete=TRUE)
#' 
#' # number of data points in data frame:
#' nrow(simdat)
#' 
#' # observations model gam1:
#' observations(gam1)
#' # observations model gam2:
#' observations(gam2)
#' 
#' @family Utility functions
observations = function(model) {
    if ("gamm" %in% class(model)) {
        stop(sprintf("Apply the function on the gam part of the model: observations(%s$gam)", deparse(substitute(model))))
    } else if ("lme" %in% class(model)) {
        return(nrow(model@frame))
    } else if ("lm" %in% class(model)) {
        return(nrow(model$model))
    } else if (("gam" %in% class(model))) {
        return(nrow(model$model))
    } else {
        stop("Currently this function only works for models build with bam(), gam(), glm(), lm(), lmer(), and glmer().")
    }
}
#' Retrieve the residual degrees of freedom from the model.
#' 
#' @export
#' @import mgcv
#' @param model A fitted regression model (using gam, or bam).
#' @return Numeric value: residual degrees of freedom from the model.
#' @examples 
#' data(simdat)
#'
#' # Fit simple GAM model:
#' gam1 <- bam(Y ~ s(Time), data=simdat, discrete=TRUE)
#' res_df(gam1)
#' # ... which is the same as:
#' 
#' modeledf(gam1)
#' 
#' @family Utility functions
res_df = function(model) {
    if ("gamm" %in% class(model)) {
        stop(sprintf("Apply the function on the gam part of the model: res_df(%s$gam)", deparse(substitute(model))))
    } else if ((!"gam" %in% class(model))) {
        stop("Model is not a gam object. Currently this function only works for models build with bam(), gam(), or gamm().")
    }
    return(nrow(model$model) - modeledf(model))
}





#' Return the regions in which the smooth is significantly different from zero.
#' 
#' @export
#' @import grDevices
#' @import graphics
#' @param mean A vector with smooth predictions.
#' @param se A vector with the standard error on the smooth predictions.
#' @param xVals Optional vector with x values for the smooth. 
#' When \code{xVals} is provided, the regions are returned in terms of x-
#' values, otherwise as indices.
#' @param f A number to multiply the \code{se} with, to convert the \code{se} 
#' into confidence intervals. Use 1.96 for 95\% CI and 2.58 for 99\%CI.
#' @param as.vector Logical: whether or not to return the data points as 
#' vector, or not. Default is FALSE, and a list with start and end points will
#'  be returned.
#' @return The function returns a list with start points of each region 
#' (\code{start}) and end points of each region (\code{end}). The logical 
#' \code{xVals} indicates whether the returned values are on the x-scale 
#' (TRUE) or indices (FALSE).
#' @examples
#' data(simdat)
#' 
#' # Use aggregate to calculate mean and standard deviation per timestamp:
#' avg <- aggregate(simdat$Y, by=list(Time=simdat$Time),
#'     function(x){c(mean=mean(x), sd=sd(x))})
#' head(avg)
#' # Note that column x has two values in each row:
#' head(avg$x)
#' head(avg$x[,1])
#' 
#' # Plot line and standard deviation:
#' emptyPlot(range(avg$Time), c(-20,20), h0=0)
#' plot_error(avg$Time, avg$x[,'mean'], avg$x[,'sd'], 
#'    shade=TRUE, lty=3, lwd=3)
#'
#' # Show difference with 0:
#' x <- find_difference(avg$x[,'mean'], avg$x[,'sd'], xVals=avg$Time)
#' # Add arrows:
#' abline(v=c(x$start, x$end), lty=3, col='red')
#' arrows(x0=x$start, x1=x$end, y0=-5, y1=-5, code=3, length=.1, col='red')
#' @author Jacolien van Rij
#'
#' @family Utility functions
find_difference <- function(mean, se, xVals = NULL, f = 1, as.vector = FALSE) {
    if (length(mean) != length(se)) {
        stop("The vectors mean and se are not equal in length.")
    } else {
        ub <- mean + f * se
        lb <- mean - f * se
        
        n <- which(!(ub >= 0 & lb <= 0))
        if (as.vector) {
            if (length(n) == 0) {
                return(rep(FALSE, length(mean)))
            } else {
                out <- rep(FALSE, length(mean))
                out[n] <- TRUE
                return(out)
            }
        } else {
            if (length(n) == 0) {
                return(NULL)
            } else {
                n_prev <- c(NA, n[1:(length(n) - 1)])
                n_next <- c(n[2:length(n)], NA)
                if (!is.null(xVals) & (length(xVals) == length(mean))) {
                  return(list(start = xVals[n[which(is.na(n - n_prev) | (n - n_prev) > 1)]], end = xVals[n[which(is.na(n_next - 
                    n) | (n_next - n) > 1)]], xVals = TRUE))
                } else {
                  return(list(start = n[which(is.na(n - n_prev) | (n - n_prev) > 1)], end = n[which(is.na(n_next - 
                    n) | (n_next - n) > 1)], xVals = FALSE))
                }
                
            }
        }
    }
}





#' Return indices of data that were not fitted by the model.
#' 
#' @export
#' @param model A fitted regression model (using lm, glm, gam, or bam).
#' @return The indices of the data that were not fitted by the model.
#' @author Jacolien van Rij
#' @examples 
#' data(simdat)
#'
#' # Add missing values:
#' set.seed(123)
#' simdat[sample(nrow(simdat), size=20),]$Y <- NA
#' # Fit simple linear model:
#' lm1 <- lm(Y ~ Time, data=simdat)
#' na.el <- missing_est(lm1)
#' length(na.el)
#'
#' @family Utility functions
missing_est <- function(model) {
    if ("lm" %in% class(model)) {
        el <- unique(model$na.action)
        if (length(el) > 0) {
            return(sort(el))
        } else {
            return(el)
        }
    } else {
        stop("This method is currently only implemented for lm, glm, gam, and bam models.")
    }
}





#' Return a list with reference levels for each factor.
#' 
#' @export
#' @description Function for retrieving all reference levels.
#' @param data Data
#' @return Named list with reference levels for each factor.
#' @author Jacolien van Rij
#' @family Utility functions
refLevels <- function(data) {
    var.factor <- names(data)[sapply(data, function(x){ inherits(x, 'factor')})]
    if(length(var.factor) > 0){
    	return( lapply(data[ ,var.factor], function(x){
    		return(list(ref=levels(x)[1], contr=contrasts(x)))
    	}) )
    }else{
    	return(list())
    }
}





#' Print a descriptive summary of a data frame.
#' 
#' @export
#' @description The function prints a summary of the data. 
#' Similar to the function \code{\link[utils]{str}}, but easier readable.
#' @param data A data frame.
#' @param print Logical: whether or not to print the summary.
#' @param n Number: maximum number of values being mentioned in the summary.
#' If NULL all values are being mentioned. Defaults to 10.
#' @return Optionally returns a named list with info.
#' @author Jacolien van Rij
#' @examples
#' data(simdat)
#' summary_data(simdat)
#' @family Utility functions
summary_data <- function(data, print = TRUE, n = 10) {
    
    labelColumns <- function(x, data) {
        out <- NULL
        cn <- ifelse(is.numeric(x), colnames(data)[x], x)
        cl <- class(data[, cn])
        if (inherits(data[, cn], "factor")) {
            vals <- sort(unique(as.character(data[, x])))
            n.cur <- length(vals) + 1
            if (!is.null(n)) {
                n.cur <- n
            }
            if (length(vals) > n.cur) {
                out <- sprintf("factor with %d values; set to the value(s): %s, ...", length(vals), paste(vals[1:n.cur], 
                  collapse = ", "))
            } else {
                out <- sprintf("factor; set to the value(s): %s.", paste(vals, collapse = ", "))
            }
        } else if (inherits(data[, cn], "numeric")) {
            if (length(unique(data[, x])) > 2) {
                out <- sprintf("numeric predictor; with %d values ranging from %f to %f.", length(unique(data[, 
                  x])), min(data[, x], na.rm = TRUE), max(data[, x], na.rm = TRUE))
            } else {
                out <- sprintf("numeric predictor; set to the value(s): %s.", paste(unique(data[, x]), collapse = ", "))
            }
        } else if (inherits(data[, cn], "matrix")) {
            if (length(unique(data[, x])) > 2) {
                out <- sprintf("a matrix predictor; with %d values ranging from %f to %f.", length(unique(data[, 
                  x])), min(data[, x], na.rm = TRUE), max(data[, x], na.rm = TRUE))
            } else {
                out <- sprintf("matrix predictor; set to the value(s): %s.", paste(unique(data[, x]), collapse = ", "))
            }
        } else {
            vals <- sort(unique(data[, x]))
            n.cur <- length(vals) + 1
            if (!is.null(n)) {
                n.cur <- n
            }
            if (length(vals) > n.cur) {
                out <- sprintf("%s vector with %d values; set to the value(s): %s, ...", class(data[, cn])[1], 
                  length(vals), paste(vals[1:n.cur], collapse = ", "))
            } else {
                out <- sprintf("%s vector; set to the value(s): %s.", class(data[, cn])[1], paste(vals, collapse = ", "))
            }
        }
        return(out)
    }
    mysummary <- sapply(colnames(data), function(x) {
        labelColumns(x, data)
    })
    if (print) {
        print_summary(mysummary)
    }
    invisible(mysummary)
}
#' Print a named list of strings, output from \code{\link{summary_data}}.
#' 
#' @export
#' @param sumlist Named list, output of \code{\link{summary_data}}.
#' @param title Optional, text string that will be printed as title.
#' @author Jacolien van Rij
#' @family Utility functions
print_summary <- function(sumlist, title = NULL) {
    if (is.null(title)) {
        cat("Summary:\n")
    } else {
        cat(title, "\n")
    }
    for (x in names(sumlist)) {
        cat("\t*", x, ":", sumlist[[x]], "\n")
    }
}





#' Label timestamps as timebins of a given binsize.
#' 
#' @export
#' @description Function for calculating timebins.
#' @param x Numerical vector with timestamp information.
#' @param binsize Size of the timebin, measured in the same units (often ms) 
#' as \code{x}.
#' @param pos Numerical value that determines the label of the binsize 
#' as proportion of the binsize. A value of 0 will provide the minimum 
#' timestamp within the bin as label, a value of 1 will provide the maximum 
#' value within the bin as label. Defaults to 0.5, the center of the bin.
#' @return Anumerical vector of the same size as \code{x} with timebin 
#' information.
#' @author Jacolien van Rij
#' @examples
#' data(simdat)
#' # grouping Time values in bins:
#' simdat$Timebin <- timeBins(simdat$Time, 200)
#' head(simdat)
#' 
#' # different labels:
#' simdat$Timebin2 <- timeBins(simdat$Time, 200, pos=0)
#' head(simdat)
#' @family Utility functions
timeBins <- function(x, binsize, pos = 0.5) {
    return((floor(x/binsize) + pos) * binsize)
}

Any scripts or data that you put into this service are public.

itsadug documentation built on June 17, 2022, 5:05 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

itsadug
Interpreting Time Series and Autocorrelated Data Using GAMMs

R/util.R
In itsadug: Interpreting Time Series and Autocorrelated Data Using GAMMs

Defines functions timeBins print_summary summary_data refLevels missing_est find_difference res_df observations modeledf diff_terms corfit convertNonAlphanumeric

Documented in convertNonAlphanumeric corfit diff_terms find_difference missing_est modeledf observations print_summary refLevels res_df summary_data timeBins

Try the itsadug package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

itsadug Interpreting Time Series and Autocorrelated Data Using GAMMs

R/util.R In itsadug: Interpreting Time Series and Autocorrelated Data Using GAMMs

Defines functions timeBins print_summary summary_data refLevels missing_est find_difference res_df observations modeledf diff_terms corfit convertNonAlphanumeric

Documented in convertNonAlphanumeric corfit diff_terms find_difference missing_est modeledf observations print_summary refLevels res_df summary_data timeBins

Try the itsadug package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

itsadug
Interpreting Time Series and Autocorrelated Data Using GAMMs

R/util.R
In itsadug: Interpreting Time Series and Autocorrelated Data Using GAMMs