#' Distribute mean values
#'
#' @description Distributes mean values to a shorter time interval. The missing datetimes are inserted
#' and then the values are repeated. This function is typically used to downscale obs values such as
#' t, ea, and u.
#' @param obs Required. The \pkg{CRHMr} data frame of obs values.
#' @param obsCols Optional. A vector containing the columns to be imputed in the obs data frame, not
#' including the datetime. The default \option{all} specifies all columns.
#' @param timeStep Required. The time step (in hours) for the interpolated values. This value must be
#' smaller than the time step in the original time series.
#' @param maxLength Optional. The maximum gap length to be interpolated. Defaults to 5 time steps.
#' @param quiet Optional. Suppresses display of messages, except for errors. If you are calling this
#' function in an \R script, you will usually leave \code{quiet=TRUE} (i.e. the default). If you are
#' working interactively, you will probably want to set \code{quiet=FALSE}.
#' @param logfile Optional. Name of the file to be used for logging the action. Normally not use
#'
#' @return If successful, returns a dataframe of the selected columns interpolated to the specified time
#' step. If unsuccessful, returns an error.
#' @author Kevin Shook
#' @seealso \code{\link{distributeInst}}
#' @importFrom stringr str_c
#' @export
#'
#' @examples \dontrun{
#' hourly_vals <- distributeMean(vegreville, c(1,2,3), timeStep = 1)}
distributeMean <- function(obs, obsCols='all', timeStep=0, maxLength=5,
quiet=TRUE, logfile=''){
if (nrow(obs) == 0) {
stop('Missing any values')
}
obs_name <- deparse(substitute(obs))
# subset dataframe
if (length(obsCols) == 1) {
if (obsCols == 'all') {
var_cols <- seq(2, ncol(obs))
obs <- obs
} else {
var_cols <- obsCols + 1
obs <- obs[,c(1, var_cols)]
}
}
else{
var_cols <- obsCols + 1
obs <- obs[,c(1,var_cols)]
}
if (timeStep <= 0) {
stop('Time step must be specified and > 0')
}
dt <- as.numeric(difftime(obs$datetime[2], obs$datetime[1], units = 'hours'))
if (timeStep >= dt) {
stop("Time step must be smaller than in the original data")
}
# create dataframe and merge values into it
clean <- na.omit(obs)
first.clean.datetime <- clean[1,1]
last.clean.datetime <- clean[nrow(clean),1]
cleanDatetime <- seq(from = first.clean.datetime,
to = last.clean.datetime, by = timeStep*3600)
cleanDatetime <- data.frame(cleanDatetime)
names(cleanDatetime) <- 'datetime'
cleanVals <- merge(cleanDatetime, obs, by = 'datetime', all.x = TRUE)
clean_rows <- nrow(cleanVals)
clean_cols <- ncol(cleanVals)
# now have a dataframe with all datetimes and NA for missing values
# now interpolate values
for (col in 2:clean_cols) {
vals <- cleanVals[,col]
vals_clean <- na.omit(as.vector(vals))
num_original <- length(vals_clean)
reps <- ceiling(clean_rows / num_original)
# do reps
reps <- rep(vals_clean, each = reps)
cleanVals[,col] <- reps[1:clean_rows]
}
distrib <- cleanVals
# output log files
obs.info <- CRHM_summary(distrib)
if (!quiet)
print(obs.info)
comment <- paste('distributeMean dataframe:', obs_name,
' Variables:', str_c(names(distrib)[-1],
collapse = ','), sep = '')
result <- logAction(comment, logfile)
if (result)
return(distrib)
else
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.