#' Fits loess curve to each station in a data set
#'
#' Takes a data set with \code{ID}, \code{DATE} and \code{SNWD} (use
#' \code{\link{create_model_variables}}), and adds a column with a loess curve
#' upper estimate, and a column with 1 for observations which are more than 3.5
#' SD above the loess estimate.
#'
#' The data set can have an arbitrary number of stations, and a loess curve will
#' be fit for each station. For each station, each observation's \code{DATE} is
#' converted to a day of the year. So '1999-1-1' will be \code{DOY}=0,
#' '2004-1-1' will also be \code{DOY}=0. This effectively collapses each
#' stations data into one year where each day will have every years observation.
#' \code{msir::loess} is then fit with the given \code{span}, \code{nsigma}, and
#' \code{family} parameters. Some stations don't have enough data, or loess
#' simply won't converge, in these cases the default value returned for
#' \code{loess_outlier} is 1, and for \code{loess_upper} \code{NA} is returned.
#'
#'
#' @param data A data set with at least ID, DATE, and SNWD (use
#' \code{\link{create_model_variables}}).
#' @param span Default is .1, passed to \code{msir::loess_sd}.
#' @param nsigma Default is 3.5, is the number of standard deviations to use
#' when finding the upper bound from the loess curve.
#' @param family Default symmetric, passed to \code{msir::loess_sd}.
#' @param progress True if you want a progress bar.
#' @param ... extra parameters are passed to msir::loess_sd.
#'
#' @return A copy of the data set with \code{loess_upper} (for plotting) and
#' \code{loess_outlier} (1 for outlier, 0 for not) added.
#' @export
#'
#' @importFrom dplyr .data
#'
#' @examples
#' \dontrun{
#' # Some test station id's
#'
#' test_ids <- c("US1COBO0074", "US1COBO0024",
#' "US1COAR0114", "US1COLR0758",
#' "USC00050437", "USC00053038",
#' "US1COAD0098", "USW00023066",
#' "USC00051179", "US1COLR0187",
#' "US1CODG0079", "US1CODG0033",
#' "US1COBO0266", "USC00052501",
#' "US1COPU0080", "USC00056410",
#' "US1COLR0520", "US1COLR0157",
#' "US1COLR0142", "USC00056012")
#'
#' # creates a testing data set of colorado data
#' test_data <- get_weather_data(test_ids)
#' test_data <- create_flagged_dataset(test_data)
#' test_data <- create_model_variables(test_data)
#' test_data$ID <- as.character(test_data$ID)
#'
#' # fits loess models on all the testing data from colorado
#' test_data <- loess_model(test_data)
#'
#' # creates a confusion matrix to assess performance
#' caret::confusionMatrix(test_data$loess_outlier,
#' test_data$OUTLIER_FINAL,
#' positive="1")
#'
#' }
#'
loess_model <- function(data, span = .1, nsigma = 3.5,
family = "symmetric", progress = TRUE, ...) {
# Makes sure that the data only has SNWD
DOY <- NULL
DOY_RAW <- NULL
data <- data %>% dplyr::mutate(DOY_RAW = lubridate::yday(.data$DATE) - 1,
DOY = base::ifelse(DOY_RAW >= 182, # 182 is July 1st
DOY_RAW - 366,
DOY_RAW)) %>%
dplyr::select(-DOY_RAW)
# Loops through all the stations
stations <- base::unique(data$ID)
results <- NULL;
i <- 0;
for (station in 1:length(stations)) {
if (progress) {
svMisc::progress(station,
max.value = length(stations),
progress.bar = TRUE)
}
new_data <- dplyr::filter(data, .data$ID == stations[station])
base::tryCatch({
# train loes model
model_full <- msir::loess.sd(new_data$DOY, new_data$SNWD,
span = span, nsigma = nsigma,
family = family, ...)
# find prediction upper limit
loess_upper <- NULL
loess_outlier <- NULL
prediction <- base::data.frame(DOY = model_full$x,
loess_upper = model_full$upper) %>%
dplyr::distinct(.keep_all = TRUE)
# create the outlier flag
new_data <- dplyr::left_join(new_data, prediction, by = "DOY")
},
error = function(c) {
new_data <- dplyr::mutate(new_data,
loess_upper = -1)
}
)
if (!base::is.null(results)) {
results <- dplyr::bind_rows(results, new_data)
}
else {
results <- new_data
}
}
results$loess_upper <- base::ifelse(base::is.na(results$loess_upper),
-1,
results$loess_upper)
results$loess_outlier <- base::factor((results$SNWD > results$loess_upper),
levels = c(FALSE, TRUE),
labels = c("0", "1"))
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.