#' Takes raw GHCN-D data and adds an outlier flag.
#'
#' This takes a data set generated by get_weather_data (or any of the
#' snowload2::get_x_data functions), and flags points which were found to be
#' outliers during the national snow load project in 2020, or have a QFLAG problem.
#'
#' Since outliers were only flagged up to the date 2020-8-1, the default
#' date_max is set to this. This way observations after this point wont be
#' marked as non-outliers when they haven't been checked. It is advised to leave
#' this date as it is.
#'
#' 0 means not an outlier, 1 means outlier
#'
#' @param data the data set which is to be flagged
#' @param date_max Date object, observations after this date will be removed
#'
#' @return data frame which has
#' @export
#'
#' @importFrom dplyr .data
#'
#' @examples
#' # UT <- get_weather_data("UT")
#' # UT_flagged <- create_flagged_dataset(UT)
create_flagged_dataset <- function(data,
date_max = base::as.Date("2020-8-1")) {
new_data <- data %>%
dplyr::filter(.data$DATE <= date_max)
# This line filters out spooky 1952
new_data <- new_data %>%
dplyr::filter(!(lubridate::year(.data$DATE) == 1952 & .data$ELEMENT == "WESD"))
new_data <- new_data %>%
# basically marks the dataset with the outliers dataset
dplyr::left_join(HTSoutliers::outliers_ghcnd, by = c("ID", "ELEMENT", "DATE", "VALUE"))
new_data <- new_data %>%
# picks out the needed variables
dplyr::select(.data$ID, .data$DATE, .data$ELEMENT, .data$VALUE, .data$OUTLIER, .data$TYPE)
OUTLIER_FINAL <- NULL
new_data <- new_data %>%
# sets the na values to 0
dplyr::mutate(OUTLIER_FINAL = base::ifelse(base::is.na(.data$OUTLIER), 0, .data$OUTLIER),
OUTLIER_FINAL = as.factor(OUTLIER_FINAL),
TYPE = base::ifelse(base::is.na(.data$TYPE), "", .data$TYPE)) %>%
dplyr::select(-.data$OUTLIER)
return(new_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.