#' Quality control on daily weather data
#'
#' @description
#' This function checks range of values for \code{METData} and implements
#' various test on daily weather data (persistence tests, internal
#' consistency tests) provided by the user.
#'
#' @param info_environments \code{data.frame} object with at least the 4 first
#' columns. \cr
#' \enumerate{
#' \item year: \code{numeric} Year label of the environment
#' \item location: \code{character} Name of the location
#' \item longitude: \code{numeric} longitude of the environment
#' \item latitude: \code{numeric} latitude of the environment
#' \item planting.date: (optional) \code{Date} YYYY-MM-DD
#' \item harvest.date: (optional) \code{Date} YYYY-MM-DD
#' \item elevation: (optional) \code{numeric}
#' \item IDenv: \code{character} ID of the environment (location x year)\cr
#' }
#' \strong{The data.frame should contain as many rows as Year x Location
#' combinations. Example: if only one location evaluated across four years, 4
#' rows should be present.}
#'
#' @param daily_weather_data a \code{data.frame} which contains the following
#' mandatory columns:
#' \enumerate{
#' \item longitude \code{numeric}
#' \item latitude \code{numeric}
#' \item year \code{numeric}
#' \item location \code{character}
#' \item YYYYMMDD \code{Date} Date of the daily observation written as
#' YYYY-MM-DD
#' \item IDenv \code{character} Environment ID written Location_Year
#' \item T2M \code{numeric} Average mean temperature (degree Celsius)
#' \item T2M_MIN \code{numeric} Min. temperature (degree Celsius)
#' \item T2M_MAX \code{numeric} Max. temperature (degree Celsius)
#' \item PRECTOTCORR \code{numeric} Total daily precipitation (mm)
#' \item SG_DAY_HOUR_AVG \code{numeric} Daylight Hrs (hrs)
#' }
#' Additional weather data provided by user must be a subset of the following
#' weather variable names (= next columns):
#' (\strong{Any imputation step should be performed before providing
#' this daily weather dataset to the package. }):
#' \enumerate{
#' \item RH2M \code{numeric} Daily mean relative humidity (%)
#' \item RH2M_MIN \code{numeric} Daily minimum relative humidity (%)
#' \item RH2M_MAX \code{numeric} Daily maximum relative humidity (%)
#' \item daily_solar_radiation \code{numeric} daily solar radiation
#' (MJ/m^2/day)
#' \item T2MDEW \code{numeric} Dew Point (°C)
#' }
#' Default is `NULL`.
#'
#'
#' @param et0 `logical` whether evapotranspiration should be calculated.
#' False by default.
#'
#' @param get_solar_radiation `logical` whether to retrieve solar data if lon
#' and lat values are available
#' @param path_flagged_values where to save the file with flagged values to
#' check on (they are not removed from the data, only indicated in the output
#' file)
#'
#' @return daily_weather_data a \code{data.frame} after quality check with the
#' same columns as before the QC. \cr
#' Vapor pressure deficit is calculated if T2M_MIN, T2M_MAX, and either
#' RH2M_MIN + RH2M_MAX or only RH2M are provided. \cr
#' et0 calculated if indicated (et0 = TRUE) . \cr
#' \strong{
#' The function checks for multiple daily observations at the same EnvID.\cr
#' Warning messages are also thrown if some observations do not pass either
#' the range test, persistence test or the internal consistency test. A
#' data.frame, with dubious values signaled by a column flagged and with the
#' corresponding explanation in the column "reason", is provided as output.
#' None of the flagged values is assigned as missing values or transformed;
#' therefore we strongly recommend the user to have a second look at the daily
#' weather data provided and to correct potential dubious values indicated by
#' the output of the present function.}
#' \cr
#' \strong{
#' Solar radiation or wind data are automatically retrieved from NASA, if they
#' are not provided without any missing data by the user. As for any other
#' weather variable used in this function, these data cannot be only partially
#' provided (no missing values accepted).}
#'
#' @references
#' \insertRef{zotarelli2010step}{learnMET}
#'
#' @author Cathy C. Westhues \email{cathy.jubin@@hotmail.com}
#' @export
qc_raw_weather_data <-
function(daily_weather_data,
info_environments,
path_flagged_values,
et0 = F,
get_solar_radiation = FALSE) {
cat("QC on daily weather data starts...\n")
checkmate::assert_data_frame(daily_weather_data, any.missing = FALSE)
checkmate::assert_names(
colnames(daily_weather_data),
must.include = c("IDenv",
"location",
"year",
"YYYYMMDD"),
subset.of = c(
"IDenv",
"location",
"year",
"longitude",
"latitude",
"YYYYMMDD",
"MM",
"DD",
"YEAR",
"DOY",
"RH2M",
"RH2M_MIN",
"RH2M_MAX",
"T2M",
"T2M_MIN",
"T2M_MAX",
"PRECTOTCORR",
"SG_DAY_HOUR_AVG",
"daily_solar_radiation",
"T2MDEW",
"WS2M",
"length.gs",
"vapr_deficit",
"et0"
)
)
# Check YYYYMMDD class
checkmate::assert_date(daily_weather_data$YYYYMMDD)
# Get DOY if not provided
if (!("DOY" %in% colnames(daily_weather_data))) {
daily_weather_data$DOY <-
lubridate::yday(daily_weather_data$YYYYMMDD)
}
daily_weather_data$multiple_obs_per_day <-
paste0(daily_weather_data$IDenv,
daily_weather_data$DOY)
if (any(duplicated(daily_weather_data$multiple_obs_per_day))) {
cat(
"Multiple observations for the same day in the same environment were",
"found and will be removed to keep 1 obs. per day.\n"
)
daily_weather_data <-
daily_weather_data[!duplicated(daily_weather_data$multiple_obs_per_day), ]
}
# Order data.frame
daily_weather_data <-
dplyr::arrange(daily_weather_data, IDenv, DOY)
# Check which IDenv are provided with raw weather data
envs_with_daily_wdata <- names(table(daily_weather_data$IDenv))
# Check that the dates provided by the user as raw weather data correspond to
# those from the growing season of the environment.
for (j in envs_with_daily_wdata) {
int <-
lubridate::interval(info_environments[info_environments$IDenv == j, "planting.date"], info_environments[info_environments$IDenv ==
j, "harvest.date"])
days_diff <-
difftime(lubridate::ymd(info_environments[info_environments$IDenv == j, "harvest.date"]),
lubridate::ymd(info_environments[info_environments$IDenv ==
j, "planting.date"]))
if (length(which(lubridate::`%within%`(daily_weather_data[daily_weather_data$IDenv == j, "YYYYMMDD"],
int))) >= days_diff - 1) {
dates_to_keep <-
daily_weather_data[daily_weather_data$IDenv == j, "YYYYMMDD"][which(lubridate::`%within%`(daily_weather_data[daily_weather_data$IDenv == j, "YYYYMMDD"],
int))]
dates_to_remove <-
daily_weather_data[daily_weather_data$IDenv == j, "YYYYMMDD"][daily_weather_data[daily_weather_data$IDenv == j, "YYYYMMDD"] %notin%
dates_to_keep]
if (length(dates_to_remove) > 0) {
daily_weather_data <- daily_weather_data %>%
filter(!(YYYYMMDD %in% dates_to_remove & IDenv %in% j))
}
} else{
stop(
paste0(
"The range of dates provided in the raw weather data for ",
j,
" does not correspond to the interval of dates between planting and harvest dates.\n"
)
)
}
}
# Create a data.frame to indicate flagged values and the reason why these
# were flagged.
flagged_values <- daily_weather_data
flagged_values$flagged <- NA
flagged_values$reason <- NA
### QC on daylight hrs ###
if ("SG_DAY_HOUR_AVG" %in% names(daily_weather_data)) {
checkmate::assert_numeric(daily_weather_data$SG_DAY_HOUR_AVG)
# 1) Range test
if (any(na.omit(daily_weather_data$SG_DAY_HOUR_AVG > 24))) {
warning("Daylight hours should be inferior to 24 hrs.")
}
if (any(na.omit(daily_weather_data$SG_DAY_HOUR_AVG < 0))) {
warning("Daylight hours should be positive.")
}
}
#### QC on precipitation ####
if ("PRECTOTCORR" %in% names(daily_weather_data)) {
checkmate::assert_numeric(daily_weather_data$PRECTOTCORR)
# 1) Range test
if (any(na.omit(daily_weather_data$PRECTOTCORR > 500))) {
warning("Some daily precipitation data sup. to 500 mm, which is abnormal.")
}
if (any(na.omit(daily_weather_data$PRECTOTCORR < 0))) {
warning("Some daily precipitation data inf. to 0 mm, which is abnormal.")
}
# Flagged values
flagged_values$flagged[which(flagged_values$PRECTOTCORR > 500)] <-
"flagged"
flagged_values$flagged[which(flagged_values$PRECTOTCORR < 0)] <-
"flagged"
flagged_values$reason[which(flagged_values$PRECTOTCORR > 500)] <-
"range_test_precipitation"
flagged_values$reason[which(flagged_values$PRECTOTCORR < 0)] <-
"range_test_precipitation"
}
#### QC on temperature data ####
if ("T2M_MIN" %in% names(daily_weather_data)) {
checkmate::assert_numeric(daily_weather_data$T2M_MIN)
# 1) Range test
if (any(na.omit(daily_weather_data$T2M_MIN < (-50)))) {
warning("Some min. daily temp. inf. to -50, which is abnormal.")
}
if (any(na.omit(daily_weather_data$T2M_MIN > 30))) {
warning("Some min. daily temp. sup. to 30, which is abnormal.")
}
flagged_values$flagged[which(flagged_values$T2M_MIN > 30)] <-
"flagged"
flagged_values$flagged[which(flagged_values$T2M_MIN < (-50))] <-
"flagged"
flagged_values$reason[which(flagged_values$T2M_MIN > 30)] <-
"range_test_min_temp"
flagged_values$reason[which(flagged_values$T2M_MIN < (-50))] <-
"range_test_min_temp"
}
if ("T2M_MAX" %in% names(daily_weather_data)) {
checkmate::assert_numeric(daily_weather_data$T2M_MAX)
if (any(na.omit(daily_weather_data$T2M_MAX < (-40)))) {
warning("Some max. daily temp. inf. to -40, which is abnormal.")
}
if (any(na.omit(daily_weather_data$T2M_MAX > 50))) {
warning("Some max. daily temp. sup. to 50, which is abnormal.")
}
flagged_values$flagged[which(flagged_values$T2M_MAX < (-40))] <-
"flagged"
flagged_values$flagged[which(flagged_values$T2M_MAX > 50)] <-
"flagged"
flagged_values$reason[which(flagged_values$T2M_MAX < (-40))] <-
"range_test_max_temp"
flagged_values$reason[which(flagged_values$T2M_MAX > 50)] <-
"range_test_max_temp"
}
if ("T2M" %in% names(daily_weather_data)) {
checkmate::assert_numeric(daily_weather_data$T2M_MIN)
if (any(na.omit(daily_weather_data$T2M < (-50)))) {
warning("Some mean. daily temp. inf. to -50, which is abnormal.")
}
if (any(na.omit(daily_weather_data$T2M > 50))) {
warning("Some mean. daily temp. sup. to 50, which is abnormal.")
}
flagged_values$flagged[which(flagged_values$T2M < (-50))] <-
"flagged"
flagged_values$flagged[which(flagged_values$T2M > 50)] <-
"flagged"
flagged_values$reason[which(flagged_values$T2M < (-50))] <-
"range_test_mean_temp"
flagged_values$reason[which(flagged_values$T2M > 50)] <-
"range_test_mean_temp"
}
# 2) Internal consistency test
if (all(c("T2M_MIN", "T2M_MAX", "T2M") %in% names(daily_weather_data))) {
if (any(na.omit(daily_weather_data$T2M_MAX < daily_weather_data$T2M_MIN))) {
warning(paste(
"Max temperature should be superior to min temperature.",
"Check data."
))
}
if (any(na.omit(daily_weather_data$T2M_MAX < daily_weather_data$T2M))) {
warning(paste(
"Max temperature should be superior to mean temperature.",
"Check data."
))
}
if (any(na.omit(daily_weather_data$T2M_MIN > daily_weather_data$T2M))) {
warning(paste(
"Min temperature should be inferior to mean temperature.",
"Check data."
))
}
daily_weather_data_check <- daily_weather_data
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., min_previous_day_value = dplyr::lag(T2M_MIN, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., max_previous_day_value = dplyr::lag(T2M_MAX, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., IDenv_previous_day = dplyr::lag(IDenv))
if (any(
na.omit(
daily_weather_data_check$T2M_MAX < daily_weather_data_check$min_previous_day_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)
)) {
warning(
paste(
"Max temperature of day j should be superior to min",
"temperature of day j-1. Check your data"
)
)
flagged_values$flagged[which(
daily_weather_data_check$T2M_MAX < daily_weather_data_check$min_previous_day_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "flagged"
flagged_values$reason[which(
daily_weather_data_check$T2M_MAX < daily_weather_data_check$min_previous_day_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "consistency_test_max_temp"
}
if (any(
na.omit(
daily_weather_data_check$T2M_MIN > daily_weather_data_check$max_previous_day_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)
)) {
warning(
paste(
"Min temperature of day j should be inferior to max",
"temperature of day j-1. Check your data"
)
)
flagged_values$flagged[which(
daily_weather_data_check$T2M_MIN > daily_weather_data_check$max_previous_day_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "flagged"
flagged_values$reason[which(
daily_weather_data_check$T2M_MIN > daily_weather_data_check$max_previous_day_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "consistency_test_min_temp"
}
# 3) Persistence test
daily_weather_data_check <- daily_weather_data
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., min_previous_day_value = dplyr::lag(T2M_MIN, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(.,
min_2_days_before_value = dplyr::lag(T2M_MIN, n = 2, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., max_previous_day_value = dplyr::lag(T2M_MAX, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(.,
max_2_days_before_value = dplyr::lag(T2M_MAX, n = 2, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., mean_previous_day_value = dplyr::lag(T2M, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., mean_2_days_before_value = dplyr::lag(T2M, n = 2, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., IDenv_previous_day = dplyr::lag(IDenv))
if (any(
na.omit(
daily_weather_data_check$T2M == daily_weather_data_check$mean_previous_day_value &
daily_weather_data_check$T2M == daily_weather_data_check$mean_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)
)) {
warning(paste(
"Mean temperature remains exactly constant three days in a row."
))
flagged_values$flagged[which(
daily_weather_data_check$T2M == daily_weather_data_check$mean_previous_day_value &
daily_weather_data_check$T2M == daily_weather_data_check$mean_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "flagged"
flagged_values$reason[which(
daily_weather_data_check$T2M == daily_weather_data_check$mean_previous_day_value &
daily_weather_data_check$T2M == daily_weather_data_check$mean_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "persistence_test_mean_temp"
}
if (any(
na.omit(
daily_weather_data_check$T2M_MIN == daily_weather_data_check$min_previous_day_value &
daily_weather_data_check$T2M_MIN == daily_weather_data_check$min_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)
)) {
warning(paste(
"Min temperature remains exactly constant three days in a row."
))
flagged_values$flagged[which(
daily_weather_data_check$T2M_MIN == daily_weather_data_check$min_previous_day_value &
daily_weather_data_check$T2M_MIN == daily_weather_data_check$min_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "flagged"
flagged_values$reason[which(
daily_weather_data_check$T2M_MIN == daily_weather_data_check$min_previous_day_value &
daily_weather_data_check$T2M_MIN == daily_weather_data_check$min_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "persistence_test_min_temp"
}
if (any(
na.omit(
daily_weather_data_check$T2M_MAX == daily_weather_data_check$max_previous_day_value &
daily_weather_data_check$T2M_MAX == daily_weather_data_check$max_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)
)) {
warning(paste(
"Max temperature remains exactly constant three days in a row."
))
flagged_values$flagged[which(
daily_weather_data_check$T2M_MAX == daily_weather_data_check$max_previous_day_value &
daily_weather_data_check$T2M_MAX == daily_weather_data_check$max_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "flagged"
flagged_values$reason[which(
daily_weather_data_check$T2M_MAX == daily_weather_data_check$max_previous_day_value &
daily_weather_data_check$T2M_MAX == daily_weather_data_check$max_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "persistence_test_max_temp"
}
}
#### QC on daily solar radiation data ####
if ("daily_solar_radiation" %in% names(daily_weather_data)) {
checkmate::assert_numeric(daily_weather_data$daily_solar_radiation, any.missing = F)
# 1) Range test
if (any(na.omit(daily_weather_data$daily_solar_radiation > 35))) {
warning("Daily solar radiation exhibits values sup. to 35 MJ/m2/day, which is abnormal.")
}
if (any(na.omit(daily_weather_data$daily_solar_radiation < 1))) {
warning("Daily solar radiation exhibits values inf. to 1 MJ/m2/day, which is abnormal.")
}
flagged_values$flagged[which(daily_weather_data$daily_solar_radiation > 35)] <-
"flagged"
flagged_values$reason[which(daily_weather_data$daily_solar_radiation > 35)] <-
"range_test_solar"
flagged_values$flagged[which(daily_weather_data$daily_solar_radiation < 1)] <-
"flagged"
flagged_values$reason[which(daily_weather_data$daily_solar_radiation < 1)] <-
"range_test_solar"
# 2) Persistence test
daily_weather_data_check <- daily_weather_data
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(.,
dsr_previous_day_value = dplyr::lag(daily_solar_radiation, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(.,
dsr_2_days_before_value = dplyr::lag(
daily_solar_radiation,
n = 2,
order_by = c(IDenv)
))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., IDenv_previous_day = dplyr::lag(IDenv))
if (any(
na.omit(
daily_weather_data_check$daily_solar_radiation == daily_weather_data_check$dsr_previous_day_value &
daily_weather_data_check$daily_solar_radiation == daily_weather_data_check$dsr_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)
)) {
warning(paste(
"Daily solar radiation remains exactly constant three days in a row."
))
flagged_values$flagged[which(
daily_weather_data_check$daily_solar_radiation == daily_weather_data_check$dsr_previous_day_value &
daily_weather_data_check$daily_solar_radiation == daily_weather_data_check$dsr_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "flagged"
flagged_values$reason[which(
daily_weather_data_check$daily_solar_radiation == daily_weather_data_check$dsr_previous_day_value &
daily_weather_data_check$daily_solar_radiation == daily_weather_data_check$dsr_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "persistence_test_solar"
}
} else {
if (get_solar_radiation) {
cat("Get solar radiation data (weather variable not provided by user)\n")
solar_data <- lapply(
envs_with_daily_wdata,
FUN = function(x,
...) {
get_solar_radiation(environment = x,
info_environments = info_environments,
...)
}
)
has_unsuccessful_requests <- TRUE
counter <- 1
list_envs_loop <- envs_with_daily_wdata
# This is an empty list to which all requested data will be assigned.
solar_data <-
vector(mode = "list",
length = length(list_envs_loop))
names(solar_data) <- list_envs_loop
# Issues with the NASAPOWER query: it sometimes fail --> use of tryCath and while procedure to ensure weather data for each envrionment
# are retrieved.
while (has_unsuccessful_requests) {
res_w_daily_solar <-
lapply(list_envs_loop,
function(environment, ...) {
solar_data <- tryCatch({
get_solar_radiation(environment = environment,
info_environments = info_environments,
...)
},
error = function(e)
return(NULL),
warning = function(w)
return(NULL))
solar_data
})
names(res_w_daily_solar) <- list_envs_loop
unsuccessful_request_bool <- vapply(res_w_daily_solar,
FUN = is.null,
FUN.VALUE = logical(1))
failed_requests <-
list_envs_loop[unsuccessful_request_bool]
good_requests <-
list_envs_loop[!unsuccessful_request_bool]
list_envs_loop <- list_envs_loop[failed_requests]
solar_data[good_requests] <-
res_w_daily_solar[good_requests]
counter <- counter + 1
if (counter == 15) {
stop("At least one request failed fifteen times.", call. = FALSE)
}
has_unsuccessful_requests <- any(unsuccessful_request_bool)
}
solar_data <- as.data.frame(data.table::rbindlist(solar_data))
daily_weather_data <-
merge(daily_weather_data,
solar_data[, c("IDenv", "YYYYMMDD", "daily_solar_radiation")],
by = c("IDenv", "YYYYMMDD"))
}
}
#### QC on relative humidity ####
# 1) Range test
if ("RH2M" %in% names(daily_weather_data)) {
checkmate::assert_numeric(daily_weather_data$RH2M)
if (any(na.omit(daily_weather_data$RH2M > 100))) {
warning("Some relative humidity data sup. to 100, which is abnormal.")
}
if (any(na.omit(daily_weather_data$RH2M < 0))) {
warning("Some relative humidity data inf. to 100, which is abnormal.")
}
flagged_values$flagged[which(daily_weather_data$RH2M > 100)] <-
"flagged"
flagged_values$reason[which(daily_weather_data$RH2M > 100)] <-
"range_test_humidity"
flagged_values$flagged[which(daily_weather_data$RH2M < 0)] <-
"flagged"
flagged_values$reason[which(daily_weather_data$RH2M < 0)] <-
"range_test_humidity"
}
if ("RH2M_MIN" %in% names(daily_weather_data)) {
checkmate::assert_numeric(daily_weather_data$RH2M_MIN)
if (any(na.omit(daily_weather_data$RH2M_MIN > 100))) {
warning("Some min. relative humidity data sup. to 100, which is abnormal.")
}
if (any(na.omit(daily_weather_data$RH2M_MIN < 0))) {
warning("Some min. relative humidity data inf. to 100, which is abnormal.")
}
flagged_values$flagged[which(daily_weather_data$RH2M_MIN > 100)] <-
"flagged"
flagged_values$reason[which(daily_weather_data$RH2M_MIN > 100)] <-
"range_test_min_humidity"
flagged_values$flagged[which(daily_weather_data$RH2M_MIN < 0)] <-
"flagged"
flagged_values$reason[which(daily_weather_data$RH2M_MIN < 0)] <-
"range_test_min_humidity"
}
if ("RH2M_MAX" %in% names(daily_weather_data)) {
checkmate::assert_numeric(daily_weather_data$RH2M_MAX)
if (any(na.omit(daily_weather_data$RH2M_MAX > 100))) {
warning("Some max. relative humidity data sup. to 100, which is abnormal.")
}
if (any(na.omit(daily_weather_data$RH2M_MAX < 0))) {
warning("Some max. relative humidity data inf. to 100, which is abnormal.")
}
flagged_values$flagged[which(daily_weather_data$RH2M_MAX > 100)] <-
"flagged"
flagged_values$reason[which(daily_weather_data$RH2M_MAX > 100)] <-
"range_test_max_humidity"
flagged_values$flagged[which(daily_weather_data$RH2M_MAX < 0)] <-
"flagged"
flagged_values$reason[which(daily_weather_data$RH2M_MAX < 0)] <-
"range_test_max_humidity"
}
# 2) Persistence test
if ("RH2M" %in% names(daily_weather_data)) {
daily_weather_data_check <- daily_weather_data
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., rh_previous_day_value = dplyr::lag(RH2M, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., rh_2_days_before_value = dplyr::lag(RH2M, n = 2, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., IDenv_previous_day = dplyr::lag(IDenv))
if (any(
na.omit(
daily_weather_data_check$RH2M == daily_weather_data_check$rh_previous_day_value &
daily_weather_data_check$RH2M == daily_weather_data_check$rh_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)
)) {
warning(paste(
"Daily relative humidity remains exactly constant three days in a row."
))
flagged_values$flagged[which(
daily_weather_data_check$RH2M == daily_weather_data_check$rh_previous_day_value &
daily_weather_data_check$RH2M == daily_weather_data_check$rh_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "flagged"
flagged_values$reason[which(
daily_weather_data_check$RH2M == daily_weather_data_check$rh_previous_day_value &
daily_weather_data_check$RH2M == daily_weather_data_check$rh_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "persistence_test_mean_humidity"
}
}
if ("RH2M_MIN" %in% names(daily_weather_data)) {
daily_weather_data_check <- daily_weather_data
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., rhmin_previous_day_value = dplyr::lag(RH2M_MIN, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(.,
rhmin_2_days_before_value = dplyr::lag(RH2M_MIN, n = 2, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., IDenv_previous_day = dplyr::lag(IDenv))
if (any(
na.omit(
daily_weather_data_check$RH2M_MIN == daily_weather_data_check$rhmin_previous_day_value &
daily_weather_data_check$RH2M_MIN == daily_weather_data_check$rhmin_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)
)) {
warning(paste(
"Daily relative humidity remains exactly constant three days in a row."
))
flagged_values$flagged[which(
daily_weather_data_check$RH2M_MIN == daily_weather_data_check$rhmin_previous_day_value &
daily_weather_data_check$RH2M_MIN == daily_weather_data_check$rhmin_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "flagged"
flagged_values$reason[which(
daily_weather_data_check$RH2M_MIN == daily_weather_data_check$rhmin_previous_day_value &
daily_weather_data_check$RH2M_MIN == daily_weather_data_check$rhmin_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "persistence_test_min_humidity"
}
}
if ("RH2M_MAX" %in% names(daily_weather_data)) {
daily_weather_data_check <- daily_weather_data
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., rhmax_previous_day_value = dplyr::lag(RH2M_MAX, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(.,
rhmax_2_days_before_value = dplyr::lag(RH2M_MAX, n = 2, order_by = c(IDenv)))
daily_weather_data_check <-
daily_weather_data_check %>%
dplyr::mutate(., IDenv_previous_day = dplyr::lag(IDenv))
if (any(
na.omit(
daily_weather_data_check$RH2M_MAX == daily_weather_data_check$rhmax_previous_day_value &
daily_weather_data_check$RH2M_MAX == daily_weather_data_check$rhmax_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)
)) {
warning(paste(
"Daily relative humidity remains exactly constant three days in a row."
))
flagged_values$flagged[which(
daily_weather_data_check$RH2M_MAX == daily_weather_data_check$rhmax_previous_day_value &
daily_weather_data_check$RH2M_MAX == daily_weather_data_check$rhmax_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "flagged"
flagged_values$reason[which(
daily_weather_data_check$RH2M_MAX == daily_weather_data_check$rhmax_previous_day_value &
daily_weather_data_check$RH2M_MAX == daily_weather_data_check$rhmax_2_days_before_value &
daily_weather_data_check$IDenv == daily_weather_data_check$IDenv_previous_day
)] <- "persistence_test_max_humidity"
}
}
#### QC on wind data ####
if ("WS2M" %in% names(daily_weather_data)) {
checkmate::assert_numeric(daily_weather_data$WS2M, any.missing = F)
# 1) Range test
if (any(na.omit(daily_weather_data$WS2M > 100))) {
warning("Some daily wind speed data (m/s) sup. to 100, which is abnormal.")
}
if (any(na.omit(daily_weather_data$WS2M < 0))) {
warning("Some daily wind speed data (m/s) inf. to 0, which is abnormal.")
}
flagged_values$flagged[which(daily_weather_data$WS2M > 100)] <-
"flagged"
flagged_values$reason[which(daily_weather_data$WS2M > 100)] <-
"range_test_wind"
flagged_values$flagged[which(daily_weather_data$WS2M < 0)] <-
"flagged"
flagged_values$reason[which(daily_weather_data$WS2M < 0)] <-
"range_test_wind"
}
## Calculation of the vapor-pressure deficit: difference between the actual
## water vapor pressure and the saturation water pressure at a particular
## temperature
if ("vapr_deficit" %notin% names(daily_weather_data)) {
cat("Vapour pressure deficit calculated from humidity and temp. data.\n")
if (all(c("T2M_MIN", "T2M_MAX", "RH2M_MIN", "RH2M_MAX") %in% names(daily_weather_data))) {
cat(
paste(
"Actual vapor pressure (ea) calculated from relative humidity",
"using RH2M_MIN and RH2M_MAX.\n"
)
)
actual_vapor_pressure <-
get.ea(
tmin = daily_weather_data$T2M_MIN,
tmax = daily_weather_data$T2M_MAX,
rhmin = daily_weather_data$RH2M_MIN,
rhmax = daily_weather_data$RH2M_MAX
)
} else if (all(c("T2M_MIN", "RH2M_MAX") %in% names(daily_weather_data))) {
cat(
paste(
"Actual vapor pressure (ea) calculated from relative humidity",
"using only RH2M_MAX.\n"
)
)
actual_vapor_pressure <-
get.ea.with.rhmax(tmin = daily_weather_data$T2M_MIN,
rhmax = daily_weather_data$RH2M_MAX)
} else if (all(c("T2M_MIN", "T2M_MAX", "RH2M") %in% names(daily_weather_data))) {
cat(
paste(
"Actual vapor pressure (ea) calculated from relative humidity",
"using RH2M (mean RH).\n"
)
)
actual_vapor_pressure <-
get.ea.with.rhmean(
tmin = daily_weather_data$T2M_MIN,
tmax = daily_weather_data$T2M_MAX,
rhmean = daily_weather_data$RH2M
)
} else{
cat(
paste(
"Actual vapor pressure (ea) calculated from relative humidity",
"using T2M_MIN.\n"
)
)
actual_vapor_pressure <-
get.ea.no.RH(tmin = daily_weather_data$T2M_MIN)
}
mean_saturation_vapor_pressure <-
get.es(tmin = daily_weather_data$T2M_MIN, tmax = daily_weather_data$T2M_MAX)
daily_weather_data$vapr_deficit <-
mean_saturation_vapor_pressure - actual_vapor_pressure
}
## Calculation of evapotranspiration
if (et0) {
cat("et0 is calculated.\n")
if ("elevation" %in% colnames(info_environments)) {
daily_weather_data <-
plyr::join(daily_weather_data, info_environments[, c("IDenv", "elevation")], by =
"IDenv")
}
if ("elevation" %notin% colnames(info_environments)) {
elevation <-
get_elevation(info_environments = info_environments,
path = path_flagged_values)
daily_weather_data <-
plyr::join(daily_weather_data, elevation[, c("IDenv", "elevation")], by =
"IDenv")
}
if ("RH2M_MAX" %notin% names(daily_weather_data)) {
daily_weather_data$RH2M_MAX <- NULL
}
if ("RH2M_MIN" %notin% names(daily_weather_data)) {
daily_weather_data$RH2M_MIN <- NULL
}
daily_weather_data <-
plyr::join(daily_weather_data, info_environments[, c("IDenv", "latitude", "longitude")], by =
"IDenv")
daily_weather_data$et0 <-
penman_monteith_reference_et0(
doy = daily_weather_data$DOY,
latitude = daily_weather_data$latitude,
elevation = daily_weather_data$elevation,
tmin = daily_weather_data$T2M_MIN,
tmax = daily_weather_data$T2M_MAX,
tmean = daily_weather_data$T2M,
solar_radiation = daily_weather_data$daily_solar_radiation ,
wind_speed = daily_weather_data$WS2M,
rhmean = daily_weather_data$RH2M,
rhmax = daily_weather_data$RH2M_MAX,
rhmin = daily_weather_data$RH2M_MIN,
tdew = NULL,
use_rh = TRUE
)
}
cat("QC on daily weather data is done!\n")
if (any((flagged_values %>% dplyr::select(-YYYYMMDD)) == "flagged", na.rm = TRUE)) {
cat("A file with flagged values has been saved in the subfolder weather_data.\n")
saveRDS(
flagged_values,
file = paste0(
path_flagged_values,
"/flagged_values_raw_weather_data.RDS"
)
)
}
return(as.data.frame(daily_weather_data))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.