Nothing
#' Obtain time interval of input timestamp vector
#'
#' @description `get_interval()` retrieves the time interval of the input
#' timestamp vector and checks whether the format of the time vector is
#' acceptable for the successive process.
#'
#' @param vctr_time A timestamp vector of class POSIXct or POSIXlt. The
#' timestamps must be equally spaced and arranged chronologically.
#'
#' @returns
#' A numeric (minutes) indicating the time interval of the input timestamp
#' vector.
#'
#' @author Yoshiaki Hata
#'
#' @keywords internal
get_interval <- function(vctr_time) {
if(lubridate::is.POSIXt(vctr_time) != TRUE) {
stop("input time vector must be a POSIXct or POSIXlt class")
}
if(length(vctr_time[is.na(vctr_time)]) >= 1) {
stop("one or more NA values exist in the input time vector")
}
max_interval <- vctr_time %>% diff() %>% as.numeric() %>% max()
min_interval <- vctr_time %>% diff() %>% as.numeric() %>% min()
if(all.equal(max_interval, min_interval) != TRUE) {
stop("input time vector does not have equal intervals")
}
return(max_interval)
}
#' Calculate global solar radiation time series at TOA
#'
#' @description `calc_sw_in_toa()` obtains incident global solar radiation
#' time series at TOA (top of atmosphere) at a specific location by
#' calculating solar elevation angle estimated from the equations of Campbell
#' and Norman (1998).
#'
#' @inheritParams get_interval
#' @param lat A numeric value (degrees) between -90 and 90, indicating the
#' latitude of the specific location.
#' @param lon A numeric value (degrees) between -180 and 180, indicating the
#' longitude of the specific location.
#' @param std_meridian A numeric value (degrees) between -180 and 180,
#' indicating the standard meridian of the specific location.
#' @param solar_const A positive value (W m-2) indicating the solar constant.
#' Default is 1365 (W m-2).
#' @param sbeta_min A threshold value of the solar elevation angle (degrees).
#' If the calculated solar elevation angle is less than this threshold, the
#' corresponding global solar radiation becomes zero.
#'
#' @returns
#' A vector of the global solar radiation at TOA (W m-2). The length of the
#' vector matches that of the input timestamp vector.
#'
#' @examples
#' ## Make a timestamp vector
#' timezone <- "Etc/GMT-8"
#' time <- seq(as.POSIXct("2026/01/01", tz = timezone),
#' as.POSIXct("2026/01/02", tz = timezone), by = "30 min")
#'
#' ## Obtain global solar radiation at Lambir Hills National Park in Malaysia
#' result <-
#' calc_sw_in_toa(vctr_time = time, lat = 4.201007, lon = 114.039079,
#' std_meridian = 120)
#'
#' @author Yoshiaki Hata
#'
#' @export
calc_sw_in_toa <-
function(vctr_time, lat, lon, std_meridian, solar_const = 1365,
sbeta_min = 0.001) {
DOY <- lubridate::yday(vctr_time)
HourMin <- lubridate::hour(vctr_time) + lubridate::minute(vctr_time) / 60
lct <- -24 * (std_meridian - lon) / 360
fff <- pi * (279.575 + DOY * 0.9856) / 180
etim <-
(-104.7 * sin(fff) + 596.2 * sin(2*fff) + 4.3 * sin(3 * fff) -
12.7 * sin(4 * fff) - 429.3 * cos(fff) - 2 * cos(2*fff) +
19.3 * cos(3 * fff)) / 3600
snoon <- 12 - lct - etim
sdelt <-
asin(0.398 * sin(4.871 + 2 * pi * DOY / 365 + 0.033 *
sin(2 * pi * DOY / 365)))
sbeta <-
asin(sin(lat / 180 * pi) * sin(sdelt) +
cos(lat / 180 * pi) * cos(sdelt) *
cos(-2 * pi * ((HourMin - 0.25) - snoon) / 24))
sbeta <- ifelse(sin(sbeta) <= sbeta_min, 0, sbeta)
global_radiation_toa <- solar_const * sin(sbeta)
return(global_radiation_toa)
}
#' Obtain the number of data points without missing values
#'
#' @description `n_valid()` retrieves the number of data points without missing
#' values.
#'
#' @param vctr A vector to be evaluated.
#' @param label_err A numeric value representing a missing value in the input
#' vector(s). Default is -9999.
#'
#' @returns
#' An integer indicating the number of the input vector elements without
#' missing values.
#'
#' @author Yoshiaki Hata
#'
#' @keywords internal
n_valid <-
function(vctr, label_err = -9999) {
vctr <- ifelse(vctr == label_err, NA, vctr)
return(length(stats::na.omit(vctr)))
}
#' Detect periods when short-term signal attenuation occurs
#'
#' @description `check_short_attenuation()` detects periods of temporary signal
#' attenuation by the upward (or downward) peak position of the smoothed
#' average (or standard deviation) time series.
#'
#' @details
#' In some cases, the dT time series may yield a signal that is attenuated for
#' only a short period, for example, when rainfall continues for days, causing
#' the moving window mean (or standard deviation) to increase (or decrease).
#' In such cases, normalization will cause the Z-score time series immediately
#' before and after the rainfall to be unnaturally distorted, hindering the
#' construction of the random-forest model. This function can detect periods
#' when the moving window average has an upward peak, and the moving window
#' standard deviation has a downward peak simultaneously.
#'
#' First, the average and standard deviation time series are interpolated if
#' they contain missing values. Second, they are smoothed by convolution with a
#' Gaussian window specified by the user through the parameters
#' `wndw_size_conv` and `inv_sigma_conv`. Third, the first-order and
#' second-order differences of both smoothed time series are calculated, which
#' determine the upward peak positions of the average and the downward peak
#' positions of the standard deviation. Finally, possible signal attenuation
#' periods are output. The start and end of the periods are defined by the
#' timings when the first-order differenced standard deviation time series
#' changes its sign before and after each peak.
#'
#' @inheritParams calc_dtmax_sp
#' @inheritParams n_valid
#' @param vctr_z A vector of Z-score time series. The length of the vector must
#' match that of the timestamp vector.
#' @param vctr_avg A vector of moving window average time series that used in
#' transforming the original time series into the Z-score time series. The
#' length of the vector must match that of the timestamp vector. The unit of
#' the time series must match that of `vctr_sd`.
#' @param vctr_sd A vector of moving window standard deviation time series that
#' used in transforming the original time series into the Z-score time series.
#' The length of the vector must match that of the timestamp vector. The unit
#' of the time series must match that of `vctr_avg`.
#' @param wndw_size_conv A positive integer indicating the number of data
#' points included in a moving window. THeefault is 48 * 15, meaning that the
#' window size is 15 days if the time interval of the input timestamp is 30
#' minutes.
#' @param inv_sigma_conv A positive value defining a Gaussian window width. The
#' width of the Gaussian window is inversely proportional to this parameter.
#' Default is 0.01.
#'
#' @returns
#' A data frame with columns below:
#'
#' * The first column, `time_start`, gives the timestamp at which the detected
#' attenuation period begins.
#'
#' * The second column, `time_peak`, gives the timestamp of the detected
#' attenuation period peak.
#'
#' * The third column, `time_end`, gives the timestamp at which the detected
#' attenuation period ends.
#'
#' * The fourth column, `ratio`, gives the average of the ratio of the standard
#' deviation at the detected attenuation peak to that at the beginning and end
#' of the attenuation period. This value can be used as a threshold for
#' determining the final attenuation periods that require Z-score modification.
#'
#' @author Yoshiaki Hata
#'
#' @keywords internal
check_short_attenuation <-
function(vctr_time, vctr_z, vctr_avg, vctr_sd, wndw_size_conv = 48 * 15,
inv_sigma_conv = 0.01, label_err = -9999) {
## avoid "No visible binding for global variable" notes
time <- NULL
dT_z <- NULL
dT_sd <- NULL
dT_avg <- NULL
dT_avg_smth <- NULL
dT_avg_smth_diff1 <- NULL
flag_dT_avg_peak <- NULL
dT_sd_smth <- NULL
dT_sd_smth_diff1 <- NULL
flag_dT_sd_peak <- NULL
dT_avg_smth_diff2 <- NULL
dT_sd_smth_diff2 <- NULL
dT_sd_head <- NULL
. <- NULL
dT_sd_tail <- NULL
dT_sd_peak <- NULL
vctr_time_start <- NULL
vctr_time_peak <- NULL
vctr_time_end <- NULL
vctr_ratio <- NULL
data <-
data.frame(time = vctr_time,
dT_z = vctr_z,
dT_avg = vctr_avg,
dT_sd = vctr_sd) %>%
dplyr::mutate(dplyr::across(tidyselect::where(is.numeric),
~dplyr::na_if(., label_err)))
time_mea_start <-
data %>%
dplyr::filter(!is.na(dT_z)) %>%
dplyr::slice_head() %>%
dplyr::pull(time)
time_mea_end <-
data %>%
dplyr::filter(!is.na(dT_z)) %>%
dplyr::slice_tail() %>%
dplyr::pull(time)
interval_time <- vctr_time %>% diff() %>% as.numeric() %>% max()
## Gaussian filter
g_filter <- gsignal::gaussian(wndw_size_conv, inv_sigma_conv)
g_filter <- g_filter / sum(g_filter)
data <-
data %>%
dplyr::mutate(dT_avg_smth = ifelse(!is.na(dT_z), dT_avg, NA),
dT_avg_smth = zoo::na.approx(dT_avg_smth, na.rm = FALSE),
dT_sd_smth = ifelse(!is.na(dT_z), dT_sd, NA),
dT_sd_smth = zoo::na.approx(dT_sd_smth, na.rm = FALSE)) %>%
tidyr::fill(c(dT_avg_smth, dT_sd_smth), .direction = "updown") %>%
dplyr::mutate(dT_avg_smth = gsignal::conv(dT_avg_smth, g_filter,
shape = "same"),
dT_avg_smth_diff1 = dT_avg_smth - dplyr::lag(dT_avg_smth),
dT_avg_smth_diff2 = dplyr::lead(dT_avg_smth_diff1) -
dT_avg_smth_diff1,
flag_dT_avg_peak =
ifelse(dT_avg_smth_diff1 *
dplyr::lead(dT_avg_smth_diff1) < 0, 1, 0),
dT_sd_smth = gsignal::conv(dT_sd_smth, g_filter,
shape = "same"),
dT_sd_smth_diff1 = dT_sd_smth - dplyr::lag(dT_sd_smth),
dT_sd_smth_diff2 = dplyr::lead(dT_sd_smth_diff1) -
dT_sd_smth_diff1,
flag_dT_sd_peak =
ifelse(dT_sd_smth_diff1 *
dplyr::lead(dT_sd_smth_diff1) < 0, 1, 0))
## detect peak position
vctr_time_avg_upeak <-
data %>%
dplyr::filter(flag_dT_avg_peak == 1 & dT_avg_smth_diff2 < 0) %>%
dplyr::pull(time)
vctr_time_sd_peak <-
data %>%
dplyr::filter(flag_dT_sd_peak == 1) %>%
dplyr:: pull(time)
vctr_time_sd_dpeak <-
data %>%
dplyr::filter(flag_dT_sd_peak == 1 & dT_sd_smth_diff2 > 0 &
time > (time_mea_start + interval_time) &
time < (time_mea_end - interval_time)) %>%
dplyr::pull(time)
if(length(vctr_time_sd_dpeak) < 1) {
message("--- There are no peaks. Skip the detection.")
data.frame(time_start = vctr_time_start,
time_peak = vctr_time_peak,
time_end = vctr_time_end,
ratio = vctr_ratio) %>%
return()
} else {
## detect possible attenuation periods
for (i in 1:length(vctr_time_sd_dpeak)) {
target_time_sd_dpeak <- vctr_time_sd_dpeak[i]
num_time_sd_peak <- which(vctr_time_sd_peak == target_time_sd_dpeak)
if(length(vctr_time_sd_dpeak) == 1) {
attn_head <- time_mea_start + interval_time
attn_tail <- time_mea_end - interval_time
} else if(num_time_sd_peak == 1) {
attn_head <- time_mea_start + interval_time
attn_tail <- vctr_time_sd_peak[num_time_sd_peak + 1] - interval_time
} else if(num_time_sd_peak == length(vctr_time_sd_peak)) {
attn_head <- vctr_time_sd_peak[num_time_sd_peak - 1] + interval_time
attn_tail <- time_mea_end - interval_time
} else {
attn_head <- vctr_time_sd_peak[num_time_sd_peak - 1] + interval_time
attn_tail <- vctr_time_sd_peak[num_time_sd_peak + 1] - interval_time
}
num_time_avg_peak <-
which(vctr_time_avg_upeak >= attn_head &
vctr_time_avg_upeak <= attn_tail)
df_attn <-
data %>%
dplyr::filter(time >= attn_head & time <= attn_tail)
dT_sd_head <- dplyr::slice_head(df_attn) %>% dplyr::pull(dT_sd_smth)
dT_sd_tail <- dplyr::slice_tail(df_attn) %>% dplyr::pull(dT_sd_smth)
dT_sd_peak <-
df_attn %>%
dplyr::filter(., time == target_time_sd_dpeak) %>%
dplyr::pull(dT_sd_smth)
dT_sd_peak_ratio <-
round((dT_sd_peak / dT_sd_head + dT_sd_peak / dT_sd_tail) / 2,
digits = 5)
if(length(num_time_avg_peak) >= 1) {
vctr_time_start <- c(vctr_time_start, attn_head)
vctr_time_peak <- c(vctr_time_peak, target_time_sd_dpeak)
vctr_time_end <- c(vctr_time_end, attn_tail)
vctr_ratio <- c(vctr_ratio, dT_sd_peak_ratio)
}
}
## convert integer to POSIX
attributes(vctr_time_start) <- attributes(target_time_sd_dpeak)
attributes(vctr_time_peak) <- attributes(target_time_sd_dpeak)
attributes(vctr_time_end) <- attributes(target_time_sd_dpeak)
data.frame(time_start = vctr_time_start,
time_peak = vctr_time_peak,
time_end = vctr_time_end,
ratio = vctr_ratio) %>%
return()
}
}
#' Convert the decimal QC flag into a human-interpretable 10-digit binary
#'
#' @description `interpret_qc()` converts the input decimal QC (quality
#' control) flag time series into a data frame whose columns indicate the
#' applied process to each data point. The values "0" and "1" represent that
#' the process specified in their column name was applied and not applied to
#' the data point, respectively.
#'
#' @param vctr_qc A vector of the decimal QC flag output from
#' `run_fluxfixer()`.
#'
#' @returns
#' A data frame with columns below:
#'
#' * The first column, `initial_na`, indicates whether the data point was
#' originally missing.
#'
#' * The second column, `manual_removal`, indicates whether the data point was
#' removed manually as specified in the `vctr_time_err` argument in the
#' `run_fluxfixer()`.
#'
#' * The third column, `absolute_limits`, indicates whether the data point was
#' removed as an outlier by absolute limits.
#'
#' * The fourth column, `drift_correction`, indicates whether the data point
#' was modified by the short-term drift correction.
#'
#' * The fifth column, `noise_filtering`, indicates whether the data point was
#' modified by the high-frequency noise filtering.
#'
#' * The sixth column, `z_score_outlier`, indicates whether the data point was
#' removed by the Z-score outlier detection.
#'
#' * The seventh column, `rf_outlier`, indicates whether the data point was
#' removed by the random forest outlier detection.
#'
#' * The eighth column, `gap_filling`, indicates whether the data point was
#' gap-filled by the random forest model prediction.
#'
#' * The ninth column, `detrending`, indicates whether the data point was
#' modified in the detrending.
#'
#' * The tenth column, `damping_correction`, indicates whether the data point
#' was modified by the signal damping correction.
#'
#' @examples
#' ## Make a QC flag vector
#' qc <- c(0, 1, 2, 1023)
#'
#' ## Obtain a human-interpretable QC data frame
#' result <- interpret_qc(vctr_qc = qc)
#'
#' @author Yoshiaki Hata
#'
#' @export
interpret_qc <-
function(vctr_qc) {
## avoid "No visible binding for global variable" notes
qc_decimal <- NULL
damping_correction <- NULL
detrending <- NULL
gap_filling <- NULL
rf_outlier <- NULL
z_score_outlier <- NULL
noise_filtering <- NULL
drift_correction <- NULL
absolute_limits <- NULL
manual_removal <- NULL
initial_na <- NULL
data.frame(qc_decimal = vctr_qc) %>%
dplyr::mutate(damping_correction = ifelse(qc_decimal >= 2^9, 1, 0),
qc_decimal = ifelse(damping_correction == 1,
qc_decimal - 2^9, qc_decimal),
detrending = ifelse(qc_decimal >= 2^8, 1, 0),
qc_decimal = ifelse(detrending == 1, qc_decimal - 2^8,
qc_decimal),
gap_filling = ifelse(qc_decimal >= 2^7, 1, 0),
qc_decimal = ifelse(gap_filling == 1, qc_decimal - 2^7,
qc_decimal),
rf_outlier = ifelse(qc_decimal >= 2^6, 1, 0),
qc_decimal = ifelse(rf_outlier == 1, qc_decimal - 2^6,
qc_decimal),
z_score_outlier = ifelse(qc_decimal >= 2^5, 1, 0),
qc_decimal = ifelse(z_score_outlier == 1,
qc_decimal - 2^5, qc_decimal),
noise_filtering = ifelse(qc_decimal >= 2^4, 1, 0),
qc_decimal = ifelse(noise_filtering == 1, qc_decimal - 2^4,
qc_decimal),
drift_correction = ifelse(qc_decimal >= 2^3, 1, 0),
qc_decimal = ifelse(drift_correction == 1,
qc_decimal - 2^3, qc_decimal),
absolute_limits = ifelse(qc_decimal >= 2^2, 1, 0),
qc_decimal = ifelse(absolute_limits == 1, qc_decimal - 2^2,
qc_decimal),
manual_removal = ifelse(qc_decimal >= 2^1, 1, 0),
qc_decimal = ifelse(manual_removal == 1, qc_decimal - 2^1,
qc_decimal),
initial_na = ifelse(qc_decimal >= 2^0, 1, 0),
qc_decimal = ifelse(initial_na == 1, qc_decimal - 2^0,
qc_decimal)) %>%
dplyr::select(initial_na, manual_removal, absolute_limits,
drift_correction, noise_filtering, z_score_outlier,
rf_outlier, gap_filling, detrending,
damping_correction) %>%
return()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.