#' Auto Correlation with a moving window
#'
#' @description Uses autocorrelation to find a circadian period for a given timeseries
#'
#' @usage function(df = NULL, from = 18, to = 30,
#' sampling_rate = "1 hour", window_vector = NULL, values = NULL)
#'
#' @param df A data.frame with 2 columns. Column 1 must contain the windows to iterate over.
#' Column 2 must supply the values. This parameter is optional if window_vector and values are supplied.
#' df must not have gaps in the dates, acf asumes data is evenly spaced.
#'
#' @param from The period (in hours) from which to start looking for peaks in the autocorrelation. Default = 18.
#'
#' @param to The period (in hours) up to which to look for peaks in the autocorrelation. Default = 30.
#'
#' @param sampling_rate A charater string which indicates the sampling rate of the data.
#' For example: "1 second", "2 minutes", "1 hour" (default),"3 days", "11 months".
#'
#'
#' @return A data.frame with the autocorrelation results for each window which include: period, peaks,
#' power, lags for the peaks.
#'
#' @seealso [stats::acf()] which this functions uses to run the autocorrelation.
#'
#' @export analyze_timeseries.acf
#'
#' @examples
#' autocorrelations_multipeak <- acf_window(df = df_with_windows,
#' multipeak_period = FALSE, peak_of_interest = 2,
#' sampling_unit = "hours")
#'
#' @importFrom dplyr pull filter mutate left_join select
#' @importFrom stringr str_extract str_remove
#' @importFrom lubridate duration as.duration as.interval
#' @importFrom tibble tibble
#' @importFrom pracma findpeaks movavg
#'
analyze_timeseries.acf <- function(df = NULL, from = 18, to = 30,
sampling_rate = "1 hour") {
#Create results list
results = list()
#First check if the function can be skipped (var = 0)
#the data for the acf will be the last col outputter by the processing functions
values = pull(df, ncol(df))
#Check the variance
if (var(values) == 0 | length(values) <= 3) {
results$datetime = NA
results$autocorrelation = NA
results$power = NA
results$period = NA
results$rythm_strength = NA
results$max_peak_of_int = NA
results$start = NA
results$end = NA
results$from = from
results$to = to
return(results)
}
##### Flow Control Parameters #####
#1. Sampling_rate must exist
if (is.null(sampling_rate)){
stop("must provide a sampling_rate")
} else {
sampling_bin_size = as.numeric(str_extract(sampling_rate, "\\d*"))
sampling_rate = str_remove(sampling_rate, "\\d* *")
}
#2. the period must be calculated from the data on the second day.
start <- duration(from+24, 'hours')
end <- duration(to+24, 'hours')
##### ACF ######
#4. Autocorrelation
#Carry out the autocorrelation with the data
autocorrelation = as.numeric(acf(x = values, #Select the values in the window
y = values,
na.action = na.pass,
type = "correlation",
plot = FALSE,
lag.max = length(values))$acf) #$acf is to keep only the acf values. We can infer lag position from those
# Change change all NA autocorrelations to 0
# The function that looks for peaks doesn't allow NA, 0 would be ignored if we put a threshold, therefore it won't affect
# the results
autocorrelation = ifelse(is.na(autocorrelation), 0, autocorrelation)
len_autocor = length(autocorrelation)
#Find the peaks
peaks = findpeaks(autocorrelation, sortstr = TRUE)
#If there are no Peaks, return NA
if (rlang::is_empty(peaks)) {
results$datetime = NA
results$autocorrelation = NA
results$power = NA
results$period = NA
results$rythm_strength = NA
results$max_peak_of_int = NA
results$start = NA
results$end = NA
results$from = from
results$to = to
return(results)
}
peaks = tibble(auto_power = peaks[,1],
datetime = duration(peaks[,2], sampling_rate))
#Keep only the positive peaks
peaks = dplyr::filter(peaks, auto_power >= 0.2)
#Find the maximum peak within the scope
peaks_of_int = filter(peaks, datetime >= start, datetime <= end)
#Make sure the peaks_of_int is not empty, otherwise return NA
if (!all(is_empty(peaks_of_int$auto_power))) {
#Get the maximum peak
max_peak_of_int = max(peaks_of_int$auto_power)
#Get the period of the maximum peak
period = filter(peaks_of_int, auto_power == max_peak_of_int)$datetime
#Translate the period into the correct time
period = as.numeric(period, 'hours') - 24
#Get datetime of the maximum peak
max_date = filter(peaks_of_int, auto_power == max_peak_of_int)$datetime
max_date = as.numeric(duration(max_date, sampling_rate), 'hours')
#Get the Rhythm Strength
rhythm_strength = max_peak_of_int / (1.965/sqrt(nrow(df)))
} else {
results$datetime = NA
results$autocorrelation = NA
results$power = NA
results$period = NA
results$rythm_strength = NA
results$max_peak_of_int = NA
results$start = NA
results$end = NA
results$from = from
results$to = to
results$start = start
results$end = end
return(results)
}
results$datetime = max_date
results$autocorrelation = autocorrelation
results$power = peaks$auto_power
results$period = period
results$rythm_strength = rhythm_strength
results$max_peak_of_int = max_peak_of_int
results$start = start
results$end = end
results$from = from
results$to = to
#Return results
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.