#' @title mg_function_extract_spreaded_ts
#'
#' @description extract a spreaded time series from the dataframe used in function mg_function_check_number_complete_time_series
#' @param df dataframe which contains the value and date column
#' @param input_settings dataframe with the input settings of function mg_function_check_number_complete_time_series, generated by this function
#' @param start_zeitpunkt starting time for which the time series should be extracted. Has to be same as from function mg_function_check_number_complete_time_series
#'
#'
#' @return dataframe of the time series in wide format with equal distances:
#'
#'
#'
#'
#' @examples
#' spreaded_ts<- mg_function_extract_spreaded_ts(start_zeitpunkt = df_complete_time_series$number_of_timeseries$start_zeitpunkt[1],
#' input_settings = df_complete_time_series$input_settings,
#' df = wasserstand_projekt)
#'
#' @export
#' @importFrom dplyr %>% mutate group_by
#' @importFrom tidyr spread
#'
#'
mg_function_extract_spreaded_ts <- function(start_zeitpunkt,input_settings,df){
#unify
xx_df <- as.data.frame(df)
xx_df$date <- xx_df[,input_settings$date_column_name]
xx_df$value <- xx_df[,input_settings$value_column_name]
#set to datetime
xx_df$date <- as.POSIXct (xx_df$date)
#get first and last date in df
first_time <- min(xx_df$date, na.rm = T)
last_time <- max(xx_df$date, na.rm = T)
n_jahre_width_window <- input_settings$time_span_years*365*24*60*60 # breite moving window = Anzahl der jahre in denen vergleichbare ganglinien gesucht werden
n_tage_step_window <- input_settings$moving_window_width_days*24*60*60 # anzahl der Tage die das movong window vorr?ckt
xx_zeitraumliste <- seq(from = first_time, to = last_time, by = n_tage_step_window)
xx_iterator <- which(xx_zeitraumliste == start_zeitpunkt)
start_zeitpunkt <- xx_zeitraumliste[xx_iterator]
end_zeitpunkt <- xx_zeitraumliste[xx_iterator+(n_jahre_width_window/(365*24*60*60)*12)]
#Auswahl der Zeitr?ume
if(xx_iterator == 1){
xx_sub_df <- xx_df[which(xx_df$date < end_zeitpunkt),]
}
if(xx_iterator > 1 && xx_iterator < (length(xx_zeitraumliste)-1)){
xx_sub_df <- xx_df[which(xx_df$date > start_zeitpunkt),]
xx_sub_df <- xx_sub_df[which(xx_sub_df$date < end_zeitpunkt),]
}
if(xx_iterator == (length(xx_zeitraumliste)-(n_jahre_width_window/(365*24*60*60)*12))){
xx_sub_df <- xx_df[which(xx_df$date >start_zeitpunkt),]
}
xx_sub_df <- as.data.frame(xx_sub_df)
xx_sub_df <- xx_sub_df[,c("date",input_settings$id_column_name,"value")]
xx_sub_df <- unique(xx_sub_df)
#spread data to get NA values and homogenous time series
xx_sub_df_spread <- spread(xx_sub_df,input_settings$id_column_name, value,fill = NA)
#funktion f?r bestimmung anteil NA
MG_FUN_ratio_na <- function(x){
sum(is.na(x))/length(x)
}
#Bestimmung anteil na
xx_anteil_na <- sapply(xx_sub_df_spread[,2:length(names(xx_sub_df_spread))], MG_FUN_ratio_na)
# Auswahl der Messstellen die einen kleineren anteil als na als festgelegt in fraction_of_NAs_allowed
xx_sub_df_spread1 <- xx_sub_df_spread[,(which(xx_anteil_na < input_settings$fraction_of_NAs_allowed)+1)]
#hier wieder datum hinzuf?gen
xx_sub_df_spread2 <- cbind(xx_sub_df_spread[,1], xx_sub_df_spread1)
names(xx_sub_df_spread2)[names(xx_sub_df_spread2) == 'xx_sub_df_spread[, 1]'] <- input_settings$date_column_name #
print(paste0("start_time: ",start_zeitpunkt,"; end_time: ",end_zeitpunkt, "; number of valid time series: ",length(names(xx_sub_df_spread2))))
return(xx_sub_df_spread2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.