R/mg_function_extract_spreaded_ts.R

Defines functions mg_function_extract_spreaded_ts

Documented in mg_function_extract_spreaded_ts

#' @title mg_function_extract_spreaded_ts
#'
#' @description extract a spreaded time series from the dataframe used in function mg_function_check_number_complete_time_series
#' @param df dataframe which contains the value and date column
#' @param input_settings dataframe with the input settings of function mg_function_check_number_complete_time_series, generated by this function
#' @param start_zeitpunkt starting time for which the time series should be extracted. Has to be same as from function mg_function_check_number_complete_time_series

#'
#'
#' @return dataframe of the time series in wide format with equal distances:
#'
#'
#'
#'
#' @examples
#' spreaded_ts<- mg_function_extract_spreaded_ts(start_zeitpunkt = df_complete_time_series$number_of_timeseries$start_zeitpunkt[1],
#'                                               input_settings = df_complete_time_series$input_settings,
#'                                               df = wasserstand_projekt)
#'
#' @export
#' @importFrom dplyr %>% mutate group_by
#' @importFrom tidyr spread
#'
#'

mg_function_extract_spreaded_ts <- function(start_zeitpunkt,input_settings,df){


  #unify
  xx_df <- as.data.frame(df)
  xx_df$date <- xx_df[,input_settings$date_column_name]
  xx_df$value <- xx_df[,input_settings$value_column_name]

  #set to datetime
  xx_df$date <- as.POSIXct (xx_df$date)

  #get first and last date in df
  first_time <- min(xx_df$date, na.rm = T)
  last_time <- max(xx_df$date, na.rm = T)


  n_jahre_width_window <- input_settings$time_span_years*365*24*60*60 # breite moving window =  Anzahl der jahre in denen vergleichbare ganglinien gesucht werden
  n_tage_step_window <-  input_settings$moving_window_width_days*24*60*60 # anzahl der Tage die das movong window vorr?ckt


  xx_zeitraumliste <- seq(from = first_time, to = last_time, by = n_tage_step_window)

  xx_iterator <- which(xx_zeitraumliste == start_zeitpunkt)
  start_zeitpunkt <- xx_zeitraumliste[xx_iterator]
  end_zeitpunkt <- xx_zeitraumliste[xx_iterator+(n_jahre_width_window/(365*24*60*60)*12)]

  #Auswahl der Zeitr?ume
  if(xx_iterator == 1){
    xx_sub_df <- xx_df[which(xx_df$date < end_zeitpunkt),]
  }
  if(xx_iterator > 1 && xx_iterator < (length(xx_zeitraumliste)-1)){
    xx_sub_df <- xx_df[which(xx_df$date > start_zeitpunkt),]
    xx_sub_df <- xx_sub_df[which(xx_sub_df$date < end_zeitpunkt),]
  }
  if(xx_iterator == (length(xx_zeitraumliste)-(n_jahre_width_window/(365*24*60*60)*12))){
    xx_sub_df <- xx_df[which(xx_df$date >start_zeitpunkt),]
  }

  xx_sub_df <- as.data.frame(xx_sub_df)







  xx_sub_df <- xx_sub_df[,c("date",input_settings$id_column_name,"value")]

  xx_sub_df <- unique(xx_sub_df)

  #spread data to get NA values and homogenous time series
  xx_sub_df_spread <- spread(xx_sub_df,input_settings$id_column_name, value,fill = NA)



  #funktion f?r bestimmung anteil NA
  MG_FUN_ratio_na <- function(x){
    sum(is.na(x))/length(x)
  }


  #Bestimmung anteil na
  xx_anteil_na <- sapply(xx_sub_df_spread[,2:length(names(xx_sub_df_spread))],  MG_FUN_ratio_na)



  # Auswahl der Messstellen die einen kleineren anteil als na als festgelegt in fraction_of_NAs_allowed
  xx_sub_df_spread1 <-  xx_sub_df_spread[,(which(xx_anteil_na < input_settings$fraction_of_NAs_allowed)+1)]


  #hier wieder datum hinzuf?gen
  xx_sub_df_spread2 <-  cbind(xx_sub_df_spread[,1], xx_sub_df_spread1)
  names(xx_sub_df_spread2)[names(xx_sub_df_spread2) == 'xx_sub_df_spread[, 1]'] <- input_settings$date_column_name  #


  print(paste0("start_time: ",start_zeitpunkt,";   end_time: ",end_zeitpunkt, ";  number of valid time series: ",length(names(xx_sub_df_spread2))))

  return(xx_sub_df_spread2)
}
mghydro/LSHydroMG documentation built on Jan. 28, 2022, 3:31 p.m.