#' Get granularity of values for each Key in df
#' @param df data frame containing at least Columns Key, Date and Value
#' @return vector of data granularities with length of nrow(df)
#' @export
get_granularity <- function(df){
library(tidyverse)
library(lubridate)
assertthat::assert_that(is.data.frame(df), msg = "df must be a data.frame")
assertthat::assert_that(all(c("Key", "Date", "Value") %in% colnames(df)),
msg = "df must include columns 'Key', 'Date' and 'Value'")
KeysWeekly <- df %>% group_by(Key) %>% mutate(Date_diff = c(7, diff(Date))) %>%
summarize(Date_Diff_all_7k = ifelse(all(Date_diff %in% c((1:100) * 7)), TRUE, FALSE)) %>%
dplyr::filter(Date_Diff_all_7k) %>% pull("Key") %>% unique
KeysMonthly_1 <- df %>%
mutate(Month = format(as.Date(Date), "%Y-%m")) %>%
group_by(Key, Month) %>%
summarise(Variance = var(Value, na.rm = T)) %>% na.omit %>%
mutate(no_monthly_variance = ifelse(all(Variance == 0, na.rm = TRUE), TRUE, FALSE)) %>%
dplyr::filter(no_monthly_variance) %>% pull("Key") %>% unique
final_df_monthlyKeys_1 <- df %>% dplyr::filter(Key %in% KeysMonthly_1) %>%
mutate(Month = month(Date), Year = year(Date)) %>% group_by(Key, Month, Year) %>%
mutate(NumberOfDaysThatMonth = n()) %>% group_by(Month, Year) %>%
mutate(NumberOfDaysThatMonth = max(NumberOfDaysThatMonth)) %>%
group_by(Key) %>% mutate(Value = Value * NumberOfDaysThatMonth) %>%
dplyr::filter(day(Date) == 1) %>% dplyr::select(Key, Date, Value)
df <- df %>% dplyr::filter(!Key %in% KeysMonthly_1) %>% bind_rows(final_df_monthlyKeys_1)
KeysMonthly_2 <- df %>%
mutate(Month = format(as.Date(Date), "%Y-%m")) %>%
group_by(Key, Month) %>% mutate(Values_in_Month = n()) %>%
#summarize(max_values_in_month = max(Values_in_Month))
group_by(Key) %>%
summarize(only_one_Value_in_months = all(Values_in_Month == 1)) %>%
filter(only_one_Value_in_months) %>%
pull(Key)
KeysMonthly <- c(KeysMonthly_1, KeysMonthly_2) %>% unique
KeysDaily <- df$Key %>% unique %>% setdiff(c(KeysWeekly, KeysMonthly))
df <- df %>% mutate(granularity = ifelse(Key %in% KeysDaily, "daily",
ifelse(Key %in% KeysWeekly, "weekly",
ifelse(Key %in% KeysMonthly, "monthly", NA))))
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.