#' Make panel data balanced
#'
#' @param df an object of class `data.frame`, `tibble`
#' @param type character, one of `"fill_NA"`,
#' `"drop_individuals"`, or `"drop_times"`, see
#' **Details**,
#' @param index only relevant for `data.frame` interface; if
#' `NULL`, the first two columns of the data.frame are
#' assumed to be the index variables; if not `NULL`, both
#' dimensions ('individual', 'time') need to be specified by
#' `index` as character of length 2 for data frame.
#' @param individual_var default is NULL; time invariant variables in the dataset, when balancing dataset they would keep the same by `"updown"` search.
#' @param time_seq default is NULL; window for balancing panel data set.
#'
#' @return
#' @export
#'
#' @examples
#' data("EmplUK", package = "plm")
#' EmplUK %>%
#' select(year, firm) %>%
#' filter(firm %in% c(1:10)) %>%
#' table()
#'
#' balance_panel(EmplUK, type = "fill_NA", individual_var = c("sector"))
balance_panel <- function(df,
type = c("fill_NA", "drop_individuals", "drop_times"),
index = NULL,
individual_var = NULL,
time_seq = NULL){
type <- match.arg(type)
# Step1: identify the index
if (!is.null(index) && length(index) != 2L){ # index less than time and id
stop("if argument 'index' is not NULL, 'index' needs to specify
'individual' and 'time' dimension")
}
# default is the first two columns as index
index <- if(is.null(index)) names(df)[1:2] else index
# check variables does not be contained by names(df) and coincide with index
if(!is.null(individual_var)){
if(!identical(setdiff(individual_var, names(df)), character(0))) stop("'individual_var' should be contained in names(data.frame)")
if(any(individual_var %in% index)) {
individual_var <- setdiff(individual_var, index)
warning("there are 'individual_var' coincided with 'index'")
}
}
# identify time sequence
time_seq <- if(!is.null(time_seq)) time_seq else df %>%
dplyr::select(index[2]) %>%
unique %>%
pull()
switch(type,
"fill_NA" = {
result <- df %>%
dplyr::group_by_at(index[1]) %>%
tidyr::fill_(if(is.null(individual_var)) NULL else individual_var, .direction = "updown") %>%
dplyr::group_by_at(c(index[1], individual_var)) %>%
tidyr::expand(time_seq) %>%
dplyr::rename_with(~index[2], time_seq) %>%
{dplyr::left_join(., df, by = names(.))} %>%
dplyr::ungroup()
},
"drop_individuals" = {
result <- df %>%
# filter_(paste0(index[2], " %in% time_seq")) %>%
dplyr::filter(.[[index[2]]] %in% time_seq) %>%
dplyr::group_by_at(index[1]) %>%
tidyr::fill_(if(is.null(individual_var)) NULL else individual_var, .direction = "updown") %>%
dplyr::filter_at(union(index, individual_var),all_vars(!is.na(.))) %>%
dplyr::ungroup()
},
"drop_times" = {
time_new <- Reduce(intersect,
split(df[, index[2]], df[, index[1]])) %>%
intersect(time_seq)
result <- df %>%
dplyr::filter(.[[index[2]]] %in% time_new) %>%
tibble::tibble()
})
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.