Nothing
########### is.pconsecutive ##############
# little helper function to determine if the time periods of an object are consecutive per id.
# By consecutive we mean "consecutive in the numbers", i.e., is.pconsecutive takes the numerical
# value of the time variable into account: t, t+1, t+2, ... where t is an integer
#
# For this, we need as.numeric(as.character(time_var)) where as.character is a crucial part!
# Equivalent but more efficient is as.numeric(levels(id_timevar))[as.integer(id_timevar)]
# (see R FAQ 7.10 for coercing factors to numeric]
# and the coerction of time_var in this manner needs to be meaningful numbers.
#
# see also in separate file make.pconsecutive.R:
# * make.pconsecutive
# * make.pbalanced
#' Check if time periods are consecutive
#'
#' This function checks for each individual if its associated time periods are
#' consecutive (no "gaps" in time dimension per individual)
#'
#' (p)data.frame, pseries and estimated panelmodel objects can be tested if
#' their time periods are consecutive per individual. For evaluation of
#' consecutiveness, the time dimension is interpreted to be numeric, and the
#' data are tested for being a regularly spaced sequence with distance 1
#' between the time periods for each individual (for each individual the time
#' dimension can be interpreted as sequence t, t+1, t+2, \ldots{} where t is an
#' integer). As such, the "numerical content" of the time index variable is
#' considered for consecutiveness, not the "physical position" of the various
#' observations for an individuals in the (p)data.frame/pseries (it is not
#' about "neighbouring" rows). If the object to be evaluated is a pseries or a
#' pdata.frame, the time index is coerced from factor via as.character to
#' numeric, i.e., the series
#' `as.numeric(as.character(index(<pseries/pdata.frame>)[[2]]))]` is
#' evaluated for gaps.
#'
#' The default method also works for argument `x` being an arbitrary
#' vector (see **Examples**), provided one can supply arguments `id`
#' and `time`, which need to ordered as stacked time series. As only
#' `id` and `time` are really necessary for the default method to
#' evaluate the consecutiveness, `x = NULL` is also possible. However, if
#' the vector `x` is also supplied, additional input checking for equality
#' of the lengths of `x`, `id` and `time` is performed, which is
#' safer.
#'
#' For the data.frame interface, the data is ordered in the appropriate way
#' (stacked time series) before the consecutiveness is evaluated. For the
#' pdata.frame and pseries interface, ordering is not performed because both
#' data types are already ordered in the appropriate way when created.
#'
#' Note: Only the presence of the time period itself in the object is tested,
#' not if there are any other variables. `NA` values in individual index
#' are not examined but silently dropped - In this case, it is not clear which
#' individual is meant by id value `NA`, thus no statement about
#' consecutiveness of time periods for those "`NA`-individuals" is
#' possible.
#'
#' @name is.pconsecutive
#' @aliases is.pconsecutive
#' @param x usually, an object of class `pdata.frame`,
#' `data.frame`, `pseries`, or an estimated
#' `panelmodel`; for the default method `x` can also be
#' an arbitrary vector or `NULL`, see **Details**,
#' @param na.rm.tindex logical indicating whether any `NA` values
#' in the time index are removed before consecutiveness is
#' evaluated (defaults to `FALSE`),
#' @param index only relevant for `data.frame` interface; if
#' `NULL`, the first two columns of the data.frame are
#' assumed to be the index variables; if not `NULL`, both
#' dimensions ('individual', 'time') need to be specified by
#' `index` for `is.pconsecutive` on data frames, for
#' further details see [pdata.frame()],
#' @param id,time only relevant for default method: vectors specifying
#' the id and time dimensions, i. e. a sequence of individual and
#' time identifiers, each as stacked time series,
#' @param \dots further arguments.
#' @return A named `logical` vector (names are those of the
#' individuals). The i-th element of the returned vector
#' corresponds to the i-th individual. The values of the i-th
#' element can be: \item{TRUE}{if the i-th individual has
#' consecutive time periods,} \item{FALSE}{if the i-th
#' individual has non-consecutive time periods,}
#' \item{"NA"}{if there are any NA values in time index of
#' the i-th the individual; see also argument `na.rm.tindex`
#' to remove those.}
#' @export
#' @author Kevin Tappe
#' @seealso [make.pconsecutive()] to make data consecutive
#' (and, as an option, balanced at the same time) and
#' [make.pbalanced()] to make data balanced.\cr
#' [pdim()] to check the dimensions of a 'pdata.frame'
#' (and other objects), [pvar()] to check for individual
#' and time variation of a 'pdata.frame' (and other objects),
#' [lag()] for lagged (and leading) values of a
#' 'pseries' object.\cr
#'
#' [pseries()], [data.frame()], [pdata.frame()],
#' for class 'panelmodel' see [plm()] and [pgmm()].
#' @keywords attribute
#' @examples
#'
#' data("Grunfeld", package = "plm")
#' is.pconsecutive(Grunfeld)
#' is.pconsecutive(Grunfeld, index=c("firm", "year"))
#'
#' # delete 2nd row (2nd time period for first individual)
#' # -> non consecutive
#' Grunfeld_missing_period <- Grunfeld[-2, ]
#' is.pconsecutive(Grunfeld_missing_period)
#' all(is.pconsecutive(Grunfeld_missing_period)) # FALSE
#'
#' # delete rows 1 and 2 (1st and 2nd time period for first individual)
#' # -> consecutive
#' Grunfeld_missing_period_other <- Grunfeld[-c(1,2), ]
#' is.pconsecutive(Grunfeld_missing_period_other) # all TRUE
#'
#' # delete year 1937 (3rd period) for _all_ individuals
#' Grunfeld_wo_1937 <- Grunfeld[Grunfeld$year != 1937, ]
#' is.pconsecutive(Grunfeld_wo_1937) # all FALSE
#'
#' # pdata.frame interface
#' pGrunfeld <- pdata.frame(Grunfeld)
#' pGrunfeld_missing_period <- pdata.frame(Grunfeld_missing_period)
#' is.pconsecutive(pGrunfeld) # all TRUE
#' is.pconsecutive(pGrunfeld_missing_period) # first FALSE, others TRUE
#'
#'
#' # panelmodel interface (first, estimate some models)
#' mod_pGrunfeld <- plm(inv ~ value + capital, data = Grunfeld)
#' mod_pGrunfeld_missing_period <- plm(inv ~ value + capital, data = Grunfeld_missing_period)
#'
#' is.pconsecutive(mod_pGrunfeld)
#' is.pconsecutive(mod_pGrunfeld_missing_period)
#'
#' nobs(mod_pGrunfeld) # 200
#' nobs(mod_pGrunfeld_missing_period) # 199
#'
#'
#' # pseries interface
#' pinv <- pGrunfeld$inv
#' pinv_missing_period <- pGrunfeld_missing_period$inv
#'
#' is.pconsecutive(pinv)
#' is.pconsecutive(pinv_missing_period)
#'
#' # default method for arbitrary vectors or NULL
#' inv <- Grunfeld$inv
#' inv_missing_period <- Grunfeld_missing_period$inv
#' is.pconsecutive(inv, id = Grunfeld$firm, time = Grunfeld$year)
#' is.pconsecutive(inv_missing_period, id = Grunfeld_missing_period$firm,
#' time = Grunfeld_missing_period$year)
#'
#' # (not run) demonstrate mismatch lengths of x, id, time
#' # is.pconsecutive(x = inv_missing_period, id = Grunfeld$firm, time = Grunfeld$year)
#'
#' # only id and time are needed for evaluation
#' is.pconsecutive(NULL, id = Grunfeld$firm, time = Grunfeld$year)
#'
is.pconsecutive <- function(x, ...){
UseMethod("is.pconsecutive")
}
#' @rdname is.pconsecutive
#' @export
is.pconsecutive.default <- function(x, id, time, na.rm.tindex = FALSE, ...) {
# argument 'x' just used for input check (if it is not NULL and is a vector)
# input checks
if(length(id) != length(time))
stop(paste0("arguments 'id' and 'time' must have same length: length(id): ", length(id), ", length(time) ", length(time)))
if(!is.null(x) && is.vector(x)) { # is.vector could be too strict? factor is not a vector
if(!(length(x) == length(id) && length(x) == length(time) && length(id) == length(time)))
stop(paste0("arguments 'x', 'id', 'time' must have same length: length(x): ",
length(x), ", length(id): ", length(id), ", length(time): ", length(time)))
}
# NB: 'time' is assumed to be organised as stacked time series (sorted for each individual)
# (successive blocks of individuals, each block being a time series for the respective individual))
#
# 'time' is in the correct order if is.pconsecutive.default is called by
# is.pconsecutive.pdata.frame or is.pconsecutive.pseries as a pdata.frame (which is sorted) was constructed
# in the first place; for data.frame interface the ordering is done in the respective function
if(na.rm.tindex) {
NA_tindex <- is.na(time)
time <- time[!NA_tindex]
id <- id[!NA_tindex]
}
# if time var is factor (as is TRUE for pdata.frames, pseries):
# need to convert to numeric, do this by coering to character first (otherwise wrong results!)
# see R FAQ 7.10 for coercing factors to numeric:
# as.numeric(levels(factor_var))[as.integer(factor_var)] is more efficient than
# as.numeric(as.character(factor_var))
if(!is.numeric(time) && is.factor(time)) time <- as.numeric(levels(time))[as.integer(time)]
list_id_timevar <- split(time, id, drop = TRUE)
res <- vapply(list_id_timevar, function(id_timevar) { if(anyNA(id_timevar)) {
NA # return NA if NA found in the time periods for individual
} else {
begin <- id_timevar[1L]
end <- id_timevar[length(id_timevar)]
# compare to length(original id_timevar) to find out if times are consecutive
(end - begin + 1L) == length(id_timevar)
# Alternative way of checking:
# consecutive time periods from begin to end (if id_timevar were consecutive)
# consecutive <- seq(from = begin, to = end, by = 1)
# length(consecutive) == length(id_timevar)
}
}, FUN.VALUE = TRUE)
return(res)
}
#' @rdname is.pconsecutive
#' @export
is.pconsecutive.data.frame <- function(x, index = NULL, na.rm.tindex = FALSE, ...){
if (!is.null(index) && length(index) != 2L)
stop("if argument 'index' is not NULL, 'index' needs to specify
'individual' and 'time' dimension for is.pconsecutive to work on a data.frame")
# if index not provided, assume first two columns to be the index vars
index_orig_names <- if(is.null(index)) names(x)[1:2] else index
id <- x[ , index_orig_names[1L]]
time <- x[ , index_orig_names[2L]]
# order as stacked time series (by id and time) first, otherwise default method does not work correctly!
ord <- order(id, time)
x_ordered <- x[ord, ]
id_ordered <- id[ord]
time_ordered <- time[ord]
# if (!identical(x, x_ordered))
# print("Note: for test of consecutiveness of time periods, the data.frame was ordered by index variables (id, time)")
return(is.pconsecutive.default(x_ordered, id_ordered, time_ordered, na.rm.tindex = na.rm.tindex, ...))
}
#' @rdname is.pconsecutive
#' @export
is.pconsecutive.pseries <- function(x, na.rm.tindex = FALSE, ...){
index <- unclass(attr(x, "index")) # unclass for speed
return(is.pconsecutive.default(x, index[[1L]], index[[2L]], na.rm.tindex = na.rm.tindex, ...))
}
#' @rdname is.pconsecutive
#' @export
is.pconsecutive.pdata.frame <- function(x, na.rm.tindex = FALSE, ...){
index <- unclass(attr(x, "index")) # unclass for speed
return(is.pconsecutive.default(x, index[[1L]], index[[2L]], na.rm.tindex = na.rm.tindex, ...))
}
#' @rdname is.pconsecutive
#' @export
is.pconsecutive.panelmodel <- function(x, na.rm.tindex = FALSE, ...){
index <- unclass(attr(x$model, "index")) # unclass for speed
return(is.pconsecutive.default(x, index[[1L]], index[[2L]], na.rm.tindex = na.rm.tindex, ...))
}
########### is.pbalanced ##############
### for convenience and to be faster than pdim() for the purpose
### of the determination of balancedness only, because it avoids
### pdim()'s calculations which are unnecessary for balancedness.
###
### copied (and adapted) methods and code from pdim.*
### (only relevant parts to determine balancedness)
#' Check if data are balanced
#'
#' This function checks if the data are balanced, i.e., if each individual has
#' the same time periods
#'
#' Balanced data are data for which each individual has the same time periods.
#' The returned values of the `is.pbalanced(object)` methods are identical
#' to `pdim(object)$balanced`. `is.pbalanced` is provided as a short
#' cut and is faster than `pdim(object)$balanced` because it avoids those
#' computations performed by `pdim` which are unnecessary to determine the
#' balancedness of the data.
#'
#' @aliases is.pbalanced
#' @param x an object of class `pdata.frame`, `data.frame`,
#' `pseries`, `panelmodel`, or `pgmm`,
#' @param y (only in default method) the time index variable (2nd index
#' variable),
#' @param index only relevant for `data.frame` interface; if
#' `NULL`, the first two columns of the data.frame are
#' assumed to be the index variables; if not `NULL`, both
#' dimensions ('individual', 'time') need to be specified by
#' `index` as character of length 2 for data frames, for
#' further details see [pdata.frame()],
#' @param \dots further arguments.
#' @return A logical indicating whether the data associated with
#' object `x` are balanced (`TRUE`) or not
#' (`FALSE`).
#' @seealso [punbalancedness()] for two measures of
#' unbalancedness, [make.pbalanced()] to make data
#' balanced; [is.pconsecutive()] to check if data are
#' consecutive; [make.pconsecutive()] to make data
#' consecutive (and, optionally, also balanced).\cr
#' [pdim()] to check the dimensions of a 'pdata.frame'
#' (and other objects), [pvar()] to check for individual
#' and time variation of a 'pdata.frame' (and other objects),
#' [pseries()], [data.frame()],
#' [pdata.frame()].
#' @export
#' @keywords attribute
#' @examples
#'
#' # take balanced data and make it unbalanced
#' # by deletion of 2nd row (2nd time period for first individual)
#' data("Grunfeld", package = "plm")
#' Grunfeld_missing_period <- Grunfeld[-2, ]
#' is.pbalanced(Grunfeld_missing_period) # check if balanced: FALSE
#' pdim(Grunfeld_missing_period)$balanced # same
#'
#' # pdata.frame interface
#' pGrunfeld_missing_period <- pdata.frame(Grunfeld_missing_period)
#' is.pbalanced(Grunfeld_missing_period)
#'
#' # pseries interface
#' is.pbalanced(pGrunfeld_missing_period$inv)
#'
is.pbalanced <- function(x, ...) {
UseMethod("is.pbalanced")
}
#' @rdname is.pbalanced
#' @export
is.pbalanced.default <- function(x, y, ...) {
if (length(x) != length(y)) stop("The length of the two inputs differs\n")
x <- x[drop = TRUE] # drop unused factor levels so that table
y <- y[drop = TRUE] # gives only needed combinations
z <- table(x, y)
balanced <- if(any(v <- as.vector(z) == 0L)) FALSE else TRUE
if (any(v > 1L)) warning("duplicate couples (id-time)\n")
return(balanced)
}
#' @rdname is.pbalanced
#' @export
is.pbalanced.data.frame <- function(x, index = NULL, ...) {
x <- pdata.frame(x, index)
index <- unclass(attr(x, "index")) # unclass for speed
return(is.pbalanced(index[[1L]], index[[2L]]))
}
#' @rdname is.pbalanced
#' @export
is.pbalanced.pdata.frame <- function(x, ...) {
index <- unclass(attr(x, "index")) # unclass for speed
return(is.pbalanced(index[[1L]], index[[2L]]))
}
#' @rdname is.pbalanced
#' @export
is.pbalanced.pseries <- function(x, ...) {
index <- unclass(attr(x, "index")) # unclass for speed
return(is.pbalanced(index[[1L]], index[[2L]]))
}
#' @rdname is.pbalanced
#' @export
is.pbalanced.panelmodel <- function(x, ...) {
x <- model.frame(x)
return(is.pbalanced(x))
}
#' @rdname is.pbalanced
#' @export
is.pbalanced.pgmm <- function(x, ...) {
## pgmm is also class panelmodel, but take advantage of the pdim attribute in it
return(attr(x, "pdim")$balanced)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.