dplyr_variants.R
In pmdplyr: 'dplyr' Extension for Common Panel Data Maneuvers

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  warning = FALSE,
  message = FALSE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(pmdplyr)

## -----------------------------------------------------------------------------
left_df <- data.frame(
  i = c(1, 1, 1, 2, 2, 2),
  t = c(1, 2, 3, 1, 2, 3),
  v1 = 1:6
)
right_df <- data.frame(
  i = c(1, 1, 1, 2, 2, 2),
  t = c(0, 2, 4, 0, 2, 4),
  v2 = 7:12
)

# It automatically detects that i and t are the shared variables
# and finds two combinations of those in left_df that are also
# in right_df: i = 1, t = 2, and i = 2, t = 2. So it brings the
# v2 values it can match up in to the joined data.
# Other observations don't find a match
left_join(left_df, right_df)

## ---- eval = FALSE------------------------------------------------------------
#  inexact_left_join(x, y,
#    by = NULL,
#    copy = FALSE,
#    suffix = c(".x", ".y"),
#    ...,
#    var = NULL,
#    jvar = NULL,
#    method,
#    exact = TRUE
#  )

## -----------------------------------------------------------------------------
right_df <- right_df %>%
  rename(t_right = t)

## -----------------------------------------------------------------------------
inexact_left_join(left_df,
  right_df,
  var = t, jvar = t_right,
  method = "last"
)

## -----------------------------------------------------------------------------
inexact_left_join(left_df,
  right_df,
  var = t, jvar = t_right,
  method = "next"
)

## -----------------------------------------------------------------------------
inexact_left_join(left_df,
  right_df,
  var = t, jvar = t_right,
  method = "closest"
)

## -----------------------------------------------------------------------------
right_df <- right_df %>%
  rename(t_bottom = t_right) %>%
  mutate(t_top = t_bottom + 2)

inexact_left_join(left_df,
  right_df,
  var = t, jvar = c(t_bottom, t_top),
  method = "between"
)

## ---- eval = FALSE------------------------------------------------------------
#  safe_join(x, y,
#    expect = NULL,
#    join = NULL,
#    ...
#  )

## -----------------------------------------------------------------------------
# left is panel data and i does not uniquely identify observations
left <- data.frame(
  i = c(1, 1, 2, 2),
  t = c(1, 2, 1, 2),
  a = 1:4
)
# right is individual-level data uniquely identified by i
right <- data.frame(
  i = c(1, 2),
  b = 1:2
)

# I think that I can do a one-to-one merge on i
# Forgetting that left is identified by i and t together
# So, this produces an error
try(
  safe_join(left, right, expect = "1:1", join = left_join)
)

# If I realize I'm doing a many-to-one merge, that is correct,
# so safe_join will return TRUE if we don't specify a join
# or perform the join for us if we do
safe_join(left, right, expect = "m:1")
safe_join(left, right, expect = "m:1", join = left_join)

## ---- eval = FALSE------------------------------------------------------------
#  mutate_subset(.df,
#    ...,
#    .filter,
#    .group_i = TRUE,
#    .i = NULL,
#    .t = NULL,
#    .d = NA,
#    .uniqcheck = FALSE,
#    .setpanel = TRUE
#  )

## -----------------------------------------------------------------------------
df <- pibble(
  state = c("CA", "CA", "CA", "NV", "NV", "NV"),
  college = c(TRUE, TRUE, FALSE, TRUE, FALSE, FALSE),
  earn = c(1, 2, 3, 2, 3, 2),
  .i = state
)

df %>%
  # Calculate average earnings of college grads
  mutate_subset(college_earnings = mean(earn), .filter = college == TRUE) %>%
  # And compare to our own earnings
  mutate(earnings_vs_college = earn - college_earnings)

## ---- eval = FALSE------------------------------------------------------------
#  mutate_cascade(.df,
#    ...,
#    .skip = TRUE,
#    .backwards = FALSE,
#    .group_i = TRUE,
#    .i = NULL,
#    .t = NULL,
#    .d = NA,
#    .uniqcheck = FALSE,
#    .setpanel = TRUE
#  )

## -----------------------------------------------------------------------------
df <- pibble(
  t = c(1, 2, 3, 4, 5),
  payout = c(3, 4, 2, 2, 4),
  .t = t
) %>%
  mutate(PDV = payout) %>%
  mutate_cascade(PDV = payout + .95 * tlag(PDV, .n = -1), .backwards = TRUE)

df

## ---- eval = FALSE------------------------------------------------------------
#  tlag(.var,
#    .df = get(".", envir = parent.frame()),
#    .n = 1,
#    .default = NA,
#    .quick = FALSE,
#    .resolve = "error",
#    .group_i = TRUE,
#    .i = NULL,
#    .t = NULL,
#    .d = NA,
#    .uniqcheck = FALSE
#  )

## -----------------------------------------------------------------------------
df <- pibble(
  i = c(1, 1, 1, 2, 2, 2),
  t = c(1, 2, 3, 1, 2, 3),
  x = 1:6,
  .i = i,
  .t = t
) %>%
  # A lag and a lead, filling in the lead with 0 instead of NA
  mutate(
    x_lag = tlag(x),
    x_lead = tlag(x, .n = -1, .default = 0),
    # Our data satisfies the .quick conditions so we can
    # do that for a little extra speed
    x_quicklag = tlag(x, .quick = TRUE)
  )

df

## -----------------------------------------------------------------------------
df <- pibble(
  i = c(1, 1, 1, 2, 2, 2),
  t = c(1, 1, 2, 1, 1, 2),
  x = 1:6,
  .i = i,
  .t = t
) %>%
  mutate(x_lag = tlag(x, .resolve = mean))

df

Any scripts or data that you put into this service are public.

pmdplyr documentation built on July 2, 2020, 4:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

pmdplyr
'dplyr' Extension for Common Panel Data Maneuvers

inst/doc/dplyr_variants.R
In pmdplyr: 'dplyr' Extension for Common Panel Data Maneuvers

Try the pmdplyr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pmdplyr 'dplyr' Extension for Common Panel Data Maneuvers

inst/doc/dplyr_variants.R In pmdplyr: 'dplyr' Extension for Common Panel Data Maneuvers

Try the pmdplyr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pmdplyr
'dplyr' Extension for Common Panel Data Maneuvers

inst/doc/dplyr_variants.R
In pmdplyr: 'dplyr' Extension for Common Panel Data Maneuvers