-- output: html_document --
library(did) #library(profvis) # not used below, but useful for seeing more details about # where speedups are happening packageVersion("did") # version 2.1.0 n <- 5000 time.periods <- 8 sp <- reset.sim(n=n, time.periods=time.periods) data <- build_sim_dataset(sp) N <- nrow(data) # add some more spurious columns to data: data$X1 <- sample(1:25, size=N, replace=TRUE) data$X1 <- as.factor(data$X1) data$X2 <- rnorm(N) data$X3 <- rnorm(N) data$X4 <- rnorm(N) data$X5 <- rnorm(N) pt <- proc.time() new_dr <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", gname="G", est_method="dr") new_dyn <- aggte(new_dr, type="dynamic") new_time <- proc.time() - pt # currently on CRAN detach("package:did") library(did, lib.loc="~/R/old_packages") packageVersion("did") # should be 2.0.0 pt <- proc.time() old_dr <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", gname="G", est_method="dr") old_dyn <- aggte(old_dr, type="dynamic") old_time <- proc.time() - pt # check that estimates are the same all(new_dr$att == old_dr$att) all(new_dyn$att == old_dyn$att) # compare actual times new_time old_time
There is may be a little bit of variation due to compiling the source file for this post, but I get about a 70% reduction in computational time with the new code (from 4.6 to 1.3 seconds).
detach("package:did") library(did) packageVersion("did") # version 2.1.0 n <- 5000 time.periods <- 8 sp <- reset.sim(n=n, time.periods=time.periods) data <- build_sim_dataset(sp) N <- nrow(data) # add some more spurious columns to data: data$X1 <- sample(1:25, size=N, replace=TRUE) data$X1 <- as.factor(data$X1) data$X2 <- rnorm(N) data$X3 <- rnorm(N) data$X4 <- rnorm(N) data$X5 <- rnorm(N) data <- data[-1, ] # drop first row pt <- proc.time() new_dr <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", gname="G", est_method="dr", allow_unbalanced_panel = TRUE) new_dyn <- aggte(new_dr, type="dynamic") new_time <- proc.time() - pt # currently on CRAN detach("package:did") library(did, lib.loc="~/R/old_packages") packageVersion("did") # should be 2.0.0 pt <- proc.time() old_dr <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", gname="G", est_method="dr", allow_unbalanced_panel = TRUE) old_dyn <- aggte(old_dr, type="dynamic") old_time <- proc.time() - pt # check that estimates are the same all(new_dr$att == old_dr$att) all(new_dyn$att == old_dyn$att) # compare actual times new_time old_time
Again, there may be variation due to compiling the document, but here I get about a 97% reduction in computation time (from 52 seconds to about 1.5 seconds).
detach("package:did") library(did) packageVersion("did") # version 2.1.0 n <- 5000 time.periods <- 8 sp <- reset.sim(n=n, time.periods=time.periods) data <- build_sim_dataset(sp, panel=FALSE) N <- nrow(data) # add some more spurious columns to data: data$X1 <- sample(1:25, size=N, replace=TRUE) data$X1 <- as.factor(data$X1) data$X2 <- rnorm(N) data$X3 <- rnorm(N) data$X4 <- rnorm(N) data$X5 <- rnorm(N) pt <- proc.time() new_dr <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", gname="G", est_method="dr", panel=FALSE) new_dyn <- aggte(new_dr, type="dynamic") new_time <- proc.time() - pt # currently on CRAN detach("package:did") library(did, lib.loc="~/R/old_packages") packageVersion("did") # should be 2.0.0 pt <- proc.time() old_dr <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", gname="G", est_method="dr", panel=FALSE) old_dyn <- aggte(old_dr, type="dynamic") old_time <- proc.time() - pt # check that estimates are the same all(new_dr$att == old_dr$att) all(new_dyn$att.egt == old_dyn$att.egt) # compare actual times new_time old_time
In this case, I get about a 75% reduction in computation time (from 2.8 to 0.6 seconds).
detach("package:did") library(did) packageVersion("did") # version 2.1.0 n <- 25000 time.periods <- 20 sp <- reset.sim(n=n, time.periods=time.periods) data <- build_sim_dataset(sp) N <- nrow(data) # add some more spurious columns to data: data$X1 <- sample(1:25, size=N, replace=TRUE) data$X1 <- as.factor(data$X1) data$X2 <- rnorm(N) data$X3 <- rnorm(N) data$X4 <- rnorm(N) data$X5 <- rnorm(N) pt <- proc.time() new_dr <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", gname="G", est_method="dr") new_dyn <- aggte(new_dr, type="dynamic") new_time <- proc.time() - pt # currently on CRAN detach("package:did") library(did, lib.loc="~/R/old_packages") packageVersion("did") # should be 2.0.0 pt <- proc.time() old_dr <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", gname="G", est_method="dr") old_dyn <- aggte(old_dr, type="dynamic") old_time <- proc.time() - pt # check that estimates are the same all(new_dr$att == old_dr$att) all(new_dyn$att.egt == old_dyn$att.egt) # compare actual times new_time old_time
This is about a 73% reduction in computation time (from 155 to 42 seconds for me).
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.