Nothing
# test_that("Replicate FFL 2011, table 5, p. 69", {
#
# # Full data set
# load("data-raw/men8305_full.rda")
# men8305_full$weights <- men8305_full$weights/sum(men8305_full$weights) * length(men8305_full$weights)
#
# flf_model <- log(wage) ~ union*(education + experience) + education*experience
#
# # Reweighting sample from 1983-85
# flf_male_inequality <- dfl_decompose(flf_model,
# data = men8305_full,
# weights = weights,
# group = year)
#
# FFL_results <- data.frame(effect=c("Observed difference", "Composition effect", "Structure effect"),
# p90p10=c(0.1091, 0.0756, 0.0336),
# p90p50=c(0.1827, 0.0191, 0.1637),
# p50p10=c(-0.0736, 0.0565, -0.1301),
# variance = c(0.0617, 0.0208, 0.0408))[,-1]
# FFL_results <- t(as.matrix(FFL_results))
# colnames(FFL_results) <- c("Observed difference", "Composition effect", "Structure effect")
# estimated_statistics <- flf_male_inequality$decomposition_other_statistics[c(4,5,6,2), c(2, 4, 3)]
# estimated_statistics <- as.matrix(estimated_statistics)
# rownames(estimated_statistics) <- rownames(FFL_results)
#
# expect_equal(FFL_results, estimated_statistics, tolerance = 0.01)
# })
test_that("dfl_decompose() does not throw an error", {
set.seed(89342395)
model_sequential <- log(wage) ~ union + experience + education | experience + education | education
decompose_results <- dfl_decompose(model_sequential,
data = men8305[1:1000, ],
weights = weights,
group = year
)
testthat::expect_error(decompose_results, NA)
})
test_that("dfl_decompose() does not return fitted models if required", {
set.seed(89342395)
model_sequential <- log(wage) ~ union + experience + education | experience + education | education
decompose_results <- dfl_decompose(model_sequential,
data = men8305[1:1000, ],
weights = weights,
group = year,
return_model = FALSE
)
testthat::expect_error(decompose_results, NA)
})
test_that("bootstrapping dfl_decompose() does not throw an error", {
flf_model <- log(wage) ~ union + experience + education
set.seed(123)
decompose_results_boot_single_core <- dfl_decompose(flf_model,
data = men8305[1:1000, ],
weights = weights,
group = year,
bootstrap = TRUE,
bootstrap_iterations = 10,
cores = 1
)
# # Does not work in check()
# set.seed(123)
# decompose_results_boot_parallel <- dfl_decompose(flf_model,
# data = men8305[1:1000, ],
# weights = weights,
# group = year,
# bootstrap = TRUE,
# bootstrap_iterations = 10,
# cores = 2)
testthat::expect_error(decompose_results_boot_single_core, NA)
})
test_that("dfl_decompose_estimate() does not throw an error", {
set.seed(89342395)
data_sample <- men8305[sample(1:nrow(men8305), size = 10000), ]
formula <- Formula::as.Formula(log(wage) ~ union * (education + experience) + education * experience)
data_used <- model.frame(formula, data_sample, weights = weights, group = year)
dep_var <- model.response(data_used, "numeric")
weights <- model.weights(data_used)
group_variable_name <- "year"
group_variable <- data_used[, ncol(data_used)]
names(data_used)[ncol(data_used)] <- "group_variable"
reference_0 <- TRUE
reference_group <- ifelse(reference_0, 0, 1)
statistics <- c("quantiles", "mean", "variance", "gini", "iq_range_p90_p10", "iq_range_p90_p50", "iq_range_p50_p10")
probs <- c(1:9) / 10
trimming <- TRUE
trimming_threshold <- 0.00005
right_to_left <- TRUE
method <- "logit"
decompose_results <- dfl_decompose_estimate(
formula = formula,
dep_var = dep_var,
data_used = data_used,
weights = weights,
group_variable = group_variable,
reference_group = reference_group,
method = method,
estimate_statistics = TRUE,
statistics = statistics,
custom_statistic_function = NULL,
estimate_normalized_difference = TRUE,
probs = probs,
right_to_left = right_to_left,
trimming = trimming,
trimming_threshold = trimming_threshold,
return_model = TRUE
)
testthat::expect_error(decompose_results, NA)
})
test_that("fit_and_predict_probabilities works properly with fastglm estimation", {
set.seed(89342395)
data_sample <- men8305[sample(1:nrow(men8305), size = 10000), ]
formula <- Formula::as.Formula(log(wage) ~ union + education + experience)
data_used <- model.frame(formula, data_sample, weights = weights, group = year)
dep_var <- model.response(data_used, "numeric")
weights <- model.weights(data_used)
group_variable_name <- "year"
group_variable <- data_used[, ncol(data_used)]
names(data_used)[ncol(data_used)] <- "group_variable"
formula_new <- group_variable ~ union + education + experience
method <- "fastglm"
fits_fastglm <- fit_and_predict_probabilities(formula_new,
data_used,
weights,
method = method,
return_model = TRUE,
newdata = NULL
)
method <- "logit"
fits_logit <- fit_and_predict_probabilities(formula_new,
data_used,
weights,
method = method,
return_model = TRUE,
newdata = NULL
)
testthat::expect_equal(fits_fastglm[[1]][1:30], fits_logit[[1]][1:30], tolerance = 0.0001)
})
test_that("fit_and_predict_probabilities works properly with ranger random forests estimation", {
set.seed(89342395)
data_sample <- men8305[sample(1:nrow(men8305), size = 10000), ]
formula <- Formula::as.Formula(log(wage) ~ union + education + experience)
data_used <- model.frame(formula, data_sample, weights = weights, group = year)
dep_var <- model.response(data_used, "numeric")
weights <- model.weights(data_used)
group_variable_name <- "year"
group_variable <- data_used[, ncol(data_used)]
names(data_used)[ncol(data_used)] <- "group_variable"
method <- "random_forest"
formula_new <- group_variable ~ union + education + experience
fits <- fit_and_predict_probabilities(formula_new,
data_used,
weights,
method = method,
return_model = TRUE,
newdata = NULL
)
testthat::expect_error(fits, NA)
})
test_that("dfl_decompose_estimate() works properly with ranger random forests estimation", {
set.seed(89342395)
data_sample <- men8305[sample(1:nrow(men8305), size = 10000), ]
formula <- Formula::as.Formula(log(wage) ~ union + education + experience)
data_used <- model.frame(formula, data_sample, weights = weights, group = year)
dep_var <- model.response(data_used, "numeric")
weights <- model.weights(data_used)
group_variable_name <- "year"
group_variable <- data_used[, ncol(data_used)]
names(data_used)[ncol(data_used)] <- "group_variable"
reference_0 <- TRUE
reference_group <- ifelse(reference_0, 0, 1)
statistics <- c("quantiles", "mean", "variance", "gini", "iq_range_p90_p10", "iq_range_p90_p50", "iq_range_p50_p10")
probs <- c(1:9) / 10
trimming <- FALSE
trimming_threshold <- NULL
right_to_left <- TRUE
method <- "random_forest"
decompose_results <- dfl_decompose_estimate(
formula = formula,
dep_var = dep_var,
data_used = data_used,
weights = weights,
group_variable = group_variable,
reference_group = reference_group,
method = method,
estimate_statistics = TRUE,
statistics = statistics,
custom_statistic_function = NULL,
estimate_normalized_difference = TRUE,
probs = probs,
right_to_left = right_to_left,
trimming = trimming,
trimming_threshold = trimming_threshold,
return_model = TRUE
)
testthat::expect_error(decompose_results, NA)
})
test_that("dfl_decompose() does not throw an error without estimating statistics", {
set.seed(89342395)
formula <- log(wage) ~ age + central_city + msa + region + black +
hispanic + education + afqt + family_responsibility + years_worked_civilian +
years_worked_military + part_time + industry
decompose_results <- dfl_decompose(
formula = formula,
data = nlys00[1:300, ],
weights = runif(nrow(nlys00[1:300, ]), 0.5, 1.5),
group = female,
estimate_statistics = FALSE
)
testthat::expect_error(decompose_results, NA)
})
test_that("select_observations_to_be_trimmed() trimms observations as intended", {
set.seed(123)
expected_trimmed_observations <- c(1:5, 101:102)
reweighting_factor <- rep(1 / 100, 200)
group_variable <- as.factor(rep(c("no", "yes"), each = 100))
group <- 0
reweighting_factor[expected_trimmed_observations] <- 20
trimming_threshold <- NULL
trimmed_observations <- select_observations_to_be_trimmed(
reweighting_factor,
group_variable,
group,
trimming_threshold
)
testthat::expect_equal(
trimmed_observations,
expected_trimmed_observations
)
})
test_that("dfl_decompose() trimms estimated factors as expected", {
set.seed(123)
data_used <- data.frame(
x = c(
c(1, 1, rep(2, 98)),
c(rep(1, 90), rep(2, 10))
),
group = rep(c(0, 1), each = 100)
)
data_used$y <- data_used$x + data_used$group + rnorm(200, 0, 0.5)
data_used$x <- as.factor(data_used$x)
levels(data_used$x) <- c("A", "B")
data_used$group <- as.factor(data_used$group)
expected_trimmed_observations <- which(data_used$x == "A")
# p_g_given_X <- data_used %>%
# dplyr::group_by(x) %>%
# dplyr::summarise(p = mean(group == "1")) %>%
# dplyr::pull(p)
#
# data_used$psi <- p_g_given_X[1]/(1-p_g_given_X[1])
# data_used[which(data_used$x == "B"), "psi"] <- p_g_given_X[2]/(1-p_g_given_X[2])
#
# reweighting_factor <- data_used$psi
# group_variable <- data_used$group
# group <- 0
# trimming_threshold = NULL
#
# trimmed_observations <- select_observations_to_be_trimmed(reweighting_factor,
# group_variable,
# group,
# trimming_threshold)
# decompose_results_untrimmed <- dfl_decompose(y ~ x,
# data_used,
# group = group)
decompose_results_trimmed <- dfl_decompose(y ~ x,
data_used,
group = group,
trimming = TRUE
)
testthat::expect_equal(
decompose_results_trimmed$trimmed_observations,
expected_trimmed_observations
)
})
test_that("get_distributional_statistics() does not throw an error with custom_statistic_function", {
data_sample <- men8305
formula <- Formula::as.Formula(log(wage) ~ union * (education + experience) + education * experience)
data_used <- model.frame(formula, data_sample, weights = weights, group = year)
dep_var <- model.response(data_used, "numeric")
weights <- model.weights(data_used)
group_variable_name <- "year"
group_variable <- data_used[, ncol(data_used)]
names(data_used)[ncol(data_used)] <- "group_variable"
reference_0 <- TRUE
reference_group <- ifelse(reference_0, 0, 1)
statistics <- c("quantiles", "mean", "variance", "gini", "iq_range_p90_p10", "iq_range_p90_p50", "iq_range_p50_p10")
probs <- c(1:9) / 10
log_transformed <- TRUE
top_share <- function(dep_var,
weights,
top_percent = 0.1) {
threshold <- Hmisc::wtd.quantile(dep_var, weights = weights, probs = 1 - top_percent)
share <- sum(weights[which(dep_var > threshold)] * dep_var[which(dep_var > threshold)]) / sum(weights * dep_var)
return(share)
}
direct_estimation <- top_share(dep_var[which(group_variable == "2003-2005")], weights[which(group_variable == "2003-2005")], 0.1)
nu1 <- get_distributional_statistics(
dep_var = dep_var,
weights = weights,
group_variable = group_variable,
group = 1,
statistics = statistics,
custom_statistic_function = top_share,
probs = probs,
log_transformed = log_transformed
)
testthat::expect_equal(direct_estimation, as.numeric(nu1[length(nu1)]), tolerance = 0.000001)
})
# test_that("glm and surveyglm return the same coefficients", {
# set.seed(123)
# data("nlys00")
# formula <- female ~ education + afqt + industry + family_responsibility + part_time
# data_used <- get_all_vars(formula, nlys00)
# data_used$weights <- runif(nrow(data_used), 0.5, 1.5)
#
# design <- survey::svydesign(~0,
# data=data_used,
# weights=~weights)
# model_fit_surveyglm <- survey::svyglm(formula,
# data=data_used,
# design=design,family=quasibinomial(link="logit"))
#
# model_fit_glm <- glm(formula, data = data_used, weights = weights, family = binomial(link = "logit"))
# model_fit_quasiglm <- glm(formula, data = data_used, weights = weights, family = quasibinomial(link = "logit"))
#
# sglm <- summary(model_fit_glm)
# squasiglm <- summary(model_fit_quasiglm)
# ssurveryglm <- summary(model_fit_surveyglm)
#
# cbind(sglm$coefficients[,2],
# squasiglm$coefficients[,2],
# ssurveryglm$coefficients[,2])
#
# cbind(coef(model_fit_glm), coef(model_fit_surveyglm))
# testthat::expect_equal(coef(model_fit_glm),
# coef(model_fit_surveyglm),
# tolerance = 0.000001)
#
# cbind(coef(model_fit_quasiglm), coef(model_fit_surveyglm))
# testthat::expect_equal(coef(model_fit_quasiglm),
# coef(model_fit_surveyglm),
# tolerance = 0.000001)
#
# })
# test_that("dfl_decompose() replicates Table 5, p. 67, in FLF 2011 Handbook Chapter", {
#
# load("data-raw/men8305_full.rda")
# men8305_full$weights <- men8305_full$weights/sum(men8305_full$weights) * length(men8305_full$weights)
# flf_model <- log(wage) ~ union*(education + experience) + education*experience
#
# # Replicate statistics in table 5, p.67, in in FLF (2011)
# flf_male_inequality_table_5 <- dfl_decompose(flf_model,
# data = men8305_full,
# weights = weights,
# group = year,
# reference_0 = TRUE,
# statistics = c("iq_range_p90_p10",
# "iq_range_p90_p50",
# "iq_range_p50_p10",
# "variance"))
# results_dfl_decompose <- flf_male_inequality_table_5$decomposition_other_statistics
# published_results_FLF_table_5 <- data.frame(statistic = results_dfl_decompose$statistic,
# `Observed difference` = c(0.0617, 0.1091, 0.1827, -0.0736),
# `Structure effect` = c(0.0408, 0.0336, 0.1637, -0.1301),
# `Composition effect` = c(0.0208, 0.0756, 0.0191, 0.0565))
# rownames(results_dfl_decompose) <- rownames(published_results_FLF_table_5) <- 1:4
# names(published_results_FLF_table_5) <- names(results_dfl_decompose)
# testthat::expect_equal(results_dfl_decompose,
# published_results_FLF_table_5,
# tolerance = 0.0075)
# })
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.