knitr::opts_chunk$set(echo = TRUE)
library(dplyr) library(quantreg) library(tidyr) library(ggplot2) library(CQR)
repetitions <- 1000 N <- 1000 N_est <- 400 cut_value <- -0.75 taus <- 0.5 q1 <- 0.1 q2 <- 0.03 param_true <- c(1, 1, 0.5, -1, -0.5, 0.25)
N_est is the number of observation used in estimation. N is number of simulation sample in one repetition. N_est is chosed from N.
MS_df <- matrix(nrow = repetitions, ncol = 6) robust_stat <- matrix(nrow = repetitions, ncol = 3) for(rep in 1:repetitions){ #set up X_i i in 1:5 data <- data.frame( X1 = rnorm(n = N, 0, 1), X2 = rnorm(n = N, 0, 1), X3 = rnorm(n = N, 0, 1), X4 = rnorm(n = N, 0, 1), X5 = rnorm(n = N, 0, 1)) #make |X_i| < 2 data <- data[ifelse(apply(data,1,max) > 2, F, T), ] data <- data[sample(NROW(data), N_est, replace = F), ] #make u ~ N(0, 25) u <- rnorm(n = N_est, 0, 5) #make eps hetero_factor <- apply(data + data^2, 1, sum) eps <- u * (1 + 0.5 * hetero_factor) #make y y <- cbind(1,as.matrix(data)) %*% param_true + eps #make y as censored data with cut_value which is -0.75 in the original papaer y <- ifelse(y < cut_value, cut_value, y) #finally obtained dataset of (1, X_i, y) data <- data %>% cbind(y) #getting into three step estimation #1st step YV <- "y" #dependent variable XV <- paste("X", 1:5, sep = "") #independent variables #rename dataset this part might be troublesome in large dataset cqr_data <- data #prepare X_i^2 as the original paper(this part should be modified for your dataset) #original paper changed this part in the extramarital affairs example sq_extra <- cqr_data %>% select_(paste("-",YV, sep = "")) %>% mutate_at(vars(-matches(YV)), pt, n = 2) colnames(sq_extra) <- paste("sq_", colnames(sq_extra), sep = "") #bind into original dataset then make binary variable which indicates the y is censored or not. lr_extra <- cbind(cqr_data, sq_extra) %>% mutate(binom_var = ifelse(y > cut_value, 1, 0)) #run the logistic regression of first step first_logit <- glm(lr_extra, formula = paste("binom_var ~ . -", YV), family = binomial()) #run the second step and third step #you will get summary of the third step model and robustnes stats to check whether J0 is subset of J1. three_step_result <- two_three_step(first_step_model = first_logit, taus = taus, q1 = q1, q2 = q2, YV = YV, XV = XV, cqr_data = cqr_data) #get the coefficients of third step MS_df[rep, ] <- three_step_result[[1]][[1]]$coefficients[,1] }
As you may noticed, first step and second and third step are separated. This is because first step is something you need to modify with your dataset.
three_step <- stats_result(df = MS_df[,1:2], param_true = param_true[1:2]) %>% cbind(name = "CQR-three-step") intercept <- c(1.05, 0.64, 0.82, 0.66) slope <- c(0.88, -0.28, 0.69, -0.43) orig_res <- rbind(intercept,slope) %>% cbind(names = "Original-three-step") intercept <- c(1.31, 0.74, 0.81, 0.78) slope <- c(0.89, -0.38, 0.69, -0.54) ILPA_res <- rbind(intercept,slope) %>% cbind(names = "Original-ILPA") comp_data <- rbind(three_step, orig_res, ILPA_res) comp_data %>% cbind(varname = row.names(comp_data)) %>% as.data.frame() %>% gather(vid, value, -c(name, varname)) %>% mutate(value = as.numeric(value)) %>% ggplot(aes(y = value, x = name, fill = name)) + geom_bar(stat = "identity", width = 0.5) + facet_grid(varname ~ vid, scales = "free") + geom_hline(yintercept = 0, linetype = 2) + xlab("method")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.