knitr::opts_chunk$set(echo = TRUE)

read libraries

library(dplyr)
library(quantreg)
library(tidyr)
library(ggplot2)
library(CQR)

MonteCarlo Simulation

Set up parameters

repetitions <- 1000
N <- 1000
N_est <- 400
cut_value <- -0.75

taus <- 0.5
q1 <- 0.1
q2 <- 0.03

param_true <- c(1, 1, 0.5, -1, -0.5, 0.25)

N_est is the number of observation used in estimation. N is number of simulation sample in one repetition. N_est is chosed from N.

Simulation and Estimation part

MS_df <- matrix(nrow = repetitions, ncol = 6)
robust_stat <- matrix(nrow = repetitions, ncol = 3)

for(rep in 1:repetitions){

#set up X_i i in 1:5
data <- data.frame(
  X1 = rnorm(n = N, 0, 1),
  X2 = rnorm(n = N, 0, 1),
  X3 = rnorm(n = N, 0, 1),
  X4 = rnorm(n = N, 0, 1),
  X5 = rnorm(n = N, 0, 1))

#make |X_i| < 2
data <- data[ifelse(apply(data,1,max) > 2, F, T), ]
data <- data[sample(NROW(data), N_est, replace = F), ]

#make u ~ N(0, 25)
u <- rnorm(n = N_est, 0, 5)

#make eps
hetero_factor <- apply(data + data^2, 1, sum)
eps <- u * (1 + 0.5 * hetero_factor)

#make y
y <- cbind(1,as.matrix(data)) %*% param_true + eps

#make y as censored data with cut_value which is -0.75 in the original papaer
y <- ifelse(y < cut_value, cut_value, y)

#finally obtained dataset of (1, X_i, y)
data <- data %>% cbind(y) 

#getting into three step estimation
#1st step
YV <- "y" #dependent variable
XV <- paste("X", 1:5, sep = "") #independent variables

#rename dataset this part might be troublesome in large dataset
cqr_data <- data

#prepare X_i^2 as the original paper(this part should be modified for your dataset)
#original paper changed this part in the extramarital affairs example
sq_extra <- cqr_data %>% 
  select_(paste("-",YV, sep = "")) %>% 
  mutate_at(vars(-matches(YV)), pt, n = 2)

colnames(sq_extra) <- paste("sq_", colnames(sq_extra), sep = "")

#bind into original dataset then make binary variable which indicates the y is censored or not.
lr_extra <- cbind(cqr_data, sq_extra) %>%
  mutate(binom_var = ifelse(y > cut_value, 1, 0))

#run the logistic regression of first step
first_logit <- glm(lr_extra, formula = paste("binom_var ~ . -", YV), family = binomial())

#run the second step and third step
#you will get summary of the third step model and robustnes stats to check whether J0 is subset of J1.
three_step_result <- two_three_step(first_step_model = first_logit, 
                                    taus = taus, 
                                    q1 = q1, 
                                    q2 = q2, 
                                    YV = YV, 
                                    XV = XV, 
                                    cqr_data = cqr_data)

#get the coefficients of third step
MS_df[rep, ] <- three_step_result[[1]][[1]]$coefficients[,1]

}

As you may noticed, first step and second and third step are separated. This is because first step is something you need to modify with your dataset.

comparison of the results

three_step <- stats_result(df = MS_df[,1:2], param_true = param_true[1:2]) %>% cbind(name = "CQR-three-step")

intercept <- c(1.05, 0.64, 0.82, 0.66)
slope <- c(0.88, -0.28, 0.69, -0.43)
orig_res <- rbind(intercept,slope) %>% cbind(names = "Original-three-step")

intercept <- c(1.31, 0.74, 0.81, 0.78)
slope <- c(0.89, -0.38, 0.69, -0.54)
ILPA_res <- rbind(intercept,slope) %>% cbind(names = "Original-ILPA")

comp_data <- rbind(three_step, orig_res, ILPA_res)
comp_data %>% cbind(varname = row.names(comp_data)) %>%
  as.data.frame() %>%
  gather(vid, value, -c(name, varname)) %>%
  mutate(value = as.numeric(value)) %>%
  ggplot(aes(y = value, x = name, fill = name)) +
  geom_bar(stat = "identity", width = 0.5) + 
  facet_grid(varname ~ vid, scales = "free") + 
  geom_hline(yintercept = 0, linetype = 2) +
  xlab("method")


yasui-salmon/CQR documentation built on May 20, 2019, 12:32 p.m.