inst/tree_sampling/outside-v2.R

## SKG
## WILL THIS WORK
## APRIL 13, 2020
## Will Fauci make it??
## Uncertainty and other questions unanswered in this R file
## We will instead hope this likelihood works
## HA
<<<<<<< HEAD
## 4/20/20 trees lol
=======
>>>>>>> 7e1a77c693066842666d17ce2eedcb06dc5d9643


## Takes a long long time


devtools::load_all()

inf_params <- c("beta_0" = -2.5, "beta_1" = .5)
smear_pos_prob <- .5
max_size <- 50
K <- 5000


<<<<<<< HEAD
set.seed(2020)
=======

>>>>>>> 7e1a77c693066842666d17ce2eedcb06dc5d9643
obs_clusters <- simulate_outbreak(K = K,
                                inf_params = inf_params,
                                smear_pos_prob = smear_pos_prob,
                                max_size = max_size,
                                start_at_outsider = TRUE)
cluster_summ <- summarize_clusters(obs_clusters)


<<<<<<< HEAD

## make sampler vars

helper_samplers <- vector(mode = "list", length = nrow(cluster_summ))
nms <- paste0(cluster_summ$n, "-", cluster_summ$n_pos)
names(helper_samplers) <- nms
B <- 1000
for(ii in 1:nrow(cluster_summ)){
  print(ii)
  n <- cluster_summ$n[ii]
  n_pos <- cluster_summ$n_pos[ii]
  helper_samplers[[ii]] <- sample_outside_gen_vars(n, n_pos, B)
}
=======
# ## make sampler vars
# 
# helper_samplers <- vector(mode = "list", length = nrow(cluster_summ))
# nms <- paste0(cluster_summ$n, "-", cluster_summ$n_pos)
# names(helper_samplers) <- nms
# B <- 1000
# for(ii in 1:nrow(cluster_summ)){
#   print(ii)
#   n <- cluster_summ$n[ii]
#   n_pos <- cluster_summ$n_pos[ii]
#   helper_samplers[[ii]] <- sample_outside_gen_vars(n, n_pos, B)
# }
>>>>>>> 7e1a77c693066842666d17ce2eedcb06dc5d9643



## Generate the sampled trees
N <- max(cluster_summ$n)
df_list <- vector(mode = "list", length = N)
t <- proc.time()[3]
for(ii in 1:N){
    print(ii)
    data <- data.frame(n = ii,
                       n_pos = 0:ii)
    df_list[[ii]] <- tree_sampler(data,
                                  B = 1000)
}
proc.time()[3] - t
sampled_trees <- dplyr::bind_rows(df_list)

<<<<<<< HEAD
par <- c("beta_0" = -2.4, "beta_1" = .4)
=======
par <- c("beta_0" = -2.5, "beta_1" = .5)
>>>>>>> 7e1a77c693066842666d17ce2eedcb06dc5d9643

loglike <- loglike_outside_gen(par = par,
                               data = cluster_summ,
                               sampled_trees = sampled_trees,
                               B = 1000,
                               return_neg = FALSE,
<<<<<<< HEAD
                               sampled_vars_list = helper_samplers)
=======
                               sampled_vars_list = NULL)
>>>>>>> 7e1a77c693066842666d17ce2eedcb06dc5d9643

## And optimizinggg



t <- proc.time()[3]
out <- optim(par = par, fn = loglike_outside_gen,
             data = cluster_summ,
             sampled_trees = sampled_trees,
<<<<<<< HEAD
             sampled_vars_list = helper_samplers,
=======
             sampled_vars_list = NULL,
>>>>>>> 7e1a77c693066842666d17ce2eedcb06dc5d9643
             B = 1000,
             return_neg = TRUE,
             control = list(trace = TRUE),
             method = "L-BFGS-B",
            lower = c(-4, -2),
             upper = c(0, 4))
print(proc.time()[3] - t)
<<<<<<< HEAD
print(out$par)

## Profile likelihood


################################################3
## Draw the profile
best_pars <- out
beta_0 <- best_pars$par[1]
beta_1 <- seq(0, 3, length.out = 21)
loglike <- numeric(length(beta_1))
loglike2 <- loglike
for(ii in 1:length(loglike)){
  print(paste0(ii / length(beta_1) * 100, "%"))  
  loglike[ii] <- loglike_outside_gen(par = c(beta_0, beta_1[ii]),
                                              data = cluster_summ,
                                              sampled_trees = sampled_trees,
                                              B = 1000,
                                              return_neg = FALSE,
                                              sampled_vars_list = NULL)
}




df <- data.frame(beta_1 = beta_1, loglike = loglike)
alpha <- .05
lp <- -best_pars$value - 1.92

like_wrapper <- function(x){ return(loglike_outside_gen(par = c(beta_0, x),
                                                 data = cluster_summ, 
                                                 sampled_trees = sampled_trees,
                                                 B = 1000,
                                                 sampled_vars_list = NULL,
                                                 return_neg = FALSE) -lp)}
lower <- uniroot(f = like_wrapper, c(-1, best_pars$par[2]))
upper <- uniroot(f = like_wrapper, c(best_pars$par[2], 3))



library(ggplot2)
g <- ggplot(data = df, aes(x = beta_1, y = loglike)) + 
  geom_vline(xintercept = best_pars$par[2], col = "blue", linetype = "dashed") +
  geom_vline(xintercept = .5, col = "black") +
  geom_hline(yintercept = lp, col = "darkgreen", linetype = "dashed") + 
  geom_line(col = "red", size = 2) +
  theme_bw() 


simulation_list <- list(data = cluster_summ,
                        true_params = c(-2.5, .5),
                        best_params = best_pars,
                        like_prof_plot = g,
                        beta1 = c(mean = as.numeric(best_pars$par[2]),
                                  lower95 = lower$root,
                                  upper = upper$root))

#saveRDS(simulation_list, "outside_sim.RDS")

#############################33
=======
>>>>>>> 7e1a77c693066842666d17ce2eedcb06dc5d9643


## out <- optim(par = par, fn = loglike_outside_gen,
##              data = cluster_summ,
##              sampled_trees = sampled_trees,
##              B = 1000,
##              return_neg = TRUE,
##              control=list(trace=TRUE))
skgallagher/TBornotTB documentation built on April 21, 2020, 1:19 p.m.