## SKG
## February 11, 2020
## Weighted sampling
#' Iterative Step to find best parameters with weighted sampling
#'
#' @param init_par parameters to optimize over. par[1] is beta_0 and par[2] is beta_1
#' @param freq vector of times the cluster of size n with x positives occurred
#' @param n_vec cluster sizes
#' @param x_vec number of positives in each cluster
#' @param iters max number of iterations
#' @param B samples to make for each
#' @return optim results for beta_0 and beta_1
iter_est_ws <- function(init_par,
freq,
n_vec,
x_vec,
B = 10,
iters = 10
){
par <- init_par
for(ii in 1:iters){
optim_results <- optim(par = par,
fn = loglike_weighted_total,
prev_par = par,
freq = freq,
n_vec = n_vec,
x_vec = x_vec,
B = B,
return_neg = TRUE,
hessian = TRUE)
print(optim_results$par)
if(sum((par - optim_results$par)^2) < .000001){
break
}
par <- optim_results$par
}
return(optim_results)
}
#' Calculate the log likelihood using weighted sampling
#'
#' @param par parameters to optimize over. par[1] is beta_0 and par[2] is beta_1
#' @param prev_par parameters to form the weights. 1 is beta_0 and 2 is beta_1
#' @param freq vector of times the cluster of size n with x positives occurred
#' @param n_vec cluster sizes
#' @param x_vec number of positives in each cluster
#' @param B number of samples for each
#' @param return_neg logical. Default is TRUE, returns the negative log likelihood
#' @return log likelihood sum as a function of par
loglike_weighted_total <- function(par, prev_par,
freq,
n_vec, x_vec, B,
return_neg = TRUE){
like <- rep(0, length(freq))
for(ii in 1:length(n_vec)){
like[ii] <- likelihood_weighted(par = par,
prev_par = prev_par,
n = n_vec[ii],
x = x_vec[ii],
B = B)
}
# browser()
loglike <- sum(freq * log(like))
if(return_neg) loglike <- -loglike
return(loglike)
}
#' Calculate average likelihood over isomorphic trees
#'
#' @param par parameters to optimize over. par[1] is beta_0 and par[2] is beta_1
#' @param prev_par parameters to form the weights. 1 is beta_0 and 2 is beta_1
#' @param n cluster size
#' @param x number of positives in each cluster
#' @param B number of samples for each
#' @param return_neg logical. Default is TRUE, returns the negative log likelihood
#' @return likelihood
likelihood_weighted <- function(par,
prev_par,
n, x, B){
# if(n > 1) browser()
prev_p_pos <- 1 / (1 + exp(- sum(prev_par)))
prev_p_neg <- 1 / (1 + exp(- prev_par[1]))
p_pos <- 1 / (1 + exp(- sum(par)))
p_neg <- 1 / (1 + exp(- par[1]))
prev_p <- prev_p_pos / (prev_p_pos + prev_p_neg)
p <- p_pos / (p_pos + p_neg)
#i_pos <- rbinom(n = B, size = n-1, prob = prev_p_pos)
i_pos <- sample(0:(n-1), size = B, replace = TRUE)
i_neg <- n - 1 - i_pos
# weights <- dbinom(x = i_pos,
# size = n-1, prob = prev_p_pos)
weights <- get_weights_from_table(n, x, i_pos)
# browser()
T_like <- (p_pos^i_pos * p_neg^i_neg)
like <- ((1 - p_pos)^x * (1 - p_neg)^(n-x)) *
sum(T_like * weights)
return(like)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.