happi: a hierarchical approach to pangenomics inference

Documented in happi

#' Main function for happi, p=q=1; this script contains the modularized version of happi with correct implementation of log likelihood
#'
#' @param outcome length-n vector; this is the vector of a target gene's presence/absence; should be coded as 0 or 1
#' @param covariate n x p matrix; this is the matrix for the primary predictor/covariate of interest
#' @param h0_param the column index in covariate that has beta=zero under the null
#' @param quality_var length-n vector; this is the quality variable vector, currently p = 1  TODO(turn into n x q matrix)
#' @param covariate_formula alternative to \code{covariate} argument, a formula for covariates of the form \eqn{~ covariate1 + covariate2 + ...}, requires \code{data} argument
#' @param covariate_formula_h0 alternative to \code{h0_param} argument, a formula for covariates in the null model, takes the form \eqn{ ~ 1} for an intercept-only model, requires \code{data} argument
#' @param quality_var_formula alternative to \code{quality_var} argument, a formula for quality variable of the form \eqn{~ quality_var}, requires \code{data} argument
#' @param data required with \code{formula} arguments, a data frame including covariates and the quality variable
#' @param max_iterations the maximum number of EM steps that the algorithm will run for
#' @param min_iterations the minimum number of EM steps that the algorithm will run for
#' @param nstarts number of starts; Integer. Defaults to \code{1}. Number of starts for optimization.
#' @param change_threshold algorithm will terminate early if the likelihood changes by this percentage or less for 5 iterations in a row for both the alternative and the null
#' @param epsilon probability of observing a gene when it should be absent; probability between 0 and 1; default is 0. Either a single value or a vector of length n.  
#' @param method method for estimating f. Defaults to "splines" which fits a monotone spline with df determined by 
#' argument spline_df; "isotone" for isotonic regression fit
#' @param random_starts whether to pick the starting values of beta's randomly. Defaults to FALSE.
#' @param firth use firth penalty? Default is TRUE.
#' @param spline_df degrees of freedom (in addition to intercept) to use in
#' monotone spline fit; default 3 
#' @param seed numeric number to set seed for random multiple starts
#' @param norm_sd positive number to set as the standard deviation for the Normal distribution used to draw initial parameter values from. 
#' @param run_npLRT logical, if TRUE, non-parametric permutation LRT test will also be run.
#' @param P if \code{run_npLRT} is TRUE, number of permutations to run 
#' @param verbose TRUE to return all information generated by happi, FALSE to only return effect size and p-value
#'
#' @importFrom msos logdet
#' @importFrom utils tail
#' @import tibble
#' @import stats
#' @import splines2
#' @importFrom isotone activeSet fSolver
#' @importFrom logistf logistf
#'
#' @return An object of class \code{happi}.
#'
#' @examples 
#' data(TM7_data)
#' x_matrix <- model.matrix(~tongue, data = TM7_data)
#' happi_results <- happi (outcome = TM7_data$`Cellulase/cellobiase CelA1`,
#' covariate=x_matrix, 
#' quality_var=TM7_data$mean_coverage,
#' max_iterations=1000, 
#' change_threshold=0.1,
#' epsilon=0, 
#' nstarts = 1, 
#' spline_df = 3)
#' @export
happi <- function(outcome,
                  covariate = NULL,
                  h0_param = 2,
                  quality_var = NULL,
                  covariate_formula = NULL, 
                  covariate_formula_h0 = NULL,
                  quality_var_formula = NULL, 
                  data = NULL, 
                  max_iterations = 1000,
                  min_iterations = 15,
                  change_threshold = 0.05,
                  epsilon = 0,
                  method = "splines",
                  random_starts = FALSE,
                  firth = TRUE,
                  spline_df = 3, 
                  nstarts = 1, 
                  seed = 13, 
                  norm_sd = 1,
                  run_npLRT = FALSE,
                  P = NULL, 
                  verbose = TRUE
) {
  
  # number of observations 
  nn <- length(outcome)
  
  # check that all inputs are provided in either form 
  if (is.null(covariate)) {
    if (is.null(covariate_formula) | is.null(covariate_formula_h0) | 
        is.null(quality_var_formula) | is.null(data)) {
      stop("If using formulas, please include `covariate_formula`, `covariate_formula_h0`, `quality_val_formula`, and `data`.")
    }
  } else {
    if (is.null(h0_param) | is.null(quality_var)) {
      stop("If using covariate matrix, please also include `h0_param` and `quality_var`.")
    }
  }
  
  # make covariate matrices from formulas
  if (!(is.null(covariate_formula))) {
    if (nrow(data) != nn) {
      stop("Make sure that `outcome` and `data` have the same number of rows.")
    }
    covariate <- stats::model.matrix(covariate_formula, data)
    pp <- ncol(covariate)
    covariate_null <- stats::model.matrix(covariate_formula_h0, data)
    if ((pp - ncol(covariate_null)) > 1) {
      stop("Currently happi cannot test multiple parameters at once.")
    }
    quality_var_mat <- stats::model.matrix(quality_var_formula, data)
    # if quality variable matrix has intercept, just take column with quality variable
    if (ncol(quality_var_mat) == 1) {
      quality_var <- quality_var_mat
    } else {
      quality_var <- quality_var_mat[, -1]
    }
    # otherwise use covariate matrices provided   
  } else {
    if (nrow(covariate) != nn | length(quality_var) != nn) {
      stop("Make sure that `outcome`, `covariate_data`, and `quality_variable` have the same number of observations.")
    }
    pp <- ncol(covariate)
    if (pp == 1) {
      covariate <- matrix(covariate, ncol = 1, nrow = nn)
      covariate_null <- matrix(1, ncol = 1, nrow = nn)
    } else {
      covariate_null <- covariate[, -h0_param]
    } 
    if (!is.matrix(covariate_null)) covariate_null <- matrix(covariate_null, nrow=nn)
  }
  
  # stop if there is any missing data 
  stopifnot(all(!is.na(c(outcome, covariate, quality_var)))) 
  
  # check that epsilon is either a single value or a vector of length n
  if (length(epsilon) == 1) {
    epsilon_vec <- rep(epsilon, nn)
  } else if (length(epsilon == nn)) {
    epsilon_vec <- epsilon
  } else {
    error("epsilon should be a single number or a length n vector.")
  }
  
  #  if(h0_param != 2) warning("Amy hasn't properly checked that testing a different parameter results in sensible output")
  
  ## reorder all elements of all data by ordering in quality_var
  ## TODO(change back at end)
  my_order <- order(quality_var)
  quality_var <- quality_var[my_order]
  outcome <- outcome[my_order]
  covariate <- covariate[my_order, ]
  epsilon_vec <- epsilon_vec[my_order]
  
  if (pp == 1) {
    covariate <- matrix(covariate, ncol = 1, nrow = nn)
    covariate_null <- matrix(1, ncol = 1, nrow = nn)
  } else {
    covariate_null <- covariate[, -h0_param]
  } #TODO(PT) take in formula 
  if (!is.matrix(covariate_null)) covariate_null <- matrix(covariate_null, nrow=nn)
  
  # generate beta initial starts for alternative model
  inits <- happi::genInits(num_covariate = pp, nstarts = nstarts, seed = seed, norm_sd = norm_sd)
  # generate beta initial starts for null model   
  inits_null <- happi::genInits(num_covariate = pp - 1, nstarts = nstarts, seed = seed, norm_sd = norm_sd)
  # generate f initial starts for both alternative and null models   
  inits_f <- happi::genInits_f(nn = nn, nstarts = nstarts, seed = seed, outcome = outcome)
  
  bestOut <- NULL
  bestOut_null <- NULL
  
  ### Start for loop through multiple starts 
  
  # save information from each start
  starts_df <- data.frame(starts = 1:nstarts,
                          alt_ll = NA,
                          null_ll = NA)
  
  for (i in 1:nstarts) {
    
    ####################################
    ## Create matrices to hold results #
    ####################################
    my_estimates <- tibble::tibble("iteration" = 0:max_iterations,
                                   "epsilon" = ifelse(length(epsilon) == 1, epsilon, "multiple"),
                                   "loglik" = NA,  
                                   "loglik_nopenalty" = NA)
    
    my_estimates_null <- tibble::tibble("iteration" = 0:max_iterations,
                                        "epsilon" = ifelse(length(epsilon) == 1, epsilon, "multiple"),
                                        "loglik" = NA, 
                                        "loglik_nopenalty" = NA)
    
    my_estimated_beta <- matrix(NA, nrow = max_iterations + 1, ncol = pp)
    my_estimated_beta_null <- matrix(NA, nrow = max_iterations + 1, ncol = max(1, pp - 1))
    my_fitted_xbeta <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    my_fitted_xbeta_null <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    my_estimated_f <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    my_estimated_f_null <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    my_estimated_ftilde <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    my_estimated_ftilde_null <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    my_estimated_p <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    my_estimated_p_null <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    my_estimated_basis_weights <- matrix(NA, nrow = max_iterations + 1, ncol = spline_df + 1)
    my_estimated_basis_weights_null <- matrix(NA, nrow = max_iterations + 1, ncol = spline_df + 1)
    
    ##################
    ## Input starts ##
    ##################
    my_estimated_beta[1, ] <- inits[i,]
    my_estimated_beta_null[1, ] <- inits_null[i,]
    
    my_fitted_xbeta[1, ] <- c(covariate %*% my_estimated_beta[1, ])
    my_fitted_xbeta_null[1, ] <- c(covariate_null %*% my_estimated_beta_null[1, ])
    
    my_estimated_f[1, ] <- inits_f[i,]
    my_estimated_f_null[1, ] <- inits_f[i,]
    # f-tilde = logit(f)
    my_estimated_ftilde[1, ] <- happi::logit(my_estimated_f[1, ])
    my_estimated_ftilde_null[1, ] <- happi::logit(my_estimated_f_null[1, ])
    
    
    my_estimated_p[1, ] <- happi::calculate_p(xbeta = my_fitted_xbeta[1, ],
                                              ff = my_estimated_f[1, ], 
                                              epsilon = epsilon_vec, 
                                              outcome = outcome)
    my_estimated_p_null[1, ] <- happi::calculate_p(xbeta = my_fitted_xbeta_null[1, ],
                                                   ff = my_estimated_f_null[1, ], 
                                                   epsilon = epsilon_vec, 
                                                   outcome = outcome)
    
    my_estimates[1, "loglik"] <- happi::incomplete_loglik(xbeta = my_fitted_xbeta[1, ],
                                                          ff = my_estimated_f[1, ],
                                                          firth = firth, 
                                                          outcome = outcome, 
                                                          epsilon = epsilon_vec, 
                                                          covariate = covariate)
    my_estimates[1, "loglik_nopenalty"] <- happi::incomplete_loglik(xbeta = my_fitted_xbeta[1, ],
                                                                    ff = my_estimated_f[1, ],
                                                                    firth = F, 
                                                                    outcome = outcome, 
                                                                    epsilon = epsilon_vec, 
                                                                    covariate = covariate)
    my_estimates_null[1, "loglik"] <- happi::incomplete_loglik(xbeta = my_fitted_xbeta_null[1, ],
                                                               ff = my_estimated_f_null[1, ],
                                                               firth = firth, 
                                                               outcome = outcome, 
                                                               epsilon = epsilon_vec, 
                                                               covariate = covariate_null)
    my_estimates_null[1, "loglik_nopenalty"] <- happi::incomplete_loglik(xbeta = my_fitted_xbeta_null[1, ],
                                                                         ff = my_estimated_f_null[1, ],
                                                                         firth = F, 
                                                                         outcome = outcome, 
                                                                         epsilon = epsilon_vec, 
                                                                         covariate = covariate_null)
    ###############################################################################################################
    ## E-M algorithm for penalized maximum likelihood estimation of our parameter estimates for alternative model #
    ###############################################################################################################
    mlout <- tryCatch(happi::run_em(outcome = outcome,
                                    quality_var = quality_var,
                                    max_iterations = max_iterations,
                                    min_iterations = min_iterations,
                                    change_threshold = change_threshold,
                                    epsilon = epsilon_vec,
                                    method = method,
                                    firth = firth,
                                    spline_df = spline_df, 
                                    nn = nn, 
                                    em_covariate = covariate,
                                    em_estimated_p = my_estimated_p,
                                    em_fitted_xbeta = my_fitted_xbeta, 
                                    em_estimated_f = my_estimated_f, 
                                    em_estimates = my_estimates,
                                    em_estimated_beta = my_estimated_beta,
                                    em_estimated_basis_weights = my_estimated_basis_weights,
                                    em_estimated_ftilde = my_estimated_ftilde), 
                      error = function(e) {cat("WARNING alternative model E-M at initial start row index", paste(i),":", conditionMessage(e),"\n")}) 
    
    ########################################################################################################
    ## E-M algorithm for penalized maximum likelihood estimation of our parameter estimates for null model #
    ########################################################################################################
    mlout_null <- tryCatch(happi::run_em(outcome = outcome,
                                         quality_var = quality_var,
                                         max_iterations = max_iterations,
                                         min_iterations = min_iterations,
                                         change_threshold = change_threshold,
                                         epsilon = epsilon_vec,
                                         method = method,
                                         firth = firth,
                                         spline_df = spline_df, 
                                         nn = nn, 
                                         em_covariate = covariate_null,
                                         em_estimated_p = my_estimated_p_null,
                                         em_fitted_xbeta = my_fitted_xbeta_null, 
                                         em_estimated_f = my_estimated_f_null, 
                                         em_estimates = my_estimates_null,
                                         em_estimated_beta  = my_estimated_beta_null,
                                         em_estimated_basis_weights =  my_estimated_basis_weights_null,
                                         em_estimated_ftilde = my_estimated_ftilde_null), 
                           error = function(e) {cat("WARNING null model E-M at initial start row index", paste(i),":", conditionMessage(e),"\n")}) # END WHILE loop that stops when convergence is met
    ############################################################
    # Evaluate for best set of results for alternative model ###
    ############################################################
    ## if we get a converged result then... 
    ## check if bestOut is NULL. 
    ## If NULL then replace with the converged result 
    ## If NOT NUlL then replace only if LL is better than what is currently in bestOut
    
    tryCatch(if(!is.na(tail(mlout$loglik$loglik[!is.na(mlout$loglik$loglik)],1))){ # check that not NA for alternative model
      starts_df[i, "alt_ll"] <- tail(mlout$loglik$loglik[!is.na(mlout$loglik$loglik)],1)
      if (is.null(bestOut)) {
        bestOut <-  mlout
      } else if (!is.null(bestOut)){
        if(tail(mlout$loglik$loglik[!is.na(mlout$loglik$loglik)],1) > tail(bestOut$loglik$loglik[!is.na(bestOut$loglik$loglik)],1)){
          bestOut <- mlout 
        }
      }
    }, error = function(e) {cat("WARNING alternative model did not converge at initial start row index", paste(i),":", conditionMessage(e),"\n")})
    
    #####################################################
    # Evaluate for best set of results for null model ###
    #####################################################
    ## if we get a converged result then... 
    ## check if bestOut_null model is NULL. 
    ## If bestOut_null is NULL then replace with the converged result 
    ## If bestOut_null is NOT NUlL then replace only if LL is better than what is currently in bestOut_null
    
    tryCatch(if(!is.na(utils::tail(mlout_null$loglik$loglik[!is.na(mlout_null$loglik$loglik)],1))){ # check that not NA for null model 
      starts_df[i, "null_ll"] <- tail(mlout_null$loglik$loglik[!is.na(mlout_null$loglik$loglik)],1)
      if (is.null(bestOut_null)) {
        bestOut_null <-  mlout_null
      } else if (!is.null(bestOut_null)){
        if(utils::tail(mlout_null$loglik$loglik[!is.na(mlout_null$loglik$loglik)],1) > tail(bestOut_null$loglik$loglik[!is.na(bestOut_null$loglik$loglik)],1)){
          bestOut_null <- mlout_null 
        }
      }
    }, error = function(e) {cat("WARNING null model did not converge at initial start row index", paste(i),":", conditionMessage(e),"\n")})
    
  } # END -- nstarts
  
  if (is.null(bestOut)) stop("Full model could not be optimized! Try increasing the number of nstarts or simplifying your model.")
  if (is.null(bestOut_null)) stop("Null model could not be optimized! Try increasing the number of nstarts or simplifying your model.")
  
  #############################################
  ### If alternative LL is smaller than null:
  ## restart estimation of alternative model using the null model 
  #############################################
  if (utils::tail(bestOut$loglik$loglik_nopenalty[!is.na(bestOut$loglik$loglik_nopenalty)],1) < 
      tail(bestOut_null$loglik$loglik_nopenalty[!is.na(bestOut_null$loglik$loglik_nopenalty)],1)) {
    
    message("Likelihood greater under null; restarting...")
    my_estimates <- tibble::tibble("iteration" = 0:max_iterations,
                                   "epsilon" = ifelse(length(epsilon) == 1, epsilon, "multiple"),
                                   "loglik" = NA, 
                                   "loglik_nopenalty" = NA)
    my_estimated_beta <- matrix(NA, nrow = max_iterations + 1, ncol = pp)
    my_fitted_xbeta <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    my_estimated_f <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    my_estimated_ftilde <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    my_estimated_p <- matrix(NA, nrow = max_iterations + 1, ncol = nn)
    
    if (pp == 1){
      my_estimated_beta[1, 1] <- 0
      
    } else if (pp == 2) {
      my_estimated_beta[1, 1] <- utils::tail(bestOut_null$beta[!is.na(bestOut_null$beta[,1]),],1)[1] ## start at converged null
      my_estimated_beta[1, h0_param] <- 0
      
    } else if (pp == 3){
      my_estimated_beta[1, 1] <- utils::tail(bestOut_null$beta[!is.na(bestOut_null$beta[,1]),],1)[1] ## start at converged null
      my_estimated_beta[1, h0_param] <- 0
      my_estimated_beta[1, 3] <- tail(bestOut_null$beta[!is.na(bestOut_null$beta[,1]),],1)[2] ## start at converged null
      
    } ## TO DO (PT): need to take in pp > 3 if needed 
    
    stopifnot(h0_param == 2)
    my_fitted_xbeta[1, ] <- c(covariate %*% my_estimated_beta[1, ])
    my_estimated_f[1, ] <- utils::tail(bestOut_null$f[!is.na(bestOut_null$f[,1]),],1)
    my_estimated_ftilde[1, ] <- happi::logit(my_estimated_f[1, ])
    my_estimated_p[1, ] <- happi::calculate_p(xbeta = my_fitted_xbeta[1, ],
                                              ff = my_estimated_f[1, ], 
                                              epsilon = epsilon_vec, 
                                              outcome = outcome)
    my_estimates[1, "loglik"] <- happi::incomplete_loglik(xbeta = my_fitted_xbeta[1, ],
                                                          ff = my_estimated_f[1, ], 
                                                          outcome = outcome, 
                                                          epsilon = epsilon_vec, 
                                                          covariate = covariate)
    my_estimates[1, "loglik_nopenalty"] <- happi::incomplete_loglik(xbeta = my_fitted_xbeta[1, ],
                                                                    ff = my_estimated_f[1, ], 
                                                                    outcome = outcome, 
                                                                    epsilon = epsilon_vec, 
                                                                    covariate = covariate,
                                                                    firth = F)
    ## restart at null model if likelihood greater under null than alternative
    tryOut <- tryCatch(happi::run_em(outcome = outcome,
                             quality_var = quality_var,
                             max_iterations = max_iterations,
                             min_iterations = min_iterations,
                             change_threshold = change_threshold,
                             epsilon = epsilon_vec,
                             method = method,
                             firth = firth,
                             nn = nn, 
                             spline_df = spline_df, 
                             em_covariate = covariate,
                             em_estimated_p = my_estimated_p,
                             em_fitted_xbeta = my_fitted_xbeta, 
                             em_estimated_f = my_estimated_f, 
                             em_estimates = my_estimates,
                             em_estimated_beta  = my_estimated_beta,
                             em_estimated_basis_weights =  my_estimated_basis_weights,
                             em_estimated_ftilde = my_estimated_ftilde), 
                        error = function(e) {cat("WARNING alternative model E-M at null model estimates:", conditionMessage(e),"\n")}) 
    
    if (!(is.null(tryOut))) {
      # use tryOut as new bestOut if it has a higher likelihood value
      if (utils::tail(tryOut$loglik$loglik[!is.na(tryOut$loglik$loglik)],1) > utils::tail(bestOut_null$loglik$loglik[!is.na(bestOut_null$loglik$loglik)],1)) {
        bestOut <- tryOut 
      }
    }
    
    if (utils::tail(bestOut$loglik$loglik[!is.na(bestOut$loglik$loglik)],1) < utils::tail(bestOut_null$loglik$loglik[!is.na(bestOut_null$loglik$loglik)],1)) {
      message("Restarting to estimate beta_alt didn't work. Penalized likelihood is still greater under the null than alt. pvalue = 1")
      # message(paste("Had not converged after", tt_restart - 1, "iterations; LL % change:", round(pct_change_llks, 3)))
    }
    # if (tail(bestOut$loglik$loglik_nopenalty[!is.na(bestOut$loglik$loglik_nopenalty)],1) < tail(bestOut_null$loglik$loglik_nopenalty[!is.na(bestOut_null$loglik$loglik_nopenalty)],1)) {
    #    message("Weird! Nonpenalized Likelihood is also greater under the null than alt. pvalue = 1")
    #  } else {
    #    message("Phew! Nonpenalized Likelihood is not greater under the null than alt.")
    #  }
  } ## End restart if likelihood is greater under the null
  
  ###############################
  ### Output the best estimates## 
  ###############################
  my_estimates$loglik <- bestOut$loglik$loglik
  my_estimates$loglik_nopenalty <- bestOut$loglik$loglik_nopenalty
  my_estimates$iteration <- bestOut$loglik$iteration
  
  my_estimates$loglik_null <- bestOut_null$loglik$loglik
  my_estimates$loglik_null_nopenalty <- bestOut_null$loglik$loglik_nopenalty
  my_estimates$iteration_null <- bestOut_null$loglik$iteration
  
  my_estimated_beta <- bestOut$beta
  my_estimated_beta_null <- bestOut_null$beta
  my_estimated_f <- bestOut$f
  my_estimated_f_null <- bestOut_null$f
  my_estimated_basis_weights <- bestOut$basis_weights
  my_estimated_basis_weights_null <- bestOut_null$basis_weights
  my_estimated_p <- bestOut$p
  my_estimated_p_null <- bestOut_null$p
  
  ########################
  # LRT based on best LLs 
  ########################
  
  my_estimates$LRT <- 2*(utils::tail(my_estimates$loglik[!is.na(my_estimates$loglik)],1) - utils::tail(my_estimates$loglik_null[!is.na(my_estimates$loglik_null)],1))
  my_estimates$pvalue <- 1 - pchisq(my_estimates$LRT, df=1)
  
  my_estimates$LRT_nopenalty <- 2*(utils::tail(my_estimates$loglik_nopenalty[!is.na(my_estimates$loglik_nopenalty)],1) - utils::tail(my_estimates$loglik_null_nopenalty[!is.na(my_estimates$loglik_null_nopenalty)],1))
  my_estimates$pvalue_nopenalty <- 1 - pchisq(my_estimates$LRT_nopenalty, df=1)
  
  full_output <- list("loglik" = my_estimates,
                      "beta" = my_estimated_beta,
                      "beta_null" = my_estimated_beta_null,
                      "f" = my_estimated_f,
                      "f_null" = my_estimated_f_null,
                      "basis_weights" =  my_estimated_basis_weights,
                      "basis_weights_null" =  my_estimated_basis_weights_null,
                      "p" = my_estimated_p,
                      "p_null" = my_estimated_p_null,
                      "quality_var" = quality_var,
                      "outcome" = outcome,
                      "covariate" = covariate,
                      "starts_df" = starts_df)
  
  if (run_npLRT) {
    npLRT_p <- npLRT(happi_out = full_output,
                     max_iterations = max_iterations,
                     min_iterations = min_iterations,
                     h0_param = h0_param,
                     change_threshold = change_threshold,
                     epsilon = epsilon,
                     method = method,
                     firth = firth,
                     spline_df = spline_df, 
                     nstarts = nstarts,
                     P = P)
    full_output$npLRT_p <- npLRT_p
  }
  
  pvalue <- tail(my_estimates$pvalue_nopenalty
                           [!is.na(my_estimates$pvalue_nopenalty)], 1)
  beta <- tail(my_estimated_beta[!is.na(my_estimated_beta[, 1]), ], 1)
  LRT <- tail(my_estimates$LRT_nopenalty[
    !is.na(my_estimates$LRT_nopenalty)], 1)
  small_results <- list(pvalue_LRT = pvalue, beta = beta, LRT = LRT)
  
  if (run_npLRT) {
    small_results$pvalue_npLRT <- npLRT_p
  }
  
  full_output$summary <- small_results
  
  if (verbose) {
    return(full_output)
  } else {
    return(full_output$summary)
  }
}