R/bpc_exports.R

Defines functions inv_logit logit launch_shinystan get_waic get_loo get_rank_of_players get_hpdi_parameters get_sample_posterior get_stanfit_summary get_stanfit

Documented in get_hpdi_parameters get_loo get_rank_of_players get_sample_posterior get_stanfit get_stanfit_summary get_waic inv_logit launch_shinystan logit

#' Retrieve the stanfit object generated by rstan.
#'
#' This object can be used with any other function or package that uses stanfit objects from rstan
#' @param bpc_object a bpc object
#' @return a stanfit object
#' @export
#'
#' @examples
#' \donttest{
#' m<-bpc(data = tennis_agresti,
#' player0 = 'player0',
#' player1 = 'player1',
#' result_column = 'y',
#' model_type = 'bt',
#' solve_ties = 'none')
#' stanfit<- get_stanfit(m)
#' print(class(stanfit))
#' }
get_stanfit <- function(bpc_object) {
  if (class(bpc_object) != 'bpc')
    stop('Error! The object is not of bpc class')
  return(bpc_object$stanfit)
}


#' Get stanfit summary table of all parameters excluding log_lik.
#'
#' Important to investigate the neff and the Rhat from the MCMC
#' This excludes the log_lik paramter
#' @param bpc_object a bpc object
#' @return a data frame with the summary including quantiles, Rhat and neff
#' @export
#' @examples
#' \donttest{
#' m<-bpc(data = tennis_agresti,
#' player0 = 'player0',
#' player1 = 'player1',
#' result_column = 'y',
#' model_type = 'bt',
#' solve_ties = 'none')
#' s <- get_stanfit_summary(m)
#' print(s)
#' }
get_stanfit_summary <- function(bpc_object) {
  if (class(bpc_object) != 'bpc')
    stop('Error! The object is not of bpc class')
  s<-rstan::summary(bpc_object$stanfit) #returns a list
  out<-as.data.frame(s$summary) # returns a matrix which we convert to dataframe
  out<- out %>%
    tibble::rownames_to_column(var="Parameter") %>%
    dplyr::filter(!startsWith(.data$Parameter, "log_lik")) %>%
    dplyr::filter(!startsWith(.data$Parameter, "lp__"))

  out<-as.data.frame(out)

  return(out)
}


#' Get the posterior samples for a parameter of the model.
#'
#' Return a data frame with the posterior samples for the parameters of the model
#' @param bpc_object a bpc object
#' @param n how many times are we sampling? Default 1000
#' @param par name of the parameters to predict
#' @return Return a data frame with the posterior samples for the parameters. One column for each parameter one row for each sample
#' @export
#'
#' @examples
#' \donttest{
#' m<-bpc(data = tennis_agresti,
#' player0 = 'player0',
#' player1 = 'player1',
#' result_column = 'y',
#' model_type = 'bt',
#' solve_ties = 'none')
#' s <- get_sample_posterior(m, par='lambda', n=100)
#' print(head(s))
#' }
get_sample_posterior <-
  function(bpc_object, par = 'lambda', n = 1000) {
    #TODO: verify the predictors condition
    if (class(bpc_object) != 'bpc')
      stop('Error! The object is not of bpc class')

    n <- floor(n)
    cluster_lookup_table <- bpc_object$cluster_lookup_table
    lookup_table <- bpc_object$lookup_table

    stanfit <- get_stanfit(bpc_object)
    posterior <-
      as.data.frame(sample_stanfit(stanfit, par = par, n = n))

    #Creating parameter name for the columns
    # The challenge here is to present a data frame with the appropriate variable names in the order given by stan.
    # We do this with the function below from helpers indexes
    colnames(posterior) <-
      create_array_of_par_names(par,
                                lookup_table = lookup_table,
                                cluster_lookup_table = cluster_lookup_table)
    return(as.data.frame(posterior))
  }

#' Return the mean and the HPDI of the parameters of the model
#'
#' Return a data frame with the mean and with high and low 95% hpd interval for all parameters of the model
#' @param bpc_object a bpc object
#' @return a data frame containing a column with the parameters, a column with mean and two columns with higher and lower hpdi
#' @export
#' @importFrom rlang .data
#' @examples
#' \donttest{
#' m<-bpc(data = tennis_agresti,
#' player0 = 'player0',
#' player1 = 'player1',
#' result_column = 'y',
#' model_type = 'bt',
#' solve_ties = 'none')
#' hpdi<-get_hpdi_parameters(m)
#' print(hpdi)}
get_hpdi_parameters <- function(bpc_object) {
  #TODO:  REMOVE the parameters that are either not relevant to the model or the ones that are transformed

  if (class(bpc_object) != 'bpc')
    stop('Error! The object is not of bpc class')
  hpdi <- bpc_object$hpdi
  #the parameters are in order as in the hpdi
  neff_rhat <- get_stanfit_summary(bpc_object) %>%
    dplyr::select(.data$n_eff, .data$Rhat)
  #excluding some parameters that are not used
  hpdi <- hpdi %>%
    dplyr::filter(!stringr::str_detect(.data$Parameter, "log_lik")) %>%
    dplyr::filter(!stringr::str_detect(.data$Parameter, "lp__"))

  hpdi <- cbind(hpdi,neff_rhat)

  pars <- get_model_parameters(bpc_object)
  for (i in 1:length(pars)) {
    parameter <- pars[i]
    if (parameter == 'U' & stringr::str_detect(bpc_object$model_type,'-U')) {
      hpdi <-
        replace_parameter_index_with_names(
          hpdi,
          column = 'Parameter',
          par = parameter,
          lookup_table = bpc_object$lookup_table,
          cluster_lookup_table = bpc_object$cluster_lookup_table
        )
    }
    else if (parameter == 'lambda') {
      hpdi <-
        replace_parameter_index_with_names(
          hpdi,
          column = 'Parameter',
          par = parameter,
          lookup_table = bpc_object$lookup_table
        )
    }
    else if (parameter == 'B' & stringr::str_detect(bpc_object$model_type,'-generalized')) {
      hpdi <-
        replace_parameter_index_with_names(
          hpdi,
          column = 'Parameter',
          par = parameter,
          lookup_table = bpc_object$lookup_table,
          predictors_lookup_table = bpc_object$predictors_lookup_table
        )
    }
  }

  # Now that we have replaced the parameters name let's select only the ones that the model wants
  hpdi <- hpdi %>%
    dplyr::filter(!stringr::str_detect(.data$Parameter, "_param"))

  if(!stringr::str_detect(bpc_object$model_type, "-U"))
    hpdi <- dplyr::filter(hpdi, !stringr::str_detect(.data$Parameter, "U"))
  if(!stringr::str_detect(bpc_object$model_type, "-ordereffect"))
    hpdi <- dplyr::filter(hpdi, !stringr::str_detect(.data$Parameter, "gm"))
  if(!startsWith(bpc_object$model_type, "davidson"))
    hpdi <- dplyr::filter(hpdi, !stringr::str_detect(.data$Parameter, "nu"))
  if(!stringr::str_detect(bpc_object$model_type, "-generalized"))
    hpdi <- dplyr::filter(hpdi, !startsWith(.data$Parameter, "B"))


  return(hpdi)
}



#' Generate a ranking of the ability based on sampling the posterior distribution of the ranks.
#'
#' To print this object you should remove the last column PosteriorRank since it contain the whole posterior distribution for each case
#' @param bpc_object a bpc object
#' @param n Number of times we will sample the posterior
#' @return a data frame. This data frame contains the median of the rank, the mean, the standard deviation and column with a list containing all the posterior values for the rank
#' @export
#' @importFrom rlang .data
#' @importFrom stats var median
#' @examples
#' \donttest{
#' m<-bpc(data = tennis_agresti,
#' player0 = 'player0',
#' player1 = 'player1',
#' result_column = 'y',
#' model_type = 'bt',
#' solve_ties = 'none')
#' rank_m<-get_rank_of_players(m,n=100)
#' rank_table <- dplyr::select(rank_m,-MeanRank, -StdRank,-PosteriorRank)
#' print(rank_table)
#' }
get_rank_of_players <- function(bpc_object, n = 1000) {
  if (class(bpc_object) != 'bpc')
    stop('Error! The object is not of bpc class')
  s <- get_sample_posterior(bpc_object, par = 'lambda', n = n)
  s <- dplyr::mutate(s, rown = dplyr::row_number())
  wide_s <-
    tidyr::pivot_longer(
      s,
      cols = tidyselect::starts_with('lambda'),
      names_to = "Parameter",
      values_to = "value"
    )
  rank_df <- wide_s %>%
    dplyr::group_by(.data$rown) %>%
    dplyr::mutate(Rank = rank(-.data$value, ties.method = 'random')) %>%
    dplyr::ungroup() %>%
    dplyr::select(-.data$value) %>%
    dplyr::group_by(.data$Parameter) %>%
    dplyr::summarise(
      MedianRank = median(.data$Rank),
      MeanRank = mean(.data$Rank),
      StdRank = sqrt(var(.data$Rank)),
      PosteriorRank = list(rank = .data$Rank)
    ) %>%
    dplyr::arrange(.data$MedianRank)
  return(rank_df)
}


#' Tiny wrapper for the PSIS-LOO-CV method from the loo package.
#'
#' This is used to evaluate the fit of the model using entropy criteria
#' @references
#' Vehtari A, Gelman A, Gabry J (2017). Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. Statistics and Computing_, 27, 1413-1432
#' @param bpc_object a bpc object
#' @return a loo object
#' @export
#'
#' @examples
#' \donttest{
#' m<-bpc(data = tennis_agresti,
#' player0 = 'player0',
#' player1 = 'player1',
#' result_column = 'y',
#' model_type = 'bt',
#' solve_ties = 'none')
#'  l<-get_loo(m)
#' print(l)
#' }
get_loo <- function(bpc_object) {
  if (class(bpc_object) != 'bpc')
    stop('Error! The object is not of bpc class')
  l <- loo::loo(get_stanfit(bpc_object), pars = "log_lik")
  return(l)
}

#' Tiny wrapper for the WAIC method from the loo package.
#'
#' This is used to evaluate the fit of the model using the Watanabe-Akaike Information criteria
#' @references
#' Gelman, Andrew, Jessica Hwang, and Aki Vehtari. Understanding predictive information criteria for Bayesian models. Statistics and computing 24.6 (2014): 997-1016.
#' @param bpc_object a bpc object
#' @return a loo object
#' @export
#'
#' @examples
#' \donttest{
#' m<-bpc(data = tennis_agresti,
#' player0 = 'player0',
#' player1 = 'player1',
#' result_column = 'y',
#' model_type = 'bt',
#' solve_ties = 'none')
#' waic<-get_waic(m)
#' print(waic)
#' }
get_waic <- function(bpc_object) {
  if (class(bpc_object) != 'bpc')
    stop('Error! The object is not of bpc class')
  loglik <- loo::extract_log_lik(get_stanfit(bpc_object))
  waic <- loo::waic(loglik)
  return(waic)
}

#' Tiny wrapper to launch a shinystan app to investigate the MCMC.
#'
#' It launches a shinystan app automatically in the web browser
#' @param bpc_object a bpc object
#' @export
#' @examples
#' \donttest{
#' m<-bpc(data = tennis_agresti,
#' player0 = 'player0',
#' player1 = 'player1',
#' result_column = 'y',
#' model_type = 'bt',
#' solve_ties = 'none')
#' launch_shinystan(m)
#' }
launch_shinystan <- function(bpc_object) {
  if (class(bpc_object) != 'bpc')
    stop('Error! The object is not of bpc class')
  shinystan::launch_shinystan(get_stanfit(bpc_object))
}


#' Logit function
#' @references
#' https://en.wikipedia.org/wiki/Logit
#' @param x p is a probability 0 to 1
#' @return a value between -inf and inf
#' @export
#'
#' @examples
#' logit(0.5)
#' logit(0.2)
logit <- function(x) {
  if (any(x < 0 | x > 1))
    stop('Error! x not between 0 and 1')
  y <- log(x / (1 - x))
  return(y)
}

#' Inverse logit function
#' @references
#' https://en.wikipedia.org/wiki/Logit
#' @param x is a real -inf to inf
#' @return a value between 0 and 1
#' @export
#'
#' @examples
#' inv_logit(5)
#' inv_logit(-5)
#' inv_logit(0)
inv_logit <- function(x) {
  y <- exp(x) / (1 + exp(x))
  return(y)
}

Try the bpcs package in your browser

Any scripts or data that you put into this service are public.

bpcs documentation built on Dec. 15, 2020, 5:23 p.m.