knitr::opts_chunk$set(echo = FALSE,
                      comment = NA)
devtools::load_all()
library(tidyverse)

options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)

dat <- Sample_Data_Binomial
dat$x2 <- sample(nrow(Sample_Data_Binomial), replace = TRUE)

p0 <- function(...) {
  paste(..., collapse = "", sep = "")
}

concat <- function(...) {
  paste(..., collapse = "", sep = "")
}

indent <- "    "

Input

Data

glimpse(dat)

Formula

(f <- y|k ~ x1 + age + gender)

Link Function

# Logit Link
logit <- function(x) {
  log( x / (1 - x) )
}

# Logit inverse (logistic)
logistic <- function(x) {
  1 / ( 1 + exp(-x) )
}

# Probit Link (Inverse of the Normal CDF)
probit <- function(x) {
  qnorm(x)
}

# Probit inverse (Normal CDF)
probit_inv <- function(x) {
  pnorm(x)
}

Output

Formula

f_str  <- as.character(f)
f_terms <- terms(f)
f_vars <- attr(f_terms, "term.labels")
f_int  <- as.logical(attr(f_terms, "intercept"))

LHS <- f_str[2]
RHS <- f_str[3]

if (grepl(pattern = "\\|", LHS)) {
  data_mode <- "binomial"
  LHS <- trimws(strsplit(LHS, split = "\\|")[[1]])
} else {
  data_mode <- "bernoulli"
}

f_ls <- list(vars = f_vars,
             LHS  = LHS,
             RHS  = RHS,
             has_intercept = f_int,
             data_mode = data_mode)

str(f_ls)

Metadata

classes <- purrr::map(dat, ~class(.x)[1])

response_vars <- LHS
numeric_vars <- subset(f_vars, classes[f_vars] %in% c("numeric", "integer"))
factor_vars <- subset(f_vars, classes[f_vars] %in% c("factor", "ordered"))
character_vars <- subset(f_vars, classes[f_vars] == "character")

response_class <- classes[response_vars]
numeric_class <- classes[numeric_vars]
factor_class <- classes[factor_vars]


nvs <- numeric_vars
fvs <- factor_vars
has_intercept <- f_int
ret_list <- list()
if (has_intercept) {
  # Every model with an intercept gets a shared intercept
  tmp <- "a"
  if (length(fvs) > 0) {
    # Every factor gets an intercept
    tmp <- c(tmp, paste0("a_", fvs))
  }
  ret_list[[1]] <- tmp
}

if(length(nvs) > 0) {
  for (nv in nvs) {
    # Every numeric predictor gets a slope coefficient
    tmp <- paste0("b_", nv)
    if (length(fvs) > 0) {
      # Every factor contributes to a change in slope
      tmp <- c(tmp, paste0("b_", nv, "_", fvs))
    }
    ret_list[[length(ret_list) + 1]] <- tmp
  } # for nv
} # if nv

metadata <- list(vars  = list(response  = response_vars,
                              numeric   = numeric_vars,
                              factor    = factor_vars,
                              character = character_vars),
                 class = list(response = response_class,
                              numeric  = numeric_class,
                              factor   = factor_class),
                 coefs = ret_list)

str(metadata)

Data

N <- nrow(dat)
dat <- as.list(dat)
dat[["N"]] <- N

if (length(fvs) > 0) {
  dat[paste0("levels_", fvs)] <- lapply(dat[fvs], levels)
  dat[fvs] <- lapply(dat[fvs], as.integer)
  # Get the number of levels of each factor
  for (fv in fvs) {
    dat[[paste0("N_", fv)]] <- length(unique(dat[[fv]]))
  }
}

str(dat)

Stan Code

Distribution Formula

link <- "logit"
opt <- paste(data_mode, link)

m_dist <- switch(
  opt,
  "bernoulli logit"  = paste0(LHS, " ~ bernoulli_logit(theta)"),
  "bernoulli probit" = paste0(LHS, " ~ bernoulli(theta)"),
  "binomial logit"   = paste0(LHS[1], " ~ binomial_logit(", LHS[2],", theta)"),
  "binomial probit"  = paste0(LHS[1], " ~ binomial(", LHS[2],", theta)"),
  stop("You must specify a valid model: (bernoulli/binomial), (logit/probit)")
)

m_dist

Linear Model

has_numeric <- length(nvs) > 0
has_factor <- length(fvs) > 0

if (!has_numeric && !has_factor && !has_intercept) {
  # y ~ 0
  m <- NULL

} else if (has_numeric && !has_factor && !has_intercept ) {
  # y ~ b1*x1 + b2*x2 + ...
  # creates: bx1, bx2, ...
  m <- paste0("b_", nvs)
  # creates: bx1*x1 + bx2*x2 + ...
  m <- paste0(m, " * ", nvs, "_std[i]", collapse = " + ")

} else if (!has_numeric && has_factor && !has_intercept) {
  # y ~ k1 + k2 + ...
  m <- paste0("a + ", paste0("a_", fvs, "[", fvs, "[i]]", collapse = " + "))

} else if (!has_numeric && !has_factor && has_intercept) {
  # y ~ 1
  m <- "a"

} else if (has_numeric && has_factor && !has_intercept) {
  # y ~ (bx1 + bx1_k1)*x1 + (bx2 + bx2_k1)*x2 + ...
  # Builds inner slope (bx1 + bx1_k1 + ...) then the outer product
  m <- paste0(
    sapply(seq_along(nvs), function(i) {
      # creates: bxi_k1 + bxi_k2 + ...
      inner <- paste0("b_", nvs[i], "_", fvs, "[", fvs, "[i]]", collapse = " + ")
      # creates: bxi + bxi_k1 + bxi_k2 + ...
      inner <- paste0("b_", nvs[i], " + ", inner)
      # creates: (bxi + bxi_k1 + bxi_k2 + ...)*xi
      paste0("(", inner, ") * ", nvs[i], "_std[i]")
    }),
    collapse = " + ")

} else if (has_numeric && !has_factor && has_intercept) {
  # y ~ a + b1*x1 + b2*x2 + ...
  # creates: bx1, bx2, ...
  m <- paste0("b_", nvs)
  # creates: bx1*x1 + bx2*x2 + ...
  m <- paste0(m, " * ", nvs, "_std[i]", collapse = " + ")
  m <- paste0("a", " + ", m)

} else if (!has_numeric && has_factor && has_intercept) {
  # y ~ a + k1 + k2 + ...
  m <- paste0("a + ", paste0("a_", fvs, "[", fvs, "[i]]", collapse = " + "))

} else {
  # y ~ (a + k) + (bx1 + bx1_k1)*x1 + (bx2 + bx2_k1)*x2 + ...
  m <- paste0(
    sapply(seq_along(nvs), function(i) {
      inner <- paste0("b_", nvs[i], "_", fvs, "[", fvs, "[i]]", collapse = " + ")
      inner <- paste0("b_", nvs[i], " + ", inner)
      paste0("(", inner, ") * ", nvs[i], "_std[i]")
    }),
    collapse = " + ")
  a <- paste0("a + ", paste0("a_", fvs, "[", fvs, "[i]]", collapse = " + "))
  m <- paste0(a, " + ", m)

}

m

Link Formula

f_link <- switch(
  link,
  probit = p0("theta[i] = Phi(", m, ")"),
  logit  = p0("theta[i] = ", m),
  stop("'Link' function must be either 'logit' or 'probit'.")
)

f_link

Prior Formula

adaptive_pooling <- TRUE
m_coefs <- metadata$coefs

if(adaptive_pooling) {
  ret_list <- list()
  for (i in m_coefs) {
    # Prior for the shared coefficient
    tmp <- paste0(i[1], " ~ normal(0, 5)")
    # Prior for the factor coefficients
    tmp <- c(tmp, map(i[-1], ~ paste0(.x, " ~ normal(0, sd_", .x, ")")))
    # Prior for the shared standard deviation for adaptive pooling
    tmp <- c(tmp, map(i[-1], ~ paste0("sd_", .x, " ~ cauchy(0, 2.5)")))

    ret_list[[length(ret_list) + 1]] <- tmp
  }
  # Each prior needs to be an element in a list
  m_prior <- as.list(unlist(ret_list))
} else {
  m_prior <- lapply(unlist(m_coefs), function(var) {
    paste0(var, " ~ normal(0, 5)")
  })
}
m_prior

Stan Blocks

data

body_data <- concat(
  indent, "// Number of observations, factor levels, etc.\n",
  indent, "int<lower=1> N;\n"
)

# Declare number of factor levels if any factors are in the model
if (length(fvs) > 0) {
  for (fv in fvs) {
    body_data <- concat(
      body_data,
      indent, "int<lower=1> N_", fv, ";\n"
    )
  }
}

# Response data (bernoulli or binomial)
body_data <- concat(body_data, "\n", indent, "// Response Data\n")
if (data_mode == "bernoulli") {
  body_data <- concat(body_data, indent, "int ", LHS, "[N];\n")
} else if (data_mode == "binomial") {
  body_data <- concat(body_data, indent, "int ", LHS[1], "[N];\n")
  body_data <- concat(body_data, indent, "int ", LHS[2], "[N];\n")
} else {
  stop("Data responses must be bernoulli or binomial")
}

# Numeric variables
if (length(nvs) > 0) {
  body_data <- concat(
    body_data, "\n", 
    indent, "// Numeric Data\n"
  )
  for (nv in nvs) {
    body_data <- concat(
      body_data,
      indent, "vector[N] ", nv, ";\n"
    )
  }
}

# Factor variables
if (length(fvs) > 0) {
  body_data <- concat(body_data, "\n", indent, "// Factor Data\n")
  for(fv in fvs) {
    body_data <- concat(body_data, indent, "int ", fv, "[N];\n")
  }
}


cat(concat(
  "data{\n",
    body_data,
  "}\n"
))

transformed data

if (length(nvs) > 0) {
  body_td_dec  <- NULL
  body_td_calc <- NULL
  for (nv in nvs) {
    body_td_dec <- concat(
      body_td_dec,
      indent, "real mu_", nv, ";\n",
      indent, "real sd_", nv, ";\n",
      indent, "vector[N] ", nv, "_std;\n\n"
    )
    body_td_calc <- concat(
      body_td_calc, "\n",
      indent, "mu_", nv, " = mean(", nv, ");\n",
      indent, "sd_", nv, " = sd(", nv, ");\n",
      indent, nv, "_std = (", nv, " - mu_", nv, ") / sd_", nv, ";\n"
    )
  }
}

cat(concat(
  "transformed data{\n",
    body_td_dec,
    body_td_calc,
  "}\n"
))

parameters

body_parameters <- ""
m_coef <- metadata$coefs

# Intercept terms
if (has_intercept) {
  body_parameters <- concat(body_parameters, indent, "// Intercept terms\n")
  body_parameters <- concat(body_parameters, indent, "real a;\n")
  # Add vectors for factors if any are in the model
  if (length(fvs) > 0) {
    for (fv in fvs) {
      body_parameters <- concat(
        body_parameters,
        indent, "vector[N_", fv, "] a_", fv, ";\n"
      )
    } # for fv
  } # if has factors
} # if has intercept

# Slope terms
if (length(nvs) > 0) {
  body_parameters <- concat(
    body_parameters, "\n",
    indent, "// Slope terms\n"
  )
  for (nv in nvs) {
    body_parameters <- concat(
      body_parameters,
      indent, "real b_", nv, ";\n"
    )
    # Factor slope terms
    if (length(fvs) > 0) {
      for (fv in fvs) {
        body_parameters <- concat(
          body_parameters,
          indent, "vector[N_", fv, "] b_", nv, "_", fv, ";\n"
        )
      } # for fv
    } # if fv
  } # for nv
} # if nv

# Adaptive pooling terms
if (adaptive_pooling && length(fvs) > 0) {
  body_parameters <- concat(
    body_parameters, "\n",
    indent, "// Adaptive Pooling terms\n"
  )
  for (coefs in m_coef) {
    for (coef in coefs[-1]) {
      body_parameters <- concat(
        body_parameters,
        indent, paste0("real<lower=machine_precision()> sd_", coef, ";\n")
      )
    } # for coef
  } # for coef group
} # if adaptive

cat(concat(
  "parameters{\n",
    body_parameters,
  "}\n"
))

model

body_model <- concat(
  indent, "vector[N] theta;\n\n",
  indent, "// Priors\n",
  concat(indent, m_prior, ";\n"), "\n",
  indent, "// General linear model\n",
  indent, "for (i in 1:N) {\n",
  indent, indent, f_link, ";\n",
  indent, "}\n",
  indent, m_dist, ";\n"
)

cat(concat(
  "model{\n",
    body_model,
  "}\n"
))

generated quantities

body_gq <- NULL

# Intercept terms
if (has_intercept) {
  body_gq <- concat(body_gq, indent, "// Intercept terms\n")
  body_gq <- concat(body_gq, indent, "real alpha;\n")
  # Add vectors for factors if any are in the model
  if (length(fvs) > 0) {
    for (fv in fvs) {
      body_gq <- concat(
        body_gq,
        indent, "vector[N_", fv, "] alpha_", fv, ";\n"
      )
    } # for fv
  } # if has factors
} # if has intercept

# Slope terms
if (length(nvs) > 0) {
  body_gq <- concat(
    body_gq, "\n",
    indent, "// Slope terms\n"
  )
  for (nv in nvs) {
    body_gq <- concat(
      body_gq,
      indent, "real beta_", nv, ";\n"
    )
    # Factor slope terms
    if (length(fvs) > 0) {
      for (fv in fvs) {
        body_gq <- concat(
          body_gq,
          indent, "vector[N_", fv, "] beta_", nv, "_", fv, ";\n"
        )
      } # for fv
    } # if fv
  } # for nv
} # if nv
body_gq2 <- concat(
  indent, "// Calculations\n"
)

if (!has_numeric && !has_factor && !has_intercept) {
  # y ~ 0

} else if (has_numeric && !has_factor && !has_intercept ) {
  # y ~ bx1*x1 + bx2*x2 + ...
  for (nv in nvs) {
    body_gq2 <- concat(
      body_gq2,
      indent, "beta_", nv, " = b_", nv, " / sd_", nv, ";\n"
    )
  }

} else if (!has_numeric && has_factor && !has_intercept) {
  # y ~ k1 + k2 + ...
  # nothing to do

} else if (!has_numeric && !has_factor && has_intercept) {
  # y ~ 1
  # nothing to do

} else if (has_numeric && has_factor && !has_intercept) {
  # y ~ (bx1 + bx1_k1)*x1 + (bx2 + bx2_k1)*x2 + ...
  for (nv in nvs) {
    body_gq2 <- concat(
      body_gq2,
      indent, "beta_", nv, " = b_", nv, " / sd_", nv, ";\n"
    )
    for (fv in fvs) {
      body_gq2 <- concat(
        body_gq2,
        indent, "beta_", nv, "_", fv, " = b_", nv, "_", fv, " / sd_", nv, ";\n"
      )
    }
  }

} else if (has_numeric && !has_factor && has_intercept) {
  # y ~ a + b1*x1 + b2*x2 + ...

  # Process alpha = a - b1*mu_x1/sd_x1 - b2*mu_x2/sd_x2 - ...
  tmp <- paste0("b_", nvs, " * mu_", nvs, " / sd_", nvs, collapse = " - ")
  body_gq2 <- concat(
    body_gq2,
    indent, "alpha = a - ", tmp, ";\n"
  )

  # Process beta_xi = b_xi / sd_xi
  for (nv in nvs) {
    body_gq2 <- concat(
      body_gq2,
      indent, "beta_", nv, " = b_", nv, " / sd_", nv, ";\n"
    )
  }

} else if (!has_numeric && has_factor && has_intercept) {
  # y ~ a + k1 + k2 + ...
  # nothing to do

} else {
  # y ~ (a + k) + (bx1 + bx1_k1)*x1 + (bx2 + bx2_k1)*x2 + ...

  # Process alpha = a - b1*mu_x1/sd_x1 - b2*mu_x2/sd_x2 - ...
  tmp <- paste0("b_", nvs, " * mu_", nvs, " / sd_", nvs, collapse = " - ")
  body_gq2 <- concat(
    body_gq2,
    indent, "alpha = a - ", tmp, ";\n"
  )

  # Process alpha_k = a_k - b1_k*mu_x1/sd_x1 - b2_k*mu_x2/sd_x2 - ...
  for (fv in fvs) {
    tmp <- paste0("b_", nvs, "_", fv, " * mu_", nvs, " / sd_", nvs, collapse = " - ")
    body_gq2 <- concat(
      body_gq2,
      indent, "alpha_", fv, " = a_", fv, " - ", tmp, ";\n"
    )
  }

  # Process beta_xi = b_xi / sd_xi
  for (nv in nvs) {
    body_gq2 <- concat(
      body_gq2,
      indent, "beta_", nv, " = b_", nv, " / sd_", nv, ";\n"
    )
  }

  for (nv in nvs) {
    for (fv in fvs) {
      body_gq2 <- concat(
        body_gq2,
        indent, "beta_", nv, "_", fv, " = b_", nv, "_", fv, " / sd_", nv, ";\n"
      )
    }
  }
}


cat(concat(
  "generated quantities{\n",
    body_gq, "\n",
    body_gq2,
  "}\n"
))

Final Code

stan_model <- concat(
  "data{\n",
    body_data,
  "}\n",
  "transformed data{\n",
    body_td_dec,
    body_td_calc,
  "}\n",
  "parameters{\n",
    body_parameters,
  "}\n",
  "model{\n",
    body_model,
  "}\n",
  "generated quantities{\n",
    body_gq, "\n",
    body_gq2,
  "}\n"
)

cat(stan_model)

Model Fit

ret <- c("alpha", "alpha_age", "alpha_gender", "beta_x1", "beta_x1_age", "beta_x1_gender")

fit <- stan(model_code = stan_model, data = dat, pars = ret)
shinystan::launch_shinystan(fit)


adknudson/BayesPsychometric documentation built on Nov. 22, 2019, 1:59 p.m.