knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
TODO: Add simulation study code
Review and run the code below to better understand the data distribution.
library(SPR) library(MASS) N = 5000 #this should be divisible by however many groups you use! number.groups <- 2 number.timepoints <- 1 set.seed(2092021) dat <- data.frame( 'USUBJID' = rep(paste0('Subject_', formatC(1:N, width = 4, flag = '0')), length.out= N*number.timepoints), 'Group' = rep(paste0('Group_', 1:number.groups), length.out = N*number.timepoints), stringsAsFactors=F) # Create Beta parameters for these design matrix: X <- model.matrix( ~ Group , data = dat) # Create Beta Beta <- matrix(0, nrow = ncol(X), dimnames=list(colnames(X), 'param')) Beta[] <- c(-0.5, 2) # Matrix multiply: XB <- X %*% Beta p <- exp(XB)/(1 + exp(XB)) # Dichotomize dat$Y_binom <- 1*(p > runif(n = N)) # Plot aggregate(Y_binom ~ Group, FUN = function(x) round(mean(x, na.rm = T),2), dat = dat, na.action = na.pass) barplot(100*table(dat$Y_binom )/sum(table(dat$Y_binom )), ylim = c(0, 100), ylab = 'Percentage', col = 'grey', main = 'Binomial') library(ggplot2) ggplot(data = dat, aes(x= as.factor(Y_binom), fill = Group)) + geom_bar(aes(y = (..count..)/sum(..count..)), color="#e9ecef", alpha=0.6, position="dodge", stat="count") + scale_y_continuous(labels=scales::percent_format(accuracy = 1)) + scale_fill_manual(name = 'Groups', values=c("blue2", "red2")) + theme_minimal() + theme(legend.position = 'bottom') + labs(x = 'Score', y = 'Percentage', title = 'Scores with a Binomial Distribution', caption = 'Note: Simulated data') mod <- glm(Y_binom ~ Group, data = dat, family = 'binomial') summary(mod) exp(1.99821) xtabs(~ Y_binom + Group, data = dat) G2 <- 2062/438 G1 <- 974/1526 G2/G1 # Probability that Group 1 is sick: exp(-0.448)/(1 + exp(-0.448)) # Probability that Group 2 is sick: exp(-0.448 + 1.99821)/(1 + exp(-0.448 + 1.99821)) # Compare to observed proportions: prop.table(xtabs(~ Y_binom + Group, data = dat), 2)
mod <- glm(Y_binom ~ Group, data = dat, family = 'binomial') summary(mod) exp(1.99821) xtabs(~ Y_binom + Group, data = dat) G2 <- 2062/438 G1 <- 974/1526 G2/G1 # Probability that Group 1 is sick: exp(-0.448)/(1 + exp(-0.448)) # Probability that Group 2 is sick: exp(-0.448 + 1.99821)/(1 + exp(-0.448 + 1.99821)) # Compare to observed proportions: prop.table(xtabs(~ Y_binom + Group, data = dat), 2)
This is helpful because it shows you the likelihood
loglike <- function(vP, X, Y){ Y <- cbind(1-Y, Y) XB <- X %*% vP p <- 1/(1 + exp(-1*(XB))) P <- cbind(1-p, p) # Loglikelihood: loglike <- Y*log(P) loglike <- -1*loglike # optimization will minimize function loglike <- sum(loglike) return(loglike) }
vP <- rep(0, length(Beta)) out <- optim(par = vP, fn = loglike, hessian = T, method = 'BFGS', X = X, Y = dat$Y_binom) # Compare Generating Parameter to my estimate and the glm() estimate cbind(Beta, round(out$par,4), round(coef(mod),4)) # Compare the standard errors: sqrt(diag(solve(out$hessian)))
N = 5000 #this should be divisible by however many groups you use! number.groups <- 2 number.timepoints <- 1 set.seed(2092021) dat <- data.frame( 'USUBJID' = rep(paste0('Subject_', formatC(1:N, width = 4, flag = '0')), length.out= N*number.timepoints), 'Group' = rep(paste0('Group_', 1:number.groups), length.out = N*number.timepoints), 'Covariate' = rnorm(n = N, mean = 0, sd = 1), stringsAsFactors=F) # Create Beta parameters for these design matrix: X <- model.matrix( ~ Group + Covariate , data = dat) # Create Beta Beta <- matrix(0, nrow = ncol(X), dimnames=list(colnames(X), 'param')) Beta[] <- c(-0.5, 2, 1) # Matrix multiply: XB <- X %*% Beta p <- exp(XB)/(1 + exp(XB)) # Dichotomize dat$Y_binom <- 1*(p > runif(n = N)) # Plot library(ggplot2) ggplot(data = dat, aes(x= as.factor(Y_binom), fill = Group)) + geom_bar(aes(y = (..count..)/sum(..count..)), color="#e9ecef", alpha=0.6, position="dodge", stat="count") + scale_y_continuous(labels=scales::percent_format(accuracy = 1)) + scale_fill_manual(name = 'Latent Classes', values=c("blue2", "red2")) + theme_minimal() + theme(legend.position = 'bottom') + labs(x = 'Score', y = 'Percentage', title = 'Scores with a Binomial Distribution', caption = 'Note: Simulated data') # Estimate mod <- glm(Y_binom ~ Group, data = dat, family = 'binomial') summary(mod) exp(1.70170) xtabs(~ Y_binom + Group, data = dat) G2 <- 1963/537 G1 <- 1000/1500 G2/G1 # Estimate the Group parameter adjusting for covariate mod2 <- glm(Y_binom ~ Group + Covariate, data = dat, family = 'binomial') summary(mod2) exp(2.01047) # Cannot recreate this conditional/adjusted odds ratio for Groups using # hand computations.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.