knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
N = 5000 #this should be divisible by however many groups you use! number.groups <- 2 number.timepoints <- 1 set.seed(6152021) dat <- data.frame( 'USUBJID' = rep(paste0('Subject_', formatC(1:N, width = 4, flag = '0')), length.out= N*number.timepoints), 'Group' = rep(paste0('Group_', 1:number.groups), length.out = N*number.timepoints), stringsAsFactors=F) # Create Beta parameters for these design matrix: X <- model.matrix( ~ Group , data = dat) # Create Beta Beta <- matrix(0, nrow = ncol(X), dimnames=list(colnames(X), 'param')) Beta[] <- c(0.0, 2) # Matrix multiply: XB <- X %*% Beta # Thresholds: thr <- c(0, 1, 2, 3) eta <- matrix(thr, nrow = nrow(XB), ncol = length(thr), byrow = T) - matrix(XB, nrow = nrow(XB), ncol = length(thr), byrow = F) p <- exp(eta)/(1 + exp(eta)) dat$p <- p dat$Y <- apply(runif(n = N) > p, 1, sum) # Quick barplot, base R plot functions barplot(100*table(dat$Y)/sum(table(dat$Y)), ylim = c(0, 100), ylab = 'Percentage', col = 'grey', main = 'Ordinal') #---- # Key here is that this data is ORDERED # Fit Models: library(MASS) mod <- MASS:::polr(as.factor(Y) ~ Group , data= dat, method = 'logistic', Hess = T) summary(mod) mod$coefficients # can't estimate an intercept here Beta mod$zeta thr library(ordinal) mod.clm <- clm(as.factor(Y) ~ Group, data = dat) summary(mod.clm) mod.clm$beta # Compute odds ratios by hand # Examine the proportional odds assumption # Notice that it's not exact tab <- addmargins(xtabs(~ Y + Group, data = dat), 1) # # >0 G1 <- (tab['Sum', 'Group_1'] - tab['0','Group_1'])/tab['0', 'Group_1'] G2 <- (tab['Sum', 'Group_2'] - tab['0','Group_2'])/tab['0', 'Group_2'] G2/G1 exp(mod.clm$beta) # # > 1 (includes 0 and 1 categories) G1 <- (tab['Sum', 'Group_1'] - sum(tab[c('0', '1'), 'Group_1']))/ sum(tab[c('0', '1'), 'Group_1']) G2 <- (tab['Sum', 'Group_2'] - sum(tab[c('0', '1'), 'Group_2']))/ sum(tab[c('0', '1'), 'Group_2']) G2/G1 exp(mod.clm$beta) # # > 2 (includes 0, 1, 2) G1 <- (tab['Sum', 'Group_1'] - sum(tab[c('0', '1', '2'), 'Group_1']))/ sum(tab[c('0', '1', '2'), 'Group_1']) G2 <- (tab['Sum', 'Group_2'] - sum(tab[c('0', '1', '2'), 'Group_2']))/ sum(tab[c('0', '1', '2'), 'Group_2']) G2/G1 exp(mod.clm$beta) # # > 3 (includes 0, 1, 2, 3) G1 <- (tab['Sum', 'Group_1'] - sum(tab[c('0', '1', '2', '3'), 'Group_1']))/ sum(tab[c('0', '1', '2', '3'), 'Group_1']) G2 <- (tab['Sum', 'Group_2'] - sum(tab[c('0', '1', '2', '3'), 'Group_2']))/ sum(tab[c('0', '1', '2', '3'), 'Group_2']) G2/G1 exp(mod.clm$beta) #
# Custom written estimation routine: ord_loglike <- function(vP, X, Y, k){ N <- nrow(X) Y <- Y + 1 # Must start at 1! XX <- X[ , -1, drop = F] # drop intercept XB <- XX %*% vP[1] # Not sure why polr parameterizes it this way XB <- XB %*% matrix(1, nrow = ncol(XB), ncol = k) eta <- matrix(vP[-1], nrow = nrow(XB), ncol = k, byrow = T) - XB p <- exp(eta)/(1 + exp(eta)) p <- cbind(0, p, 1) like <- p[cbind(1:N, Y + 1)] - p[cbind(1:N, Y)] loglike <- log(like) loglike <- -1*sum(loglike, na.rm = F) return(loglike) } # optimize # Set starting values: Y <- dat$Y + 1 intercepts.hat = matrix(NA, nrow = 1, ncol = length(unique(Y)) - 1) for (score in 2:length(unique(Y))){ intercepts.hat[ , score -1] = log(mean(Y >= score, na.rm =T)) } # vP is the vector of parameters passed to the optim() function vP <- c(1, -1*intercepts.hat) # optim() function optimizes the objective function out <- optim(par = vP, hessian = T, fn = ord_loglike, method = 'BFGS', X = X, Y = dat$Y, k = length(intercepts.hat)) # Compare Generating Parameter to R package estimate and the custom code estimate cbind('polr() estimate' = c(mod$coefficients, mod$zeta), 'clm() estimate' = c(mod.clm$beta, mod.clm$alpha), 'custom code' = out$par) # Standard errors: SE <- sqrt(diag(solve(out$hessian))) zval <- out$par/SE 2*(1-pnorm(zval[1])) summary(mod.clm)
The ordinal R package uses Z-tests, compare the Type I error control of that to the t-distribution.
rm(list = ls()) gc() library(ordinal) N = 100 #this should be divisible by however many groups you use! number.groups <- 2 number.timepoints <- 1 number.repl <- 1000 out <- vector() for (repl in 1:number.repl){ set.seed(6242021 + repl) dat <- data.frame( 'USUBJID' = rep(paste0('Subject_', formatC(1:N, width = 4, flag = '0')), length.out= N*number.timepoints), 'Group' = rep(paste0('Group_', 1:number.groups), length.out = N*number.timepoints), stringsAsFactors=F) X <- model.matrix( ~ Group , data = dat) Beta <- matrix(0, nrow = ncol(X), dimnames=list(colnames(X), 'param')) Beta[] <- c(0.0, 0) XB <- X %*% Beta thr <- c(0, 1, 2, 3) eta <- matrix(thr, nrow = nrow(XB), ncol = length(thr), byrow = T) - matrix(XB, nrow = nrow(XB), ncol = length(thr), byrow = F) p <- exp(eta)/(1 + exp(eta)) dat$p <- p dat$Y <- apply(runif(n = N) > p, 1, sum) library(ordinal) mod.clm <- clm(as.factor(Y) ~ Group, data = dat) mod.out <- summary(mod.clm) pval.z <- mod.out$coef['GroupGroup_2', 'Pr(>|z|)'] tval <- mod.out$coef['GroupGroup_2', 'Estimate']/ mod.out$coef['GroupGroup_2', 'Std. Error'] #tval #yes, same as the zvalue reported out #2*(1-pnorm(tval)) # This is model reported out, use t-distribution: pval.t <- 2*(1-pt(q = tval, df = N - length(coef(mod.clm)))) out <- rbind(out, c(pval.z, pval.t)) cat('Replication: ', repl, '\n') } colMeans(out < 0.05)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.