# Single reference group interaction models ------------------------------------------------------
interaction_cox <- function(dat,start,stop,outcome,expoVar,strataVar,age,covariates=NULL){
# load packages if neceessary
packages <- (.packages())
if (!"survival" %in% packages) require(survival,quietly=T)
if (!"dplyr" %in% packages) require(dplyr,quietly=T)
# data file
df <- dat
# convert start and stop times to numeric
df[,c(start,stop)] <- lapply(df[,c(start,stop)],as.numeric)
# start time mus != stop time - adjusting if that is true (similar to adding 1 to fail time)
df[[stop]] <- df[[stop]] + 1/365.25
# Data checks - stop the program with error if some things are not correct
class.strataVar <- class(df[[strataVar]]) # must be factor
class.expoVar <- class(df[[expoVar]]) # must NOT be character, can be factor/numeric/int
if (class.strataVar != "factor") stop("strataVar is not a factor variable")
if (class.expoVar == "character") stop("expoVar must be numeric or factor")
rm(class.strataVar,class.expoVar)
# Coding this interaction is different for categorical vs continuous expoVar
if (class(df[[expoVar]]) !="numeric"){
df$INTERACTION <- interaction(df[[expoVar]],df[[strataVar]])
# Formula for the categorical model
y <- formula(paste(
paste0("Surv(",start,",",stop,",",outcome,")~",
paste(c("INTERACTION",covariates,paste0("strata(",age,")")),collapse="+"))))
} else {
# Formula for the continuous model
y <- formula(paste(
paste0("Surv(",start,",",stop,",",outcome,")~",
paste0(expoVar,"*",strataVar," + "),
paste(c(covariates,paste0("strata(",age,")")),collapse="+"))))
}
# Formula for the base model for P-interaction - same for categorical or continuous expoVar - just additive
z <- formula(paste(
paste0("Surv(",start,",",stop,",",outcome,")~",
strataVar, "+",expoVar,"+",
paste(c(covariates,paste0("strata(",age,")")),collapse="+"))))
# Fit the models
fit <- survival::coxph(y,data=df)
base <- survival::coxph(z,data=df)
p.int <- anova(fit,base)$"P(>|Chi|)"[2] # p-interaction
rm(y,z)
# Data management section - pull together and format my results
# First: format the estimates and p-values into something nice
# Then dump the remaining trash to make it more manageable
results <- data.frame(summary(fit)$conf.int,P=data.frame(summary(fit)$coef)[["Pr...z.."]])
results$Expo <- row.names(results)
results <- results[,c("Expo","exp.coef.","lower..95","upper..95","P")]
names(results) <- c("Expo","RR","LL","UL","P")
results[,c("RR","LL","UL")] <- lapply(results[,c("RR","LL","UL")],function(x){
format(round(x,2),nsmall=2)
})
results$Estimate <- paste0(results$RR," (",
results$LL,", ",
results$UL,")")
results <- results[,c("Expo","Estimate","P")]
# Subset the results, get rid of the covariates
if (class(df[[expoVar]])=="factor") {
results <- results[grep("INTERACTION",results$Expo),]
} else {
results <- results[grep(expoVar,results$Expo),]
}
strat.levels <- levels(df[[strataVar]])
expo.levels <- levels(df[[expoVar]]) # if continuous, will be NULL, that's ok
# Case numbers and formatting ------------------------------------------------------------
# Continuous vs categorical expoVar have to go through a different set of formatting
# These functions will do the work and produce an organized table, complete with case numbers
continuous <- function(){
# I need to cbind() by the strataVar levels
# Adding the levels into the dataset so I can sort them out
results$strataVar <- strat.levels
# Summing cases within each strata
# If there are missing expoVar observations, they will not be included in case numbers
cases <- data.frame(strataVar=strat.levels,
cases=tapply(df[[outcome]][!is.na(df[[expoVar]])],
df[[strataVar]][!is.na(df[[expoVar]])],
sum),
stringsAsFactors=F)
# Join with the model estimates
results <- dplyr::left_join(cases,results,"strataVar")
# Loop to cbind() all the results
final <- data.frame(results[results$strataVar==strat.levels[1],
c("cases","Estimate","P")],
stringsAsFactors=F)
for (i in 2:length(strat.levels)){
final <- cbind(final,results[results$strataVar==strat.levels[i],c("cases","Estimate","P")])
}
# Structuring the dataset so that it can be eventually rbind() with categorical results
final <- data.frame(Category="Continuous",
final,
stringsAsFactors=F)
return(final)
}
categorical <- function(){
results$Expo <- sub("INTERACTION","",results$Expo)
cases <- data.frame(table(df$INTERACTION[df[[outcome]]==1]),stringsAsFactors=F) # case numbers
names(cases) <- c("Expo","Cases")
cases$Expo <- as.character(cases$Expo)
results <- dplyr::full_join(cases,results,"Expo") # merge with model estimates
rm(cases)
results$Estimate[is.na(results$Estimate)] <- "1.00" # add referent group
# separate out the interaction levels into the two variables
# This will allow me to format the table more easily
grid <- expand.grid(expo.levels,strat.levels)
grid$Expo <- paste(grid$Var1,grid$Var2,sep=".")
results <- dplyr::full_join(grid,results,"Expo")
# Now I want to cbind() the estimates for each level of strataVar
final <- results[results$Var2 == strat.levels[1],c("Var1","Cases","Estimate","P")]
for (i in 2:length(strat.levels)){
final <- cbind(final,
results[results$Var2== strat.levels[i],c("Cases","Estimate","P")]
)
}
return(final)
}
if (class(df[[expoVar]]) == "factor") final <- categorical()
if (class(df[[expoVar]]) != "factor") final <- continuous()
# Add a column for the expoVar variable name, so it can be rbind() with out model output
final <- data.frame(Variable=c(expoVar,rep(NA,nrow(final))),
rbind(NA,final),stringsAsFactors=F)
# Name my output variables so that they are descriptive
# I assume the analyst will change them at some point, I just want results clearly identified
col.names <- expand.grid(c("Cases","Estimate","P"),strat.levels)
col.names <- paste(col.names$Var1,col.names$Var2,sep="_")
names(final) <- c("Variable","Category",col.names)
# Add the p-value for interaction
final$P_Interaction <- c(rep(NA,nrow(final)-1),p.int)
# Final list of results
output <- list(final=final,
int.model=fit,
base.model=base)
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.