To demonstrate how the medicalrisk package can be useful, this vignette shows the kinds of descriptive statistics and inferences that can be made from a simple administrative dataset.
This vignette assumes you have read the introductory vignette for medicalrisk.
First, use the medicalrisk package to create a single dataframe with information on each patient:
library(medicalrisk) library(plyr) data(vt_inp_sample) cm_df <- generate_comorbidity_df(vt_inp_sample, icd9mapfn=icd9cm_charlson_quan) cci_df <- generate_charlson_index_df(cm_df) rsi_df <- ddply(vt_inp_sample, .(id), function(x) { icd9cm_sessler_rsi(x$icd9cm) } ) num_icd9_df <- count(vt_inp_sample, c('id')) num_icd9_df <- rename(num_icd9_df, c("freq" = "num_icd9")) wide_df <- merge(merge(merge(merge( rsi_df, cci_df), cm_df), unique(vt_inp_sample[,c('id','scu_days','drg','mdc')])), num_icd9_df)
library(knitr) kable(head(wide_df[1:13], n=5), digits=3, table.attr='id="wide_df_table"') kable(head(wide_df[c(1,14:ncol(wide_df))], n=5), digits=3, table.attr='id="wide_df_table"')
Let's explore the data here with some graphs. First, a histogram:
library(reshape2) library(ggplot2) # generate a 100 pt x 17 comorbidity table (1700 rows) cm_melted <- melt(cm_df, id.vars=c('id'), variable.name='cm') # get rid of all the false entries cm_melted <- cm_melted[cm_melted$value,] ## count only flags that are true ggplot(cm_melted, aes(cm, fill=cm)) + geom_bar() + scale_fill_discrete()
The chrnlung comorbidity seems well represented. Let's create a histogram breaking down which ICD-9-CM codes are mapping to chrnlung in this dataset:
# make a histogram dataframe for all the icd-9 codes icd9cm_df <- count(vt_inp_sample, vars='icd9cm') # create a charlson comorbidity map for all icd-9 codes icd9cm_charlson_df <- icd9cm_charlson_quan(icd9cm_df$icd9cm) # isolate just the chrnlung icd_9_cm codes icd9cm_chrnlung <- row.names(icd9cm_charlson_df[icd9cm_charlson_df$chrnlung,]) # create a hist df icd9cm_chrnlung_hist <- icd9cm_df[icd9cm_df$icd9cm %in% icd9cm_chrnlung,] # plot it ggplot(icd9cm_chrnlung_hist, aes(icd9cm, freq)) + geom_bar(stat="identity")
Let's see how often ICD-9-CM codes used for chrnlung coincide within patients:
# create base dataset pairs <- unique( vt_inp_sample[vt_inp_sample$icd9cm %in% icd9cm_chrnlung, c('id','icd9cm')]) # create coincidence matrix t <- table( ddply(pairs, c('id'), function(x) { if (length(x$icd9cm) > 1) { data.frame(t(combn(as.character(x$icd9cm),2))) } })[c('X1','X2')])
kable(t, table.attr='id="chrnlung_coincidence_table"')
How often do comorbidities coincide?
# create coincidence matrix t <- table( ddply(cm_melted, c('id'), function(x) { if (length(x$cm) > 1) { data.frame(t(combn(as.character(x$cm),2))) } })[c('X1','X2')]) # sort it t <- t[order(rownames(t)),order(colnames(t))]
kable(t, table.attr='id="cm_coincidence_table"')
Plot the above table:
m <- melt(t) ggplot(m[m$value>0,], aes(X1,X2)) + stat_sum(aes(group=value))
This is a scatterplot of the Charlson Comorbidity Index versus each RSI mortality estimate. A linear regression line is superimposed:
library(grid) library(gridExtra) p.inhosp <- ggplot(wide_df, aes(rsi_inhosp, index)) + geom_point() + geom_smooth(method=lm) + scale_y_continuous(limits=c(-3,10)) p.30dpod <- ggplot(wide_df, aes(rsi_30dpod, index)) + geom_point() + geom_smooth(method=lm) + scale_y_continuous(limits=c(-3,10)) p.1yrpod <- ggplot(wide_df, aes(rsi_1yrpod, index)) + geom_point() + geom_smooth(method=lm) + scale_y_continuous(limits=c(-3,10)) grid.arrange(p.inhosp, p.30dpod, p.1yrpod, nrow=1)
As expected, the Risk Stratification Index is correlated with an increased Charlson Comorbidity Index.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.