In patrickmdnet/medicalrisk: Medical Risk and Comorbidity Tools for ICD-9-CM Data

Introduction

To demonstrate how the medicalrisk package can be useful, this vignette shows the kinds of descriptive statistics and inferences that can be made from a simple administrative dataset.

This vignette assumes you have read the introductory vignette for medicalrisk.

Calculating Mortality Risk

First, use the medicalrisk package to create a single dataframe with information on each patient:

library(medicalrisk)
library(plyr)
data(vt_inp_sample)
cm_df <- generate_comorbidity_df(vt_inp_sample, icd9mapfn=icd9cm_charlson_quan)
cci_df <- generate_charlson_index_df(cm_df)
rsi_df <- ddply(vt_inp_sample, .(id), function(x) { icd9cm_sessler_rsi(x$icd9cm) } )
num_icd9_df <- count(vt_inp_sample, c('id'))
num_icd9_df <- rename(num_icd9_df, c("freq" = "num_icd9"))
wide_df <- merge(merge(merge(merge(
  rsi_df, cci_df), 
    cm_df), 
      unique(vt_inp_sample[,c('id','scu_days','drg','mdc')])),
        num_icd9_df)

library(knitr)
kable(head(wide_df[1:13], n=5), digits=3, table.attr='id="wide_df_table"')
kable(head(wide_df[c(1,14:ncol(wide_df))], n=5), digits=3, table.attr='id="wide_df_table"')

Let's explore the data here with some graphs. First, a histogram:

library(reshape2)
library(ggplot2)
# generate a 100 pt x 17 comorbidity table (1700 rows)
cm_melted <- melt(cm_df, id.vars=c('id'), variable.name='cm')
# get rid of all the false entries
cm_melted <- cm_melted[cm_melted$value,]
## count only flags that are true
ggplot(cm_melted, aes(cm, fill=cm)) + 
  geom_bar() + 
  scale_fill_discrete()

The chrnlung comorbidity seems well represented. Let's create a histogram breaking down which ICD-9-CM codes are mapping to chrnlung in this dataset:

# make a histogram dataframe for all the icd-9 codes
icd9cm_df <- count(vt_inp_sample, vars='icd9cm')
# create a charlson comorbidity map for all icd-9 codes
icd9cm_charlson_df <- icd9cm_charlson_quan(icd9cm_df$icd9cm)
# isolate just the chrnlung icd_9_cm codes
icd9cm_chrnlung <- row.names(icd9cm_charlson_df[icd9cm_charlson_df$chrnlung,])
# create a hist df
icd9cm_chrnlung_hist <- icd9cm_df[icd9cm_df$icd9cm %in% icd9cm_chrnlung,]
# plot it
ggplot(icd9cm_chrnlung_hist, aes(icd9cm, freq)) + 
  geom_bar(stat="identity")

Let's see how often ICD-9-CM codes used for chrnlung coincide within patients:

# create base dataset
pairs <- unique(
  vt_inp_sample[vt_inp_sample$icd9cm %in% icd9cm_chrnlung, c('id','icd9cm')])

# create coincidence matrix
t <- table(
    ddply(pairs, c('id'), function(x) { if (length(x$icd9cm) > 1) {
      data.frame(t(combn(as.character(x$icd9cm),2))) } })[c('X1','X2')])

kable(t, table.attr='id="chrnlung_coincidence_table"')

How often do comorbidities coincide?

# create coincidence matrix
t <- table(
    ddply(cm_melted, c('id'), function(x) { if (length(x$cm) > 1) {
      data.frame(t(combn(as.character(x$cm),2))) } })[c('X1','X2')])
# sort it
t <- t[order(rownames(t)),order(colnames(t))]

kable(t, table.attr='id="cm_coincidence_table"')

Plot the above table:

m <- melt(t)
ggplot(m[m$value>0,], aes(X1,X2)) + stat_sum(aes(group=value))

This is a scatterplot of the Charlson Comorbidity Index versus each RSI mortality estimate. A linear regression line is superimposed:

library(grid)
library(gridExtra)
p.inhosp <- ggplot(wide_df, aes(rsi_inhosp, index)) + geom_point() + geom_smooth(method=lm) +
  scale_y_continuous(limits=c(-3,10))
p.30dpod <- ggplot(wide_df, aes(rsi_30dpod, index)) + geom_point() + geom_smooth(method=lm) +
  scale_y_continuous(limits=c(-3,10))
p.1yrpod <- ggplot(wide_df, aes(rsi_1yrpod, index)) + geom_point() + geom_smooth(method=lm) +
  scale_y_continuous(limits=c(-3,10))
grid.arrange(p.inhosp, p.30dpod, p.1yrpod, nrow=1)