Nothing
## This example deals with the use of mixture model to obtain empirical Bayes estimates
## for Poisson rates in rare disease mapping.
## The data consist of sudden infant death syndrome counts in 100 counties in north Caroline
## and were available at http://spatial.uchicago.edu/sample-data ,
## and also in the examples of Bugs (http://www.mrc-bsu.cam.ac.uk/bugs/).
## The version we use here is very complete
## with North Carolina maps and centroids.
## In this example we only use the 1974 data.
require(moc) #first load the required moc library
ncSids.file <- system.file("Examples","ncSids.RData",package="moc",mustWork=TRUE)
load(ncSids.file) #and the data.
## Poisson distribution is appropriate for counts of rare events.
poiss <- function(x,la,...) {dpois(x,la)}
## The mean of the Poisson process is taken as the base rate (exp(p) > 0) times
## the expected number of cases which is proportional to the size of the county.
smr.ecases <- function(p,expected.cases) {exp(p)*cbind(expected.cases)}
nc.sids$ecases <- nc.sids$BIR74*sum(nc.sids$SID74)/sum(nc.sids$BIR74)
gmu.smr.sids <- list(G1=function(p) smr.ecases(p[1],nc.sids$ecases),
G2=function(p) smr.ecases(p[2],nc.sids$ecases),
G3=function(p) smr.ecases(p[3],nc.sids$ecases),
G4=function(p) smr.ecases(p[4],nc.sids$ecases))
## In fact it is coded such that the analysis corresponds to the
## standardized mortality rate (SMR = O/E : observed cases/expected cases)
## which we know is highly sensitive to the size of the county: extreme SMR
## having elevated chances to appear in smaller counties. As illustrated by the
## following plot:
plot(nc.sids$ecases,nc.sids$SID74/nc.sids$ecases)
## Fitting mixture model would allow to account for over(under)-dispersion
## and to capture the range of regressions on the size of the counties.
nc.smr <- moc(nc.sids$SID74, density = poiss, gmu = gmu.smr.sids[1],
pgmu = log(c(1)))
nc.smr2 <- moc(nc.sids$SID74, density = poiss, groups=2, gmu = gmu.smr.sids[1:2],
pgmu = log(c(1,3)), pgmix=0)
nc.smr3 <- moc(nc.sids$SID74, density = poiss, groups=3, gmu = gmu.smr.sids[1:3],
pgmu = log(c(0.9,2.5,5)), pgmix=c(0,0))
nc.smr4 <- moc(nc.sids$SID74, density = poiss, groups=4, gmu = gmu.smr.sids,
pgmu = log(c(0.2,1,6,10)), pgmix=c(0,0,0))
AIC(nc.smr,nc.smr2,nc.smr3,nc.smr4,k="BIC")
nc.smr2
## As we can see the 2 groups models is sufficient to capture all essential heterogeneity.
plot(nc.sids$ecases,nc.sids$SID74)
lines(c(0,max(nc.sids$ecases)),exp(nc.smr2$coef[1])*c(0,max(nc.sids$ecases)))
lines(c(0,max(nc.sids$ecases)),exp(nc.smr2$coef[2])*c(0,max(nc.sids$ecases)))
## As we can see the empirical posterior mean provides a good shrinkage of the
## SMR estimates which is less dependent on county size
plot(nc.sids$ecases,nc.sids$SID74/nc.sids$ecases)
points(nc.sids$ecases,apply(post(nc.smr2),1,function(x) sum(x*exp(nc.smr2$coef[1:2]))),col="red",pch=5)
lines(c(0,max(nc.sids$ecases)),exp(nc.smr2$coef[1])*c(1,1))
lines(c(0,max(nc.sids$ecases)),exp(nc.smr2$coef[2])*c(1,1))
## Using the posterior empirical mean estimates is equivalent to the use
## of the posterior probabilities that we can use to mix basic colors
## to map on the counties.
smr2.col <- mix.colors.moc(nc.smr2,group.colors=c("blue","red"))
#library(maptools)
#plot(sidspolys, main="SMR_EB of SIDS in North Carolina",border="grey",smr2.col,forcefill=FALSE)
## If you don't use the library maptools the following commands are equivalent.
plot(attr(sidspolys, "maplim"),type="n",asp=1)
for(i in 1:100) polygon(sidspolys[[i]], border="grey",col=smr2.col[i])
legend(-84,34,format(apply(cbind(c(0,0.25,0.5,0.75,1),c(1,0.75,0.5,0.25,0)),1,
function(x) sum(x*exp(nc.smr2$coef[1:2]))),digits=4),
fill=c(rgb(1,0,0),rgb(0.75,0,0.25),rgb(0.5,0,0.5),rgb(0.25,0,0.75),rgb(0,0,1)))
## As we can see there are some spatial heterogeneity in the spatial distribution of sids.
## Part of it can be due to some remaining effect of county size, however we can
## use the spatial structure to account for possible spatial dependency and achieve
## better smoothing.
## A simple way to do this is to make the mixture probabilities of each counties depend on the
## SMR of the neighborhood counties.
nc.sids.smr.nb <- sapply(ncCR85.nb,function(nb)
sum(nc.sids[nb,"SID74"])/sum(nc.sids[nb,"BIR74"]))/sum(nc.sids$SID74)*sum(nc.sids$BIR74)
smr.mix.nb <- function(pmix) t(apply(cbind(pmix[1]+pmix[2]*nc.sids.smr.nb),1,inv.glogit))
nc.smr2.nb <- moc(nc.sids$SID74, density = poiss, groups=2, gmu = gmu.smr.sids[1:2],
gmixture=smr.mix.nb,pgmu = nc.smr2$coef[1:2], pgmix=c(nc.smr2$coef[3],0.1))
## The positive coefficient of regression on neighborhood counties has a small p-value
## and the decrease in -likelihood is important showing the presence spatial dependence.
nc.smr2.nb
AIC(nc.smr2,nc.smr2.nb,k="BIC")
## Mapping the empirical posterior SMR estimates again
smr2.nb.col <- mix.colors.moc(nc.smr2.nb,group.colors=c("blue","red"))
#plot(sidspolys, main="SMR_EB_NB of SIDS in North Carolina",border="grey",smr2.nb.col,forcefill=FALSE)
#legend(-84,34,format(apply(cbind(c(0,0.25,0.5,0.75,1),c(1,0.75,0.5,0.25,0)),1,function(x) sum(x*exp(nc.smr2.nb$coef[1:2]))),
# digits=4),fill=c(rgb(1,0,0),rgb(0.75,0,0.25),rgb(0.5,0,0.5),rgb(0.25,0,0.75),rgb(0,0,1)))
## now shows more spatial homogeneity and the values shows that
## we have achieved slightly better shrinkage.
plot(nc.sids$ecases,nc.sids$SID74/nc.sids$ecases)
points(nc.sids$ecases,apply(post(nc.smr2),1,
function(x) sum(x*exp(nc.smr2$coef[1:2]))),col="red",pch=5)
points(nc.sids$ecases,apply(post(nc.smr2.nb),1,
function(x) sum(x*exp(nc.smr2.nb$coef[1:2]))),col="blue",pch=8)
## The preceding model require the identification of the neighborhood of each county
## and restrict spatial dependence to adjacent counties. An alternative approach
## would be to consider the dependence of each county on other counties
## to be inversely proportional to their distance. A simple way to
## achieve this would be to compute for each county, the weighted SMR of the other counties
## with the weights being the inverse of the squared distance between the counties.
#sidscents <- cbind(nc.sids$x,nc.sids$y)
sids.dist.nb <- t(apply(sidscents,1,function(x) apply((x-t(sidscents))^2,2,sum)))
for(i in 1:100) sids.dist.nb[i,-i] <- 1/sids.dist.nb[i,-i]/sum(1/sids.dist.nb[i,-i])
nc.smr.dist.nb <- apply(nc.sids$SID74*t(sids.dist.nb),2,sum)/
apply(nc.sids$BIR74*t(sids.dist.nb),2,sum)/sum(nc.sids$SID74)*sum(nc.sids$BIR74)
smr.mix.dist.nb <- function(pmix) t(apply(cbind(pmix[1]+pmix[2]*nc.smr.dist.nb),1,inv.glogit))
nc.smr2.dist.nb <- moc(nc.sids$SID74, density = poiss, groups=2, gmu = gmu.smr.sids[1:2],
gmixture=smr.mix.dist.nb,pgmu = nc.smr2$coef[1:2], pgmix=c(nc.smr2$coef[3],0.1))
nc.smr2.dist.nb
AIC(nc.smr2,nc.smr2.dist.nb,k="BIC")
entropy(nc.smr2,nc.smr2.dist.nb)
## As we can see the two spatial models are quite comparable, the second model showing
## a slightly smaller prior entropy denoting a stronger spatial prediction resulting
## in a little more homogeneity in the SMR mapping.
smr2.dist.nb.col <- mix.colors.moc(nc.smr2.dist.nb,group.colors=c("blue","red"))
plot(attr(sidspolys, "maplim"),type="n",asp=1)
for(i in 1:100) polygon(sidspolys[[i]], border="grey",col=smr2.dist.nb.col[i])
legend(-84,34,format(apply(cbind(c(0,0.25,0.5,0.75,1),c(1,0.75,0.5,0.25,0)),1,
function(x) sum(x*exp(nc.smr2.dist.nb$coef[1:2]))),digits=4),
fill=c(rgb(1,0,0),rgb(0.75,0,0.25),rgb(0.5,0,0.5),rgb(0.25,0,0.75),rgb(0,0,1)))
## The second model however uses a specific function of the distance between counties which may not
## always be appropriate.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.