library(lme4) library(car) library(arm) library(pander) library(knitr) library(stargazer) library(xtable) library(lattice) library(plyr) library(blme) # library(reshape) library(reshape2) library(optimx) library(STIecoPredict)
# load("C:/Users/ngreen1/Dropbox/small-area & chlamydia/R_code/scripts/mrp/data/cleaned-regn-input-mrpNatsal.RData") load("./data/cleaned-regn-input-mrpNatsal.RData") Natsal0 <- Natsal
savedata <- FALSE showplot <- TRUE
## older ages sparsely sampled Natsal <- subset(Natsal, age>15 & age<25) #NCSP range ## same sex partner in last year Natsal <- subset(Natsal, sam1yr==0) Natsal$sex1yr <- (Natsal$het1yr!=0 | Natsal$sam1yr!=0) Natsal$GUMvenue <- Natsal$chtstwh1=="Sexual health clinic (GUM clinic)" | Natsal$chltstwh=="Sexual health clinic (GUM clinic)" # names.atleast1test <- rownames(table(Natsal$laname, Natsal$cttestly))[table(Natsal$laname, Natsal$cttestly)[,"1"]>0] # names.LAbigenough <- names(table(Natsal$laname)[(table(Natsal$laname))>10]) # # subnames <- names.LAbigenough[names.LAbigenough%in%names.atleast1test] # Natsal2 <- subset(Natsal, laname%in%subnames) Natsal2 <- Natsal
form.cttestly <- as.formula(cttestly ~ 1 + sex + (1|sex.age) + (1|ethnic2) + I(hhsize==1) + student + (1|Conception.decile) + (1|`Numerical classification`) + I(`Average Score`>28) + (1|laname) + (1|gor) + (1|metcounty_UA) + FreqGUM) # + I(gor==9) form.GUMvenue <- update(form.cttestly, GUMvenue ~ .)
fit.cttestly <- glmer(form.cttestly, family = binomial(link="logit"), data = Natsal2) fit.GUMvenue <- glmer(form.GUMvenue, family = binomial(link="logit"), data = Natsal2[as.logical(Natsal2$cttestly), ])
MAXIT <- 10000L fit.cttestly <- bglmer(form.cttestly, family = binomial(link="logit"), maxit = MAXIT, control = glmerControl(optimizer="Nelder_Mead"), data = Natsal2) fit.GUMvenue <- bglmer(form.GUMvenue, family = binomial(link="logit"), maxit = MAXIT, control = glmerControl(optimizer="Nelder_Mead"), data = Natsal2[as.logical(Natsal2$cttestly), ])
lattice::dotplot(ranef(fit.GUMvenue, condVar=TRUE)) lattice::dotplot(ranef(fit.cttestly, condVar=TRUE))
Because some of the LAs don't have any sample point we create a vector of LA random effects that includes these. We set the RE to 0. Could alternatively take e.g. average of neighbours.
## add some missing random effects missingLANatsal.names <- unique(Natsal$laname)[!unique(Natsal$laname)%in%rownames(ranef(fit.GUMvenue)$laname)] missingLAsim_prop.names <- unique(sim_prop_la$LAname)[!unique(sim_prop_la$LAname)%in%rownames(ranef(fit.GUMvenue)$laname)] missingLA.names <- sort(union(missingLAsim_prop.names, missingLANatsal.names)) la.ranefs.GUMvenue <- array(0, dim=c(length(missingLA.names),1)) dimnames(la.ranefs.GUMvenue) <- list(missingLA.names, "(Intercept)") la.ranefs.GUMvenue <- rbind(ranef(fit.GUMvenue)$laname, la.ranefs.GUMvenue)
## add some missing random effects missingmetUANatsal.names <- unique(Natsal$metcounty_UA)[!unique(Natsal$metcounty_UA)%in%rownames(ranef(fit.GUMvenue)$metcounty_UA)] missingmetUAsim_prop.names <- unique(sim_prop_la$metcounty_UA)[!unique(sim_prop_la$metcounty_UA)%in%rownames(ranef(fit.GUMvenue)$metcounty_UA)] missingmetUA.names <- na.omit(union(missingmetUAsim_prop.names, missingmetUANatsal.names)) metUA.ranefs.GUMvenue <- array(0, dim=c(length(missingmetUA.names),1)) dimnames(metUA.ranefs.GUMvenue) <- list(missingmetUA.names, "(Intercept)") metUA.ranefs.GUMvenue <- rbind(ranef(fit.GUMvenue)$metcounty_UA, metUA.ranefs.GUMvenue)
##TODO## ## add some missing random effects missinggorNatsal.names <- unique(Natsal$gor)[!unique(Natsal$gor)%in%rownames(ranef(fit.GUMvenue)$gor)] missinggorsim_prop.names <- unique(sim_prop_la$gor)[!unique(sim_prop_la$gor)%in%rownames(ranef(fit.GUMvenue)$gor)] missinggor.names <- union(missinggorsim_prop.names, missinggorNatsal.names) gor.ranefs.GUMvenue <- array(0, dim=c(length(missinggor.names),1)) dimnames(gor.ranefs.GUMvenue) <- list(missinggor.names, "(Intercept)") gor.ranefs.GUMvenue <- rbind(ranef(fit.GUMvenue)$gor, gor.ranefs.GUMvenue)
## add some missing random effects missingLANatsal.names <- unique(Natsal$laname)[!unique(Natsal$laname)%in%rownames(ranef(fit.cttestly)$laname)] missingLAsim_prop.names <- unique(sim_prop_la$LAname)[!unique(sim_prop_la$LAname)%in%rownames(ranef(fit.cttestly)$laname)] missingLA.names <- union(missingLAsim_prop.names, missingLANatsal.names) la.ranefs.cttestly <- array(0, dim=c(length(missingLA.names),1)) dimnames(la.ranefs.cttestly) <- list(missingLA.names, "(Intercept)") la.ranefs.cttestly <- rbind(ranef(fit.cttestly)$laname, la.ranefs.cttestly)
## add some missing random effects missingmetUANatsal.names <- unique(Natsal$metcounty_UA)[!unique(Natsal$metcounty_UA)%in%rownames(ranef(fit.cttestly)$metcounty_UA)] missingmetUAsim_prop.names <- unique(sim_prop_la$metcounty_UA)[!unique(sim_prop_la$metcounty_UA)%in%rownames(ranef(fit.cttestly)$metcounty_UA)] missingmetUA.names <- na.omit(union(missingmetUAsim_prop.names, missingmetUANatsal.names)) metUA.ranefs.cttestly <- array(0, dim=c(length(missingmetUA.names),1)) dimnames(metUA.ranefs.cttestly) <- list(missingmetUA.names, "(Intercept)") metUA.ranefs.cttestly <- rbind(ranef(fit.cttestly)$metcounty_UA, metUA.ranefs.cttestly)
##TODO## ## add some missing random effects missinggorNatsal.names <- unique(Natsal$gor)[!unique(Natsal$gor)%in%rownames(ranef(fit.cttestly)$gor)] missinggorsim_prop.names <- unique(sim_prop_la$gor)[!unique(sim_prop_la$gor)%in%rownames(ranef(fit.cttestly)$gor)] missinggor.names <- union(missinggorsim_prop.names, missinggorNatsal.names) gor.ranefs.cttestly <- array(0, dim=c(length(missinggor.names),1)) dimnames(gor.ranefs.cttestly) <- list(missinggor.names, "(Intercept)") gor.ranefs.cttestly <- rbind(ranef(fit.cttestly)$gor, gor.ranefs.cttestly)
## takes mean average of each LA and its neighbours names(madjacency) <- LAnameClean(names(madjacency)) madjacency[,1] <- LAnameClean(madjacency[, 1]) ## fitted LAs LAnames <- rownames(ranef(fit.cttestly)$laname) madjacencyRE <- madjacency # madjacencyRE <- madjacencyRE[, c(TRUE,(names(madjacency)%in%LAnames)[-1])] #not missing LAs # madjacencyRE <- madjacencyRE[(names(madjacency)%in%LAnames)[-1],] for (i in 1:nrow(madjacencyRE)) madjacencyRE[i, i+1] <- 1 #diagonal (not including first column) # numNeighbours <- rowSums(madjacencyRE[, -1]) #true number of neigbours ## fillin fitted random effects for (i in LAnames){ madjacencyRE[, i] <- madjacencyRE[, i] * ranef(fit.cttestly)$laname[i, ] } madjacencyRE[madjacencyRE == 1] <- 0 #if don't have fitted RE then remove from average ## number of neighbours with values numNeighbours <- apply(madjacencyRE[,-1], 1, function(x) sum(x!=0)) #numNeighbours <- rowSums(madjacencyRE[, -1]) rowAverages <- array(rowSums(madjacencyRE[,-1])/numNeighbours, dim = c(length(numNeighbours),1)) rowAverages[is.nan(rowAverages)] <- 0 dimnames(rowAverages) <- list(madjacencyRE[,1], "(Intercept)") ## who is still without an estimate? missingLANatsal.names <- unique(Natsal0$laname)[!unique(Natsal0$laname)%in%rownames(rowAverages)] missingLAsim_prop.names <- unique(sim_prop_la$LAname)[!unique(sim_prop_la$LAname)%in%rownames(rowAverages)] missingLA.names <- union(missingLAsim_prop.names, missingLANatsal.names) la.ranefs.cttestly <- array(0, dim=c(length(missingLA.names),1)) dimnames(la.ranefs.cttestly) <- list(missingLA.names, "(Intercept)") la.ranefs.cttestly <- rbind(rowAverages, la.ranefs.cttestly)
## takes mean average of each LA and its neighbours names(madjacency) <- LAnameClean(names(madjacency)) madjacency[,1] <- LAnameClean(madjacency[, 1]) ## fitted LAs LAnames <- rownames(ranef(fit.GUMvenue)$laname) madjacencyRE <- madjacency # madjacencyRE <- madjacencyRE[, c(TRUE,(names(madjacency)%in%LAnames)[-1])] #not missing LAs # madjacencyRE <- madjacencyRE[(names(madjacency)%in%LAnames)[-1],] for (i in 1:nrow(madjacencyRE)) madjacencyRE[i, i+1] <- 1 #diagonal (not including first column) # numNeighbours <- rowSums(madjacencyRE[, -1]) #true number of neigbours ## fillin fitted random effects for (i in LAnames){ madjacencyRE[, i] <- madjacencyRE[, i] * ranef(fit.GUMvenue)$laname[i, ] } madjacencyRE[madjacencyRE == 1] <- 0 #if don't have fitted RE then remove from average ## number of neighbours with values numNeighbours <- apply(madjacencyRE[,-1], 1, function(x) sum(x!=0)) #numNeighbours <- rowSums(madjacencyRE[, -1]) rowAverages <- array(rowSums(madjacencyRE[,-1])/numNeighbours, dim = c(length(numNeighbours),1)) rowAverages[is.nan(rowAverages)] <- 0 dimnames(rowAverages) <- list(madjacencyRE[,1], "(Intercept)") ## who is still without an estimate? missingLANatsal.names <- unique(Natsal0$laname)[!unique(Natsal0$laname)%in%rownames(rowAverages)] missingLAsim_prop.names <- unique(sim_prop_la$LAname)[!unique(sim_prop_la$LAname)%in%rownames(rowAverages)] missingLA.names <- union(missingLAsim_prop.names, missingLANatsal.names) la.ranefs.GUMvenue <- array(0, dim=c(length(missingLA.names),1)) dimnames(la.ranefs.GUMvenue) <- list(missingLA.names, "(Intercept)") la.ranefs.GUMvenue <- rbind(rowAverages, la.ranefs.GUMvenue)
fixeff.formula <- as.formula("cttestly ~ student + sex + age + ethnic2 + livealone") ## census data # test.calcTotalProbs(sim_prop_la) sim_prop_la.fit <- calcTotalProbs(formula=fixeff.formula, data=sim_prop_la, extracols = c("LAname","gor","Average Score","metcounty_UA","Numerical classification","Conception.deciles","FreqGUM")) pred.GUMvenue <- arm::invlogit( fixef(fit.GUMvenue)["(Intercept)"] + fixef(fit.GUMvenue)["sexWomen"]*(sim_prop_la.fit$sex=="Women") + fixef(fit.GUMvenue)["studentTRUE"]*(sim_prop_la.fit$student) + fixef(fit.GUMvenue)["I(hhsize == 1)TRUE"]*(sim_prop_la.fit$livealone) + fixef(fit.GUMvenue)["FreqGUM"]*(sim_prop_la.fit$FreqGUM) + ranef(fit.GUMvenue)$ethnic2[as.character(sim_prop_la.fit$ethnic2),1] + # ranef(fit.GUMvenue)$age[as.character(sim_prop_la.fit$age),1] + ranef(fit.GUMvenue)$'sex.age'[paste(sim_prop_la.fit$sex, sim_prop_la.fit$age, sep="."), 1] + # ranef(fit.GUMvenue)$laname[as.character(sim_prop_la.fit$LAname),1] + la.ranefs.GUMvenue[as.character(sim_prop_la.fit$LAname),1] + # ranef(fit.GUMvenue)$metcounty_UA[as.character(sim_prop_la.fit$metcounty_UA),1] #+ metUA.ranefs.GUMvenue[as.character(sim_prop_la.fit$metcounty_UA),1] + ranef(fit.GUMvenue)$gor[as.character(sim_prop_la.fit$gor),1] + # fixef(fit.GUMvenue)["I(gor == 9)TRUE"]*(sim_prop_la.fit$gor==9) #london fixef(fit.GUMvenue)["I(`Average Score` > 28)TRUE"]*(sim_prop_la.fit$`Average Score`>28) + ranef(fit.GUMvenue)$`Numerical classification`[as.character(sim_prop_la.fit$`Numerical classification`),1] + ranef(fit.GUMvenue)$Conception.decile[as.character(sim_prop_la.fit$Conception.deciles),1]) pred.cttestly <- arm::invlogit( fixef(fit.cttestly)["(Intercept)"] + fixef(fit.cttestly)["sexWomen"]*(sim_prop_la.fit$sex=="Women") + fixef(fit.cttestly)["studentTRUE"]*(sim_prop_la.fit$student) + fixef(fit.cttestly)["I(hhsize == 1)TRUE"]*(sim_prop_la.fit$livealone) + fixef(fit.cttestly)["FreqGUM"]*(sim_prop_la.fit$FreqGUM) + ranef(fit.cttestly)$ethnic2[as.character(sim_prop_la.fit$ethnic2),1] + #ranef(fit.cttestly)$age[as.character(sim_prop_la.fit$age),1] + ranef(fit.cttestly)$'sex.age'[paste(sim_prop_la.fit$sex, sim_prop_la.fit$age, sep="."), 1] + # ranef(fit.cttestly)$laname[as.character(sim_prop_la.fit$LAname),1] + la.ranefs.cttestly[as.character(sim_prop_la.fit$LAname),1] + # ranef(fit.cttestly)$metcounty_UA[as.character(sim_prop_la.fit$metcounty_UA),1] #+ metUA.ranefs.cttestly[as.character(sim_prop_la.fit$metcounty_UA),1] + ranef(fit.cttestly)$gor[as.character(sim_prop_la.fit$gor),1] + # fixef(fit.cttestly)["I(gor == 9)TRUE"]*(sim_prop_la.fit$gor==9) #london fixef(fit.cttestly)["I(`Average Score` > 28)TRUE"]*(sim_prop_la.fit$`Average Score`>28) + ranef(fit.cttestly)$`Numerical classification`[as.character(sim_prop_la.fit$`Numerical classification`),1] + ranef(fit.cttestly)$Conception.decile[as.character(sim_prop_la.fit$Conception.deciles),1])
## separate GUMvenue and test for chlamydia separate-out the two groups ## p(test AND GUMvenue AND category) combined_prob <- pred.cttestly * pred.GUMvenue * sim_prop_la.fit$totalprob ## weighted average by LA LApred <- numerator <- tapply(combined_prob, sim_prop_la.fit$LAname, sum)
## given tested in last year, what are the proportions? ## cf Gelman, Parks ## p(GUMvenue AND category) denominator <- pred.GUMvenue * sim_prop_la.fit$totalprob ## weighted average by LA denominator <- tapply(denominator, sim_prop_la.fit$LAname, sum) LApred <- numerator/denominator
The LA specific post-stratified estimates are then
(LApred <- data.frame(LApred=LApred[order(LApred)]))
# popCensus <- read.csv("..\\..\\packages\\STIecoPredict\\raw-data\\popCensus.csv") LApred$LAname <- rownames(LApred) popCensus$LAname <- STIecoPredict:::LAnameClean(popCensus$LAname) rownames(LApred) <- NULL LApred <- merge(LApred, popCensus[popCensus$Sex=="All", c("LAname","prob.25.and.over")], all.x=TRUE) LApred$LApred.adj <- LApred$LApred*(1-LApred$prob.25.and.over) LApred[order(LApred$LApred.adj),]
save(pred, LApred, file="data/predictions.RData")
## raw NatsalLA <- melt(tapply(Natsal2$cttestly, Natsal2$laname, mean), varnames = c("LA.Name")) #direct estimate NatsalLAsize <- melt(tapply(Natsal2$cttestly, Natsal2$laname, length), varnames = c("LA.Name")) #as.NatsalLAsize <- data.frame(table(Natsal2$laname)) ## with sample weights NatsalLA.weights <- ddply(.data=Natsal2, .variables=.(laname), function(y) data.frame(wtmean=weighted.mean(x=y$cttestly, w=y$total_wt))) ## use prediction instead of data # Natsal2$cttestly.predict <- predict(fit, type="response") # NatsalLA.predict <- melt(tapply(Natsal2$cttestly.predict, Natsal2$laname, mean), varnames = c("LA.Name")) #direct estimate # NatsalLA.weights.predict <- ddply(.data=Natsal2, .variables=.(laname), function(y) data.frame(wtmean=weighted.mean(x=y$cttestly.predict, w=y$total_wt))) NatsalLA.GUM.weights <- ddply(.data=Natsal2, .variables=.(laname), function(y) data.frame(GUMwtmean=weighted.mean(x=(y$cttestly & y$GUMvenue), w=y$total_wt)))
Combine predictions with the surveillance data and produce summary statistics for each area.
CTADGUM_pred <- STIecoPredict:::joinAllOutcomeData(LApred) CTADGUM_pred <- STIecoPredict:::calcStats.CTADGUM_pred(CTADGUM_pred)
CTADGUM_pred <- merge(CTADGUM_pred, NatsalLA[,c("LA.Name", "value")], by.x = "LA Name", by.y="LA.Name", all.x=TRUE) CTADGUM_pred <- merge(CTADGUM_pred, NatsalLA.weights[,c("laname", "wtmean")], by.x = "LA Name", by.y="laname", all.x=TRUE) CTADGUM_pred <- merge(CTADGUM_pred, NatsalLA.GUM.weights[,c("laname", "GUMwtmean")], by.x = "LA Name", by.y="laname", all.x=TRUE) CTADGUM_pred <- merge(CTADGUM_pred, NatsalLAsize[,c("LA.Name", "value")], by.x="LA Name", by.y="LA.Name", all.x=TRUE) names(CTADGUM_pred)[names(CTADGUM_pred)%in%c("value.x", "value.y")] <- c("NatsalLA", "NatsalLAsize") CTADGUM_pred$NatsalLAsize[is.na(CTADGUM_pred$NatsalLAsize)] <- 1.5 #missing LAs CTADGUM_pred$`LA Name` <- as.factor(CTADGUM_pred$`LA Name`) CTADGUM_pred$laname.num <- as.numeric(CTADGUM_pred$`LA Name`) CTADGUM_pred$Region <- droplevels(CTADGUM_pred$Region) CTADGUM_pred <- CTADGUM_pred[order(CTADGUM_pred$Region), ] x <- by(CTADGUM_pred, CTADGUM_pred$Region, nrow) CTADGUM_pred$LAnum_inregion <- unlist(sapply(x, function(z) z:1))
save(CTADGUM_pred, file="data/CTADGUM_pred.RData")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.