library(lme4) library(car) library(arm) library(pander) library(knitr) library(stargazer) library(xtable) library(lattice) library(plyr) library(stringr) library(reshape2) # library(STIecoPredict) library("STIecoPredict", lib.loc="~/R/win-library/3.2")
savedata <- FALSE
load("./data/cleaned-regn-input-mrpNatsal.RData") Natsal0 <- Natsal
Natsal <- subset(Natsal, age>15 & age<25) #NCSP range ## fit to London LAs only subnames <- unique(Natsal$laname[Natsal$gor==9]) Natsal2 <- subset(Natsal, laname%in%subnames) Natsal2 <- Natsal ## fit to non-small sampled LAs # subnames2 <- names(table(Natsal2$laname)[(table(Natsal2$laname))>20]) # Natsal2 <- subset(Natsal2, laname%in%subnames2)
## >28 is 75% quantile fit <- glmer(formula = cttestly ~ 1 + student + sex + (1|age) + (1|ethnic2) + I(`Average Score`>28) + (1|metcounty_UA) + (1|gor),# + (1|laname),# , # + I(gor==9), family = binomial(link="logit"), weights = total_wt.int, # data = Natsal) data = Natsal2) lattice::dotplot(ranef(fit, condVar=TRUE))
## add some missing random effects missingLA.names <- unique(Natsal$laname[Natsal$gor==9])[!unique(Natsal$laname[Natsal$gor==9])%in%rownames(ranef(fit)$laname)] la.ranefs <- array(0, dim=c(length(missingLA.names),1)) dimnames(la.ranefs) <- list(missingLA.names, "(Intercept)") la.ranefs <- rbind(ranef(fit)$laname, la.ranefs)
## fitted LAs LAnames <- rownames(ranef(fit)$laname) numNeighbours <- rowSums(madjacency[,-1]) madjacencyRE <- madjacency for (i in LAnames){ madjacencyRE[, i] <- madjacencyRE[, i]*ranef(fit)$laname[i,] } madjacencyRE[madjacencyRE==1] <- 0 rowAverages <- array(rowSums(madjacencyRE[,-1])/numNeighbours, dim=c(length(numNeighbours),1)) rowAverages[is.nan(rowAverages)] <- 0 dimnames(rowAverages) <- list(madjacencyRE[,1], "(Intercept)") missingLANatsal.names <- unique(Natsal$laname)[!unique(Natsal$laname)%in%rownames(ranef(fit)$laname)] missingLAsim_prop.names <- unique(sim_prop_la$LAname)[!unique(sim_prop_la$LAname)%in%rownames(ranef(fit)$laname)] missingLA.names <- union(missingLAsim_prop.names, missingLANatsal.names) la.ranefs <- subset(rowAverages, rownames(rowAverages) %in% missingLA.names) la.ranefs <- rbind(ranef(fit)$laname, la.ranefs)
names(madjacency) <- LAnameClean(names(madjacency)) madjacency[,1] <- LAnameClean(madjacency[, 1]) ## fitted LAs LAnames <- rownames(ranef(fit)$laname) madjacencyRE <- madjacency madjacencyRE <- madjacencyRE[, c(TRUE,(names(madjacency)%in%LAnames)[-1])] madjacencyRE <- madjacencyRE[(names(madjacency)%in%LAnames)[-1],] for (i in 1:nrow(madjacencyRE)) madjacencyRE[i, i+1] <- 1 numNeighbours <- rowSums(madjacencyRE[, -1]) for (i in LAnames){ madjacencyRE[, i] <- madjacencyRE[, i] * ranef(fit)$laname[i, ] } madjacencyRE[madjacencyRE == 1] <- 0 rowAverages <- array(rowSums(madjacencyRE[,-1])/numNeighbours, dim=c(length(numNeighbours),1)) rowAverages[is.nan(rowAverages)] <- 0 dimnames(rowAverages) <- list(madjacencyRE[,1], "(Intercept)") missingLANatsal.names <- unique(Natsal$laname)[!unique(Natsal$laname)%in%rownames(rowAverages)] missingLAsim_prop.names <- unique(sim_prop_la$LAname)[!unique(sim_prop_la$LAname)%in%rownames(rowAverages)] missingLA.names <- union(missingLAsim_prop.names, missingLANatsal.names) la.ranefs <- array(0, dim=c(length(missingLA.names),1)) dimnames(la.ranefs) <- list(missingLA.names, "(Intercept)") la.ranefs <- rbind(rowAverages, la.ranefs) lattice::dotplot(sort(la.ranefs[la.ranefs!=0,]))
# load("../../packages/STIecoPredict/data/LA_ethnic2_london.RData") sim_prop_la <- sim_prop_la[,names(sim_prop_la)!="p.ethnic2"] sim_prop_la <- merge(sim_prop_la, LA_ethnic2_london, by=c("LAname", "ethnic2", "age", "sex"), all.x=FALSE, all.y=FALSE)
fixeff.formula <- as.formula("cttestly ~ student + sex + age + ethnic2") # test.calcTotalProbs(sim_prop_la) sim_prop_la.fit <- STIecoPredict:::calcTotalProbs(formula=fixeff.formula, data=sim_prop_la, extracols = c("LAname", "Average Score", "metcounty_UA")) pred <- arm::invlogit( fixef(fit)["(Intercept)"] + fixef(fit)["sexWomen"]*(as.character(sim_prop_la.fit$sex)=="Women") + fixef(fit)["studentTRUE"]*(sim_prop_la.fit$student) + ranef(fit)$ethnic2[as.character(sim_prop_la.fit$ethnic2),1] + ranef(fit)$age[as.character(sim_prop_la.fit$age),1] + # ranef(fit)$laname[as.character(sim_prop_la.fit$LAname),1] + la.ranefs[as.character(sim_prop_la.fit$LAname),1] + ranef(fit)$metcounty_UA[as.character(sim_prop_la.fit$metcounty_UA),1] + fixef(fit)["I(`Average Score` > 28)TRUE"]*(sim_prop_la.fit$`Average Score`>28) )
## weight the probabilities by the subpopulation sizes and then sum by LA # tapply(sim_prop_la.fit$totalprob, sim_prop_la.fit$LAname, sum) predweighted <- pred * sim_prop_la.fit$totalprob LApred <- tapply(predweighted, sim_prop_la.fit$LAname, sum)
The LA specific post-stratified estimates are then
LApred <- data.frame(LApred=LApred[order(LApred)])
# popCensus <- read.csv("..\\..\\packages\\STIecoPredict\\raw-data\\popCensus.csv") LApred$LAname <- rownames(LApred) popCensus$LAname <- STIecoPredict:::LAnameClean(popCensus$LAname) rownames(LApred) <- NULL LApred <- merge(LApred, popCensus[popCensus$Sex=="All", c("LAname","prob.25.and.over")], all.x=TRUE) LApred$LApred.adj <- LApred$LApred*(1-LApred$prob.25.and.over) # LApred[order(LApred$LApred.adj),]
save(pred, LApred, file="data/predictions.RData")
agerange <- 15:24 NatsalLA <- melt(tapply(Natsal$cttestly[Natsal$age%in%agerange], Natsal$laname[Natsal$age%in%agerange], mean), varnames = c("LA.Name")) NatsalLAsize <- melt(tapply(Natsal$cttestly[Natsal$age%in%agerange], Natsal$laname[Natsal$age%in%agerange], length), varnames = c("LA.Name"))
Combine predictions with the surveillance data and produce summary statistics for each area.
CTADGUM_pred <- STIecoPredict:::joinAllOutcomeData(LApred) CTADGUM_pred <- STIecoPredict:::calcStats.CTADGUM_pred(CTADGUM_pred)
CTADGUM_pred <- merge(CTADGUM_pred, NatsalLA[,c("LA.Name", "value")], by.x = "LA Name", by.y="LA.Name", all.x=TRUE) CTADGUM_pred <- merge(CTADGUM_pred, NatsalLAsize[,c("LA.Name", "value")], by.x="LA Name", by.y="LA.Name", all.x=TRUE) names(CTADGUM_pred)[names(CTADGUM_pred)%in%c("value.x", "value.y")] <- c("NatsalLA", "NatsalLAsize")
save(CTADGUM_pred, file="data/CTADGUM_pred.RData")
library(gtools) temp <- dcast(sim_prop_la, LAname+age+sex+smokenow+increasingdrinker+student~ethnic2, value.var = "p.ethnic2") temp2 <- t(round(apply(temp[,7:12]*10, 1, function(x) rdirichlet(n=1, alpha=x)),5)) apply(temp2, 2, median) colnames(temp2) <- names(temp[,7:12]) mtemp <- data.frame(temp[,-(7:12)], temp2) sample.ethnic2 <- melt(mtemp, measure.vars = make.names(names(temp[,7:12])), variable.name = "ethnic2")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.