#### Load packages
source("./s1.0_packages.R", echo=F)
#Fonts
library(extrafont)
#library(clpAPI)
library(Cairo)
if(.Platform$OS.type == "unix") face = "Times" else face = "Times New Roman"
#Save directory
dir <- "../pop-synth-paper/figures"
#### Checking each generation ####
if(!file.exists("./db/d6.0_genresults.RData")) {
load("./db/d2.3_microdata.RData")
load("./db/d2.4_lodes.RData")
load("./db/d3.1_IPFlodes.RData")
load("./db/d2.5_margins.RData")
load("./db/d5.1_IPFpop.RData")
load("./db/d5.1_IPFODpop.RData")
load("./db/d5.2_MCMCpop.RData")
load("./db/d5.2_MCMCODpop.RData")
load("./db/d5.3_BNpop.RData")
load("./db/d5.3_BNODpop.RData")
indcols = c("AGE","GEND","RELATE","OCC","INDUS")
hhcols = c("INCOME","RESTY","HHVEH","HHSIZ")
pums = microdata$pums[,c("SERIALNO",c(indcols,hhcols)),with=F]
check_results <- function(pop, pums, odi, indcols, hhcols) {
#All columns combined
cols = c(indcols,hhcols)
#### Marginals ####
print("Formatting marginals")
#Check marginal results
marg <- list("AGE" = apply(margins$indagesex, c(1,3), sum),
"GEND" = apply(margins$indagesex, c(1,2), sum),
"RELATE" = as.matrix(margins$indrelate),
"OCC" = margins$indocc,
"INDUS" = margins$indindus,
"INCOME" = as.matrix(margins$hhinc),
"RESTY" = margins$hhdwell,
"HHVEH" = margins$hhveh,
"HHSIZ" = rowSums(margins$hhvehsize,dims=2))
#Aggregating population
agg.hh <- rbindlist(lapply(hhcols, function(n) {
cbind('Variable'=n,setNames(pop[!duplicated(HHID),.N,by=c(n,'otract')],c("var","otract","Synthetic")))
}))
agg.ind <- rbindlist(lapply(indcols, function(n) {
cbind('Variable'=n,setNames(pop[,.N,by=c(n,'otract')],c("var","otract","Synthetic")))
}))
#Aggregating marginals
marg <- rbindlist(lapply(marg, function(n) setNames(melt(n),c("otract","var","Marginals"))))
marg[, otract := as.character(otract)]
#Merging
marg <- merge(rbind(agg.hh,agg.ind),marg, by=c("otract","var"))
#### Cells ####
print("Formatting microdata cells")
#IND alone
cells.ind<- merge(as.data.table(prop.table(table(pums[,indcols,with=F]))),
as.data.table(prop.table(table(pop[,indcols,with=F]))),
by = indcols)
setnames(cells.ind,c("N.x","N.y"),c("PUMS","Synthetic"))
#HH alone
cells.hh <- merge(as.data.table(prop.table(table(pums[!duplicated(SERIALNO),hhcols,with=F]))),
as.data.table(prop.table(table(pop[!duplicated(HHID),hhcols,with=F]))),
by = hhcols)
setnames(cells.hh,c("N.x","N.y"),c("PUMS","Synthetic"))
#joint
joint.cells <- merge(as.data.table(prop.table(table(pums[,!"SERIALNO"])))[N>0,],
as.data.table(prop.table(table(pop[,cols,with=F]))), by = cols, all.x=T)
setnames(joint.cells,c("N.x","N.y"),c("PUMS","Synthetic"))
#### OD ####
print("Formatting origin-destinations")
#Getting proportions
pop <- pop[ , lapply(.SD, as.character)]
res.od = as.data.table(table(pop[INDUS!="NOIND",.(otract,dtract)]))
res.oi = as.data.table(table(pop[INDUS!="NOIND",.(otract,INDUS)]))
res.di = as.data.table(table(pop[INDUS!="NOIND",.(dtract,INDUS)]))
lodes.od = setNames(melt(as.data.table(lodes$mtxOD,keep.rownames = T), id.vars='rn'),
c("otract","dtract","LODES"))[, dtract := as.character(dtract)]
lodes.oi = setNames(melt(as.data.table(lodes$mtxOI,keep.rownames = T), id.vars='rn'),
c("otract","INDUS","LODES"))
lodes.di = setNames(melt(as.data.table(lodes$mtxDI,keep.rownames = T), id.vars='rn'),
c("dtract","INDUS","LODES"))
#merging proportions to gibbs results
res.od = merge(res.od[otract!="00000000000" & dtract!="00000000000",],
lodes.od[otract!="00000000000" & dtract!="00000000000",], by=c("otract","dtract"), all=T)
res.oi = merge(res.oi[otract!="00000000000",], lodes.oi[otract!="00000000000",], by=c("otract","INDUS"), all=T)
res.di = merge(res.di[dtract!="00000000000",], lodes.di[dtract!="00000000000",], by=c("dtract","INDUS"), all=T)
#Checking for zeros
res.od = res.od[ , (c('N','LODES')) := lapply(.SD, function(x) ifelse(is.na(x),0,x)), .SDcols = c('N','LODES')]
res.oi = res.oi[ , (c('N','LODES')) := lapply(.SD, function(x) ifelse(is.na(x),0,x)), .SDcols = c('N','LODES')]
res.di = res.di[ , (c('N','LODES')) := lapply(.SD, function(x) ifelse(is.na(x),0,x)), .SDcols = c('N','LODES')]
"hh cells" = cells.hh[,.(PUMS,Synthetic)][ , sqrt(sum((PUMS-Synthetic)^2)/.N)/mean(PUMS)]
"ind cells" = cells.ind[,.(PUMS,Synthetic)][ , sqrt(sum((PUMS-Synthetic)^2)/.N)/mean(PUMS)]
#### SRMSE ####
print("Finalizing")
SRMSE <- c("Marginal" = marg[,sqrt(sum((Marginals - Synthetic)^2)/.N)/mean(Marginals)],
"Multilevel joint cells" = joint.cells[ , sqrt(sum((PUMS-Synthetic)^2)/.N)/mean(PUMS)],
"Independent joint cells" = rbind(cells.ind[,.(PUMS,Synthetic)],cells.hh[,.(PUMS,Synthetic)])[ , sqrt(sum((PUMS-Synthetic)^2)/.N)/mean(PUMS)],
"hh cells" = cells.hh[,.(PUMS,Synthetic)][ , sqrt(sum((PUMS-Synthetic)^2)/.N)/mean(PUMS)],
"ind cells" = cells.ind[,.(PUMS,Synthetic)][ , sqrt(sum((PUMS-Synthetic)^2)/.N)/mean(PUMS)],
"OI" = res.oi[,sqrt(sum((LODES-N)^2)/.N)/mean(LODES)],
"DI" = res.di[,sqrt(sum((LODES-N)^2)/.N)/mean(LODES)],
"OD" = res.od[,sqrt(sum((LODES-N)^2)/.N)/mean(LODES)],
"ODI" = rbindlist(list(res.od[,.(N,LODES)],res.oi[,.(N,LODES)],res.di[,.(N,LODES)]))[,sqrt(sum((LODES-N)^2)/.N)/mean(LODES)])
SRMSE <- as.data.table(SRMSE, keep.rownames = T)
#Output
output <- list('marginals' = marg,
'cells' = rbind(cells.ind[,.(PUMS,Synthetic)],cells.hh[,.(PUMS,Synthetic)]),
'jointcells' = joint.cells,
'odi' = list('od' = res.od, 'oi' = res.oi, 'di' = res.di),
'SRMSE' = SRMSE)
return(output)
}
results.ipfod = check_results(ipfod.pop, pums, ipf.lodes, indcols, hhcols)
results.ipf = check_results(ipf.pop, pums, lodes, indcols, hhcols)
results.mcmc = check_results(mcmc.pop, pums, lodes, indcols, hhcols)
results.mcmcod = check_results(mcmc.odpop, pums, lodes, indcols, hhcols)
results.bn = check_results(bn.pop, pums, lodes, indcols, hhcols)
results.bnod = check_results(bn.odpop, pums, lodes, indcols, hhcols)
results <- list('IPF' = results.ipf,
'IPFOD' = results.ipfod,
'MCMC' = results.mcmc,
'MCMCOD' = results.mcmcod,
'BN' = results.bn,
'BNOD' = results.bnod)
save(results, file = "./db/d6.0_genresults.RData")
} else {
load("./db/d6.0_genresults.RData")
#Errors
SRMSE <- rbindlist(lapply(results,
function(x) setNames(data.table(t(x$SRMSE$SRMSE)),x$SRMSE$rn)))
SRMSE <- cbind("Pop" = names(results), SRMSE)
SRMSE <- t(SRMSE[,-1])
colnames(SRMSE) <- names(results)
SRMSE
}
#### Compare methods ####
if(!file.exists("./db/d6.0_compare_all.RData")){
sourceCpp("./cpp_IPU.cpp")
format_fulltypes <- function(ind, hh, indvars, hhvars){
# Getting HH and Person types from IPF
if(is.list(hh)){
print("Aggregating person matrices")
#Summing into single matrix
indtypes <- Reduce("+", ind)
hhtypes <- Reduce("+", hh)
} else {
print("Using existing total weights")
indtypes <- ind
hhtypes <- hh
}
print("Formatting sparse matrix from PUMS")
pumstypes <- microdata[["pums"]][,c("SERIALNO", indvars, hhvars), with=F]
pumstypes[ , SERIALNO := as.character(SERIALNO)]
#expand IPF results into hh/person type frequencies
indtypes <- as.data.table(indtypes)
hhtypes <- as.data.table(hhtypes)
#labeling types
hhtypes[,HHTYPE := paste("HHTYPE",1:nrow(hhtypes),sep="")]
indtypes[,PERTYPE := paste("PERTYPE",1:nrow(indtypes),sep="")]
#Merging labels to person/household types in pums pop
pumstypes <- merge(pumstypes, hhtypes[,c(hhvars,"HHTYPE"), with=F], by=hhvars, all.x = T)
pumstypes <- merge(pumstypes, indtypes[,c(indvars,"PERTYPE"), with=F], by=indvars, all.x = T)
#Compact DT
pumstypes <- pumstypes[,.(SERIALNO, PERTYPE, HHTYPE)]
#Getting frequencies
pertypefreq <- pumstypes[, .N, by = .(SERIALNO,PERTYPE)]
hhtypefreq <- pumstypes[!duplicated(SERIALNO), .N, by = .(SERIALNO,HHTYPE)]
#Making uniform names and stacking results for sparse matrix
colnames(pertypefreq) <- c("SERIALNO","TYPE","N")
colnames(hhtypefreq) <- c("SERIALNO","TYPE","N")
#Stacking long
typefreq <- rbind(hhtypefreq, pertypefreq)
#Releveling the factor levels
typefreq[ , TYPE := as.factor(TYPE)]
typefreq[ , SERIALNO := as.factor(SERIALNO)]
#Sending to wide sparse matrix
sparse <- sparseMatrix(i = as.integer(typefreq$TYPE),
j = as.integer(typefreq$SERIALNO),
x = typefreq$N,
dimnames = list(type = levels(typefreq$TYPE), serial = levels(typefreq$SERIALNO)))
bigsparse <- list(hhtypes = hhtypes,
indtypes = indtypes,
pumstypes = pumstypes,
sparse = sparse)
return(bigsparse)
}
format_bvector <- function(indcons, hhcons, sparsetypes, typevars, indvars, hhvars){
#expand IPF results into hh/person type frequencies
indcons <- as.data.table(indcons)
hhcons <- as.data.table(hhcons)
#labeling types
indcons <- merge(indcons, sparsetypes$indtypes[,c(indvars,"PERTYPE"),with=F], by=indvars, all=T)
hhcons <- merge(hhcons, sparsetypes$hhtypes[,c(hhvars,"HHTYPE"),with=F], by=hhvars, all=T)
#creating constraint vector
if("value" %in% colnames(indcons)) {
cons <- c(hhcons$value, indcons$value)
} else {
cons <- c(hhcons$N, indcons$N)
}
names(cons) <- c(hhcons$HHTYPE, indcons$PERTYPE)
#Removing corresponding zero columns from constraints
cons <- cons[typevars]
return(cons)
}
nnld_fit <- function(A, b, int = F) {
print("Running least deviation fitting")
time <- Sys.time()
#Format for LAD
Ac <- rbind(cbind(A, Diagonal(nrow(A), -1)), cbind(-A, Diagonal(nrow(A), -1)))
if(int==T) b <- int_trs(b) ; names(b) <- rownames(A)
#Objective coefficients
f <- rep(0,nrow(A))
f[which(grepl("HHTYPE",rownames(A)))] <- 1
f[which(grepl("PERTYPE",rownames(A)))] <- 1
f <- c(rep(0,ncol(A)), f)
#Prepare data structures for the problem object. Number of columns and rows:
prob <- initProbCLP()
setObjDirCLP(prob, 1)
#Matrix size
nc <- ncol(Ac)
nr <- nrow(Ac)
#The constraint matrix is passed in column major order format. all indices start with 0!
ia <- Ac@i
#Column indices.
ja <- Ac@p
#Non-zero elements.
ar <- Ac@x
#Lower bounds for the variables (columns).
clb <- rep(0, ncol(Ac))
#Right hand side (row upper bounds for the rows).
rub <- c(b,-b)
#Objective coefficients.
obj <- f
#Load problem data into the problem object.
loadProblemCLP(prob, nc, nr, ia, ja, ar, clb, NULL, obj, NULL, rub)
#Solve the problem using the simplex algorithm.
solveInitialCLP(prob)
#Retrieve the value of the objective function after optimization.
X <- getColPrimCLP(prob)[1:ncol(A)]
names(X) <- colnames(A)
#Free memory, allocated to the problem object.
delProbCLP(prob)
#getting results
bhat <- as.vector(A %*% X)
res <- data.table(varname = names(b),
bhat,
b,
vartype = gsub('[0-9]+', '', names(b)),
esttype = "Least-Deviation")
res[ , sqerror := (b-bhat)^2]
RMSE <- sqrt(sum(res$sqerror)/nrow(res))
RMSN <- RMSE/mean(res$b)
qplot(data=res, x=b, y=bhat) + theme_classic()
time <- difftime(Sys.time(), time, units = 'secs')
print(paste("Completed in", time, "seconds"))
return(list(X = X, errors = res, RMSE = RMSE, RMSN = RMSN, time = time))
}
nnls_fit <- function(A, b) {
print("Running least-squares fitting")
time <- Sys.time()
#Run NNLS
LS <- nnls(A, b)
#Retrieve the value of the objective function after optimization.
X <- LS$x
names(X) <- colnames(A)
#getting results
bhat <- as.vector(A %*% X)
res <- data.table(varname = names(b), bhat, b, vartype = gsub('[0-9]+', '', names(b)))
res[ , sqerror := (b-bhat)^2]
res[ , esttype := "Least-Squares"]
RMSE <- sqrt(sum(res$sqerror)/nrow(res))
RMSN <- RMSE/mean(res$b)
time <- difftime(Sys.time(), time, units = 'secs')
print(paste("Completed in", time, "seconds"))
return(list(X = X, errors = res, RMSE = RMSE, RMSN = RMSN, time = time))
}
glmnet_fit <- function(A, b, pumstypes, l = 0, a = 1, tol=1e-10, maxit=1e5) {
#print("Running least-squares fitting")
time <- Sys.time()
#Run NNLS with glmnet
glmnetmod <- glmnet(A, b, alpha = a, lambda = l, lower.limits=0, intercept=F, thresh = tol, maxit=maxit)
#Retrieve the value of the objective function after optimization.
X <- coef(glmnetmod)[,1][-1]
#names(X) <- colnames(A)
#getting results
bhat <- as.vector(A %*% X)
res <- data.table(varname = names(b), bhat, b, vartype = gsub('[0-9]+', '', names(b)))
res[ , sqerror := (b-bhat)^2]
res[ , esttype := "CCD"]
RMSE <- sqrt(sum(res$sqerror)/nrow(res))
RMSN <- RMSE/mean(res$b)
#plot(bhat~b, res)
#plot(bhat~b, res[!(bhat>0 & b==0), ])
#Finding the few erroneously weighted samples and removing
zeros = res[bhat>0 & b==0,varname]
#zeros = pumstypes[PERTYPE %in% zeros | HHTYPE %in% zeros, SERIALNO]
#zeros = pumstypes[HHTYPE %in% zeros, SERIALNO]
zeros = pumstypes[PERTYPE %in% zeros, SERIALNO]
X <- X[!(names(X) %in% zeros)]
#Remove 0
X <- X[X>0]
time <- difftime(Sys.time(), time, units = 'secs')
#print(paste("Completed in", time, "seconds"))
return(list(X = X, errors = res, RMSE = RMSE, RMSN = RMSN, time = time))
}
ipu_fit <- function(A, b, verb = T, c = 1e-5, cc = 1, maxit=100) {
print("Running IPU fitting")
time <- Sys.time()
#base 0 hh type col index
hh_idx <- which(grepl("HHTYPE", names(b)))-1
#setting 0's to 0.01
b[b==0] <- 1e-5
#Running IPU
X <- IPU_cpp_sparse(t(A), b, hh_idx, corncrit=cc, crit=c, maxit=maxit, print=verb)
names(X) <- colnames(A)
#If results in NA because too small then set to 0
X[!is.finite(X)]<-0
#getting results
bhat <- as.vector(A %*% X)
res <- data.table(varname = names(b), bhat, b, vartype = gsub('[0-9]+', '', names(b)))
res[ , sqerror := (b-bhat)^2]
res[ , esttype := "IPU"]
#
RMSE <- sqrt(sum(res$sqerror)/nrow(res))
RMSN <- RMSE/mean(res$b)
#
time <- difftime(Sys.time(), time, units = 'secs')
print(paste("Completed in", time, "seconds"))
return(list(X = X, errors = res, RMSE = RMSE, RMSN = RMSN, time = time))
}
# Formatting full constraints and matrix
if(!file.exists("./db/d6.0_sparse.RData")) {
#Load general data
load("./db/d2.3_microdata.RData")
load('./db/d3.1_IPFindhh.RData')
indcols = c("AGE","GEND","RELATE","OCC","INDUS")
hhcols = c("INCOME","RESTY","HHSIZ","HHVEH")
pums = microdata$pums[,c("SERIALNO",c(indcols,hhcols)),with=F]
types <- format_fulltypes(ipf.indtots, ipf.hhtots, indcols, hhcols)
# sparse <- types$sparse
#
# chnk <- split(sample(1:ncol(sparse),ncol(sparse)), ceiling(seq_along(1:ncol(sparse))/50000))
#
# if(.Platform$OS.type == "unix") {
# cl <- makeCluster(detectCores()-1, "FORK")
# } else {
# cl <- makeCluster(detectCores(logical = F)-1, "PSOCK")
# clusterEvalQ(cl,c(library(Matrix)))
# clusterExport(cl,
# list("sparse"),
# envir=environment())
# }
#
# NoDup <- pblapply(chnk, function(x) t(mgcv::uniquecombs(t(sparse[,x]))), cl = cl)
# stopCluster(cl)
#
# sn <- unlist(lapply(NoDup, function(x) dimnames(x)$serial))
#
# sparse <- sparse[ , dimnames(sparse)$serial %in% sn]
#
# types$sparse <- sparse
save(types, file = "./db/d6.0_sparse.RData")
} else {
load("./db/d6.0_sparse.RData")
A <- types$sparse
b <- format_bvector(ipf.indtots, ipf.hhtots, types, rownames(A), indcols, hhcols)
}
#####Comparison for whole region
if(!file.exists("./db/d6.0_compare_ipu.RData")) {
res_ipu <- ipu_fit(A, b, verb=T, c=5e-4, cc=1, maxit=100)
save(res_ipu, file = "./db/d6.0_compare_ipu.RData")
} else {
load("./db/d6.0_compare_ipu.RData")
}
# if(!file.exists("./db/d6.0_compare_nnls.RData")) {
# res_nnls <- nnls_fit(A, b)
# save(res_nnls, file = "./db/d6.0_compare_nnls.RData")
# } else {
# load("./db/d6.0_compare_nnls.RData")
# }
if(!file.exists("./db/d6.0_compare_nnld.RData")) {
res_nnld <- nnld_fit(A, b)
save(res_nnld, file = "./db/d6.0_compare_nnld.RData")
} else {
load("./db/d6.0_compare_nnld.RData")
}
if(!file.exists("./db/d6.0_compare_ccd.RData")) {
res_ccd <- glmnet_fit(A, b, types$pumstypes, a=1, l=0, tol=1e-14)
save(res_ccd, file = "./db/d6.0_compare_ccd.RData")
} else {
load("./db/d6.0_compare_ccd.RData")
}
if(!file.exists("./db/d6.0_compare_lasso.RData")) {
res_lasso <- glmnet_fit(A, b, types$pumstypes, a=1, l=1)
save(res_lasso, file = "./db/d6.0_compare_lasso.RData")
} else {
load("./db/d6.0_compare_lasso.RData")
}
if(!file.exists("./db/d6.0_compare_ridge.RData")) {
res_ridge <- glmnet_fit(A, b, types$pumstypes, a=0, l=1, tol=1e-12, maxit=1e7)
res_ridge$errors$esttype <- "NNLS"
save(res_ridge, file = "./db/d6.0_compare_ridge.RData")
} else {
load("./db/d6.0_compare_ridge.RData")
}
compare <- c(list("ipu"=res_ipu,
"nnls"=res_ridge,
"nlad"=res_nnld,
"ccd"=res_ccd))
####Combining
# compare <- c(list("nnls"=res_nnls,"nnld"=res_nnld,"ipu"=res_ipu,"ccd"=res_ccd))
#Creating performance table
perform <- lapply(compare, function(res) data.table("Method"=res$errors$esttype[1],
"RMSN"=res$RMSN,
"RMSE"=res$RMSE,
"MAPE"=res$MAPE,
"Time"=res$time))
perform <- do.call(rbind, perform)
perform[ , Scale := Time/as.numeric(min(Time))]
perform
save(compare, perform, file= "./db/d6.0_compare_all.RData")
} else{
load("./db/d6.0_compare_all.RData")
}
#### PLOT COMPARE METHODS ####
plots.compare <- lapply(compare, function(res) {
dat = res$errors
dat$bhat = dat$bhat/1000
dat$b = dat$b/1000
RMSN = format(dat[ , sqrt(sum((bhat - b)^2)/.N)/mean(b)], digits=3, scientific=T)
slope = round(coef(lm(bhat~b, data=dat))[2],5)
R = round(summary(lm(bhat~b, data=dat))$r.squared,5)
ggplot(data=dat, aes(x=b, y=bhat)) +
geom_smooth(method="lm", color="gray", size=0.5, fullrange=T) +
geom_point(shape=1, size=1.5, color="black") +
scale_y_continuous('Estimated IPF types (thousands)', breaks = seq(0,1000,50), label = scales::comma) +
scale_x_continuous('Target IPF types (thousands)', breaks = seq(0,1000,50), label = scales::comma) +
#annotate("text", x=0.7*max(dat$bhat), y=50, label = paste("Slope=",slope), family=face) +
#annotate("text", x=0.7*max(dat$bhat), y=25, label = paste("R^{2}~'='~",R), parse=T, family=face) +
annotate("text", x=0.7*max(dat$bhat), y=0, label = paste("RMSN =",RMSN), family=face) +
theme_classic() + theme(plot.title = element_text(hjust = 0.5)) + coord_fixed() +
theme(legend.position = "none", text=element_text(family=face, size=14))})
grid.arrange(grobs=plots.compare, ncol = 2)
#save
for(n in names(compare)) {
ggsave(paste0(dir,"/method_",n,".png"), type="cairo", plot = plots.compare[[n]], width = 3, height = 3, units = "in", dpi=300)
}
#Zoom
plots.comparezoom <- lapply(compare, function(res) {
ggplot(data=res$errors, aes(x=b, y=bhat)) +
geom_smooth(method="lm", color="gray", size=0.5, fullrange=T) +
geom_point(size=1.5, shape=1) +
scale_y_continuous('Estimated IPF types', limits = c(0,5), breaks = seq(0,5,1)) +
scale_x_continuous('Target IPF types', limits = c(0,5), breaks = seq(0,5,1)) +
#annotate("text", x=2, y=0.50, label = paste("Slope=",round(coef(lm(bhat~b, data=res$errors))[2],5)), family=face) +
#annotate("text", x=2, y=0.25, label = paste("R^{2}~'='~",round(summary(lm(bhat~b, res$errors))$r.squared,5)), parse=T, family=face) +
annotate("text", x=3, y=0, label = paste("RMSN =",format(res$RMSN,digits=3,scientific=T)), family=face) +
theme_classic() + theme(plot.title = element_text(hjust = 0.5)) + coord_fixed() +
theme(legend.position = "none", text=element_text(family=face, size=14))})
grid.arrange(grobs=plots.comparezoom, ncol = 2)
#save
for(n in names(compare)) {
ggsave(paste0(dir,"/methodzoom_",n,".png"), type="cairo", plot = plots.comparezoom[[n]], width = 3, height = 3, units = "in", dpi=300)
}
#### Plot final gens ####
keepers <- c("IPF","IPFOD")
####Marginal totals
plots.margtot <- lapply(results, function(res) {
dat = res$marginals[ , lapply(.SD, sum), by = c("var","Variable"), .SDcols = c("Synthetic","Marginals")]
RMSN = round(dat[ , sqrt(sum((Marginals - Synthetic)^2)/.N)/mean(Marginals)], 4)
ggplot(data=dat, aes(x=Marginals/1000000, y=Synthetic/1000000, color=Variable)) +
geom_smooth(method="lm", color="gray", size=0.5, fullrange=T) +
geom_point(size=1.5, shape=1) + theme_classic() + coord_fixed() +
# annotate("text", x=0.8*max(dat$Marginals/1000), y=0.8,
# label = paste("Slope=",round(coef(lm(Synthetic~Marginals, data=dat))[2],4)), family=face) +
# annotate("text", x=0.8*max(dat$Marginals/1000), y=0.4,
# label = paste("R^{2}~'='~",round(summary(lm(Synthetic~Marginals, dat))$r.squared,4)), parse=T, family=face) +
annotate("text", x=0.8*max(dat$Marginals/1000000), y=0,
label = paste("RMSN =",RMSN), family=face) +
scale_x_continuous("Marginal totals (millions)", breaks = seq(0, 3, by = 0.5), limits = c(0,3)) +
scale_y_continuous("Synthetic totals (millions)", breaks = seq(0, 3, by = 0.5), limits = c(0,3)) +
scale_color_manual("Variable",
breaks = c("HHVEH","HHSIZ","RESTY","INCOME","INDUS","OCC","RELATE","GEND","AGE"),
labels = c("Household vehicles","Household size","Dwelling type","Income","Industry","Occupation","Relationship","Gender","Age"),
values = c('#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02','#a6761d','#666666','#e31a1c')) +
theme(text=element_text(family=face,size=12),
axis.title.y = element_text(margin = unit(c(0, 8, 0, 0), "pt")),
legend.title = element_blank(),
legend.background = element_rect(fill="transparent"),
legend.key.height=unit(0.65,"line"),
legend.position = c(.3,0.7))
})
plots.margtot$IPF
#save
for(n in keepers) {
ggsave(paste0(dir,"/margtot_",n,".png"), type="cairo", plot = plots.margtot[[n]], width = 3, height = 3, units = "in", dpi=300)
}
####Marginal tracts
plots.margtracts <- lapply(results, function(res) {
RMSN = round(res$marginals[ , sqrt(sum((Marginals - Synthetic)^2)/.N)/mean(Marginals)], 4)
ggplot(data=res$marginals, aes(x=Marginals/1000, y=Synthetic/1000)) +
geom_smooth(method="lm", color="gray", size=0.5, fullrange=T) +
geom_point(size=0.001) + theme_classic() + coord_fixed() +
# annotate("text", x=0.8*max(res$marginals$Marginals/1000), y=0.8,
# label = paste("Slope=",round(coef(lm(Synthetic~Marginals, data=res$marginals))[2],4)), family=face) +
# annotate("text", x=0.8*max(res$marginals$Marginals/1000), y=0.4,
# label = paste("R^{2}~'='~",round(summary(lm(Synthetic~Marginals, res$marginals))$r.squared,4)), parse=T, family=face) +
annotate("text", x=0.8*max(res$marginals$Marginals/1000), y=0,
label = paste("RMSN =",RMSN), family=face) +
scale_x_continuous("Marginal totals (thousands)", breaks = seq(0, 6, by = 1), label = scales::comma, limits = c(0,6)) +
scale_y_continuous("Synthetic totals (thousands)", breaks = seq(0, 6, by = 1), label = scales::comma, limits = c(0,6)) +
theme(text=element_text(family=face,size=12),
axis.title.y = element_text(margin = unit(c(0, 14, 0, 0), "pt"))
)
})
#plots.margtracts$IPF
#save
for(n in keepers) {
ggsave(paste0(dir,"/margtracts_",n,".png"), type="cairo", plot = plots.margtracts[[n]], width = 3, height = 3, units = "in", dpi=300)
}
####Cells
plots.cells <- lapply(results,
function(res) {
RMSN = round(res$cells[ , sqrt(sum((PUMS - Synthetic)^2)/.N)/mean(PUMS)], 4)
ggplot(data=res$cells, aes(x=PUMS, y=Synthetic)) +
geom_smooth(method="lm", color="gray", size=0.5, fullrange=T) +
geom_point(size=0.001) + theme_classic() + coord_fixed() +
# annotate("text", x=0.8*max(res$cells$PUMS), y=0.01,
# label = paste("Slope=",round(coef(lm(Synthetic~PUMS, data=res$cells))[2],4)), family=face) +
# annotate("text", x=0.8*max(res$cells$PUMS), y=0.005,
# label = paste("R^{2}~'='~",round(summary(lm(Synthetic~PUMS, res$cells))$r.squared,4)), parse=T, family=face) +
annotate("text", x=0.7*max(res$cells$PUMS), y=0,
label = paste("RMSN =",RMSN), family=face) +
scale_x_continuous("PUMS proportions", breaks = seq(0, 1, by = 1e-2), limits = c(0,0.07)) +
scale_y_continuous("Synthetic proportions", breaks = seq(0, 1, by = 1e-2), limits = c(0,0.07)) +
theme(text=element_text(family=face,size=12),
#axis.title.y = element_text(margin = unit(c(0, 10, 0, 0), "pt"))
)
})
plots.cells$IPF
#save
for(n in keepers) {
ggsave(paste0(dir,"/cells_",n,".png"), type="cairo", plot = plots.cells[[n]], width = 3, height = 3, units = "in", dpi=300)
}
####OD
plots.odi <- lapply(results,
function(res) {
#Origin-destination
RMSN = lapply(res$odi, function(x) round(x[ , sqrt(sum((LODES - N)^2)/.N)/mean(N)], 4))
slope = lapply(res$odi, function(x) round(coef(lm(LODES~N, data=x))[2],4))
R = lapply(res$odi, function(x) round(summary(lm(LODES~N, data=x))$r.squared,4))
plot.OD <- ggplot(data=res$odi$od,aes(x=LODES, y=N)) +
geom_smooth(method="lm", color="gray", size=0.5, fullrange=T) +
geom_point(size=0.1) +
# annotate("text", x=0.7*max(res$odi$od$LODES), y=1000, label = paste("Slope=",slope$od), family=face) +
# annotate("text", x=0.7*max(res$odi$od$LODES), y=500, label = paste("R^{2}~'='~",R$od), parse=T, family=face) +
annotate("text", x=0.7*max(res$odi$od$LODES), y=0, label = paste("RMSN =",RMSN$od), family = face) +
scale_x_continuous("LODES", labels = scales::comma, breaks = seq(0, max(res$odi$od$LODES), 100), limits=c(0,max(res$odi$od$LODES))) +
scale_y_continuous("Synthetic", labels = scales::comma, breaks = seq(0, max(res$odi$od$LODES), 100), limits=c(0,max(res$odi$od$LODES))) +
theme_classic() + coord_fixed() + theme(text=element_text(family=face, size=12),
axis.title.y = element_text(margin = unit(c(0, 8, 0, 0), "pt")))
#plot.OD
#Destination by industry
plot.DI <- ggplot(data=res$odi$di, aes(x=LODES, y=N)) +#, color=INDUS)) +
geom_smooth(method="lm", color="gray", size=0.5, fullrange=T) +
geom_point(size=0.5) +
# annotate("text", x=0.7*max(res$odi$di$LODES), y=5000, label = paste("Slope=",slope$di), family=face) +
# annotate("text", x=0.7*max(res$odi$di$LODES), y=2500, label = paste("R^{2}~'='~",R$di), parse=T, family=face) +
annotate("text", x=0.7*max(res$odi$di$LODES), y=0, label = paste("RMSN =",RMSN$di), family = face) +
scale_x_continuous("LODES", labels = scales::comma, breaks = seq(0, max(res$odi$di$LODES), 10000)) +
scale_y_continuous("Synthetic", labels = scales::comma, breaks = seq(0, max(res$odi$di$LODES),10000)) +
theme_classic() + coord_fixed() + theme(text=element_text(family=face, size=12))
#plot.DI
#Origin by industry
plot.OI <- ggplot(data=res$odi$oi, aes(x=LODES, y=N)) + #, color = INDUS)) +
geom_smooth(method="lm", color="gray", size=0.5, fullrange=T) +
geom_point(size=0.5) +
#annotate("text", x=0.7*max(res$odi$oi$LODES), y=1000, label = paste("Slope=",slope$oi), family=face) +
#annotate("text", x=0.7*max(res$odi$oi$LODES), y=500, label = paste("R^{2}~'='~",R$oi), parse=T, family=face) +
annotate("text", x=0.7*max(res$odi$oi$LODES), y=0, label = paste("RMSN =",RMSN$oi), family = face) +
scale_x_continuous("LODES", labels = scales::comma, breaks = seq(0, max(res$odi$oi$LODES), 500)) +
scale_y_continuous("Synthetic", labels = scales::comma, breaks = seq(0, max(res$odi$oi$LODES), 500)) +
theme_classic() + coord_fixed() + theme(text=element_text(family=face, size=12),
axis.title.y = element_text(margin = unit(c(0, 8, 0, 0), "pt")))
#plot.OI
return(list("OD"=plot.OD,"DI"=plot.DI,"OI"=plot.OI))
})
plots.odi$IPFOD$OI
#save
for(n in keepers) {
for(od in names(plots.odi[[n]])){
ggsave(paste0(dir,"/odi_",n,"_",od,".png"), type="cairo", plot = plots.odi[[n]][[od]], width = 3, height = 3, units = "in", dpi=300)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.