##' Produce a lattice cartogram by polishing the grids in rows and in columns
##'
##' @param grids The output of function \code{checkerboard}.
##' @param density A vector of the variable of interest.
##' \code{names(density)} should match \code{grids$label}.
##' @param pal palette. The input for the argument "col" in \code{image()}.
##' @param ... na.loose=T/F?
##' @return a data frame of coordinates and the corresponding labels, as well as the errors.
##' @example inst/ex_gridmap.R
##' @export
##'
polish_cart = function(grids,density,pal=NULL,...){
all_labels=if (is.factor(grids$label)) levels(grids$label) else unique(grids$label)
grids$label=as.character(grids$label)
stopifnot(length(intersect(names(density),unique(grids$label)))>1)
grids=grids[grids$label %in% names(density) | is.na(grids$label),]
xgrid=sort(unique(grids$x))
ygrid=sort(unique(grids$y))
ncell=length(xgrid)*length(ygrid)
if (ncell!=nrow(grids)) stop("The input is not a full grid.")
#plot(y~x,data=grids,pch=15,col=factor(grids$label,levels=all_labels),cex=1.7)
if (is.null(pal)) {
set.seed(1000)
pal=palette(sample(c(rainbow(24),colors()[c(1,4:11,13:26,28:29,76:87)*5+3]),length(all_labels),rep=FALSE))
}
x11(width=4,height=4)
grids=grids[order(grids$y,grids$x),]
image(xgrid,ygrid,matrix(as.integer(factor(grids$label,levels=all_labels)),nrow=length(xgrid),ncol=length(ygrid)),col=pal,xlab='',ylab='',xaxt='n',yaxt='n',frame=F)
dens=data.frame(density=density,orig_area=table(grids$label)[names(density)])
rownames(dens)=names(density)
dens=dens[!is.na(dens$orig_area),]
dens$goal=round(dens$density*mean(dens$orig_area)/mean(dens$density))
xcenter=round(mean(1:length(xgrid)))
ycenter=round(mean(1:length(ygrid)))
newgrids1=data.frame(x=NULL,y=NULL,label=NULL)
for (i in 1:length(xgrid)){
column=grids[grids$x==xgrid[i],]
newgrids1=rbind(newgrids1,panning(column,dens,'x',ycenter))
}
newgrids1=global.matching(newgrids1,'x',...)
image1=complete.image(newgrids1)
newgrids1=image1$df
dens$mid_area=table(newgrids1$label)[rownames(dens)]
#plot(y~x,data=newgrids1,pch=15,col=factor(newgrids1$label,levels=all_labels),cex=1.7)
x11(width=4,height=4)
image(image1$matrix,col=pal,xlab='',ylab='',xaxt='n',yaxt='n',frame=F)
Sys.sleep(0.5)
newgrids2=data.frame(x=NULL,y=NULL,label=NULL)
for (i in unique(newgrids1$y)){
rowline=newgrids1[newgrids1$y==i,]
newgrids2=rbind(newgrids2,panning(rowline,dens,'y',xcenter))
}
newgrids2=global.matching(newgrids2,'y',...)
image2=complete.image(newgrids2)
newgrids2=image2$df
newncell=sum(!is.na(newgrids2$label))
dens$end_area=table(newgrids2$label)[rownames(dens)]
err=c(SSE=sum((dens$goal-dens$end_area)^2)/newncell,
AE=sum(abs(dens$goal-dens$end_area))/newncell)
#plot(y~x,data=newgrids2,pch=15,col=factor(newgrids2$label,levels=all_labels),cex=1.7)
x11(width=4,height=4)
image(image2$matrix,col=pal,xlab='',ylab='',xaxt='n',yaxt='n',frame=F)
return(list(grids=newgrids2,count=dens[,c(1,3,2,4,5)],error=err))
}
##' Complete the lattice if there are missings.
##'
##' If nrow(df)!=length(unique(x))*length(unique(y)),
##' then NA's will be filled in to make a full lattice.
##'
##' @param df a data frame with column x(the x coordinate), y(y coordinate),
##' and label(which region it belongs to). The data point should be in a line
##' in either x or y direction.
##' @param omit.sea logical. If TRUE then the NA rows or columns around the map
##' will be removed.
##' @return a data frame of column x(the new x coordinate),
##' y(the new y coordinate), and label.
##'
complete.image=function(df,omit.sea=TRUE){
names(df)[which(!names(df)%in%c('x','y'))]='label'
x=sort(unique(df$x))
y=sort(unique(df$y))
all_labels=if (is.factor(df$label)) levels(df$label) else sort(unique(df$label))
df$label=as.integer(factor(df$label,levels=all_labels))
res=matrix(NA,nrow=length(y),ncol=length(x))
rownames(res)=y; colnames(res)=x
for (j in 1:ncol(res)){
tmp=df[df$x==x[j],]
res[as.character(tmp$y),j]=tmp$label
}
if (omit.sea) {
NArows=which(rowMeans(is.na(res))!=1)
NAcols=which(colMeans(is.na(res))!=1)
res=res[NArows[1]:NArows[length(NArows)],NAcols[1]:NAcols[length(NAcols)]]
x=as.numeric(colnames(res))
y=as.numeric(rownames(res))
}
list(x=x,y=y,matrix=t(res),df=data.frame(x=rep(x,each=length(y)),y=rep(y,length(x)),label=all_labels[as.vector(res)],stringsAsFactors=TRUE))
}
##' Stretch or squeeze a sequence of cells by a target density
##'
##' @param df a data frame with column x(the x coordinate), y(y coordinate),
##' and label(which region it belongs to). The data point should be in a line
##' in either x or y direction.
##' @param dnsty a data frame with columns goal(the target number of cells) and
##' orig_area(the original number of cells). The rownames must contain
##' all the labels in \code{df}.
##' @param by If by='x' then the code will stretch the cells in y/vertical direction.
##' If by='y' then the code will stretch the cells in x/horizontal direction.
##' @param center the global center of y if by='x' and global center of x if by='y'.
##' "Global" center means not the center of the input \code{df},
##' but the center of the map.
##' @return a data frame of column x(the new x coordinate),
##' y(the new y coordinate), and label. The number of rows of the output
##' might be different from the input \code{df}.
##'
panning=function(df,dnsty,by,center){
df$label=as.character(df$label)
df$label[is.na(df$label)]='NA'
jump=c(1,which(diff(as.integer(as.factor(df$label)))!=0)+1)
unilabel=df$label[jump]
labellen=c(diff(jump),nrow(df)+1-max(jump))
strength=sqrt(dnsty[unilabel,'goal'] / dnsty[unilabel,'orig_area'])
ratiolen=labellen / dnsty[unilabel,'orig_area']
ratiolen[is.na(ratiolen)]=.3
naidx=which(is.na(strength))
for (i in naidx){
strength[i]=min(c(1,strength[i-1],strength[i+1]),na.rm=TRUE)
}
newlength=labellen*strength
newlength[ratiolen>=.3]=floor(newlength[ratiolen>=.3])
newlength[ratiolen<.3]=ceiling(newlength[ratiolen<.3])
newcolumn=rep(unilabel,newlength)
newcolumn[newcolumn=='NA']=NA
centeridx=sum(jump<=center)
newcenter=round((center-jump[centeridx])*strength[centeridx])+ifelse(centeridx==1,0,sum(newlength[1:(centeridx-1)]))
if (by=='x') return(data.frame(x=df$x[1],y=(1:length(newcolumn))-newcenter,label=newcolumn,stringsAsFactors=FALSE))
if (by=='y') return(data.frame(x=(1:length(newcolumn))-newcenter,y=df$y[1],label=newcolumn,stringsAsFactors=FALSE))
}
##' Measure the similarity of two sequences of cells
##'
##' 1. Contigency table or mean(a!=b)
##' 2. diff lengths of the sequences -- ignore or place a little panelty?
##' 3. neighors -- 1 neighbor or 3 neighbors? Assign some weights to the diagonal neighbors?
##'
##' @param a a vector
##' @param b a numeric vector of the same length as "a"
##' @param na.panelty non-negative. Apply to answered Question 2.
##' @param diag.wt non-negative. Apply to answer Question 3.
##' @return the score of similarity
##' @examples
##' similarity(letters[1:5],letters[1:5])
##' similarity(1:5,5:1)
##' similarity(1:5,2:6,diag.wt=0.5)
##' similarity(1:5,c(NA,2:5),na.panelty=0.2)
##' similarity(c(1,1,NA,NA,3,3,3),c(1,1,NA,NA,2,3,3))
##' similarity(c(1,1,NA,NA,3,3,3),c(1,1,NA,2,3,3,NA))
##'
similarity=function(a,b,na.panelty=0.1,diag.wt=0.1){
n=length(a)
stopifnot(n==length(b),na.panelty>=0,diag.wt>=0)
r1=sum(a==b,na.rm=TRUE)
r1top=sum(a[-1]==b[1:(n-1)],na.rm=TRUE)
r1bot=sum(a[1:(n-1)]==b[-1],na.rm=TRUE)
r2=sum(is.na(a) & is.na(b))
r3=sum(is.na(a) | is.na(b)) - r2
r4=n-r1-r2-r3
(r1+(r1top+r1bot)*diag.wt-(r2+r3)*na.panelty-r4*min(1,2*na.panelty))/n
}
##' Find the optimal matching position for two sequences
##'
##' 1. shift one sequence by one cell at a time, from
##' matching the bottom of the left sequence with the top of the right one, to
##' matching the top of the left sequence with the bottom of the right one.
##' 2. completely unmatched case, like a division line? Then we match
##' the center of the left sequence with the center of the right one.
##' 3. ties of the measurement? Add weights to the diagonal neighbors and
##' add panelty to the NA's, then measure the similarity again. If ties still
##' exist, then use the closest tie to the center of the two sequences.
##' 4. To match two sequences - "1,1,NA,NA,2,3,3" and "1,1,2,2,3,3,3",
##' the standard result
##' 1 1 2 2 2 3 3
##' 1 1 NA NA 3 3 3
##' is worse than that with a loose rule -
##' 1 1 2 2 2 3 3
##' 1 1 NA 2 3 3 NA
##'
##' @param a a numeric vector
##' @param b a numeric vector
##' @param a1loc the global location of the first element of "a".
##' @param na.loose logical. Whether to allow the number of NA's within a sequence changes.
##' @return the matched vectors and their position
##' @examples
##' local.matching(1:5,1:5,3)
##' local.matching(1:5,3:7,0)
##' local.matching(rep(1,7),rep(1,3),-3)
##' local.matching(rep(1,7),2:5,1)
##' local.matching(c(1,1,2,2,3,3,3),c(1,1,NA,NA,2,3,3),1,na=TRUE)
##' local.matching(c(1,1,NA,NA,3,3,3),c(1,1,NA,NA,2,3,3),1,na=TRUE)
##'
local.matching=function(a,b,a1loc,na.loose=FALSE){
n1=length(a)
n2=length(b)
s=rep(NA,n1+n2-1)
for (i in 1:length(s)){
a1=c(rep(NA,max(0,i-n1)),a,rep(NA,max(n2-i,0)))
b1=c(rep(NA,max(0,n1-i)),b,rep(NA,max(i-n2,0)))
s[i]=similarity(a1,b1)
}
opti=which(s==max(s))
if (length(opti)>1){
optis=rep(NA,length(opti))
for (i in 1:length(optis)){
a1=c(rep(NA,max(0,opti[i]-n1)),a,rep(NA,max(n2-opti[i],0)))
b1=c(rep(NA,max(0,n1-opti[i])),b,rep(NA,max(opti[i]-n2,0)))
optis[i]=similarity(a1,b1,na.panelty=1,diag.wt=1)
}
opti=opti[optis==max(optis)]
opti=opti[which.min(abs(opti-(n1+n2)/2))]
}
res.df=data.frame(a=c(rep(NA,max(0,opti-n1)),a,rep(NA,max(n2-opti,0))),
b=c(rep(NA,max(0,n1-opti)),b,rep(NA,max(opti-n2,0))),
loc=(a1loc-max(0,opti-n1)):(a1loc-1+n1+max(n2-opti,0)),
stringsAsFactors=FALSE)
res.bloc=res.df$loc[(1:n2)+max(0,n1-opti)]
if (na.loose) {
imp=nrow(res.df)*10
atmp=res.df$a
atmp[is.na(atmp)]=imp
arle=rle(atmp)
arle$values[arle$values==imp]=NA
btmp=res.df$b
btmp[is.na(btmp)]=imp
brle=rle(btmp)
brle$values[brle$values==imp]=NA
uniblen=length(brle$values)
bidx=which(is.na(brle$values[-c(1,uniblen)]))+1
if (bidxlen <- length(bidx) > 0) {
for (i in 1:bidxlen) {
bidxleft=sum(brle$lengths[1:(bidx[i]-1)])
bidxright=sum(brle$lengths[1:bidx[i]])+1
bleft=brle$values[bidx[i]-1]
bright=brle$values[bidx[i]+1]
aleft=which(arle$values==bleft)
aright=which(arle$values==bright)
if (length(aleft)>0 & length(aright)>0) {
aleft=aleft[which.min(abs(sapply(aleft,function(x){sum(arle$lengths[1:x])-bidxleft})))]
aright=aright[which.min(abs(sapply(aright,function(x){sum(arle$lengths[1:(x-1)])+1-bidxright})))]
if (aleft+1>=aright) {aNA=1} else {
aNA=sum(arle$lengths[(aleft+1):(aright-1)])
}
brle$lengths[bidx[i]]=aNA
}
}
if (is.na(brle$values[1])){
brle$values=brle$values[-1]
brle$lengths=brle$lengths[-1]
}
if (is.na(brle$values[uniblen])){
brle$values=brle$values[-uniblen]
brle$lengths=brle$lengths[-uniblen]
}
b2=inverse.rle(brle)
in.res=local.matching(a,b2,a1loc,na.loose=FALSE)
res.df=in.res$df
res.bloc=in.res$bloc
}
}
return(list(df=res.df,bloc=res.bloc))
}
##' Find the optimal matching position for the map
##'
##' @param df a data frame with x,y,and label
##' @param by If by='x' then the function will match the y/column sequences
##' in x/horizontal direction. If by='y' then will match the x/row sequences
##' in y/vertical direction.
##' @return a data frame of the column x(the new x coordinate),
##' y(the new y coordinate), and label. Column label should be the same as "df".
##'
global.matching=function(df,by,...){
stopifnot(by %in% c('x','y'), c('x','y') %in% names(df))
names(df)[which(!names(df)%in%c('x','y'))]='label'
df=df[order(df$x,df$y),]
x=sort(unique(df$x))
y=sort(unique(df$y))
res=df
if (by=='x'){ # then column 'x' and 'label' will not change
for (i in 2:length(x)){
x1=res[res$x==x[i-1],'label']
x2=res[res$x==x[i],'label']
res=res[res$x!=x[i],]
x2pos=local.matching(x1,x2,res$y[res$x==x[i-1]][1],...)$df[,2:3]
x2pos=x2pos[which.trim(x2pos$b),]
#res$y[res$x==x[i]]=x2pos
x2df=data.frame(x=x[i],y=x2pos$loc,label=x2pos$b,stringsAsFactors=FALSE)
res=rbind(res,x2df)
}
} else { # then column 'y' and 'label' will not change
for (j in 2:length(y)){
y1=res[res$y==y[j-1],'label']
y2=res[res$y==y[j],'label']
res=res[res$y!=y[j],]
y2pos=local.matching(y1,y2,res$x[res$y==y[j-1]][1],...)$df[,2:3]
y2pos=y2pos[which.trim(y2pos$b),]
#res$x[res$y==y[j]]=y2pos
y2df=data.frame(x=y2pos$loc,y=y[j],label=y2pos$b,stringsAsFactors=FALSE)
res=rbind(res,y2df)
}
}
res
}
##' Remove the NA's in the beginning and the end of a vector
##'
##' @param xvec a vector
##' @return the location from the first to the last non-NA element
##' @export
##' @examples
##' which.trim(c(NA,0,1,2,NA,7,NA,NA))
##' which.trim(rep(NA,5))
##'
which.trim=function(xvec){
a=which(!is.na(xvec))
if (length(a)>0) return(a[1]:a[length(a)]) else {return(1:length(xvec))}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.