Nothing
## ---- include = FALSE---------------------------------------------------------
library(knitr)
library(prettydoc)
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----contingency table--------------------------------------------------------
library(laeken)
library(sampling)
library(StratifiedSampling)
data("eusilc")
eusilc <- na.omit(eusilc)
N <- nrow(eusilc)
# Xm are the matching variables and id are identity of the units
Xm <- eusilc[,c("hsize","db040","age","rb090","pb220a")]
Xmcat <- do.call(cbind,apply(Xm[,c(2,4,5)],MARGIN = 2,FUN = disjunctive))
Xm <- cbind(Xmcat,Xm[,-c(2,4,5)])
id <- eusilc$rb030
# categorial income splitted by the percentile
c_income <- eusilc$eqIncome
q <- quantile(eusilc$eqIncome, probs = seq(0, 1, 0.15))
c_income[which(eusilc$eqIncome <= q[2])] <- "(0,15]"
c_income[which(q[2] < eusilc$eqIncome & eusilc$eqIncome <= q[3])] <- "(15,30]"
c_income[which(q[3] < eusilc$eqIncome & eusilc$eqIncome <= q[4])] <- "(30,45]"
c_income[which(q[4] < eusilc$eqIncome & eusilc$eqIncome <= q[5])] <- "(45,60]"
c_income[which(q[5] < eusilc$eqIncome & eusilc$eqIncome <= q[6])] <- "(60,75]"
c_income[which(q[6] < eusilc$eqIncome & eusilc$eqIncome <= q[7])] <- "(75,90]"
c_income[which( eusilc$eqIncome > q[7] )] <- "(90,100]"
# variable of interests
Y <- data.frame(ecostat = eusilc$pl030)
Z <- data.frame(c_income = c_income)
# put same rownames
rownames(Xm) <- rownames(Y) <- rownames(Z)<- id
YZ <- table(cbind(Y,Z))
addmargins(YZ)
## ----sample_scheme------------------------------------------------------------
# size of sample
n1 <- 1000
n2 <- 500
# samples
s1 <- srswor(n1,N)
s2 <- srswor(n2,N)
# extract matching units
X1 <- Xm[s1 == 1,]
X2 <- Xm[s2 == 1,]
# extract variable of interest
Y1 <- data.frame(Y[s1 == 1,])
colnames(Y1) <- colnames(Y)
Z2 <- as.data.frame(Z[s2 == 1,])
colnames(Z2) <- colnames(Z)
# extract correct identities
id1 <- id[s1 == 1]
id2 <- id[s2 == 1]
# put correct rownames
rownames(Y1) <- id1
rownames(Z2) <- id2
# here weights are inverse of inclusion probabilities
d1 <- rep(N/n1,n1)
d2 <- rep(N/n2,n2)
# disjunctive form
Y_dis <- sampling::disjunctive(as.matrix(Y))
Z_dis <- sampling::disjunctive(as.matrix(Z))
Y1_dis <- Y_dis[s1 ==1,]
Z2_dis <- Z_dis[s2 ==1,]
## ----harm---------------------------------------------------------------------
re <- harmonize(X1,d1,id1,X2,d2,id2)
# if we want to use the population totals to harmonize we can use
re <- harmonize(X1,d1,id1,X2,d2,id2,totals = c(N,colSums(Xm)))
w1 <- re$w1
w2 <- re$w2
colSums(Xm)
colSums(w1*X1)
colSums(w2*X2)
## ----OT-----------------------------------------------------------------------
# Optimal transport matching
object <- otmatch(X1,id1,X2,id2,w1,w2)
head(object[,1:3])
Y1_ot <- cbind(X1[as.character(object$id1),],y = Y1[as.character(object$id1),])
Z2_ot <- cbind(X2[as.character(object$id2),],z = Z2[as.character(object$id2),])
YZ_ot <- tapply(object$weight,list(Y1_ot$y,Z2_ot$z),sum)
# transform NA into 0
YZ_ot[is.na(YZ_ot)] <- 0
# result
round(addmargins(YZ_ot),3)
## ----BS-----------------------------------------------------------------------
# Balanced Sampling
BS <- bsmatch(object)
head(BS$object[,1:3])
Y1_bs <- cbind(X1[as.character(BS$object$id1),],y = Y1[as.character(BS$object$id1),])
Z2_bs <- cbind(X2[as.character(BS$object$id2),],z = Z2[as.character(BS$object$id2),])
YZ_bs <- tapply(BS$object$weight/BS$q,list(Y1_bs$y,Z2_bs$z),sum)
YZ_bs[is.na(YZ_bs)] <- 0
round(addmargins(YZ_bs),3)
# With Z2 as auxiliary information for stratified balanced sampling.
BS <- bsmatch(object,Z2)
Y1_bs <- cbind(X1[as.character(BS$object$id1),],y = Y1[as.character(BS$object$id1),])
Z2_bs <- cbind(X2[as.character(BS$object$id2),],z = Z2[as.character(BS$object$id2),])
YZ_bs <- tapply(BS$object$weight/BS$q,list(Y1_bs$y,Z2_bs$z),sum)
YZ_bs[is.na(YZ_bs)] <- 0
round(addmargins(YZ_bs),3)
## -----------------------------------------------------------------------------
# split the weight by id1
q_l <- split(object$weight,f = object$id1)
# normalize in each id1
q_l <- lapply(q_l, function(x){x/sum(x)})
q <- as.numeric(do.call(c,q_l))
Z_pred <- t(q*disjunctive(object$id1))%*%disjunctive(Z2[as.character(object$id2),])
colnames(Z_pred) <- levels(factor(Z2$c_income))
head(Z_pred)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.