inst/doc/SamplingStrata.R

## ----setup, include = FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
options(width = 999)
knitr::opts_chunk$set(fig.width=6, fig.height=4,
  collapse = TRUE,
  comment = "#>"
)

## ----load, eval=TRUE, echo=FALSE, include=FALSE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
load("SamplingStrata_vignette.RData")

## ----swiss, message = FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
library(SamplingStrata)
data(swissmunicipalities)
swissmun <- swissmunicipalities[swissmunicipalities$REG < 4,
                                c("REG","COM","Nom","HApoly",
                                  "Surfacesbois","Surfacescult",
                                  "Airbat","POPTOT")]
head(swissmun)

## ----correlation---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
cor(swissmun[,c(4:8)])

## ----categorize----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
swissmun$HApoly.cat <- var.bin(swissmun$HApoly,15)
table(swissmun$HApoly.cat)
swissmun$POPTOT.cat <- var.bin(swissmun$POPTOT,15)
table(swissmun$POPTOT.cat)

## ----buildframe1, eval=TRUE, echo=TRUE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
frame1 <- buildFrameDF(df = swissmun,
                           id = "COM",
                           X = c("POPTOT.cat","HApoly.cat"),
                           Y = c("Airbat","Surfacesbois"),
                           domainvalue = "REG")

head(frame1)

## ----buildstrata1, eval=TRUE, echo=TRUE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
strata1 <- buildStrataDF(frame1, progress=F)
head(strata1)

## ----cv, eval=TRUE, echo=TRUE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
ndom <- length(unique(swissmun$REG))
cv <- as.data.frame(list(DOM=rep("DOM1",ndom),
                         CV1=rep(0.10,ndom),
                         CV2=rep(0.10,ndom),
                         domainvalue=c(1:ndom) ))
cv

## ----check, eval=TRUE, echo=TRUE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
checkInput(errors = checkInput(errors = cv, 
                               strata = strata1, 
                               sampframe = frame1))

## ----bethel, eval=TRUE, echo=TRUE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
allocation <- bethel(strata1,cv[1,])
sum(allocation)

## ----optim1, eval=FALSE, echo=TRUE, warning=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  set.seed(1234)
#  solution1 <- optimStrata(method = "atomic",
#                          errors = cv,
#                          nStrata = rep(10,ndom),
#                          framesamp = frame1,
#                          iter = 50,
#                          pops = 10)
#  # Number of strata:  350
#  # ... of which with only one unit:  130
#  #  *** Starting parallel optimization for  3  domains using  3  cores
#  #   |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=07s
#  #
#  #  *** Sample size :  306
#  #  *** Number of strata :  22
#  # ---------------------------

## ----optim1b, eval=TRUE, echo=TRUE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
expected_CV(solution1$aggr_strata)

## ----frame2, eval=TRUE, echo=TRUE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
swissmun$progr <- c(1:nrow(swissmun))
frame2 <- buildFrameDF(df = swissmun,
                       id = "COM",
                       X = "progr",
                       Y = c("Airbat","Surfacesbois"),
                       domainvalue = "REG")
head(frame2)

## ----initsol, eval=FALSE, results= FALSE, echo=TRUE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  strata2 <- buildStrataDF(frame2, progress=F)
#  initial_solution2 <- KmeansSolution(strata = strata2,
#                                      errors = cv,
#                                      maxclusters = 10)

## ----nstrata, eval=TRUE, echo=TRUE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
nstrata2 <- tapply(initial_solution2$suggestions,
                 initial_solution2$domainvalue,
                 FUN=function(x) length(unique(x)))
nstrata2

## ----optim2, eval=FALSE, echo=TRUE, warning=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  set.seed(1234)
#  solution2 <- optimStrata(method = "atomic",
#                          errors = cv,
#                          framesamp = frame2,
#                          iter = 50,
#                          pops = 10,
#                          nStrata = nstrata2,
#                          suggestions = initial_solution2)
#  
#  # Number of strata:  1823
#  # ... of which with only one unit:  1823
#  #  *** Starting parallel optimization for  3  domains using  3  cores
#  #   |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=08s
#  #
#  #  *** Sample size :  164
#  #  *** Number of strata :  28
#  # ---------------------------

## ----optim2a, eval=TRUE, echo=TRUE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
outstrata2 <- solution2$aggr_strata
expected_CV(outstrata2)

## ----buildframe3, eval=TRUE, echo=TRUE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
frame3 <- buildFrameDF(df = swissmun,
                           id = "COM",
                           X = c("POPTOT","HApoly"),
                           Y = c("Airbat","Surfacesbois"),
                           domainvalue = "REG")

head(frame3)

## ----init_sol3, eval=FALSE, echo=TRUE, results= FALSE, warning=FALSE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  set.seed(1234)
#  init_sol3 <- KmeansSolution2(frame=frame3,
#                               errors=cv,
#                               maxclusters = 10)

## ----init_sol3a, eval=FALSE, echo=TRUE, results= FALSE, warning=FALSE----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  nstrata3 <- tapply(init_sol3$suggestions,
#                   init_sol3$domainvalue,
#                   FUN=function(x) length(unique(x)))
#  nstrata3
#  #  1  2  3
#  # 10 10  9

## ----init_sol4, eval=FALSE, echo=TRUE, warning=FALSE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  initial_solution3 <- prepareSuggestion(init_sol3,frame3,nstrata3)

## ----optim3, eval=FALSE, echo=TRUE, warning=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  set.seed(1234)
#  solution3 <- optimStrata(method = "continuous",
#                          errors = cv,
#                          framesamp = frame3,
#                          iter = 50,
#                          pops = 10,
#                          nStrata = nstrata3,
#                          suggestions = initial_solution3)
#  
#  #  *** Starting parallel optimization for  3  domains using  3  cores
#  #   |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=09s
#  #
#  #  *** Sample size :  93
#  #  *** Number of strata :  26
#  # ---------------------------
#  

## ----ss, eval=TRUE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
strataStructure <- summaryStrata(solution3$framenew,
                                 solution3$aggr_strata,
                                 progress=FALSE)
head(strataStructure)

## ----plot2d, eval=TRUE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
plotStrata2d(solution3$framenew, 
             solution3$aggr_strata,
             domain = 3, 
             vars = c("X1","X2"),
             labels = c("Total Population","Total Area"))

## ----evaluate, eval=TRUE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
eval3 <- evalSolution(frame = solution3$framenew, 
                     outstrata = solution3$aggr_strata, 
                     nsampl = 200,
                     progress = FALSE) 

## ----evaluate2, eval=T---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
eval3$coeff_var
eval3$rel_bias

## ----distrib, eval=T-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
dom = 1
hist(eval3$est$Y1[eval3$est$dom == dom], col = "grey", border = "white",
     xlab = expression(hat(Y)[1]),
     freq = FALSE, 
     main = paste("Variable Y1 Domain ",dom,sep=""))
abline(v = mean(eval3$est$Y1[eval3$est$dom == dom]), col = "blue", lwd = 2)
abline(v = mean(frame3$Y1[frame3$domainvalue==dom]), col = "red")
legend("topright", c("distribution mean", "true value"),
       lty = 1, col = c("blue", "red"), box.col = NA, cex = 0.8)

## ----adjust1, eval=T-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
adjustedStrata <- adjustSize(size=75,strata=solution3$aggr_strata,cens=NULL)

## ----adjust2, eval=T-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
adjustedStrata <- adjustSize(size=150,strata=solution3$aggr_strata,cens=NULL)

## ----checkCV, eval=F-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  expected_CV(adjustedStrata)
#  #      cv(Y1) cv(Y2)
#  # DOM1  0.079  0.079
#  # DOM2  0.079  0.082
#  # DOM3  0.078  0.079

## ----selectSample, eval=T------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
framenew3 <- solution3$framenew
outstrata3 <- solution3$aggr_strata
sample <- selectSample(framenew3, 
                       outstrata3,
                       writeFiles = TRUE)

## ----selectSystematic, eval=TRUE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# adding POPTOT to framenew
data("swissmunicipalities")
framenew <- merge(solution3$framenew,
                  swissmunicipalities[,c("COM","Airind")],
                  by.x=c("ID"),by.y=c("COM"))
# selection of sample with systematic method
sample <- selectSampleSystematic(frame=framenew,
                                 outstrata=solution3$aggr_strata,
                                 sortvariable = c("Airind"))

## ----takeall1, eval=T----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#----Selection of units to be censused from the frame
ind_framecens <- which(frame3$X1 > 10000)
framecens <- frame3[ind_framecens,]
nrow(framecens)
#----Selection of units to be sampled from the frame
# (complement to the previous)
framesamp <- frame3[-ind_framecens,]
nrow(framesamp)

## ----optim4, eval=FALSE, echo=TRUE, warning=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#  set.seed(1234)
#  solution4 <- optimStrata(method = "continuous",
#                          errors = cv,
#                          framesamp = framesamp,
#                          framecens = framecens,
#                          iter = 50,
#                          pops = 10,
#                          nStrata = c(10,10,10))

## ----optim4a, eval=TRUE, echo=TRUE, warning=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
outstrata4 <- solution4$aggr_strata
sum(outstrata4$SOLUZ)
expected_CV(outstrata4)

## ----select2, eval=TRUE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
sample <- selectSample(frame=solution4$framenew,
                       outstrata=solution4$aggr_strata)

## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
sum(framecens$id %in% sample$ID)

Try the SamplingStrata package in your browser

Any scripts or data that you put into this service are public.

SamplingStrata documentation built on Nov. 16, 2022, 1:08 a.m.