resampleT_f: resample when synthetisation created incoherent high and low...

Description Usage Arguments Details Examples

View source: R/resampleT_f.R

Description

resample when synthetisation created incoherent high and low level aggregates

Usage

1
resampleT_f(.data, variables, verbose = FALSE)

Arguments

.data

data frame to "reduce"

variables

list of variable names roots

verbose

(default FALSE) if verbose, the formulae to compute the new variables is printed.

Details

In the case where marginal presence indicator equals 1 but all cell presence indicators where synthetised to 0, then presence indicators and other variables are resampled from synthetic units with coherent values.

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
library(BigSyn)
library(reshape2)
library(data.table)
data(TtableA,package="BigSyn")
variablepct="AA.cont1"
variablespct=variablepct
variablemax="AA.present"
variablesmax=variablemax
ATtableA<-augmentT_f(TtableA,variablesmax=variablesmax,variablespct=variablespct)
set.seed(1)
SATtableA<-BigSyn::SDPSYN2(ATtableA,asis=c("id1a", "id1b"))[[1]]
problems<-SATtableA$AA.cont1_Lb_La>0&!is.na(SATtableA$AA.cont1_Lb_La)&((SATtableA$AA.cont1_Lb_La_Lrn1==0|is.na(SATtableA$AA.cont1_Lb_La_Lrn1))&(SATtableA$AA.cont1_Lb_La_Lrn2==0|is.na(SATtableA$AA.cont1_Lb_La_Lrn2))&(SATtableA$AA.cont1_Lb_La_Lrn3==0|is.na(SATtableA$AA.cont1_Lb_La_Lrn3)))
varcell=c("AA.cont1_Lb_La_Lrn1", "AA.cont1_Lb_La_Lrn2", "AA.cont1_Lb_La_Lrn3")
varcellandpresenceind<-unlist(c(varcell,get_missingind(c(varcell,"AA.cont1_Lb_La"),names(SATtableA)),get_presentind(c(varcell,"AA.cont1_Lb_La"),names(SATtableA))))
replacements<-SATtableA$AA.cont1_Lb_La>0&!is.na(SATtableA$AA.cont1_Lb_La)&!((SATtableA$AA.cont1_Lb_La_Lrn1==0|is.na(SATtableA$AA.cont1_Lb_La_Lrn1))&(SATtableA$AA.cont1_Lb_La_Lrn2==0|is.na(SATtableA$AA.cont1_Lb_La_Lrn2))&(SATtableA$AA.cont1_Lb_La_Lrn3==0|is.na(SATtableA$AA.cont1_Lb_La_Lrn3)))
SATtableA[problems,c("AA.cont1_Lb_La",varcellandpresenceind)][1:3,]
SATtableA[replacements,c("AA.cont1_Lb_La",varcellandpresenceind)][1:3,]
CSATtableA<-resampleT_f(SATtableA,variablespct)
CSATtableA[problems,c("AA.cont1_Lb_La",varcellandpresenceind)][1:3,]
problems2<-CSATtableA$AA.cont1_Lb_La>0&!is.na(CSATtableA$AA.cont1_Lb_La)&
((CSATtableA$AA.cont1_Lb_La_Lrn1==0|is.na(CSATtableA$AA.cont1_Lb_La_Lrn1))&
 (CSATtableA$AA.cont1_Lb_La_Lrn2==0|is.na(CSATtableA$AA.cont1_Lb_La_Lrn2))&
 (CSATtableA$AA.cont1_Lb_La_Lrn3==0|is.na(CSATtableA$AA.cont1_Lb_La_Lrn3)))
any(problems2);sum(problems2)
RCSATtableA<-reduceT_f(CSATtableA,variablespct)
RCSATtableA[problems,intersect(c("AA.cont1_Lb_La",varcellandpresenceind),names(RCSATtableA))][1:3,]
problems3<-RCSATtableA$AA.cont1_Lb_La>0&!is.na(RCSATtableA$AA.cont1_Lb_La)&
((RCSATtableA$AA.cont1_Lb_La_Lrn1==0|is.na(RCSATtableA$AA.cont1_Lb_La_Lrn1))&
 (RCSATtableA$AA.cont1_Lb_La_Lrn2==0|is.na(RCSATtableA$AA.cont1_Lb_La_Lrn2))&
 (RCSATtableA$AA.cont1_Lb_La_Lrn3==0|is.na(RCSATtableA$AA.cont1_Lb_La_Lrn3)))
any(problems3);sum(problems3)
AA<-rbind(RCSATtableA[problems3,intersect(c("AA.cont1_Lb_La",varcellandpresenceind),names(RCSATtableA))],
CSATtableA[problems3,intersect(c("AA.cont1_Lb_La",varcellandpresenceind),names(RCSATtableA))],
SATtableA[problems3,intersect(c("AA.cont1_Lb_La",varcellandpresenceind),names(RCSATtableA))])

AA$y=rep(c("RCSA","CSA","SA"),each=sum(problems3))
AA$x=rep(1:sum(problems3),3)
AA[order(AA$x),]
library(ggplot2);library(dplyr)
xx<-function(x){xxx<-x[sort(grep("present",names(x),value=TRUE))]
xxx[xxx==0]<-NA
StudyDataTools::ggplot_missing(xxx)}
xx(ATtableA)
xx(SATtableA)
xx(CSATtableA)
xx(RCSATtableA)

DanielBonnery/BigSyn documentation built on June 28, 2020, 7:18 p.m.