SDPSYN2: General SDP function.

Description Usage Arguments Details Examples

View source: R/SDPSyn.R

Description

General SDP function.

Usage

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
SDPSYN2(
  TtableA,
  asis = NULL,
  notpredictor = asis,
  nrep = 1,
  synparameters = NULL,
  Sparameters = Sparameters.default.f(ref.table = TtableA, asis = asis, notpredictor =
    notpredictor, preferredmethod = "ctree", defaultsynparameters =
    c(as.list(synparameters),
    eval(formals(Sparameters.default.f)$defaultsynparameters)[setdiff(names(formals(Sparameters.default.f)$defaultsynparameters),
    c("", names(synparameters)))])),
  STtableA = if (is.null(asis)) {     data.frame(.n = rep(nrep, each = nrow(TtableA))) }
    else {     plyr::ddply(data.frame(.n = nrep), ~.n, function(d) {         TtableA[asis]
        }) },
  fitmodelsavepath = NULL,
  treeplotsavefolder = NULL,
  samplereportsavepath = NULL,
  stepbystepsavepath = NULL,
  doparallel = TRUE,
  recode = NULL,
  saveeach = 200,
  randomfitorder = TRUE,
  fitonly = FALSE
)

Arguments

TtableA

a dataframe to synthesize

asis

list of variable names from TtableA to keep as is (e.g. not to synthesize)

notpredictor

list of variable names which should not be used as predictors

nrep

number of synthetic replicates wanted

synparameters

general synthetisation paramters

Sparameters

a list, Specific (variable by variable) synthetisation parameters, splits ...

STtableA

a dataframe

fitmodelsavepath

a path where to save the fitted models

treeplotsavefolder

a path where to save the tree plots

samplereportsavepath

a path where to save the sampling report

stepbystepsavepath

a path where to backup the synthetised in case of a crash

doparallel

a boolean indicating whether sampling should be done in parallel for each repliacte

recode

: a vector of character strings or NULL, list of variables to be recoded

saveeach

an integer, indicating every how many variables a backup is done

randomfitorder

a boolean : fitting for each variable can be done in the order of appearance of each variables or at random

fitonly

a boolean, if TRUE, no sampling is done.

Details

This function is doing both the fitting and the sampling.

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
data(TtableA,package="BigSyn")
ATtableA=augmentT_f(TtableA,variablesmax="AA.present",
                   variablespct="AA.cont1")
asis=NULL;notpredictor=asis;nrep=1;synparameters=NULL;
Sparameters=
 Sparameters.default.f(ref.table=TtableA,
                       asis=asis,
                       notpredictor=notpredictor,
                       preferredmethod="ctree",
                       defaultsynparameters=
                          c(as.list(synparameters),
                            eval(formals(Sparameters.default.f)$defaultsynparameters)[
                              setdiff(names(formals(Sparameters.default.f)$defaultsynparameters),
                              c("",names(synparameters)))]));
SATtableA=plyr::rdply(nrep,ATtableA[asis]);
samplereportsavepath=NULL;
stepbystepsavepath=NULL;
doparallel=FALSE;
recode=NULL;
randomfitorder=TRUE;
fitonly=FALSE;
fitmodelsavepath=tempdir()
treeplotsavefolder=tempdir()
sapply(list.files(tempdir(),full.names = TRUE  ),file.remove)
SATtableA<-SDPSYN2(ATtableA,asis=NULL,
                  fitmodelsavepath = fitmodelsavepath,
                  treeplotsavefolder=treeplotsavefolder)
todisplay<-grep("La_La_Lrn1",names(STtableA[[1]]),value=T);
STtableA[[1]][1:3,todisplay];TtableA[1:3,todisplay]
##############################################################
# Controling that AA.present_La=0=>AA.present_La_Lb=0 in synthetic data
library(BigSyn)
library(reshape2)
library(data.table)
data(TtableA,package="BigSyn")
variablepct="AA.cont1"
variablespct=variablepct
variablemax="AA.present"
variablesmax=variablemax
set.seed(1)
asis=c("id1a", "id1b")
                  fitmodelsavepath=NULL
                  treeplotsavefolder=NULL
                  samplereportsavepath=NULL
                  stepbystepsavepath=NULL
                  doparallel=TRUE
                  recode=NULL
                  saveeach=200
                  randomfitorder=TRUE
                  fitonly=FALSE
                  
variablemax="AA.present"
variablesmax=variablemax
variablepct="AA.cont1"
variablespct=variablepct
ATtableA<-augmentT_f(TtableA,
                    variablesmax=variablesmax,variablespct=variablespct)
TtableA<-ATtableA
STtableA<-ATtableA[asis]
Sparameters=Sparameters.default.f(
   ref.table=ATtableA,asis=c("id1a", "id1b"),
   notpredictor=NULL,
   preferredmethod="ctree",
   defaultsynparameters=
     eval(formals(Sparameters.default.f)$defaultsynparameters))
SATtableA<-BigSyn::SDPSYN2(ATtableA,asis=c("id1a", "id1b"))[[1]]
problems<-SATtableA$AA.present_Lb_La==1&SATtableA$AA.present_Lb==0
mean(problems)
Sparameters[["AA.present_Lb_La"]]
library(dplyr)
library(ggplot2)
xx<-function(x){
xxx<-x[sort(grep("present",names(x),value=TRUE))]
xxx[xxx==0]<-NA
StudyDataTools::ggplot_missing(xxx)}
xx(ATtableA)
xx(SATtableA)

DanielBonnery/BigSyn documentation built on June 28, 2020, 7:18 p.m.