View source: R/BIFIE.data.transform.R
BIFIE.data.transform | R Documentation |
BIFIEdata
Objects
Computes a data transformation for BIFIEdata
objects.
BIFIE.data.transform( bifieobj, transform.formula, varnames.new=NULL )
bifieobj |
Object of class |
transform.formula |
R formula object for data transformation. |
varnames.new |
Optional vector of names for new defined variables. |
An object of class BIFIEdata
. Additional values are
varnames.added |
Added variables in data transformation |
varsindex.added |
Indices of added variables |
library(miceadds) ############################################################################# # EXAMPLE 1: Data transformations for TIMSS data ############################################################################# data(data.timss2) data(data.timssrep) # create BIFIEdata object bifieobj1 <- BIFIEsurvey::BIFIE.data( data.timss2, wgt=data.timss2[[1]]$TOTWGT, wgtrep=data.timssrep[,-1] ) # create BIFIEdata object in compact way (cdata=TRUE) bifieobj2 <- BIFIEsurvey::BIFIE.data( data.timss2, wgt=data.timss2[[1]]$TOTWGT, wgtrep=data.timssrep[,-1], cdata=TRUE) #**************************** #*** Transformation 1: Squared and cubic book variable transform.formula <- ~ I( books^2 ) + I( books^3 ) # as.character(transform.formula) bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj1, transform.formula=transform.formula) bifieobj$variables # rename added variables bifieobj$varnames[ bifieobj$varsindex.added ] <- c("books_sq", "books_cub") # check descriptive statistics res1 <- BIFIEsurvey::BIFIE.univar( bifieobj, vars=c("books_sq", "books_cub" ) ) summary(res1) ## Not run: #**************************** #*** Transformation 2: Create dummy variables for variable book transform.formula <- ~ as.factor(books) bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=transform.formula ) ## Included 5 variables: as.factor(books)1 as.factor(books)2 as.factor(books)3 ## as.factor(books)4 as.factor(books)5 bifieobj$varnames[ bifieobj$varsindex.added ] <- paste0("books_D", 1:5) #**************************** #*** Transformation 3: Discretized mathematics score hi3a <- BIFIEsurvey::BIFIE.hist( bifieobj, vars="ASMMAT" ) plot(hi3a) transform.formula <- ~ I( as.numeric(cut( ASMMAT, breaks=seq(200,800,100) )) ) bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=transform.formula, varnames.new="ASMMAT_discret") hi3b <- BIFIEsurvey::BIFIE.hist( bifieobj, vars="ASMMAT_discret", breaks=1:7 ) plot(hi3b) # check frequencies fr3b <- BIFIEsurvey::BIFIE.freq( bifieobj, vars="ASMMAT_discret", se=FALSE ) summary(fr3b) #**************************** #*** Transformation 4: include standardization variables for book variable # start with testing the transformation function on a single dataset dat1 <- bifieobj$dat1 stats::weighted.mean( dat1[,"books"], dat1[,"TOTWGT"], na.rm=TRUE) sqrt( Hmisc::wtd.var( dat1[,"books"], dat1[,"TOTWGT"], na.rm=TRUE) ) # z standardization transform.formula <- ~ I( ( books - weighted.mean( books, TOTWGT, na.rm=TRUE) )/ sqrt( Hmisc::wtd.var( books, TOTWGT, na.rm=TRUE) )) bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=transform.formula, varnames.new="z_books" ) # standardize variable books with M=500 and SD=100 transform.formula <- ~ I( 500 + 100*( books - stats::weighted.mean( books, w=TOTWGT, na.rm=TRUE) ) / sqrt( Hmisc::wtd.var( books, weights=TOTWGT, na.rm=TRUE) ) ) bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=transform.formula, varnames.new="z500_books" ) # standardize variable books with respect to M and SD of ALL imputed datasets res <- BIFIEsurvey::BIFIE.univar( bifieobj, vars="books" ) summary(res) ## var Nweight Ncases M M_SE M_fmi M_VarMI M_VarRep SD SD_SE SD_fmi ## 1 books 76588.72 4554 2.945 0.04 0 0 0.002 1.146 0.015 0 M <- round(res$output$mean1,5) SD <- round(res$output$sd1,5) transform.formula <- paste0( " ~ I( ( books - ", M, " ) / ", SD, ")" ) ## > transform.formula ## [1] " ~ I( ( books - 2.94496 ) / 1.14609)" bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=stats::as.formula(transform.formula), varnames.new="zall_books" ) # check statistics res4 <- BIFIEsurvey::BIFIE.univar( bifieobj, vars=c("z_books", "z500_books", "zall_books") ) summary(res4) #**************************** #*** Transformation 5: include rank transformation for variable ASMMAT # calculate percentage ranks using wtd.rank function from Hmisc package dat1 <- bifieobj$dat1 100 * Hmisc::wtd.rank( dat1[,"ASMMAT"], w=dat1[,"TOTWGT"] ) / sum( dat1[,"TOTWGT"] ) # define an auxiliary function for calculating percentage ranks wtd.percrank <- function( x, w ){ 100 * Hmisc::wtd.rank( x, w, na.rm=TRUE ) / sum( w, na.rm=TRUE ) } wtd.percrank( dat1[,"ASMMAT"], dat1[,"TOTWGT"] ) # define transformation formula transform.formula <- ~ I( wtd.percrank( ASMMAT, TOTWGT ) ) # add ranks to BIFIEdata object bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=transform.formula, varnames.new="ASMMAT_rk") # check statistic res5 <- BIFIEsurvey::BIFIE.univar( bifieobj, vars=c("ASMMAT_rk" ) ) summary(res5) #**************************** #*** Transformation 6: recode variable books library(car) # recode variable books according to "1,2=0, 3,4=1, 5=2" dat1 <- bifieobj$dat1 # use Recode function from car package car::Recode( dat1[,"books"], "1:2='0'; c(3,4)='1';5='2'") # define transformation formula transform.formula <- ~ I( car::Recode( books, "1:2='0'; c(3,4)='1';5='2'") ) bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=transform.formula, varnames.new="book_rec" ) res6 <- BIFIEsurvey::BIFIE.freq( bifieobj, vars=c("book_rec" ) ) summary(res6) #**************************** #*** Transformation 7: include some variables aggregated to the school level dat1 <- as.data.frame(bifieobj$dat1) # at first, create school ID in the dataset by transforming the student ID dat1$idschool <- as.numeric(substring( dat1$IDSTUD, 1, 5 )) transform.formula <- ~ I( as.numeric( substring( IDSTUD, 1, 5 ) ) ) bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=transform.formula, varnames.new="idschool" ) #*** test function for a single dataset bifieobj$dat1 dat1 <- as.data.frame(bifieobj$dat1) gm <- miceadds::GroupMean( data=dat1$ASMMAT, group=dat1$idschool, extend=TRUE)[,2] # add school mean ASMMAT tformula <- ~ I( miceadds::GroupMean( ASMMAT, group=idschool, extend=TRUE)[,2] ) bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=tformula, varnames.new="M_ASMMAT" ) # add within group centered mathematics values of ASMMAT bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=~ 0 + I( ASMMAT - M_ASMMAT ), varnames.new="WC_ASMMAT" ) # add school mean books bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=~ 0 + I( add.groupmean( books, idschool ) ), varnames.new="M_books" ) #**************************** #*** Transformation 8: include fitted values and residuals from a linear model # create new BIFIEdata object data(data.timss1) bifieobj3 <- BIFIEsurvey::BIFIE.data( data.timss1, wgt=data.timss1[[1]]$TOTWGT, wgtrep=data.timssrep[,-1] ) # specify transformation transform.formula <- ~ I( fitted( stats::lm( ASMMAT ~ migrant + female ) ) ) + I( residuals( stats::lm( ASMMAT ~ migrant + female ) ) ) # Note that lm omits cases in regression by listwise deletion. # add fitted values and residual to BIFIEdata object bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj3, transform.formula=transform.formula ) bifieobj$varnames[ bifieobj$varsindex.added ] <- c("math_fitted1", "math_resid1") #**************************** #*** Transformation 9: Including principal component scores in BIFIEdata object # define auxiliary function for extracting PCA scores BIFIE.princomp <- function( formula, Ncomp ){ X <- stats::princomp( formula, cor=TRUE) Xp <- X$scores[, 1:Ncomp ] return(Xp) } # define transformation formula transform.formula <- ~ I( BIFIE.princomp( ~ migrant + female + books + lang + ASMMAT, 3 )) # apply transformation bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj3, transform.formula=transform.formula ) bifieobj$varnames[ bifieobj$varsindex.added ] <- c("pca_sc1", "pca_sc2","pca_sc3") # check descriptive statistics res9 <- BIFIEsurvey::BIFIE.univar( bifieobj, vars="pca_sc1", se=FALSE) summary(res9) res9$output$mean1M # The transformation formula can also be conveniently generated by string operations vars <- c("migrant", "female", "books", "lang" ) transform.formula2 <- as.formula( paste0( "~ 0 + I ( BIFIE.princomp( ~ ", paste0( vars, collapse="+" ), ", 3 ) )") ) ## > transform.formula2 ## ~ I(BIFIE.princomp(~migrant + female + books + lang, 3)) #**************************** #*** Transformation 10: Overwriting variables books and migrant bifieobj4 <- BIFIEsurvey::BIFIE.data.transform( bifieobj3, transform.formula=~ I( 1*(books >=1 ) ) + I(2*migrant), varnames.new=c("books","migrant") ) summary(bifieobj4) ## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.