#' function: Clean data for analysis:
#' first version of clean gurland data
#'
#' @param dat DataFrame.
#' @param analysis string. analysis to perform
#' @return Cleaned data. List of data; baseline id number list and cleaned whole dataset. Adjust missing values and include only CN at baseline. #Need more explanation
#' @examples TBA
#' @author Jongwoo Choi, \email{jc4816@columbia.edu}
#' @references TBA
#' @keywords clean data analysis
#' @import dplyr
#' @import knitr
#' @export
clean_data <- function(dat, analysis='mem01'){
# analysis on
if(analysis=='mem01'){
baselinelist = dat %>% select(id_num, mem01, eval) %>%
filter(eval==0 & mem01>0) %>% select(id_num, mem01)
dat = dat[dat$id_num %in% unique(baselinelist$id_num),] %>%
filter(mem01>0 & is.na(ccd01)==FALSE)} # note that there were 286 of '-2', 26 of '-1' and 9 of '0'.
if(analysis=='happiness'){
baselinelist = dat %>% select(id_num, s32, s33, eval) %>%
filter(eval==0 & is.na(s32)==FALSE & is.na(s33)==FALSE) %>% select(id_num)
dat = dat[dat$id_num %in% unique(baselinelist$id_num),] %>%
filter(is.na(s32)==FALSE & is.na(s33)==FALSE & is.na(ccd01)==FALSE)} # Can we remove NA values for s32,s33?
# Only CN at baseline, dement and NA were removed for the final cleaned data
dat = dat %>%
dplyr::filter(ccd01_eval0=='CN') %>%
dplyr::filter(is.na(restrict_cleaned_eval0)==FALSE & is.na(affsuff_cleaned_eval0)==FALSE &
is.na(anergia_cleaned_eval0)==FALSE & is.na(isolatio_cleaned_eval0)==FALSE &
is.na(lonely_cleaned_eval0)==FALSE)
# Use median split for life quality variables. Same for all analysis. Use memory as default.
dat.clean = dat %>%
dplyr::mutate(eval=factor(eval),
isolatio_mediansplit=cut(isolatio_cleaned_eval0,
breaks=c(-Inf, median(isolatio_cleaned_eval0), Inf),
labels=c('No', 'Yes')),
lonely_mediansplit=cut(lonely_cleaned_eval0,
breaks=c(-Inf, median(lonely_cleaned_eval0), Inf),
labels=c('No', 'Yes')),
suff_mediansplit=cut(affsuff_cleaned_eval0,
breaks=c(-Inf, median(affsuff_cleaned_eval0), Inf),
labels=c('No', 'Yes')),
aner_mediansplit=cut(anergia_cleaned_eval0,
breaks=c(-Inf, median(anergia_cleaned_eval0), Inf),
labels=c('No', 'Yes')),
restric_mediansplit=cut(restrict_cleaned_eval0,
breaks=c(-Inf, median(restrict_cleaned_eval0), Inf),
labels=c('No', 'Yes')))
return(list(baselinelist, dat.clean))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.