#' check rawdata for consistence
#'
#' @param Input Input table
#' @param required.colnames colnames, which are required
#' @return throw out an error if other colnames are needed
#' @export check.colnames.FUN
check.colnames.FUN <- function(Input, # = Config$info.table,
required.colnames){# = required.colnames.config.file
if(any(!(required.colnames %in% colnames(Input)))){
stop(paste0("Unknown column names in info file detected. The exspected column names are: ", paste(required.colnames, collapse =", "),"."))
}
}
############################################################################
#' grouping into subsets
#' @description This function creates a list of separate data frames for each group and timepoint. By default no further arguments are necessary, as all input variables have pre-defined values.
#' @importFrom magrittr %<>%
#' @importFrom magrittr %>%
#' @importFrom dplyr rename_at
#' @importFrom dplyr select
#' @importFrom rlang .data
#' @param Input data frame that contains the complete data
#' @param Timepoint.names names of timepoints
#' @param Controls name of control group
#' @param Patients names of treatment groups
#' @param names.col names of all parameter
#' @return list of separate data frames for each group and timepoint.
#' @export grouping.subsets.FUN
grouping.subsets.FUN = function(
Input, #= Config$CogDat$Table,
Timepoint.names, #= Config$parameter$names.Timepoints,
Controls, #= Config$parameter$names.Group$Control,
Patients, #= Config$parameter$names.Group$Patients,
names.col) #= Config$parameter$colnames)
#names.CG.cleaning = Config$parameter$names.CG.cleaning,
#names.NCD = Config$parameter$names.NCD)
{
CTPs = list()
for (i in Timepoint.names) {
CTPs[[paste0("Patients.", i)]] = Input %>% subset(Group %in% Patients) %>%
dplyr::select(.data$SubjectID, .data$Group, grep( i, names.col, value = T))
}
for (i in Timepoint.names) {
CTPs[[paste0("Controls.", i)]] = Input %>% subset(Group %in% Controls) %>%
dplyr::select(.data$SubjectID, .data$Group, grep( i, names.col, value = T))
}
return(CTPs)
}
#############################################################################
#' logarythmic transformation of CTP data
#'
#' @param Input list of data frames containing the CTP information
#' @param CTP.names names of CTP tests
#' @param transformation info which columns should be transform
#' @return list of dataframes with logarythmic transformed CTP values
#' @export grouping.subsets.FUN
transformation.FUN <- function(Input, #= Config$CogDat$CTPs.control.clean,
CTP.names, #= Config$parameter$colnames.CTPs,
transformation) #= Config$parameter$CTP.transformation
{
CTPs <- Input
rownames(CTPs) <- Input$SubjectID
df <- CTPs[colnames(Input) %in% CTP.names]
# CTPs <- CTPs[rowSums(is.na(df)) != ncol(df), ]
CTPs[ , sapply(rownames(transformation)[transformation[1] == TRUE], function(x) grep(x, colnames(CTPs)))] <-
apply(CTPs[sapply(rownames(transformation)[transformation[1] == TRUE], function(x) grep(x, colnames(CTPs)))], 2,log)
return(CTPs)
}
#######################################################################################
#' remove problematic controls group measures
#'
#' @importFrom magrittr %<>%
#' @importFrom magrittr %>%
#' @importFrom dplyr rename_at
#' @importFrom dplyr select
#' @param Input list of data frames with CTPs and cleaning variable
#' @param patterns string pattern, which match with problematic measures
#' @param CG.cleaning name of variable coding for reason not to use
#' @return list of data frames with problematic control group measures removed
#' @export cleaning.cg.FUN
cleaning.cg.FUN = function(Input,# = Config$CogDat$CTPs.raw,
patterns = c("do not use"),
CG.cleaning)# = Config$parameter$colnames.Comment)
{
CTPs = Input
for (i in names(CTPs)) {
df = CTPs[[i]]
if(length(grep(patterns, df[,colnames(df) %in% CG.cleaning])) > 0){
CTPs[[i]] = df[-grep(patterns, df[,colnames(df) %in% CG.cleaning]), ]
} else{
CTPs[[i]] = df
}
}
for (i in grep( "Patients",names(CTPs), value = T)[-1]) {
CTPs[[i]] = CTPs[[i]][CTPs[[i]]$SubjectID %in% CTPs$Patients.T0$"SubjectID", ]
}
for (i in grep( "Controls",names(CTPs), value = T)[-1]) {
CTPs[[i]] = CTPs[[i]][CTPs[[i]]$SubjectID %in% CTPs$Controls.T0$"SubjectID", ]
}
return(CTPs)
}
#######################################################################################
#' replace missings due to physical impairment with worst performance
#'
#' @importFrom magrittr %<>%
#' @importFrom magrittr %>%
#' @importFrom dplyr rename_at
#' @importFrom dplyr select
#' @param Input ist of data frames with CTPs and information about health condition
#' @param CTP.names names of CTP tests
#' @param pattern string pattern, which match with problematic measures
#' @param polarity information about the polarity of the CTP tests
#' @return list of data frames with worst performanced imputated values
#' @export replace.worst.FUN
replace.worst.FUN = function(Input,# = Config$CogDat$CTPs.control.clean,
CTP.names,# = Config$parameter$colnames.CTPs,
polarity,# = Config$parameter$PoledVars,
pattern = "worst performance")
{
replace.FUN = function(In){
CTPs <- In
df = In[colnames(In) %in% CTP.names]
# get performance range per CTP
performance.range = apply(df, 2, range, na.rm = TRUE)
# define worst performance per CTP
worst.performance = performance.range[1, ]
worst.performance[polarity] = performance.range[2, polarity]
# replace missings due to physical impairment with worst performance
for (i in colnames(df)) {
CTPs[,i] <- ifelse(In[grep("Comment", colnames(In))] == pattern & is.na(In[,i]),
worst.performance[i], In[,i])
colnames(CTPs[,i]) <- i
}
CTPs <- as.data.frame.matrix(CTPs)
return(CTPs)
}
CTPs.worst <- lapply(Input, replace.FUN)
return(CTPs.worst)
}
##########################################################################
#' random forest imputation - version 1
#'
#' @importFrom magrittr %<>%
#' @importFrom magrittr %>%
#' @importFrom dplyr rename_at
#' @importFrom dplyr select
#' @param Input list of data frames with CTPs and cleaning variable
#' @param CTP.names names of CTP tests
#' @param seed set seed for reproducible results
#' @return list of data frames with rendom forest imputed values
#' @export random_forest.FUN
random_forest.FUN = function(Input,# = Config$CogDat$CTPs.participate,
seed = 145,
CTP.names)# = Config$parameter$colnames.CTPs)
{
CTPs <- Input
#sort data according to SubjectID
CTPs <- lapply(CTPs, function(i) i[order(i$SubjectID,decreasing = F),])
CTPs <- lapply(CTPs, function(x) x[!is.na(x$SubjectID),])
CTPs.red <- lapply(CTPs, function(x) {data.frame(x[c(colnames(x) %in% CTP.names)], row.names = x$SubjectID)})
#check if CTPs are numeric or integer and convert integer to factor; necessary for randomforest imputation
# function to check if Colum is numeric or integer
is.integer.FUN = function(x) {all(x == round(x), na.rm = T)}
CTPs.red.missforest <- lapply(CTPs.red, function(df){
check.int <- apply(df, 2,FUN = is.integer.FUN)
df[,check.int] <- lapply(df[,check.int] , factor)
if(seed != 0 ){ set.seed(seed)}
rf <- missForest::missForest(df, variablewise = TRUE)
# take only imputed data frames without MSE
rf <- rf$ximp
#convert all columns to numeric
rf[,1:length(rf)] <- lapply(rf, as.character)
rf[,1:length(rf)] <- lapply(rf, as.numeric)
rf$SubjectID = rownames(rf)
return(rf)
})
for (i in grep("Patients",names(CTPs.red.missforest), value = T)[-1]) {
CTPs.red.missforest[[i]] = CTPs.red.missforest[[i]][CTPs.red.missforest[[i]]$SubjectID %in% CTPs.red.missforest$Patients.T0$"SubjectID", ]
}
for (i in grep("Controls",names(CTPs.red.missforest), value = T)[-1]) {
CTPs.red.missforest[[i]] = CTPs.red.missforest[[i]][CTPs.red.missforest[[i]]$SubjectID %in% CTPs.red.missforest$Controls.T0$"SubjectID", ]
}
CTPs.red.missforest <- lapply(CTPs.red.missforest, function(x) {
row.names(x) <- x$SubjectID
x[,-grep("SubjectID", colnames(x))]})
return(CTPs.red.missforest)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.