data_prep <- function(input_tsv, roi_start, roi_end, sep='\t')
{
### This function is to do the required data preprocessing from the targeted tsv file for dti statistics in prevdemals.
## Param:
# input_tsv: str, the path to the tsv file, the tsv file's first 8 columns should be 'participant_id', 'session_id', 'prevdemals_family_code', 'group', 'age_bl', 'sex', and later columns should be the ROIs
# roi_start
#NOTE, should be careful with the seperator of the tsv file and xlsx file, also the decimal in the tsv file.
input_tsv <- read.table(input_tsv, sep = sep, header = T, dec = ',')
#### Check out the two files contens are consitent
ses_info <- table(input_tsv$session_id)
cat("The session info comparison: ", ses_info, '\n')
print(ses_info)
cat('\n')
group_info <- table(input_tsv$group)
cat("The group info comparison: ")
print(group_info)
cat('\n')
age_info <- summary(as.numeric(as.character(input_tsv$age_bl)))
cat("The age_bl info: ")
print(age_info)
cat('\n')
summary(as.numeric(as.character(input_tsv$expected_years_aoo_bl)))
sex_info <- table(input_tsv$sex)
cat("The sex info: ")
print(sex_info)
cat('\n')
### checkout dimension
input_tsv_dim <- dim(input_tsv)
cat("The source data tsv dimension is: ")
print(input_tsv_dim)
cat('\n')
## simplifier les noms de participant_id
input_tsv$participant_id <- gsub('sub-PREVDEMALS', '', input_tsv$participant_id)
length(unique( input_tsv$participant_id))
length(unique(substr(input_tsv$participant_id, 6, 9)))
input_tsv$participant_id <- substr(input_tsv$participant_id, 6, 9)
### make sure the numerical input_tsv to be numerical
input_tsv[, setdiff(colnames(input_tsv), c("participant_id", 'prevdemals_family_code', "session_id", "group", "sex"))] <- apply(input_tsv[, setdiff(colnames(input_tsv), c("participant_id", 'prevdemals_family_code', "session_id", "group", "sex"))], 2, function(x)as.numeric(as.character(x))) # 2 means columns to run
## Suppression des colonnes non utiles
data_analyse <- input_tsv[, c(which(colnames(input_tsv) %in% c('participant_id', 'prevdemals_family_code', 'group', 'age_bl', 'sex')), match(roi_start, colnames(input_tsv)):match(roi_end, colnames(input_tsv)))]
## Make sure all the categorical factor to be character
data_analyse[, c("participant_id", 'prevdemals_family_code', "group", "sex")] <- apply(data_analyse[, c("participant_id", 'prevdemals_family_code', "group", "sex")], 2, as.character)
###############
#########Make sure there is no NA in the data
#################
NA_res <- apply(data_analyse, 2, function(x) sum(is.na(x)))# 0 OK
cat("Note, important, check out the number of every column, make sure it is 0: ")
print(NA_res)
cat('\n')
nom_ROIs_volume <- setdiff(colnames(data_analyse), c("participant_id", 'prevdemals_family_code', "group", "age_bl", "sex"))
###Redefine the data_analyses
data_analyse <- data_analyse[, c(c("participant_id", 'prevdemals_family_code', "group", "age_bl", "sex"), nom_ROIs_volume)]
###Compute the mean and std of the CN, PS, PT.
data_analyse_CN <- data_analyse[which(data_analyse$group %in% c('CN')), ]
data_analyse_PS <- data_analyse[which(data_analyse$group %in% c('PS')), ]
data_analyse_PT <- data_analyse[which(data_analyse$group %in% c('PT')), ]
data_analyse_CN_age_mean <- mean(data_analyse_CN[, c("age_bl")])
data_analyse_CN_age_std <- sd(data_analyse_CN[, c("age_bl")])
cat("The mean and stand deviation of CN: ")
print(data_analyse_CN_age_mean)
print(data_analyse_CN_age_std)
cat('\n')
sex_info_CN <- table(data_analyse_CN$sex)
cat("The sex info for CN: ")
print(sex_info_CN)
cat('\n')
data_analyse_PS_age_mean <- mean(data_analyse_PS[, c("age_bl")])
data_analyse_PS_age_std <- sd(data_analyse_PS[, c("age_bl")])
cat("The mean and stand deviation of PS: ")
print(data_analyse_PS_age_mean)
print(data_analyse_PS_age_std)
cat('\n')
sex_info_PS <- table(data_analyse_PS$sex)
cat("The sex info for PS: ")
print(sex_info_PS)
cat('\n')
data_analyse_PT_age_mean <- mean(data_analyse_PT[, c("age_bl")])
data_analyse_PT_age_std <- sd(data_analyse_PT[, c("age_bl")])
cat("The mean and stand deviation of PT: ")
print(data_analyse_PT_age_mean)
print(data_analyse_PT_age_std)
cat('\n')
sex_info_PT <- table(data_analyse_PT$sex)
cat("The sex info for PT: ")
print(sex_info_PT)
cat('\n')
return(list(data_analyse, nom_ROIs_volume))
}
general_linear_model_without_interaction_for_es <- function(data, effets, ROIs, model = 'fixe', arrondi = 10, correction = 'BH'){
if(model == 'fixe'){
results_tableau <- do.call('rbind', lapply(ROIs, function(var){
model_complet <- lm(as.formula(paste(var, paste(effets, collapse = ' + '), sep = ' ~ ')), data = data)
results <- cbind(data.frame(var = gsub('_', '.', gsub('.cortex', '', gsub('_volume', '', gsub('.TIV', '', gsub('Left', 'L', gsub('Right', 'R', var))))))),
do.call('cbind', lapply(effets, function(e){
### This here is to define the full model and reduced model, here, really just consider the interaction's effect when creating the reduced model
model_low <- lm(as.formula(paste(var, paste(setdiff(effets, effets[which(regexpr(e, effets, fixed = T) != (-1))]), collapse = ' + '), sep = ' ~ ')), data = data)
coef <- format_arrondi(coef(model_complet)[which(regexpr(e, names(coef(model_complet))) != (-1))], arrondi)
res <- data.frame(data.frame(t(coef)),
f2cohen = format_arrondi(cohenD_fixed_model(model_complet, model_low), 2),
pvalue = anova(model_complet, model_low, test='LRT')[["Pr(>Chi)"]][2],
# pvalueWALDconsist = waldtest(model_low, model_complet, vcov = vcov_consist)$"Pr(>F)"[2],
pvalue_mult = NA)
colnames(res) <- paste(e, colnames(res), sep='_')
return(res)
})))
}))
}else{
##### mixed Model
results_tableau <- do.call('rbind', lapply(ROIs, function(var){
model_complet <- lmer(as.formula(paste(var, ' ~ ', paste(effets, collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
cbind(data.frame(var = gsub('_', '.', gsub('.cortex', '', gsub('_volume', '', gsub('.TIV', '', gsub('Left', 'L', gsub('Right', 'R', var))))))),
do.call('cbind', lapply(effets, function(e){
### This here is to define the full model and reduced model, here, really just consider the interaction's effect when creating the reduced model
### This is actually for model construction with interaction.
if(any(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1)) & regexpr(':', e) == (-1))
model_complet <- lmer(as.formula(paste(var, ' ~ ', paste(setdiff(effets, effets[which(regexpr(e, effets) != (-1) & regexpr(':', effets) != (-1))]), collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
## This is the reduced_model without the var
model_low <- lmer(as.formula(paste(var, ' ~ ', paste(setdiff(effets, effets[which(regexpr(e, effets, fixed = T) != (-1))]), collapse = ' + '), ' + (1|prevdemals_family_code)', sep = '')), data = data)
### This is for interaction
if(regexpr(':', e) != (-1)){ coef <- format_arrondi(fixef(model_complet)[which(regexpr(e, names(fixef(model_complet))) != (-1))], arrondi)}
## This is for without interaction
else{coef <- format_arrondi(fixef(model_complet)[which(regexpr(e, names(fixef(model_complet))) != (-1) & regexpr(':', names(fixef(model_complet))) == (-1))], arrondi)}
res <- data.frame(data.frame(t(coef)),
f2cohenCondition = format_arrondi(cohenf2_mixed_model(model_complet, model_low, 2), 2),
f2cohenMarginal = format_arrondi(cohenf2_mixed_model(model_complet, model_low, 1), 2),
R2CompletCondition = r.squaredGLMM(model_complet)[2],
R2LowContdition = r.squaredGLMM(model_low)[2],
R2CompletMarginal = r.squaredGLMM(model_complet)[1],
R2LowMarginal = r.squaredGLMM(model_low)[1],
pvalue = anova(model_complet, model_low, test='LRT')[["Pr(>Chisq)"]][2],
pvalue_mult = NA
)
colnames(res) <- paste(e, colnames(res), sep='_')
return(res)
})))
}))
}
#### multiple correction for the pvalue
#for(col in colnames(results_tableau)[which(regexpr('_mult', colnames(results_tableau)) != (-1))]){
# results_tableau[, col] <- p.adjust(results_tableau[, gsub('_mult', '', col), ], 'BH', nrow(results_tableau))
#}
############ format the result tabel
#results_tableau[, which(regexpr('pvalue', colnames(results_tableau)) != (-1))] <- apply(results_tableau[, which(regexpr('pvalue', colnames(results_tableau)) != (-1))] , 2, function(x) format_pvalue(x))
return(results_tableau)
}
# Create a function to round the numerical data
format_arrondi <- function(x, arrondi = 2)format(round(x, arrondi), nsmall=arrondi, scientific=TRUE)
# According to Cohen’s (1988) guidelines, f2f2≥ 0.02, f2f2≥ 0.15, and f2f2 ≥ 0.35 represent small, medium, and large effect sizes, respectively.
cohenf2_mixed_model <- function(model_complet, model_emboite, r_type){
#### r_type 1 represents the marginal r2, 2 is the conditional r2
cohenf = (r.squaredGLMM(model_complet)[r_type]-r.squaredGLMM(model_emboite)[r_type])/(1- r.squaredGLMM(model_complet)[r_type])
return(cohenf[1])
}
f.format_pvalue <- function(pvalue, n.arrondi = 3, alpha = 0.05){
gsub(' ', '', ifelse(pvalue < alpha,
paste(ifelse(round(pvalue, n.arrondi) == 0,
paste('<0.', paste(rep('0', n.arrondi-1), collapse=''), '1', sep= ''),
f.format_arrondi(pvalue, n.arrondi)), '*', sep=''),
f.format_arrondi(pvalue, n.arrondi)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.