regressions <-
function(data=NULL, modele=NULL, Y=NULL, X_a=NULL, X_i=NULL, outlier=.dico[["txt_complete_dataset"]], inf=F, CV=F, select.m="none", method="p", step=NULL, group=NULL, criteria=0.15 , scale=T, dial=T, info=T,
sauvegarde=F, n.boot=NULL, param="param", rscale=0.353, html=TRUE){
regressions.in<-function(data=NULL, modele=NULL, Y=NULL, X_a=NULL, X_i=NULL, outlier=NULL, inf=F, CV=F, select.m="none", method="p", step=NULL, group=NULL, criteria=NULL , scale=T, dial=T, info=T,
sauvegarde=F, n.boot=NULL, param=NULL, rscale=0.353){
options (warn=-1)
Resultats<-list()
if(is.null(data) | is.null(modele)) {dial<-TRUE}else dial<-F
data<-choix.data(data=data, info=info, nom=T)
if(length(data)==0) return(NULL)
nom<-data[[1]]
data<-data[[2]]
if(dial && is.null(modele)){
if(info) writeLines(.dico[["ask_chose_relation_between_vars_regressions_log"]])
dlgList(c(.dico[["txt_additive_effects"]], .dico[["txt_interaction_effects"]], .dico[["txt_specify_model"]]), preselect=.dico[["txt_regressions"]], multiple = TRUE, title=.dico[["ask_which_regression_type"]])$res->link
if(length(link)==0) return(NULL) } else link<-"none"
if(length(Y)>1){
msgBox(.dico[["desc_only_one_dependant_variable_alllowed"]])
Y<-NULL }
if(any(link %in% c(.dico[["txt_additive_effects"]], .dico[["txt_interaction_effects"]]))){
msg3<-.dico[["ask_chose_dependant_variable"]]
Y<-.var.type(X=Y, info=info, data=data, type="numeric", check.prod=F, message=msg3, multiple=FALSE, title=.dico[["txt_dependant_variable"]], out=NULL)
if(is.null(Y)) {
regressions.in()->Resultats
return(Resultats)}
data<-Y$data
Y<-Y$X
if(any(link==.dico[["txt_additive_effects"]]) || !is.null(X_a)| any(X_a %in% names(data)==F)) {
msg3<-.dico[["ask_chose_dependant_variable"]]
X_a<-.var.type(X=Y, info=info, data=data, type=NULL, check.prod=F, message=msg3, multiple=TRUE, title=.dico[["txt_additive_model_variables"]], out=Y)
if(is.null(X_a)) {
regressions.in()->Resultats
return(Resultats)}
data<-X_a$data
X_a<-X_a$X
}else X_a<-NULL
if(any(link==.dico[["txt_interaction_effects"]]) || !is.null(X_i) & (length(X_i)<2 | any(X_i %in% names(data)==F))) {
msg3<-.dico[["ask_chose_interaction_model_predictors"]]
X_i<-c()
while(length(X_i)<2){
X_i<-.var.type(X=Y, info=info, data=data, type=NULL, check.prod=F, message=msg3, multiple=TRUE, title=.dico[["txt_interactive_model_variables"]], out=c(X_a,Y))
if(is.null(X_i)) {
regressions.in()->Resultats
return(Resultats)}
data<-X_i$data
X_i<-X_i$X
}
}else X_i<-NULL
paste0(Y," ~ ")->modele
if(!is.null(X_a )) {
X_a.mod<-X_a[1]
if(length(X_a)>1) for(i in 2 : length(X_a)) paste0(X_a.mod, "+", X_a[i])-> X_a.mod
} else X_a.mod<-NULL
if(!is.null(X_i)){
X_i.mod<-X_i[1]
if(length(X_i)>1) for(i in 2 : length(X_i)) paste0(X_i.mod, "*", X_i[i])-> X_i.mod
} else X_i.mod<-NULL
if(!is.null(X_a.mod) & !is.null(X_i.mod)) {
paste0(modele, X_a.mod, "+", X_i.mod)->modele
} else paste0(modele, X_a.mod, X_i.mod)->modele
}
if(any(link==.dico[["txt_specify_model"]])) {
if(is.null(modele)) modele<-" "
modele<-fix(modele)}
modele<-as.formula(modele)
variables<-terms(modele)
variables<-as.character( attributes(variables)$variables)[-1]
model.test<-try(model.matrix(modele, data), silent=T)
if(any(class(model.test)=='try-error')) {
msgBox(.dico[["desc_incorrect_model"]])
return(regressions.in())
}
data[complete.cases(data[,variables]),]->data
msg.options1<-.dico[["desc_param_test_is_classical_reg_robusts_are_m_estimator"]]
options<-.ez.options(options=c('choix',"outlier"), n.boot=n.boot,param=T, non.param=F, robust=T, Bayes=T, msg.options1=msg.options1, msg.options2=msg.options2, info=info, dial=dial,
choix=param,sauvegarde=sauvegarde, outlier=outlier, rscale=rscale)
if(is.null(options)) return(regressions.in())
reg.options<- .regressions.options(data=data, modele=modele, CV=CV, inf=inf, select.m=select.m, method=method, criteria=criteria, step=step, group=group, scale=scale, dial=dial,info=info)
if(is.null(reg.options)) return(regressions.in())
Resultats$data<-data
Resultats$nom<-nom
Resultats$modele<-modele
Resultats$options<-options
Resultats$reg.options<-reg.options
return(Resultats)
}
regressions.out<-function(dtrgeasieR=NULL, modele=NULL, VC=F, select.m="none", method=NULL, step=NULL, group=NULL, criteria=NULL , scale=T,
sauvegarde=F, n.boot=NULL, param=NULL, rscale=0.353){
Resultats<-list()
dtrgeasieR<<-dtrgeasieR
variables<-terms(as.formula(modele))
variables<-as.character( attributes(variables)$variables)[-1]
pred<-attributes(terms(as.formula(modele)))$term.labels
Resultats[[.dico[["txt_descriptive_statistics"]]]]<-.stat.desc.out(X=variables, groupes=NULL, data=dtrgeasieR, tr=.1, type=3, plot=T)
if(scale==T || scale==.dico[["txt_center"]]) {
Resultats$info<-.dico[["desc_centered_data_schielzeth_recommandations"]]
if(length(pred)>1) { which(!sapply(dtrgeasieR[,pred[which(pred %in% variables)]],class)%in%c("factor", "character"))->centre
centre<-pred[centre]}else{centre<-NULL}
if(!is.null(centre)){
if(length(centre)==1) dtrgeasieR[,centre]-mean(dtrgeasieR[,centre],na.rm=T)->dtrgeasieR[,centre] else{
sapply(X=dtrgeasieR[,centre], fun<-function(X){X-mean(X, na.rm=T)})->dtrgeasieR[,centre]
}
}
}
mod<-list()
modele1<-as.formula(paste0(variables[1], "~", pred[1]))
lm( modele1,na.action=na.exclude, data=dtrgeasieR)->lm.r1
lm.r1->mod[[1]]
if(length(pred)>1) {
for(i in 2:length(pred)){update(lm.r1, as.formula(paste0(".~.+",pred[i])))->lm.r1
lm.r1->mod[[i]]}
}
assign("lm.r1",lm.r1, env= .GlobalEnv)
resid(lm.r1)->dtrgeasieR$residu
Resultats[[.dico[["txt_normality_tests"]]]]<-.normalite(data=dtrgeasieR, X='residu', Y=NULL)
if(length(pred)>1) {
cont<-variables[which(!sapply(dtrgeasieR[,variables],class)%in%c("factor","character"))]
Resultats[[.dico[["txt_multivariate_normality"]]]]<-.normalite(data=dtrgeasieR, X=cont, Y=NULL)
ols_plot_resid_fit(lm.r1)
FIV<-ols_coll_diag(lm.r1) # calcul du facteur d inflation de la variance
names(FIV$vif_t)<-c(.dico[["txt_variables"]], .dico[["txt_tolerance"]], .dico[["txt_VIF"]])
Resultats[[.dico[["txt_multicolinearity_tests"]]]]<-FIV$vif_t
if(any(FIV$`Test de multicolinearite`$Tolerance==0)) {
msgBox(.dico[["desc_instable_model_high_multicolinearity"]])
return(Resultats)
}
Resultats[[.dico[["txt_linearity_graph_between_predictors_and_dependant_variable"]]]]<-ols_plot_comp_plus_resid(lm.r1)
Resultats[[.dico[["txt_proper_values_index"]]]]<-FIV$eig_cindex
dwt(lm.r1, simulate=TRUE, method= "normal", reps=500)->DWT.results
DWT.results<-round(data.frame(txt_autocorrelation=DWT.results[[1]],
txt_dw_statistic=DWT.results[[2]],txt_p_dot_val=DWT.results[[3]]),4)
names(DWT.results)<-c(.dico[["txt_autocorrelation"]],.dico[["txt_dw_statistic"]],.dico[["txt_p_dot_val"]] )
Resultats[[.dico[["txt_durbin_watson_test_autocorr"]]]]<-DWT.results
var.err<-ols_test_breusch_pagan(lm.r1, rhs=T)
Resultats[[.dico[["txt_breusch_pagan_test"]]]]<-data.frame(chi=var.err$bp,
ddl=length(var.err$preds), valeur.p=var.err$p)
try(ceresPlots(lm.r1, main=.dico[["txt_ceres_graph_linearity"]]), silent=T)
}
if(select.m!="none"){
if(select.m == .dico[["txt_forward_step_ascending"]] | select.m =="forward") select.m<-"Forward"
if(select.m == .dico[["txt_backward_step_descending"]] | select.m =="backward") select.m<-.dico[["txt_backward"]]
if(select.m == .dico[["txt_bidirectionnal"]] | select.m == "bidirectional") select.m<-"Both"
if(method %in% c("F", .dico[["txt_f_value"]], "p", .dico[["txt_probability_value"]])){
if(select.m=="Forward") ols.out <- ols_step_forward_p(lm.r1,penter = criteria, details=F)
if(select.m=="Backward") ols.out <- ols_step_backward_p(lm.r1, prem=criteria, details=F)
if(select.m=="Both") ols.out <- ols_step_both_p(lm.r1,pent=criteria, details=F)
}
if(method %in% c(.dico[["txt_aic_criterion"]],"AIC")){
if(select.m=="Forward") ols.out <- ols_step_forward_aic(lm.r1, details=F)
if(select.m=="Backward") ols.out <- ols_step_backward_aic(lm.r1, details=F)
if(select.m=="Both") ols.out <- ols_step_both_aic(lm.r1, details=F)
}
ols.frame<-data.frame(ols.out$metrics)
reg.out<-ols_regress(ols.out$model)
c(summary(ols.out$model)$sigma, summary(ols.out$model)$r.squared, summary(ols.out$model)$fstatistic)->significativite_modele # fournit les residus, le R.deux et le F
pf(summary(ols.out$model)$fstatistic[1], summary(ols.out$model)$fstatistic[2],summary(ols.out$model)$fstatistic[3], lower.tail=F)->p.value #permet de savoir si le F est significatif
c(significativite_modele , p.value)->modele.F # on combine les precedents
modele.F<-round(modele.F,3) # on arrondit les nombres a la 3e decimale
names(modele.F)<-c(.dico[["txt_residual_error"]], .dico[["txt_r_dot_two"]], "F",
.dico[["txt_df_parenthesis_num"]], .dico[["txt_df_parenthesis_denom"]],.dico[["txt_p_dot_val"]])# attribue le nom aux colonne
coef.table<-data.frame(b = round(reg.out$betas, 3),
"Erreur Std."= format(round(reg.out$std_errors, 3)),
"Beta" = c(" ", round(reg.out$sbetas, 3)),
t=round(reg.out$tvalues, 3),
valeur.p =round(reg.out$pvalues, 3),
lower=round(reg.out$conf_lm[, 1], 3),
upper =round(reg.out$conf_lm[, 2], 3))
names(coef.table)<-c("b",.dico[["txt_error_dot_standard"]],"beta","t",
.dico[["txt_p_dot_val"]],.dico[["txt_ci_inferior_limit_dot"]], .dico[["txt_ci_superior_limit_dot"]])
select.name<-paste0(.dico[["txt_selection_method"]],"-", method)
Resultats$selection$Selection<-ols.frame
Resultats$selection$ANOVA<-modele.F
Resultats$selection[[.dico[["txt_coeff_table"]]]]<-coef.table
names(Resultats)[length(Resultats)]<-select.name
if(any(param=="Bayes")|any(param==.dico[["txt_bayesian_factors"]])){
BF.out<-try(regressionBF(modele, data=dtrgeasieR,progress=F, rscaleCont=rscale), silent=T)
if(class(BF.out)!='try-error') {
try(plot(BF.out) , silent=T)
BF.out<-extractBF(BF.out)
BF.out<-head(BF.out[order(BF.out[,1], decreasing=T), ])
BF.out<-BF.out[,1:2]
Resultats[[.dico[["txt_selection_method_bayesian_factor"]]]]<-BF.out
} else Resultats[[.dico[["txt_selection_method_bayesian_factor"]]]]<-.dico[["desc_selection_for_bayesian_factor_does_not_apply_to_complex_models"]]
}
# rm( "dtrgeasieR", envir = .GlobalEnv)
}
if(!is.null(step)){
as.formula(paste0(variables[1]," ~ ",step[[1]][1]))->modele.H
list()->modele.H1
list()->formule.H1
for(i in 1:length(step)){
for(j in 1:length(step[[i]])){update(modele.H, as.formula(paste0(".~. + ",step[[i]][j])))->modele.H}
formule.H1[[i]]<-modele.H
lm(modele.H, data=dtrgeasieR, na.action=na.exclude )->lm.H
lm.H->modele.H1[[i]]}
if(any(param=="param")|any(param==.dico[["txt_param_tests"]])) {
hier<-paste0("anova(modele.H1[[1]],modele.H1[[2]]")
if(length(modele.H1)>2){
for(i in 3: length(modele.H1)){
hier<-paste0(hier, ",modele.H1[[", i, "]]")
}
}
hier<-paste0(hier,")")
hier<-eval(parse(text=hier))
attributes(hier)$heading[1]<-.dico[["txt_hierarchical_models_variance_analysis_table"]]
names(hier)<-c(.dico[["txt_df_residual"]], "SC.resid",.dico[["txt_df_effect"]], "SC", "F", .dico[["txt_p_dot_val"]])
Resultats[[.dico[["txt_hierarchical_model_analysis"]]]]<-hier
c(summary(modele.H1[[1]])$sigma, summary(modele.H1[[1]])$r.squared, summary(modele.H1[[1]])$fstatistic)->significativite_modele # fournit les residus, le R.deux et le F
pf(summary(modele.H1[[1]])$fstatistic[1], summary(modele.H1[[1]])$fstatistic[2],summary(modele.H1[[1]])$fstatistic[3], lower.tail=F)->p.value #permet de savoir si le F est significatif
c(significativite_modele , p.value)->modele_avec_outliers
for(i in 2:(length(modele.H1))){
c(summary(modele.H1[[i]])$sigma, summary(modele.H1[[i]])$r.squared, summary(modele.H1[[i]])$fstatistic)->significativite_modele # fournit les residus, le R.deux et le F
pf(summary(modele.H1[[i]])$fstatistic[1], summary(modele.H1[[i]])$fstatistic[2],summary(modele.H1[[i]])$fstatistic[3], lower.tail=F)->valeur.p #permet de savoir si le F est significatif
rbind(modele_avec_outliers, c(significativite_modele ,valeur.p))->modele_avec_outliers
}
round(modele_avec_outliers,3)->modele_avec_outliers
c(.dico[["txt_residual_error"]], .dico[["txt_r_dot_two"]], "F", .dico[["txt_df_parenthesis_1"]], .dico[["txt_df_parenthesis_2"]],.dico[["txt_p_dot_val"]])->dimnames(modele_avec_outliers)[[2]]
paste(.dico[["txt_step"]], 1:length(modele_avec_outliers[,1]))->dimnames(modele_avec_outliers)[[1]]
Resultats[[.dico[["txt_hierarchical_models_complete_model_sig_at_each_step"]]]]<-modele_avec_outliers
}
if(any(param=="Bayes")|any(param==.dico[["txt_bayesian_factors"]])) {
BF<-lmBF(formula= as.formula(formule.H1[[1]]), data=dtrgeasieR, rscaleFixed=rscale)
BF.modele<-extractBF(BF, onlybf=T)
BF.hier<-c(NA)
for(i in 2:length(formule.H1)){
numBF<-lmBF(formula= as.formula(formule.H1[[i]]), data=dtrgeasieR, rscaleFixed=rscale)
BF.modele<-c(BF.modele, extractBF(numBF, onlybf=T))
denomBF<-lmBF(formula= as.formula(formule.H1[[i-1]]), data=dtrgeasieR, rscaleFixed=rscale)
OddBF<-numBF/denomBF
BF.hier<-c(BF.hier, extractBF(OddBF, onlybf=T))}
BF.hier<-data.frame(desc_fb_ratio_between_models=BF.hier, txt_bayesian_factor_of_model= BF.modele)
dimnames(BF.hier)[[1]]<- unlist(as.character(formule.H1))
Resultats[[.dico[["txt_bayesian_approach_hierarchical_models"]]]]<-BF.hier
}
}
# .dico[["txt_param_test"]], .dico[["txt_non_param_test"]],.dico[["txt_robusts_tests_with_bootstraps"]], .dico[["txt_bayesian_factors"]]
if(any(param=="param")|any(param==.dico[["txt_param_tests"]])) {
significativite_modele <-c(summary(lm.r1)$sigma,
summary(lm.r1)$r.squared,
summary(lm.r1)$fstatistic)# fournit les residus, le R.deux et le F
p.value<-pf(summary(lm.r1)$fstatistic[1], summary(lm.r1)$fstatistic[2],summary(lm.r1)$fstatistic[3], lower.tail=F)#permet de savoir si le F est significatif
c(significativite_modele , p.value)->modele.F # on combine les precedents
round(modele.F,3)->modele.F # on arrondit les nombres a la 3e decimale
c(.dico[["txt_residual_error"]], .dico[["txt_r_dot_two"]], "F", .dico[["txt_df_parenthesis_num"]], .dico[["txt_df_parenthesis_denom"]],.dico[["txt_p_dot_val"]])->names(modele.F)# attribue le nom aux colonnes
modele.F->Resultats$"Estimation du modele global"
reg.out<-ols_regress(lm.r1)
coef.table<-data.frame(b = round(reg.out$betas, 3),
"Erreur Std."= format(round(reg.out$std_errors, 3)),
"Beta" = c(" ", round(reg.out$sbetas, 3)),
t=round(reg.out$tvalues, 3),
valeur.p =round(reg.out$pvalues, 3),
lower=round(reg.out$conf_lm[, 1], 3),
upper =round(reg.out$conf_lm[, 2], 3))
names(coef.table)<-c("b",.dico[["txt_error_dot_standard"]],"beta","t",
.dico[["txt_p_dot_val"]],.dico[["txt_ci_inferior_limit_dot"]], .dico[["txt_ci_superior_limit_dot"]])
if(length(pred)>1){
ols.corr<-try(ols_correlations(lm.r1), silent=T)
if(any(class(ols.corr)!='try-error')){
Resultats[[.dico[["txt_variables_contribution_to_model"]]]]<-ols.corr
Resultats[[.dico[["txt_added_variables_graph"]]]] <-ols_plot_added_variable(lm.r1)
coef.table[[.dico[["txt_delta_r_squared"]]]]<-c(" ", round((ols.corr$Part)^2,3))
}
}
Resultats[[.dico[["txt_beta_table"]]]]<-coef.table
}
if(any(param=="Bayes")|any(param==.dico[["txt_bayesian_factors"]])){
lmBF(modele1, data=dtrgeasieR)->BF.out
BF.table<-extractBF(BF.out)[1:2]
if(length(pred)>1) { for(i in 2:length(pred)){
modele1<-update(modele1, as.formula(paste0(".~.+",pred[i])))
lmBF(modele1, data=dtrgeasieR)->BF.out
BF.table<-rbind(BF.table, extractBF(BF.out)[1:2])
}
}
Resultats[[.dico[["txt_bayesian_factors"]]]]<-BF.table
}
if(any(param==.dico[["txt_robusts"]]| any(param==.dico[["txt_robusts_tests_with_bootstraps"]]))){
rlm(formula=modele, data=dtrgeasieR)->modele_robuste
summary(modele_robuste)->res_modele_robuste
(1-pt(abs(res_modele_robuste$coefficients[,3]), (length(dtrgeasieR[,1])-1-length(pred)), lower.tail=TRUE))*2->proba
round(cbind(res_modele_robuste$coefficients, proba),3)->M_estimator
data.frame(M_estimator)->M_estimator
noms<-c(.dico[["txt_b_m_estimator"]], "SE", "t", .dico[["txt_p_dot_val"]])
if(n.boot>100){
bootReg<-function(formula, dtrgeasieR, i)
{ d <- dtrgeasieR[i,]
fit <- lm(formula, data = d)
return(coef(fit))}
bootResults<-boot(statistic=bootReg, formula= modele , data=dtrgeasieR, R=n.boot) # cree le bootstrap
intervalle<-c()
try(for(i in 1: length(lm.r1$coefficients)){boot.ci(bootResults, type = "bca", index = i)$bca[,4:5]->IC1
rbind(intervalle, IC1)->intervalle}, silent=T)
if(is.null(intervalle)){
for(i in 1: length(lm.r1$coefficients)){boot.ci(bootResults, type = "perc", index = i)$percent[,4:5]->resultats
rbind(intervalle, resultats)->intervalle}
noms<-c(noms, .dico[["txt_percentile_inferior_limit_dot"]], .dico[["txt_percentile_superior_limit_dot"]])
} else{
noms<-c(noms, .dico[["txt_bca_inferior_limit"]], .dico[["txt_bca_superior_limit"]])
}
data.frame(M_estimator, round(intervalle,4))->M_estimator
}
names(M_estimator)<-noms
Resultats[[.dico[["txt_robusts_statistics"]]]]<-M_estimator
}
if(CV) .dico[["desc_cross_validation_issues"]]
return(Resultats)
}
options (warn=-1)
.e <- environment()
c('MASS','BayesFactor','boot','car','ggplot2','gsl', 'MBESS','olsrr','nortest','psych','svDialogs')->packages
try(lapply(packages, library, character.only=T), silent=T)->test2
if(class(test2)== 'try-error') return(ez.install())
Resultats<-list()
try( windows(record=T), silent=T)->win
if(any(class(win)=='try-error')) quartz()
if(class(data)=="data.frame") deparse(substitute(data))->data
reg.in.output<-regressions.in(data=data, modele=modele, Y=Y, X_a=X_a, X_i=X_i, outlier=outlier, inf=inf,
CV=CV, select.m=select.m, method=method, step=step, group=group, criteria=criteria , scale=scale, info=info,
sauvegarde=sauvegarde, n.boot=n.boot, param=param, rscale=rscale)
if(is.null(reg.in.output)) return(choix.reg())
data<-reg.in.output$data
nom<-reg.in.output$nom
modele<-reg.in.output$modele
param<-reg.in.output$options$choix
n.boot<-reg.in.output$options$n.boot
if(reg.in.output$options$rscale) rscale<-reg.in.output$options$rscale/2 else rscale<-reg.in.output$options$rscale
outlier<-reg.in.output$options$desires
sauvegarde<-reg.in.output$options$sauvegarde
scale<-reg.in.output$reg.options$scale
inf<-reg.in.output$reg.options$inf
CV<-reg.in.output$reg.options$CV
step<-reg.in.output$reg.options$step
select.m<-reg.in.output$reg.options$select.m
method<-reg.in.output$reg.options$method
criteria<-reg.in.output$reg.options$criteria
group<-reg.in.output$reg.options$group
if(any(outlier== .dico[["txt_complete_dataset"]])){
Resultats[[.dico[["txt_complete_dataset"]]]]<-regressions.out(dtrgeasieR=data, modele=modele, VC=VC, select.m=select.m, method=method, step=step, group=group, criteria=criteria , scale=scale,
sauvegarde=sauvegarde, n.boot=n.boot, param=param, rscale=rscale)
if(!is.null(group)) {
R1<-list()
G<-data[,group]
if(length(group)>1) G<-as.list(G)
G<-split(data, G)
for(i in 1:length(G)){
resg<-regressions.out(dtrgeasieR=G[[i]], modele=modele, VC=VC, select.m=select.m, method=method, step=step, group=group, criteria=criteria , scale=scale,
sauvegarde=sauvegarde, n.boot=n.boot, param=param, rscale=rscale)
R1[[length(R1)+1]]<-resg
names(R1)[length(R1)]<-names(G)[i]
}
Resultats[[.dico[["txt_complete_dataset"]]]][[.dico[["txt_group_analysis"]]]]<-R1
}
}
if(any(outlier==.dico[["txt_identifying_outliers"]])|any(outlier==.dico[["txt_without_outliers"]])|inf==T){
lm.r1<-lm(modele, data)
as.character(attributes(terms(modele))$variables)->variables
variables[2:length(variables)]->variables
plot(lm.r1, which = 5)
if(inf) {
influence.measures(lm.r1)->mesure_influence
data<-data.frame(data, round(mesure_influence$infmat,3))
data$leverage<-ols_leverage(lm.r1)
rstandard(lm.r1)->data$res.stand
rstudent(lm.r1)->data$res.student # idem avec le residu studentise
data$res.student.p<-2*pt(abs(data$res.student), df=lm.r1$df.residual, lower.tail=F)
data$res.student.p.Bonf<-p.adjust(data$res.student.p,"bonferroni")
data$est.inf<-" "
data[which(apply(mesure_influence$is.inf, 1, any)),"est.inf"]<-"*"
ols_plot_dfbetas(lm.r1)
data[order(data$res.student.p.Bonf), ]->data
writeLines(.dico[["desc_obs_with_asterisk_are_outliers"]])
View(data)
suppression<-"yes"
outliers<-data.frame()
nettoyees<-data
while(suppression=="yes"){
cat (.dico[["ask_press_enter_to_continue"]])
line <- readline()
sup<-NA
while(is.na(sup)){
sup <- dlgInput(.dico[["ask_obs_to_remove"]], 0)$res
if(length(sup)==0) return(regressions())
strsplit(sup, ":")->sup
tail(sup[[1]],n=1)->sup
as.numeric(sup)->sup
if(is.na(sup)) msgBox(.dico[["ask_enter_number_of_to_be_removed_variable"]])
}
if(sup==0) suppression<-"no" else {
rbind(outliers, nettoyees[which(dimnames(nettoyees)[[1]]==sup),])->outliers
nettoyees[-which(dimnames(nettoyees)[[1]]==sup),]->nettoyees
}
}
if(length(outliers)!=0) outliers<-outliers[,variables]
assign(nom, data, envir=.GlobalEnv)
} else {
4/length(data[,1])->seuil_cook # fixe le seuil pour les valeurs aberrantes
cooks.distance(lm.r1)->data$cook.d
data[which(data$cook.d<= seuil_cook), ]->nettoyees
data[which(data$cook.d>= seuil_cook), ]->outliers
cbind(outliers[,variables],outliers$cook.d)->outliers
Resultats$"information"[[.dico[["desc_outliers_identified_on_4_div_n"]]]]
}
nettoyees->>nettoyees
length(data[,1])-length(nettoyees[,1])->N_retire # identifier le nombre d observations retirees sur la base de la distance de cook
if(any(outlier== .dico[["txt_identifying_outliers"]])){
paste(N_retire/length(data[,1])*100,"%")->Pourcentage_retire # fournit le pourcentage retire
data.frame("N.retire"=N_retire, txt_percent_removed_obs=Pourcentage_retire)->Resultats[[.dico[["txt_identified_outliers_synthesis"]]]]
if(length(outliers)!=0) Resultats[[.dico[["txt_identifying_outliers"]]]][[.dico[["desc_identified_outliers"]]]]<-outliers
}
if(any(outlier== .dico[["txt_without_outliers"]])) {
if(N_retire!=0 | all(outlier!=.dico[["txt_complete_dataset"]])){
Resultats[[.dico[["txt_without_outliers"]]]]<-regressions.out(dtrgeasieR=nettoyees, modele=modele, VC=VC, select.m=select.m, method=method, step=step, group=group, criteria=criteria , scale=scale,
sauvegarde=sauvegarde, n.boot=n.boot, param=param, rscale=rscale)
if(!is.null(group)) {
R1<-list()
G<-nettoyees[,group]
if(length(group)>1) G<-as.list(G)
G<-split(nettoyees, G)
for(i in 1:length(G)){
resg<-regressions.out(dtrgeasieR=G[[i]], modele=modele, VC=VC, select.m=select.m, method=method, step=step, group=group, criteria=criteria , scale=scale,
sauvegarde=sauvegarde, n.boot=n.boot, param=param, rscale=rscale)
R1[[length(R1)+1]]<-resg
names(R1)[length(R1)]<-names(G)[i]
}
Resultats[[.dico[["txt_without_outliers"]]]][[.dico[["txt_group_analysis"]]]]<-R1
}
}
}
}
paste(outlier, collapse="','", sep="")->outlier
paste(param, collapse="','", sep="")->param
as.character(modele)->m1
modele<-paste0(m1[2],"~", m1[3])
if(!is.null(group)) paste(group, collapse="','", sep="")->group
if(!is.null(step)) {
paste0("list(")->step.call
for(i in 1:length(step)){
if(i>1) n.step<-paste0(", step",i) else n.step<-paste0("step",i)
paste(step[[i]], collapse="','", sep="")->var.step
step.call<-paste0(step.call,n.step,"=c('", var.step, "')")
}
step.call<-paste0(step.call, ")")
}
Resultats$Call<-paste0("regressions(data=", nom, ",modele=", modele, ",outlier=c('", outlier, "'),inf=", inf, ",CV=", CV,",select.m='", select.m,"',step=", ifelse(!is.null(step), step.call,"NULL"),
",group=", ifelse(is.null(group), "NULL", paste0("c('",group,"')")),
",criteria=", criteria, ",scale=", scale, ",dial=T, info=T,sauvegarde=", sauvegarde, ",n.boot=", n.boot, ",param=c('", param, "'),rscale=", round(rscale,3), ")")
.add.history(data=data, command=Resultats$Call, nom=nom)
.add.result(Resultats=Resultats, name =paste(.dico[["txt_multiple_regressions_dot"]], Sys.time() ))
if(sauvegarde) if(sauvegarde) save(Resultats=Resultats, choix=.dico[["txt_multiple_regressions_dot"]], env=.e)
Resultats[[.dico[["txt_references"]]]]<-ref1(packages)
if(html) ez.html(Resultats)
return(Resultats)
}
.regressions.options<-function(data=NULL, modele=NULL, CV=F, inf=F, select.m="none", method="p", criteria=NULL, step=NULL, group=NULL, scale=T, dial=T,info=T){
# data : dataframe
# modele : formula as it is used in lm
# CV : logical. Should a cross validation to be performed ?
# inf : Logical. Should influential observations be checked ?
# select.m : character specifying method of selection. One among "none", "forward", "backward" and "bidirectional"
# method : if select is different of "none", one among "AIC", "F", or "p"
# criteria : if method is "F", specify F value to use. If method is "p", specify p value to use as cutoff criteria.
# step : list. Each element of the list is a vector with the effect to test at the specific step (see details)
# group : character. Name of the factor variable definying the groups
# scale : Logical. Should the predictor be scaled before the analysis (recommended) ?
Resultats<-list()
step1<-terms(as.formula(modele))
step2<-as.character( attributes(step1)$variables)[-1]
step1<-attributes(step1)$term.labels
if(dial || !is.logical(scale)){
if(info) writeLines(.dico[["ask_center_numeric_variables"]])
scale<-dlgList(c(.dico[["txt_center"]], .dico[["txt_non_centered"]]), multiple = FALSE, title=.dico[["ask_center"]])$res
if(length(scale)==0) return(NULL)
scale<-ifelse(scale==.dico[["txt_center"]],T,F)
}
Resultats$scale<-scale
if(dial || !is.logical(inf) || !is.logical(CV)) {
writeLines(.dico[["ask_specify_other_options_regressions"]])
autres.options<-c(.dico[["txt_cross_validation"]],.dico[["txt_influence_method"]], .dico[["txt_none"]])
if(dim(model.matrix(modele, data))[2]>2) autres.options<-c(.dico[["txt_selection_methods"]], .dico[["txt_hierarchical_models"]], autres.options)
if(length(step2)<length(data)) autres.options<-c(.dico[["txt_groups_analysis"]],autres.options)
autres.options<- dlgList( autres.options, preselect=c(.dico[["txt_none"]]), multiple = TRUE, title=.dico[["ask_other_options"]])$res
if(length(autres.options)==0) return(.regressions.options(data=data, modele=modele))
# if(any(autres.options==.dico[["txt_none"]])) return(Resultats)
if(any(autres.options==.dico[["txt_influence_method"]]) ) Resultats$inf<-T else Resultats$inf<-F
if(any(autres.options==.dico[["txt_cross_validation"]]) ) Resultats$CV<-T else Resultats$CV<-F
}else{Resultats$inf<-inf
Resultats$CV<-CV
autres.options<-.dico[["txt_none"]]
}
if(any(autres.options==.dico[["txt_groups_analysis"]]) || !is.null(group)) {
msg5<-.dico[["ask_chose_categorial_ranking_factor"]]
group<-.var.type(X=group, info=info, data=data, type="factor", check.prod=T, message=msg5, multiple=FALSE, title=.dico[["txt_groups_variables"]], out=step2)
if(length(group)==0) { return(.regressions.options(data=data, modele=modele))}
data<-group$data
group<-group$X
ftable(data[,group])->groupe.check
if(any(is.na(groupe.check)) || min(groupe.check)<(length(dimnames(model.matrix(as.formula(modele), data))[[2]])+10)) {
msgBox(.dico[["desc_at_least_10_obs_needed"]])
return(groupe.check)
}
}
if(any(autres.options==.dico[["txt_selection_methods"]]) || select.m!="none" & length(select.m)!=1 | !select.m%in%c("none","forward", "backward", "bidirectional",.dico[["txt_forward_step_ascending"]],
.dico[["txt_backward_step_descending"]], .dico[["txt_bidirectionnal"]])){
if(info) writeLines(.dico[["ask_chose_selection_method"]])
select.m<- dlgList(c(.dico[["txt_forward_step_ascending"]],.dico[["txt_backward_step_descending"]], .dico[["txt_bidirectionnal"]]),
preselect=NULL, multiple = FALSE, title=.dico[["txt_method_choice"]])$res
if(length(select.m)==0) return(.regressions.options(data=data, modele=modele))
}
if(!is.null(method)){
if(any(autres.options==.dico[["txt_selection_methods"]]) || (select.m!="none" && !method%in%c("AIC", "p", "F", .dico[["txt_f_value"]],.dico[["txt_probability_value"]], .dico[["txt_aic_criterion"]])) ){
if(info) writeLines(.dico[["ask_selection_method"]])
method<- dlgList(c(.dico[["txt_f_value"]],.dico[["txt_probability_value"]], .dico[["txt_aic_criterion"]]),
preselect=c(.dico[["txt_f_value"]]), multiple = FALSE, title=.dico[["txt_method_choice"]])$res
if(length(method)==0) return(.regressions.options(data=data, modele=modele))
}
if(select.m!="none" & (method==.dico[["txt_f_value"]] | method=="F")){
if(!is.null(criteria) && (!is.numeric(criteria) || criteria<1)) {msgBox(.dico[["desc_specify_f_value"]])
criteria<-NULL}
if(is.null(criteria)) {
while(is.null(criteria)){
criteria <- dlgInput(.dico[["ask_f_value"]], 4)$res
if(length(criteria)==0) return(.regressions.options(data=data, modele=modele))
strsplit(criteria, ":")->criteria
tail(criteria[[1]],n=1)->criteria
as.numeric(criteria)->criteria
if(is.na(criteria) || criteria<1) {criteria<-NULL
msgBox(.dico[["desc_specify_f_value"]])
}
criteria<-df(criteria, df1=1, df2=(length(data[,1])-1-length(step1)), log = FALSE)
}
}
}
if(select.m!="none" & (method==.dico[["txt_probability_value"]] | method=="p")){
if(dial | !is.null(criteria) && (!is.numeric(criteria) || criteria<0 || criteria>1)) {msgBox(.dico[["desc_specify_probability_value"]])
criteria<-NULL}
if(is.null(criteria)) {
while(is.null(criteria)){
criteria <- dlgInput(.dico[["ask_probability_value"]], 0.15)$res
if(length(criteria)==0) return(.regressions.options(data=data, modele=modele))
strsplit(criteria, ":")->criteria
tail(criteria[[1]],n=1)->criteria
as.numeric(criteria)->criteria
if(is.na(criteria) || criteria>1 || criteria<0 ) {criteria<-NULL
msgBox(.dico[["desc_specify_probability_value"]])}
}
}
}
}
if(any(autres.options==.dico[["txt_hierarchical_models"]])| !is.null(step)) {
if(!is.null(step) ){
st1<-unlist(step)
if(any(table(st1>1))) st1<-.dico[["txt_error"]]
if(any(!st1%in%step1 ))st1<-.dico[["txt_error"]]
if(st1==.dico[["txt_error"]]){
msgBox(.dico[["desc_issue_in_hierarchical_regression"]])
step<-NULL
}
}
if(is.null(step)){
if(info) writeLines(.dico[["ask_chose_variables"]])
step<-list()
step[[1]]<- dlgList(step1, preselect=NULL, multiple = TRUE, title=.dico[["txt_variables_from_step"]])$res
if(length(step[[1]])==0) return(.regressions.options(data=data, modele=modele))
setdiff(step1,step[[1]])->step1
while(length(step1!=0)){
step[[length(step)+1]]<-dlgList(step1, multiple = TRUE,title=.dico[["txt_variables_from_step"]])$res
if(length(step[[length(step)]])==0) return(.regressions.options(data=data, modele=modele))
setdiff(step1,step[[length(step) ]])->step1
}
}
}
Resultats$step<-step
Resultats$select.m<-select.m
Resultats$method<-method
Resultats$criteria<-criteria
Resultats$group<-group
rm( "dtrgeasieR", envir = .GlobalEnv)
return(Resultats)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.