#' Specifies the model and sets the variables
#' Specify the type of analysis, the predictor and response variables, variance
#' structure, and whether to test a null hypothesis or generate a confidence
#' interval. Currently, only two predictor variables are allowed in the case of
#' a regression model and maximum one of the predictor variables can be a
#' categorical variable. The categorical variable can only have two levels.
#' @df dataframe with input data.
#' @test type of test. Options: "slope" (regression slope), "diff slopes"
#' (difference between regression slopes), "diff intercept" (difference in
#' intercept with parallel slopes), "diff means" (different between two sample means
#' and "diff props" (difference between tow sample proportions. More to come.
#' @procedure choose whether you want to test a null-hypothesis ("H0", of a mean,
#' regression slope, or slope difference) or get a confidence interval around
#' the observed sample statistic ("CI").
#' @cont1 Assign a value to 'error_cont1' or 'error_cont2', depending whether
#' you used cont1 or cont2 to assign the continuous variable. A value
#' of 1 means that the error term is computed as the sd of the residuals of the
#' regression model on the original data. Values of 0.5 and 2 mean that the error
#' term is half or twice that, respectively, etc. However, if you set the argument
#' 'heteroscedasticity' == TRUE, the error term will be calculated as function
#' of the continuous predictor variable. Use this argument when only one continuous
#' variable is used!
#' @cont2 idem. A second continuous variable is only used when you want to
#' explore interaction effects.Not yet implemented!
#' @cat1 Categorical variable. All options but the Chi-square test only
#' allow for two levels within the categorical variable.
#' @resp Numerical response variable
#' @error_cont1 Multiplies the sd of the residuals of the regression model fitted
#' on the original data with given number. A value of '1' (default) means that
#' the error term of the model is the same as in the original data.
#' @het_cont1 A single number that defines how much smaller/bigger the
#' error term is at the highest value of the observed range of predictor values
#' than at the lowest value of the observed range of predictor values. The error
#' terms are then calculated such that the average of the error term over the
#' whole range of predictor values equals 'error_cont1' * the sd of the residuals
#' of the regression model fitted on the original data.
#' @error_cont2 see 'error_cont1'
#' @error_cat A vector with two numbers that defines how much
#' smaller/bigger the error term is than the standard deviation
#' of the residuals of the model fitted on the original data (both levels
#' combined). A value of 1 means that the sd of the residuals of the model fitted
#' on the original data (both levels combined) is used to define the error term.
#' If you want to use the sd of the original residuals calculated separately
#' for" each level, use "as data".
#' @het_cont2 see 'het_cont1'
#' @success in case of a test for difference in proportions, what level of the
#' categorical variable indicates success?
#' @import readr
#' @import purrr
#' @import tidyr
#' @import tidyselect
#' @import dplyr
#' @export
specify_model <- function(
df,
test,
procedure,
cont1 = NULL,
cont2 = NULL,
cat1 = NULL,
cat2 = NULL,
resp = NULL,
error_cat = NULL,
error_cont1 = 1,
het_cont1 = 1,
error_cont2 = 1,
het_cont2 = 1,
success){
#---difference between regression slopes----------------------------------------
if(test == "diff slopes"){
if(is.null(cat1)){
stop("Testing for the difference between two regression slopes requires
one continuous and one categorical variable")
}
if(is.null(error_cat)){
stop("You need to provide 'error_cat', a vector with two values or 'as_data'")
}
if(!is.null(cont1) & !is.null(cont2)){
stop("This option requires one categorical and (only) one continuous
variable. Use the argument 'cont1' to define the latter")
}
if(is.null(cont1) & !is.null(cont2)){
stop("Use the argument 'cont1' to define your (only) continuous variable")
}
if(is.null(cat1) & is.null(cont1) & is.null(cont2)){
stop("Testing for the difference between two regression slopes requires
one continuous and one categorical variable")
}
if(is.null(resp)){
stop("You need a response variable")
}
nr_levels <- df %>% distinct(.[all_of(cat1)]) %>% count() # FIGURE OUT HOW TO USE EXTRACTED VARIABLE AS COLUMN NAME IN DISTINCT()
if(nr_levels != 2){
stop("Currently the option 'diff slopes' only
supports two levels for the categorical variable")
}
if(!is.numeric(data.frame(df)[,all_of(cont1)]) & !is.numeric(data.frame(df)[,all_of(resp)])){
stop("Both your response and predictor variables are not numeric")
}
if(!is.numeric(data.frame(df)[,all_of(cont1)])){
stop("your predictor variable is not numeric")
}
if(!is.numeric(data.frame(df)[,all_of(resp)])){
stop("your response variable is not numeric")
}
df <- df %>%
dplyr::select(tidyselect::all_of(cont1),
tidyselect::all_of(cat1),
tidyselect::all_of(resp))
# Adding attributes to data frame
test_procedure <- list(
attr(df, "from") <- "specify_model",
attr(df, "test") <- test,
attr(df, "procedure") <- procedure
)
variables <- list(
attr(df, "response_variable") <- resp,
attr(df, "continuous_predictor") <- cont1,
attr(df, "categorical_predictor") <- cat1
)
error_terms <- list(
attr(df, "error_cat") <- error_cat,
attr(df, "error_cont1") <- error_cont1,
attr(df, "het_cont1") <- het_cont1)
return(df)
#---difference between intercepts----------------------------------------
} else if(test == "diff intercepts"){
if(is.null(cat1)){
stop("Testing for the difference between the intercepts of two
regression lines requires one continuous and one categorical variable")
}
if(!is.null(cont1) & !is.null(cont2)){
stop("This option requires one categorical and (only) one continuous
variable. Use the argument 'cont1' to define the latter")
}
if(is.null(cont1) & !is.null(cont2)){
stop("Use the argument 'cont1' to define your (only) continuous variable")
}
if(is.null(cat1) & is.null(cont1) & is.null(cont2)){
stop("Testing for the difference between the intercepts of two
regression lines requires one continuous and one categorical variable")
}
if(is.null(resp)){
stop("You need a response variable")
}
nr_levels <- df %>% distinct(.[all_of(cat1)]) %>% count() # FIGURE OUT HOW TO USE EXTRACTED VARIABLE AS COLUMN NAME IN DISTINCT()
if(nr_levels != 2){
stop("Currently the option 'diff intercepts' only
supports two levels for the categorical variable")
}
if(!is.numeric(data.frame(df)[,all_of(cont1)]) & !is.numeric(data.frame(df)[,all_of(resp)])){
stop("Both your response and predictor variables are not numeric")
}
if(!is.numeric(data.frame(df)[,all_of(cont1)])){
stop("your predictor variable is not numeric")
}
if(!is.numeric(data.frame(df)[,all_of(resp)])){
stop("your response variable is not numeric")
}
df <- df %>%
dplyr::select(tidyselect::all_of(cont1),
tidyselect::all_of(cat1),
tidyselect::all_of(resp))
# Adding attributes to data frame
test_procedure <- list(
attr(df, "from") <- "specify_model",
attr(df, "test") <- test,
attr(df, "procedure") <- procedure
)
variables <- list(
attr(df, "response_variable") <- resp,
attr(df, "continuous_predictor") <- cont1,
attr(df, "categorical_predictor") <- cat1
)
error_terms <- list(
attr(df, "error_cat") <- error_cat,
attr(df, "error_cont1") <- error_cont1,
attr(df, "het_cont1") <- het_cont1)
return(df)
# ---regression slope ----------------------------------------------------------
} else if(test == "slope"){
if(!is.null(cat1)){
stop("The option to test the regression slope requires only
one continuous predictor and no categorical variable.
Use the argument 'cont1' to define the former")
}
if(!is.null(cont1) & !is.null(cont2)){
stop("This option requires only one continuous predictor
variable. Use the argument 'cont1' to define this predictor variable")
}
if(is.null(cont1) & !is.null(cont2)){
stop("Use the argument 'cont1' to define your (only) continuous variable")
}
if(is.null(cat1) & is.null(cont1) & is.null(cont2)){
stop("The option to test the regression slope requires one continuous
predictor. Use the argument 'cont1' to define the former")
}
if(is.null(resp)){
stop("You need a response variable")
}
if(!is.numeric(data.frame(df)[,all_of(cont1)]) & !is.numeric(data.frame(df)[,all_of(resp)])){
stop("Both your response and predictor variables are not numeric")
}
if(!is.numeric(data.frame(df)[,all_of(cont1)])){
stop("your predictor variable is not numeric")
}
if(!is.numeric(data.frame(df)[,all_of(resp)])){
stop("your response variable is not numeric")
}
df <- df %>%
dplyr::select(tidyselect::all_of(cont1),
tidyselect::all_of(resp))
# Adding attributes to data frame
test_procedure <- list(
attr(df, "from") <- "specify_model",
attr(df, "test") <- test,
attr(df, "procedure") <- procedure
)
variables <- list(
attr(df, "response_variable") <- resp,
attr(df, "predictor_variable") <- cont1
)
error_terms <- list(
attr(df, "error_cont1") <- error_cont1,
attr(df, "het_cont1") <- het_cont1
)
return(df)
#---difference between means---------------------------------------------
} else if(test == "diff means"){
if(is.null(cat1)){
stop("Testing for the difference between two sample means requires
one categorical variable")
}
if(!is.null(cont1) | !is.null(cont2)){
stop("This option requires only one categorical and no continuous
predictor variable(s). Use the argument 'cat1' to define the former")
}
if(is.null(resp)){
stop("You need a response variable")
}
nr_levels <- df %>% distinct(.[all_of(cat1)]) %>% count()
if(nr_levels != 2){
stop("The option 'difference between sample means' only supports two levels
for the categorical variable. Consider if the Chi-square test is an option")
}
# WEIRD! the original tibble gives a FALSE for this if statement!
if(!is.numeric(data.frame(df)[,all_of(resp)])){
stop("your response variable is not numeric")
}
df <- df %>%
dplyr::select(tidyselect::all_of(cat1),
tidyselect::all_of(resp))
# Adding attributes to data frame
test_procedure <- list(
attr(df, "from") <- "specify_model",
attr(df, "test") <- test,
attr(df, "procedure") <- procedure
)
variables <- list(
attr(df, "response_variable") <- resp,
attr(df, "predictor_variable") <- cat1
)
error_terms <- list(
attr(df, "error_cat") <- error_cat)
return(df)
#---difference between proportions---------------------------------------------
} else if(test == "diff props"){
if(is.null(cat1)){
stop("Testing for the difference between two sample proportions requires
one categorical variable")
}
if(!is.null(cont1) | !is.null(cont2)){
stop("This option requires only one categorical and no continuous
predictor variable(s). Use the argument 'cat1' to define the former")
}
if(is.null(resp)){
stop("You need a response variable")
}
nr_levels <- df %>% distinct(.[all_of(cat1)]) %>% count()
if(nr_levels != 2){
stop("The option 'difference between sample proportions' only supports two levels
for the categorical variable. Consider if the Chi-square test is an option")
}
df <- df %>%
dplyr::select(tidyselect::all_of(cat1),
tidyselect::all_of(resp))
# Adding attributes to data frame
test_procedure <- list(
attr(df, "from") <- "specify_model",
attr(df, "test") <- test,
attr(df, "procedure") <- procedure
)
variables <- list(
attr(df, "response_variable") <- resp,
attr(df, "predictor_variable") <- cat1
)
proportion <- list(
attr(df, "success") <- success)
return(df)
#---Chi-square test for homogeneity---------------------------------------------
} else if(test == "Chi-sqr"){
if(is.null(cat1) & is.null(cat2)){
stop("A Chi-square test requires one or two categorical variables")
}
if(!is.null(cat1) & !is.null(cat2)){
stop("for now, this function only allows for a Chi-square test for homogeniety
with one categorical variable and one predictor variable. Use cat1 for this.")
}
if(is.null(resp)){
stop("You need a response variable")
}
if(!is.null(cont1) | !is.null(cont2)){
stop("This option requires one or two categorical and no continuous
predictor variable(s). Use the arguments 'cat1' and 'cat2' to define the former")
}
df <- df %>%
dplyr::select(tidyselect::all_of(cat1),
tidyselect::all_of(cat2),
tidyselect::all_of(resp))
# Adding attributes to data frame
test_procedure <- list(
attr(df, "from") <- "specify_model",
attr(df, "test") <- test,
attr(df, "procedure") <- procedure
)
variables <- list(
attr(df, "response_variable") <- resp,
attr(df, "categorical_variable_1") <- cat1,
attr(df, "categorical_variable_2") <- cat2
)
return(df)
#---option does not exist--------------------------------------------------------
} else { stop("at this moment the only option are 'difference between sample
means', regression slope' and 'difference between regression
slopes'")}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.