# #################################
# ### The function itself ####
# #################################
#
# #input:
#
# all( 1:3 %in% 1:8)
#
# unlist(strsplit("a,b", split=","))
#
#
#
# mean.for.regression <- function(mResponse, censusdata, surveydata, model, location_survey){
# if(mResponse == ".") {
# vars_for_mean_calculation <- all.vars(model)[-1]
# } else if(is.character(mResponse) & length(mResponse == 1)){ # Fall: "a + b + c + d" oder "a, b, c, d"
# # replace " " by "" --> remove blanks
# vars_for_mean_calculation <- gsub(pattern = " ", replacement="" , mResponse)
# vars_for_mean_calculation <- unlist(strsplit(model.split, split="\\+"))
# vars_for_mean_calculation <- unlist(strsplit(model.split, split=","))
# } else if(is.character(mResponse)){
# vars_for_mean_calculation <- mResponse
# } else {
# stop("In order to include the means of variables included in the census in the model fit on the surveydata, you have to give a
# a) string with the variables you want to include separated by \"+\" or \",\" or
# b) a character vector with your variables
# c) a \"\'.\'\" as string, indicating that you want to include the mean of all the variables in your model")
# }
# if(!all( mResponse %in% names(censusdata))){
# stop("your input for mResponse includes variables that are not present in the censusdata set.
# Means for those variables cannot be calculated")
# }
# if(!all( mResponse %in% names(survey))){
# warning("your input for mResponse includes variables that are not present in the surveydata set")
# }
#
# means_from_census <- cbind(subset(censusdata, select = vars_for_mean_calculation), location_survey)
# means_from_census <- try(by(data = means_from_census,
# INDICES = location_survey, FUN = mean))
# if(class(means_from_census) == "try-error"){
# stop("your input for mResponse corresponds to variables in the censusdata set that are not numeric.
# Means for those variables cannot be calculated")
# }
#
# surveydata <- merge(surveydata, means_from_census, by = paste(varname_location_survey)))
# var.names <- paste(vars_for_mean_calculation, "_meanCensus", sep = "")
#
#
#
# # for-loop that determines if a varaible is metric or not and then calculates
# # the mean or median respectively and names the variables too
# for (i in 1:length(vars_for_mean_calculation)) {
# numeric_or_factor[i] <- is.numeric(censusdata[i])
# # first we select the variable we want from our input and connect it to the
# # census dataset for the mean or median calculation
# varcheck <- censusdata[,which(model.split[i] == colnames(censusdata))]
# #eval(parse(text=paste("censusdata$", model.split[i], sep = "")))
# # if statement that determines if mean or median as computet and saved in a vector
# if(is.numeric(varcheck) || is.integer(varcheck)){
# var.names[i] <- paste(model.split[i], ".Cmean" , sep = "")
# location.mean <- aggregate(as.formula(paste(model.split[i], "~", location_survey)), censusdata, mean)
# # aggregrate is actually pretty slow
# # there are faster options but they require additional packages
# # se here: https://stackoverflow.com/questions/11562656/calculate-the-mean-by-group
# names(location.mean)[2] <- paste(names(location.mean)[2], ".Cmean" , sep = "")
# location.mean[,1] <- as.factor(location.mean[,1])
#
# surveydata <- merge(surveydata, location.mean) #, by = eval(location_survey))
# } else if(is.factor(varcheck)){
# #var.means[,i] <- rep(median(varcheck), nrow(surveydata)) # median
# var.names[i] <- paste(model.split[i], ".Cmedian" , sep = "")
# location.median <- aggregate(as.formula(paste(model.split[i], "~", location_survey)), censusdata, median)
#
# names(location.median)[2] <- paste(names(location.median)[2], ".Cmedian" , sep = "")
# location.median[,1] <- as.factor(location.median[,1])
# surveydata <- merge(surveydata, location.median)
# }
# }
# #colnames(var.means) <- var.names # defines the new variable names
# #######------------------------------------------------########
# ### this could be sped up by using data.table's rbindlist() ###
# #surveydata <- cbind(surveydata, var.means)
# #######------------------------------------------------########
#
# # the last step is now to recreate tha part that gets pasted into the model
# # possible function for this:
# # https://stevencarlislewalker.wordpress.com/2012/08/06/merging-combining-adding-together-two-formula-objects-in-r/
# model.in.characters <- as.character(model)
#
# #model <- paste( model , paste(model.split, collapse = " + "), sep = " + ") # old version
# modelx <- paste(model.in.characters[3] , paste(var.names, collapse = " + "), sep = " + ")
# model.new <- paste(model.in.characters[2], modelx, sep = " ~ ")
# # we should check if this hyperparametrisation works!!
# return(list(model.new, surveydata))
# }
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.