variable_predictions_funs.R

## More general simulation function
### Default: Continuous outcome 2 and 1 categorical predictors
### form : specify model using formula

linearsim <- function(nHH=1000, perHH=1, form=~1+x1+x2+x3
	, hhSD=2, noiseSD=1, addnoiseGLM=FALSE, betas=NULL
	, pgausian=list(p=2, fun=rnorm, mean=0, sd=1)
	, pcat=list(p=1, fun=sample, nlevels=2, labels=NULL, prob=NULL, replace=TRUE)
	, noutcomes=1
	, beta0=log(runif(noutcomes))
	, separatelatent=FALSE
	, blatent=1
	, mulatent=0
	, sdlatent=1
	, vnames=NULL
	, link_scale=FALSE) {
	
	Terms <- attr(terms(form), "term.labels")
	nbetas <- length(Terms)
	if (noutcomes==1) {
		if (!is.null(betas) & length(betas) != (nbetas+1))
			stop(paste0("betas should a vector of length ", nbetas+1))
	} else {
		if (!is.null(betas) & length(betas) != (nbetas))
			stop(paste0("betas should a vector of length ", nbetas+1))
	}

	p <- pgausian$p + pcat$p
	nvars <- length(all.vars(form))

	if (p != nvars) {
		stop(paste0("\nNumber of predictors in form should be same as sum of p in pgausian and pcat: ", nvars, " != ", p))
	}
	
	# TODO: warn users if there's intercept in the multiple outcome
	check_intercept <- attr(terms(form), "intercept")
	if (noutcomes>1) {
		if (check_intercept>=1) {
			form <- reformulate(c(all.vars(form), -1))
			betas <- betas[-1]
		}
		if (length(beta0) != noutcomes) stop("length of beta0 should be the same as noutcomes")
	}

	if (noutcomes<1)stop("Number of outcome variables should be  >= 1")
	if (perHH < 1)stop("perHH >= 1")
	N <- nHH * perHH

	pg <- pgausian$p
	mu <- pgausian$mean
	if (is.null(mu)) mu <- rep(0, pg)
	.sd <- pgausian$sd
	if(is.null(.sd)) sd <- 1

	pgausian$p <- NULL
	gfun <- pgausian$fun
	if (is.null(gfun)) gfun <- rnorm
	pgausian$fun <- NULL
	pgausian$n <- N
	if (length(mu)==1) {
		X <- replicate(pg, do.call(gfun, pgausian))
	} else {
		X <- list()
		if (length(mu)>pg)stop("p in pgausian should be greater than or equals to the length of mean")
		for (m in 1:length(mu)) {
			if(length(.sd)<length(mu)).sd <- 1 else .sd <- .sd[[m]]
			pgausian$mean <- mu[[m]]
			pgausian$sd <- .sd
			X[[m]] <- do.call(gfun, pgausian)
		}
		X <- do.call("cbind", X)
	}
	
	cp <- pcat$p
	pcat$p <- NULL
	if (cp > 0) {
		nlevels <- pcat$nlevels
		if (is.null(nlevels)) {
			nlevels <- rep(2, cp)
			pcat$labels <- NULL
			pcat$prob <- NULL
			pcat$replace <- TRUE
		}
		if (length(nlevels)!=cp)
			stop("\n p in pcat must be equal to the length of nlevels")
		Xcat <- list()
		cfun <- pcat$fun
		if (is.null(cfun)) cfun <- sample
		labels_temp <- pcat$labels
		prob_temp <- pcat$prob
		for (nl in 1:cp) {
			if(is.list(labels)) {
				labels <- labels_temp[[nl]]
				prob <- prob_temp[[nl]]
			} else {
				labels <- labels_temp
				prob <- prob_temp
			}
			if (nlevels[[nl]]<2)stop("Levels of categorical variable must be >1")
			if (is.null(labels)) {
				levs <- sample(LETTERS, nlevels[[nl]], replace=FALSE)
			} else {
				levs <- labels
				if (is.list(levs)) levs <- levs[[nl]]
			}
			if (length(levs) != nlevels[[nl]])
				stop("nlevels must be equal to the length of labels")
			pcat$labels <- NULL
			pcat$x <- levs
			if(is.list(prob)) prob <- prob[[nl]]
			if (is.null(prob)) {
				prob <- rep(1/nlevels[[nl]], nlevels[[nl]])
			}
			if (length(prob) != length(levs))
				stop("length of prob must be equal to the length of labels")

			pcat$fun <- NULL
			pcat$prob <- prob
			pcat$nlevels <- NULL
			pcat$size <- N
			Xcat[[nl]] <- do.call(cfun, pcat)
		}
		Xcat <- do.call("cbind", Xcat)
		X <- cbind.data.frame(X, Xcat)
	}
	colnames(X) <- all.vars(form)
	
	## Model matrix
	df <- as.data.frame(X)
	
	## Generate random effects
	### Probably better way to do this in base R
	if (perHH==1) {
		hhid <- 1:N
		hhRE <- rep(0, N)
	} else {
		hhid <- rep(1:nHH, each=perHH)
		if (length(hhSD)==1) hhSD <- rep(hhSD, noutcomes)
		names(hhSD) <- paste0("hhRE", 1:noutcomes)
		sdfun <- function(){hhSD}
		df <- (df
			%>% mutate(hhid=hhid)
			%>% group_by(hhid)
			%>% mutate(!!!sdfun())
#			%>% mutate_(.dots=hhSD)
			%>% mutate_at(names(hhSD), function(x)rnorm(1, 0, x))
			%>% ungroup()
		)
		hhid <- pull(df, hhid)
		hhRE <- select(df, !!names(hhSD))
		df <- select(df, -!!c("hhid", names(hhSD)))
	}

	X <- model.matrix(form, df)
	
	if (is.null(betas)) betas <- log(runif(NCOL(X), 0, 1))

	lp <- as.vector(X %*% betas)
	if (noutcomes==1) {
		if (perHH==1) {
			hhRE <- hhRE 
		} else {
			hhRE <- pull(hhRE, "hhRE1")
		}
		eta <- lp + hhRE
		if (link_scale) {
			df$y <- rnorm(N, mean=eta, sd=noiseSD) 
		} else {
			if (addnoiseGLM) {
				eta <- eta + rnorm(N, 0, noiseSD)
			}
			df$y <- rbinom(N, 1, plogis(eta))
		}
		ynames <- "y"
	} else {
		if (separatelatent) {
			if (length(mulatent)==1) mulatent <- rep(mulatent, noutcomes)
			if (length(sdlatent)==1) sdlatent <- rep(sdlatent, noutcomes)
			if (length(blatent)==1) blatent <- rep(blatent, noutcomes)
		}
		latentvar <- blatent*rnorm(N, mulatent, sdlatent)
		out <- lapply(1:noutcomes, function(i){
			if (separatelatent) {
				latentvar <- blatent[[i]]*rnorm(N, mulatent[[i]], sdlatent[[i]])
			} 
			hhRE <- pull(hhRE, paste0("hhRE", i))
			eta <- beta0[[i]] + lp + hhRE + latentvar
			if (link_scale) {
				y <- rnorm(N, mean=eta, sd=noiseSD) 
			} else {
				y <- rbinom(N, 1, plogis(eta))
			}
			return(y)
		})
		out <- do.call("cbind", out)
		df <- cbind.data.frame(df, out)
		ynames <- paste0("y", 1:noutcomes)
	}
	
	
	if (is.null(vnames)) {
		colnames(df) <- c(all.vars(form), ynames)
	} else {
		colnames(df) <- vnames
	}
	df$hhid <- hhid
	
	return(list(data = df, betas = betas, formula=form))
}


## Generate binned observations
binfun <- function(mod, focal, bins=50, groups=NULL, ...) {
	if (!is.null(groups)) {
		bins_all <- c(groups, "bin")
	} else {
		bins_all <- "bin"
	}
	mf <- model.frame(mod)
	check_df <- (mf
		%>% arrange_at(focal)
		%>% mutate(bin=ceiling(row_number()*bins/nrow(.)))
		%>% group_by_at(bins_all)
		%>% summarise_all(mean)
		%>% mutate(model="binned")
	)
	return(check_df)
}

## Compare several varpred objects

comparevarpred <- function(vlist, lnames=NULL, plotit=FALSE, addmarginals=FALSE, margindex, ...) {
	if (!is.list(vlist))stop("vlist should be a list of objects of class varpred")
	nobjs <- length(vlist)
	if (!is.null(lnames)) {
		if (nobjs==1) lnames <- rep(lnames, nobjs)
	} 
	preds <- lapply(1:nobjs, function(v){
		pp <- vlist[[v]]$preds
		if (!is.null(lnames)) {
			pp$.varpred <- lnames[[v]]
		}
		return(pp)
	})
	preds <- do.call("rbind", preds)
	if (addmarginals) {
		if (missing(margindex))stop("Specify margindex of vlist objects to compute marginals")
		marg_df <- lapply(margindex, function(i){
			pp <- vlist[[i]]$preds
			df <- data.frame(muy=mean(pp$fit)
				, mux=mean(pp[[attr(pp, "x.var")]])
			)
			if (!is.null(lnames)) {
				df$.varpred <- lnames[[i]]
			}
			return(df)
		})
		marg_df <- do.call("rbind", marg_df)
	}
	
	out <- vlist[[1]]
	out$preds <- preds
	if (plotit) {
		add_args <- list(...)
		if (length(add_args)) {
			out <- list(out)
			out[names(add_args)] <- add_args
			p <- do.call(plot, out)
		} else {
			p <- plot(out)
		}
		if (addmarginals) {
			p <- (p
				+ geom_hline(data=marg_df, aes(yintercept=muy), lty=2, colour="grey")
				+ geom_vline(data=marg_df, aes(xintercept=mux), lty=2, colour="grey")
			)
		}
		return(p)
	} else {
		return(out)
	}
}


## Compare predictions from vareffects, emmeans, effects and margins
## funs: emmean; varpred; ...
combinepreds <- function(mod, funs, focal, x.var, x.var.factor=FALSE, plotit=TRUE, print.head=FALSE, ...){
	if (length(focal)>1) {
		focal <- union(focal[focal %in% x.var], focal)
		focal_temp <- c("xvar", focal[!focal %in% x.var])
		spec <- as.formula(paste0("~", paste0(focal, collapse=":")))
	} else {
		spec <- as.formula(paste0("~", focal))
		focal_temp <- "xvar"
	}
	args <- list(mod, spec=spec
		, focal.predictors=focal
		, focal_predictors=focal
		, x.var=x.var
	)
	add_args <- list(...)
	if (length(add_args)) args[names(add_args)] <- add_args
	
	if (!is.null(add_args$at) & ("Effect" %in% funs)) args$xlevels <- add_args$at

	out <- sapply(funs, function(f){
		est <- do.call(f, args)
		if (inherits(est, c("emmeans", "emmGrid"))) {
			.nn <- est@misc$inv.lbl
			if (is.null(.nn)) .nn <- est@misc$estName
			est <- as.data.frame(est)
			oldn <- c(focal, .nn
				, grep("\\.CL|\\.LCL|\\.UCL|lower\\.|upper\\.", colnames(est), value=TRUE)
			)
			newn <- c(focal_temp, "fit", "lwr", "upr")
			colnames(est)[colnames(est) %in% oldn] <-  newn
		} else if (inherits(est, "eff")) {
			est <- as.data.frame(est)
			oldn <- c(focal, "lower", "upper")
			newn <- c(focal_temp, "lwr", "upr")
			colnames(est)[colnames(est) %in% oldn] <-  newn
		} else {
			est <- as.data.frame(est)
			colnames(est)[colnames(est) %in% x.var] <- "xvar"
		}
		est <- est[, c(focal_temp, c("fit", "lwr", "upr"))]
		est$model <- f
		return(est)
	}, simplify = FALSE)
	out <- do.call("rbind", out)
	rownames(out) <- NULL
	class(out) <- c("jdeffects", "data.frame")
	if (print.head){
		head(out)
	}
	if (plotit) {
		cols <- rainbow(length(funs))
#		colfun <- colorRampPalette(c("black", "red"))
#		cols <- colfun(length(funs))
		names(cols) <- funs
		out <- list(out)
		out[names(add_args)] <- add_args
		p1 <- (do.call(vareffects:::plot.vareffects, out)
			+ scale_colour_manual(breaks = funs, values=cols)
#			+ scale_linetype_manual(values=1:length(funs), breaks=funs)
			+ labs(y="Predictions", x=x.var, colour="Method")
			+ theme(legend.position="bottom")
		)
		if (!x.var.factor) {
			pred_prop_df <- (out[[1]]
				%>% group_by_at(c("model", focal[!focal %in% x.var]))
				%>% summarize(fit=mean(fit))
				%>% data.frame()
			)
			print(pred_prop_df)
			out <- p1 + geom_hline(data=pred_prop_df, aes(yintercept=fit, colour=model, linetype=model))
		} else {
			out <- p1
		}
	}
	return(out)
}


saveEnvironment()
mac-theobio/effects documentation built on July 6, 2023, 4:19 a.m.