Nothing
#' Create an optimal design for measuring the slope divided by the intercept
#'
#' @param n The number of experimental runs.
#' @param xmin The minimum value of the independent variable.
#' @param xmax The maximum value of the independent variable.
#' @param theta0 The guess of the true value of the slope / intercept.
#' @param f_hetero Specification of heteroskedasticity: the h(x) which relates the value of the
#' independent variable to the variance in the response around the line at that place
#' or the proportional variance at that point. If \code{NULL}, homoskedasticity is
#' assumed (this is the default behavior).
#' @param MaxIter For the heteroskedastic design, a Nelder-Mead search is used (via the function \code{fminbnd}).
#' This is the \code{MaxIter} value for the search. Default is \code{6000}. Lower if \code{n} is high.
#' @param MaxFunEvals For the heteroskedastic design, a Nelder-Mead search is used (via the function \code{fminbnd}).
#' This is the \code{MaxFunEvals} value for the search. Default is \code{6000}. Lower if \code{n} is high.
#' @param TolFun For the heteroskedastic design, a Nelder-Mead search is used (via the function \code{fminbnd}).
#' This is the \code{TolFun} value for the search. Default is \code{1e-6}. Increase for faster execution.
#' @param NUM_RAND_STARTS For the heteroskedastic design, a Nelder-Mead search is used (via the function \code{fminbnd}).
#' The Nelder-Mead search must be given a starting location. Our implementation uses many
#' starting locations. This parameter controls the number of additional random starting
#' locations in the space \code{[xmin, xmax]}. Default is \code{50}.
#'
#' @return An n-vector of x-values which specifies the optimal design
#'
#' @author Adam Kapelner
#' @examples
#' xmin = 5 / 15
#' xmax = 19 / 1
#' n = 10
#' theta0 = 0.053
#' opt_homo_design = oed_for_slope_over_intercept(n, xmin, xmax, theta0)
#' table(opt_homo_design)
#' @export
oed_for_slope_over_intercept = function(n, xmin, xmax, theta0, f_hetero = NULL, MaxIter = 6000, MaxFunEvals = 6000, TolFun = 1e-6, NUM_RAND_STARTS = 50){
#throw errors here
if (is.null(f_hetero)){
oed_for_slope_over_intercept_homo(n, xmin, xmax, theta0)
} else {
stop("This function currently is unavailable since the neldermead package became archived. If the neldermead package is back on cran, please email me at kapelner@qc.cuny.edu and I will fix.")
# oed_for_slope_over_intercept_hetero(n, xmin, xmax, theta0, f_hetero, MaxIter, MaxFunEvals, TolFun, NUM_RAND_STARTS)
}
}
#' Plots a standard error estimate of thetahat (slope over intercept) over a range of possible theta0 values
#' in order to investigate robustness of the the initial theta0 guess.
#'
#' @param n The number of experimental runs.
#' @param xmin The minimum value of the independent variable.
#' @param xmax The maximum value of the independent variable.
#' @param theta The putative true value. This is used to see how much efficiency given up by designing it for \code{theta0}.
#' @param theta0 The guess used to construct the experimental design. Specify only if you wish to see this
#' value plotted. Default is \code{NULL}.
#' @param theta0_min Simulating over different guesses of theta0, this is the minimum guess.
#' @param theta0_max Simulating over different guesses of theta0, this is the maximum guess.
#' @param beta0 A guess to be used for the intercept. Defaults to \code{1}.
#' @param sigma A guess to be used for the homoskedastic variance of the measurement errors. If known accurately,
#' then the standard errors (i.e. the y-axis on the plot) will be accurate. Otherwise, the standard
#' errors are useful only when compared to each other in a relative sense. Defaults to \code{1}.
#' @param error_est The error metric for the estimates. The sample standard deviation (i.e. \code{sd})
#' is unstable at low sample sizes. The default is the 90 percentile minus the 10 percentile.
#' @param RES The number of points on the x-axis to simulate. Higher numbers will give smoother results. Default is \code{20}.
#' @param Nsim The number of models to be simulated for estimating the standard error at each value on the x-axis. Default is \code{1000}.
#' @param theta_logged Should the values of theta be logged? Default is \code{TRUE}.
#' @param error_pct Plot error as a percentage increase from minimum. Default is \code{TRUE}.
#' @param plot_rhos Plot an additional graph of rho by theta0. Default is \code{FALSE}.
#' @param ... Additional arguments passed to the \code{plot} function.
#'
#' @return A list with original parameters as well as data from the simulation
#'
#' @author Adam Kapelner
#' @export
#' @examples
#' \donttest{
#' xmin = 5 / 15
#' xmax = 19 / 1
#' n = 10
#' theta0 = 0.053
#' plot_info = err_vs_theta0_plot_for_homo_design(
#' n, xmin, xmax, theta0, theta0_min = 0.001, theta0_max = 1
#' )
#' }
err_vs_theta0_plot_for_homo_design = function(n, xmin, xmax, theta, theta0_min, theta0_max,
theta0 = NULL, beta0 = 1, sigma = 1, RES = 500, Nsim = 5000,
error_est = function(est){quantile(est, 0.99) - quantile(est, 0.01)}, #99%ile - 1%ile
theta_logged = TRUE, error_pct = TRUE, plot_rhos = FALSE, ...){
if (theta_logged){
theta0s = seq(log(theta0_min), log(theta0_max), length.out = RES) / log(10)
} else {
theta0s = seq(theta0_min, theta0_max, length.out = RES)
}
homo_designs = matrix(NA, nrow = 0, ncol = n)
for (i in 1 : RES){
homo_design = oed_for_slope_over_intercept_homo(n, xmin, xmax, ifelse(theta_logged, 10^(theta0s[i]), theta0s[i]))
homo_designs = rbind(homo_designs, homo_design)
}
unique_homo_designs = unique(homo_designs)
unique_homo_designs
unique_rhos = apply(unique_homo_designs, 1, function(row){sum(row == xmin) / n})
unique_error_ests = array(NA, nrow(unique_homo_designs))
for (d in 1 : nrow(unique_homo_designs)){
ests_opt = array(NA, Nsim)
xs_opt = unique_homo_designs[d, ]
for (nsim in 1 : Nsim){
ys_opt = beta0 + theta * beta0 * xs_opt + rnorm(n, 0, sigma)
mod_opt = lm(ys_opt ~ xs_opt)
ests_opt[nsim] = coef(mod_opt)[2] / coef(mod_opt)[1]
}
unique_error_ests[d] = error_est(ests_opt)
}
#now we need to realign the uniques
error_ests = array(NA, RES)
rhos = array(NA, RES)
for (i in 1 : RES){
homo_design = homo_designs[i, ]
#get index in the unique set
ind = apply(unique_homo_designs, 1, function(row){all(row == homo_design)})
error_ests[i] = unique_error_ests[ind]
rhos[i] = unique_rhos[ind]
}
if (plot_rhos){
oldpar <- par(no.readonly = TRUE) # code line i
on.exit(par(oldpar)) # code line i + 1
par(mfrow = c(1, 2))
}
if (error_pct){
error_ests_pct = (error_ests - min(error_ests)) / min(error_ests) * 100
plot(theta0s, error_ests_pct,
type = "l",
ylab = "% increase in optimal error of theta-hat",
xlab = ifelse(theta_logged, "log_10(theta0) hypothesis", "theta0 hypothesis"),
...)
} else {
plot(theta0s, error_ests,
type = "l",
ylab = "error of theta-hat",
xlab = ifelse(theta_logged, "log_10(theta0) hypothesis", "theta0 hypothesis"),
...)
}
if (!is.null(theta0)){
abline(v = ifelse(theta_logged, log(theta0) / log(10), theta0), col = "red")
}
abline(v = ifelse(theta_logged, log(theta) / log(10), theta), col = "green")
if (plot_rhos){
plot(theta0s, rhos,
type = "l",
ylab = "rho",
xlab = ifelse(theta_logged, "log_10(theta0) hypothesis", "theta0 hypothesis"),
...)
if (!is.null(theta0)){
abline(v = ifelse(theta_logged, log(theta0) / log(10), theta0), col = "red")
}
abline(v = ifelse(theta_logged, log(theta) / log(10), theta), col = "green")
}
#return the simulated results only if user cares
invisible(list(
n = n,
xmin = xmin,
xmax = xmax,
theta = theta,
theta_logged = theta_logged,
theta_vs_design = cbind(ifelse(rep(theta_logged, RES), 10^(theta0s), theta0s), homo_designs),
theta_vs_error_ests = cbind(ifelse(rep(theta_logged, RES), 10^(theta0s), theta0s), unique_error_ests)
))
}
#' A visualiation for comparing slope-divided-by-intercept estimates
#' for a number of designs
#'
#' @param xmin The minimum value of the independent variable.
#' @param xmax The maximum value of the independent variable.
#' @param designs A d x n matrix where each of the d rows is a design (the x values
#' used to run the experiment).
#' @param gen_resp A model for the response which takes the design as its parameter.
#' @param Nsim The number of estimates per design. Default is \code{1000}.
#' @param l_quantile_display The lowest quantile of the simulation estimates displayed. Default is \code{0.025}.
#' @param u_quantile_display The highest quantile of the simulation estimates displayed. Default is \code{0.975}.
#' @param error_est The error metric for the estimates. The sample standard deviation (i.e. \code{sd})
#' is unstable at low sample sizes. The default is the 90 percentile minus the 10 percentile.
#' @param num_digits_round The number of digits to round the error results. Default is 2.
#' @param draw_theta_at If the user wishes to draw a horizontal line marking theta (to checked biasedness)
#' it is specified here. The default is \code{NULL} with no line being drawn.
#' @param xlab_names Text for the x-grid labels. This vector's size should equal \code{lenth(designs)}.
#' @param ... Additional arguments passed to the \code{boxplot} function.
#'
#' @return A list with the simulated estimates and error estimates for each design.
#'
#' @author Adam Kapelner
#' @export
#' @examples
#' xmin = 5 / 15
#' xmax = 19 / 1
#' n = 10 #must be even for this demo
#' designs = rbind(
#' c(rep(xmin, n / 2), rep(xmax, n / 2)), #design A
#' seq(from = xmin, to = xmax, length.out = n) #design B
#' )
#' design_bakeoff_info = design_bakeoff(xmin, xmax, designs) #design A wins
design_bakeoff = function(xmin, xmax, designs,
gen_resp = function(xs){1 + 2 * xs + rnorm(length(xs), 0, 1)},
Nsim = 1000, l_quantile_display = 0.01, u_quantile_display = 0.99,
error_est = function(est){quantile(est, 0.99) - quantile(est, 0.01)}, #99%ile - 1%ile i.e the interdecile range
num_digits_round = 3,
draw_theta_at = NULL,
xlab_names = NULL,
...){
num_designs = nrow(designs)
n = ncol(designs)
ests = matrix(NA, nrow = Nsim, ncol = num_designs)
for (nsim in 1 : Nsim){
for (d in 1 : num_designs){
xs = designs[d, ]
ys = gen_resp(xs)
mod = lm(ys ~ xs)
ests[nsim, d] = coef(mod)[2] / coef(mod)[1]
}
}
error_ests = apply(ests, 2, error_est)
l = list()
for (d in 1 : num_designs){
l[[d]] = ests[, d]
}
#create a default for the name label on the x-axis
xlab_names = ifelse(rep(is.null(xlab_names), num_designs), (1 : num_designs), xlab_names)
#now add errors to it
xlab_names = paste(xlab_names, rep(" (", 2) , round(error_ests, num_digits_round), rep(")", 2), sep = "")
boxplot(l, ylim = quantile(c(ests), c(l_quantile_display, u_quantile_display)),
main = "Bakeoff: Error Estimates for Many Designs",
ylab = "theta-hat",
xlab = "Design (Error)",
names = xlab_names,
...)
if (!is.null(draw_theta_at)){
abline(h = draw_theta_at, col = "blue")
}
invisible(list(ests = l, error_ests = error_ests))
}
#' Report the results of the experiment as well as confidence intervals.
#'
#' @param xs The design
#' @param ys The measurements of the response
#' @param alpha \code{1 - alpha} is the confidence of the computed intervals. Default is \code{0.05}.
#' @param B For the confidence interval methods with an embedded bootstrap (or resampling), the number
#' of resamples (defaults to \code{1000}).
#'
#' @return A list object containing the estimate as well as confidence intervals and parameters.
#'
#' @author Adam Kapelner
#' @export
#' @examples
#' n = 10
#' xmin = 5 / 15
#' xmax = 19 / 1
#' xs = runif(n, xmin, xmax)
#' ys = 2 + 3 * xs + rnorm(n)
#' experimental_results_info = experimental_results(xs, ys)
experimental_results = function(xs, ys, alpha = 0.05, B = 1000){
mod = lm(ys ~ xs)
b0 = coef(mod)[1]
b1 = coef(mod)[2]
thetahat = b1 / b0
es = mod$residuals
s_e = summary(mod)$sigma
n = length(xs)
X = cbind(rep(1, n), xs)
XtXinv = solve(t(X) %*% X)
cis = data.frame(matrix(NA, nrow = 0, ncol = 2))
colnames(cis) = c("lower", "upper")
#now generate them outside
cis = rbind(
normal_approx_homo_ci(alpha, b0, b1, XtXinv, s_e),
normal_approx_hetero_ci(alpha, b0, b1, X, XtXinv, n, xs, es),
bayesian_weighting_ci(alpha, B, n, xs, ys),
param_homo_ci(alpha, B, b0, b1, s_e, n, xs),
nonparam_homo_ci(alpha, B, xs, ys, es),
nonparam_hetero_ci(alpha, B, n, xs, ys, es)
)
rownames(cis) = c(
"Normal Approx Homo (poor coverage)",
"Normal Approx Hetero (poor coverage)",
"Bayesian Weighting",
"Parametric Homo",
"Nonparametric Homo",
"Nonparametric Hetero"
)
list(
thetahat = thetahat,
cis = cis,
n = n,
alpha = alpha,
B = B
)
}
#' This is data for the PRV measurement of the k_H of Napthalene in water.
#' See Section 3 of our paper below for more information.
#'
#' @docType data
#' @keywords datasets
#' @name napth
#' @usage data(napth)
#' @format A data frame with 100 rows and 2 variables
#' @author Adam Kapelner \email{kapelner@@qc.cuny.edu}
#' @references \url{https://arxiv.org/abs/1604.03480}
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.