##
## Copyright 2016 Brent Kaplan
##
## This file is part of beezdemand.
##
## beezdemand is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, version 2.
##
## beezdemand is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with beezdemand. If not, see <http://www.gnu.org/licenses/gpl-2.0.html>.
##
## summary
## R script for analysis functions
##
##' Analyzes purchase task data
##'
##' @title FitCurves
##' @param dat data frame (long form) of purchase task data.
##' @param equation Character vector of length one. Accepts either "hs" for Hursh and Silberberg (2008) or "koff" for Koffarnus, Franck, Stein, and Bickel (2015).
##' @param k A numeric (or character) vector of length one. Reflects the range of consumption in log10 units. If none provided, k will be calculated based on the max/min of the entire sample + .5. If k = "ind", k will be calculated per individual using max/min + .5. If k = "fit", k will be a free parameter on an individual basis. If k = "range", k will be calculated based on the max/min of the entire sample + .5.
##' @param agg Character vector of length one accepts either "Mean" or "Pooled". If not NULL (default), data will be aggregrated appropriately and analyzed in the specified way.
##' @param detailed If TRUE, output will be a 3 element list including (1) dataframe of results, (2) list of model objects, (3) list of individual dataframes used in fitting. Default value is FALSE, which returns only the dataframe of results.
##' @param xcol The column name that should be treated as "x" data
##' @param ycol The column name that should be treated as "y" data
##' @param idcol The column name that should be treated as dataset identifier
##' @param groupcol The column name that should be treated as the groups
##' @param lobound Optional. A named vector of length 2 ("q0", "alpha") or 3 ("q0", "k", "alpha"), the latter length if k = "fit", specifying the lower bounds.
##' @param hibound Optional. A named vector of length 2 ("q0", "alpha") or 3 ("q0", "k", "alpha"), the latter length if k = "fit", specifying the upper bounds.
##' @param constrainq0 Optional. A number that will be used to constrain Q0 in the fitting process. Currently experimental and only works with a fixed k value.
##' @param startq0 Optional. A number that will be used to start Q0 in the fitting process. Currently experimental.
##' @param startalpha Optional. A number that will be used to start Alpha in the fitting process. Currently experimental.
##' @return If detailed == FALSE (default), a dataframe of results. If detailed == TRUE, a 3 element list consisting of (1) dataframe of results, (2) list of model objects, (3) list of individual dataframes used in fitting
##' @author Brent Kaplan <bkaplan.ku@@gmail.com> Shawn Gilroy <shawn.gilroy@@temple.edu>
##' @examples
##' ## Analyze using Hursh & Silberberg, 2008 equation with a k fixed to 2
##' FitCurves(apt[sample(apt$id, 5), ], "hs", k = 2)
##' @export
FitCurves <- function(dat, equation, k, agg = NULL, detailed = FALSE, xcol = "x",
ycol = "y", idcol = "id", groupcol = NULL, lobound, hibound,
constrainq0 = NULL, startq0 = NULL, startalpha = NULL) {
if (missing(dat)) stop("Need to provide a dataframe!", call. = FALSE)
origcols <- colnames(dat)
dat <- CheckCols(dat, xcol = xcol, ycol = ycol, idcol = idcol, groupcol = groupcol)
if (missing(equation)) stop("Need to specify an equation!", call. = FALSE)
equation <- tolower(equation)
if (!is.numeric(constrainq0) & !is.null(constrainq0)) stop("Q0 constraint must be a number", call. = FALSE)
if (equation == "linear") {
out <- FitCurves.linear(dat, equation, agg, detailed, xcol, ycol, idcol, groupcol)
return(out)
}
cnames <- c("id", "Equation", "Q0d", "K",
"Alpha", "R2", "Q0se", "Alphase", "N", "AbsSS", "SdRes", "Q0Low", "Q0High",
"AlphaLow", "AlphaHigh", "EV", "Omaxd", "Pmaxd", "Omaxa", "Pmaxa", "Notes")
if (!is.null(agg)) {
agg <- tolower(agg)
if (!any(c("mean", "pooled") %in% agg)) {
stop("No correct agg specified. Choose either 'Mean' or 'Pooled'")
} else if (agg == "mean") {
dat <- aggregate(y ~ x, data = dat, mean)
dat$id <- agg
} else if (agg == "pooled") {
tmpdat <- aggregate(y ~ x, data = dat, mean)
tmpdat$id <- agg
dat$id <- agg
}
}
if (any(dat$y %in% 0) && (equation == "hs")) {
warning("Zeros found in data not compatible with equation! Dropping zeros!", call. = FALSE)
dat <- dat[dat$y != 0, , drop = FALSE]
}
ps <- unique(dat$id)
ps <- as.character(ps)
np <- length(ps)
dfres <- data.frame(matrix(vector(),
np,
length(cnames),
dimnames = list(c(), c(cnames))), stringsAsFactors = FALSE)
fits <- vector(mode = "list", length = np)
adfs <- vector(mode = "list", length = np)
newdats <- vector(mode = "list", length = np)
if (!is.null(agg) && agg == "pooled") {
dfresempirical <- GetEmpirical(tmpdat)
} else {
dfresempirical <- GetEmpirical(dat)
}
kest <- FALSE
if (missing(k)) {
k <- GetK(dat)
message("No k value specified. Defaulting to empirical mean range +.5")
} else if (is.numeric(k)) {
k <- k
} else if (is.character(k)) {
if (k == "fit") {
kest <- "fit"
kstart <- GetK(dat)
} else if (k == "ind") {
kest <- "ind"
} else if (k == "share") {
k <- GetSharedK(dat, equation, sharecol = "id")
if (is.character(k)) {
warning(paste0(k, " Defaulting to empirical mean range +.5"))
k <- GetK(dat)
}
kest <- "share"
} else if (k == "range") {
k <- GetK(dat)
}
} else {
k <- GetK(dat)
warning("Defaulting to empirical mean range +.5")
}
# bounds for exponential/exponentiated
if (!kest == "fit") {
if (missing(lobound)) {
lower <- c(-Inf, -Inf)
names(lower) <- c("q0", "alpha")
} else {
if (!identical(tolower(names(lobound)), c("q0", "alpha"))) stop("lobound names should be q0 and alpha")
lower <- lobound
names(lower) <- tolower(names(lower))
}
if (missing(hibound)) {
upper <- c(Inf, Inf)
names(upper) <- c("q0", "alpha")
} else {
if (!identical(tolower(names(hibound)), c("q0", "alpha"))) stop("hibound names should be q0 and alpha")
upper <- hibound
names(upper) <- tolower(names(upper))
}
} else {
if (missing(lobound)) {
lower <- c(-Inf, -Inf, -Inf)
names(lower) <- c("q0", "k", "alpha")
} else {
if (!identical(tolower(names(lobound)), c("q0", "k", "alpha"))) stop("lobound names should be q0, k, and alpha")
lower <- lobound
names(lower) <- tolower(names(lower))
}
if (missing(hibound)) {
upper <- c(Inf, Inf, Inf)
names(upper) <- c("q0", "k", "alpha")
} else {
if (!identical(tolower(names(hibound)), c("q0", "k", "alpha"))) stop("hibound names should be q0, k, and alpha")
upper <- hibound
names(upper) <- tolower(names(upper))
}
}
## TODO: if groupcol is specified (or not), manufacture vector to loop (paste(agg, grps, sep = "-"))
fo <- switch(equation,
"hs" = (log(y)/log(10)) ~ (log(q0)/log(10)) + k * (exp(-alpha * q0 * x) - 1),
"koff" = y ~ q0 * 10^(k * (exp(-alpha * q0 * x) - 1)))
## loop to fit data
for (i in seq_len(np)) {
dfres[i, "id"] <- ps[i]
dfres[i, "Equation"] <- equation
adf <- NULL
adf <- dat[dat$id == ps[i], ]
if (nrow(adf) == 0) {
dfres[i, setdiff(colnames(dfres), c("id", "Equation", "N", "Notes"))] <- NA
dfres[i, "N"] <- 0
dfres[i, "Notes"] <- "No consumption"
next()
}
fit <- NULL
if (kest == "ind") {
k <- GetK(adf)
} else if (kest == "fit") {
k <- kstart
}
adf[, "k"] <- k
if (!is.null(constrainq0)) {
adf[, "q0"] <- constrainq0
}
if (is.null(startq0)) {
startq0 <- max(adf$y)
}
if (is.null(startalpha)) {
startalpha <- 0.01
}
if (!kest == "fit") {
if (is.null(constrainq0)) {
suppressWarnings(fit <- try(nlsr::wrapnlsr(
formula = fo,
start = list(q0 = startq0, alpha = startalpha),
lower = c(lower),
upper = c(upper),
data = adf),
silent = TRUE))
if (inherits(fit, what = "try-error") && nrow(adf) > 2) {
suppressWarnings(fit <- try(nlsr::nlxb(formula = fo,
start = list(q0 = startq0, alpha = startalpha),
lower = c(lower),
upper = c(upper),
data = adf),
silent = TRUE))
suppressWarnings(fit <- try(nls2::nls2(fo,
data = adf,
start = fit$coefficients,
algorithm = "brute-force")))
attributes(fit)$class <- if (fit$m$Rmat()[2,2] == 0) c("nls", "nls2", "error") else c("nls", "nls2")
}
} else if (!is.null(constrainq0)) {
suppressWarnings(fit <- try(nlsr::wrapnlsr(
formula = fo,
start = list(alpha = startalpha),
lower = c(lower[2]),
upper = c(upper[2]),
data = adf),
silent = TRUE))
if (inherits(fit, what = "try-error") && nrow(adf) > 2) {
suppressWarnings(fit <- try(nlsr::nlxb(formula = fo,
start = list(alpha = startalpha),
lower = c(lower[2]),
upper = c(upper[2]),
data = adf),
silent = TRUE))
suppressWarnings(fit <- try(nls2::nls2(fo,
data = adf,
start = fit$coefficients,
algorithm = "brute-force")))
attributes(fit)$class <- if (fit$m$Rmat()[2,2] == 0) c("nls", "nls2", "error") else c("nls", "nls2")
}
}
} else {
suppressWarnings(fit <- try(nlsr::wrapnlsr(
formula = fo,
start = list(q0 = startq0, k = kstart, alpha = startalpha),
lower = c(lower),
upper = c(upper),
data = adf),
silent = TRUE))
if (inherits(fit, what = "try-error") && nrow(adf) > 3) {
suppressWarnings(fit <- try(nlsr::nlxb(formula = fo,
start = list(q0 = startq0, k = kstart, alpha = startalpha),
lower = c(lower),
upper = c(upper),
data = adf),
silent = TRUE))
suppressWarnings(fit <- try(nls2::nls2(fo,
data = adf,
start = fit$coefficients,
algorithm = "brute-force")))
attributes(fit)$class <- if (fit$m$Rmat()[2,2] == 0) c("nls", "nls2", "error") else c("nls", "nls2")
}
}
fits[[i]] <- fit
adfs[[i]] <- adf
dfres[i, ] <- ExtractCoefs(ps[i], adf, fit, eq = equation,
cols = colnames(dfres), kest = kest,
constrainq0 = constrainq0)
newdat <- NULL
newdat <- data.frame("id" = rep(i, length.out = 10000),
"x" = seq(min(adf$x), max(adf$x), length.out = 10000),
"y" = NA)
#newdat$k <- if (!kest == "fit") k else dfres[i, "K"]
newdat$k <- dfres[i, "K"]
if (!inherits(fit, what = "try-error")) {
if (equation == "hs") {
newdat$y <- 10^predict(fit, newdata = newdat)
} else if (equation == "koff") {
newdat$y <- predict(fit, newdata = newdat)
}
}
newdats[[i]] <- newdat
}
dfres$Equation <- equation
dfres <- merge(dfresempirical, dfres, by = "id")
dfres <- dfres[match(ps, dfres$id), ]
rownames(dfres) <- 1:nrow(dfres)
names(fits) <- ps
names(adfs) <- ps
names(newdats) <- ps
if (detailed) {
return(list("dfres" = dfres,
"fits" = fits,
"newdats" = newdats,
"adfs" = adfs))
} else {
return(dfres)
}
}
##' @noRd
FitCurves.linear <- function(dat, equation, agg = NULL, detailed = FALSE, xcol = "x", ycol = "y", idcol = "id", groupcol = NULL) {
cnames <- c("id", "Equation", "L", "b", "a",
"R2", "Lse", "bse", "ase", "N", "AbsSS", "SdRes", "LLow", "LHigh",
"bLow", "bHigh", "aLow", "aHigh", "MeanElasticity",
"Omaxd", "Pmaxd", "Omaxa", "Pmaxa", "Notes")
if (!is.null(agg)) {
agg <- tolower(agg)
if (!any(c("mean", "pooled") %in% agg)) {
stop("No correct agg specified. Choose either 'Mean' or 'Pooled'")
} else if (agg == "mean") {
dat <- aggregate(y ~ x, data = dat, mean)
dat$id <- agg
} else if (agg == "pooled") {
tmpdat <- aggregate(y ~ x, data = dat, mean)
tmpdat$id <- agg
dat$id <- agg
}
}
if (any(dat$y %in% 0)) {
warning("Zeros found in data not compatible with equation! Dropping zeros!", call. = FALSE)
dat <- dat[dat$y != 0, , drop = FALSE]
}
if (any(dat$x %in% 0)) {
dat <- dat[dat$x != 0, , drop = FALSE]
}
ps <- unique(dat$id)
ps <- as.character(ps)
np <- length(ps)
dfres <- data.frame(matrix(vector(),
np,
length(cnames),
dimnames = list(c(), c(cnames))), stringsAsFactors = FALSE)
fits <- vector(mode = "list", length = np)
adfs <- vector(mode = "list", length = np)
newdats <- vector(mode = "list", length = np)
if (!is.null(agg) && agg == "pooled") {
dfresempirical <- GetEmpirical(tmpdat)
} else {
dfresempirical <- GetEmpirical(dat)
}
fo <- log(y) ~ log(l) + (b * log(x)) - a * x
## loop to fit data
for (i in seq_len(np)) {
dfres[i, "id"] <- ps[i]
dfres[i, "Equation"] <- equation
adf <- NULL
adf <- dat[dat$id == ps[i], ]
if (nrow(adf) == 0) {
dfres[i, setdiff(colnames(dfres), c("id", "Equation", "N", "Notes"))] <- NA
dfres[i, "N"] <- 0
dfres[i, "Notes"] <- "No consumption"
next()
}
fit <- NULL
fit <- try(nlsr::wrapnlsr(
formula = fo,
start = list(l = 1, b = 0, a = 0),
data = adf),
silent = TRUE)
fits[[i]] <- fit
adfs[[i]] <- adf
dfres[i, ] <- ExtractCoefs.linear(ps[i], adf, fit, eq = equation, cols = colnames(dfres))
newdat <- NULL
newdat <- data.frame("id" = rep(i, length.out = 10000),
"x" = seq(min(adf$x), max(adf$x), length.out = 10000),
"y" = NA)
if (!inherits(fit, what = "try-error")) {
newdat$y <- exp(predict(fit, newdata = newdat))
}
newdats[[i]] <- newdat
}
dfres$Equation <- equation
dfres <- merge(dfresempirical, dfres, by = "id")
dfres <- dfres[match(ps, dfres$id), ]
rownames(dfres) <- 1:nrow(dfres)
names(fits) <- ps
names(adfs) <- ps
names(newdats) <- ps
if (detailed) {
return(list("dfres" = dfres,
"fits" = fits,
"newdats" = newdats,
"adfs" = adfs))
} else {
return(dfres)
}
}
##' @noRd
ExtractCoefs.linear <- function(pid, adf, fit, eq, cols) {
dfrow <- data.frame(matrix(vector(),
1,
length(cols),
dimnames = list(c(), c(cols))), stringsAsFactors = FALSE)
dfrow[["id"]] <- pid
dfrow[1, "N"] <- nrow(adf)
dfrow[1, c("L", "b", "a")] <- as.numeric(coef(fit)[c("l", "b", "a")])
dfrow[1, c("Lse", "bse", "ase")] <- as.numeric(summary(fit)[[10]][c(1:3), 2])
dfrow[1, "R2"] <- 1.0 - (deviance(fit)/sum((log(adf$y) - mean(log(adf$y)))^2))
dfrow[1, c("LLow", "LHigh")] <- nlstools::confint2(fit)[c(1, 4)]
dfrow[1, c("bLow", "bHigh")] <- nlstools::confint2(fit)[c(2, 5)]
dfrow[1, c("aLow", "aHigh")] <- nlstools::confint2(fit)[c(3, 6)]
## Calculates mean elasticity based on individual range of x
pbar <- mean(unique(adf$x))
dfrow[1, "MeanElasticity"] <- dfrow[1, "b"] - (dfrow[1, "a"] * pbar)
dfrow[1, "Pmaxd"] <- (1 + dfrow[1, "b"])/dfrow[1, "a"]
dfrow[1, "Omaxd"] <- (dfrow[1, "L"] * dfrow[1, "Pmaxd"]^dfrow[1, "b"]) /
exp(dfrow[1, "a"] * dfrow[1, "Pmaxd"]) * dfrow[1, "Pmaxd"]
dfrow[1, "N"] <- nrow(adf)
dfrow[1, "AbsSS"] <- if (is.null(deviance(fit))) fit$m$deviance() else deviance(fit)
dfrow[1, "SdRes"] <- sqrt(dfrow[1, "AbsSS"]/(nrow(adf) - length(fit$m$getPars())))
dfrow[1, "Notes"] <- if ("nls2" %in% class(fit)) "wrapnls failed to converge, reverted to nlxb" else fit$convInfo$stopMessage
dfrow[1, "Notes"] <- trim.leading(dfrow[1, "Notes"])
dfrow
}
# Populates a single row of a dataframe consisting of important information from fits, etc.
# pid Participant id
# adf A data frame
# fit Fitted model object
# eq Equation specified
# cols Column names to populate the dataframe row
# kest Specification of k value
##' @noRd
ExtractCoefs <- function(pid, adf, fit, eq, cols, kest, constrainq0) {
dfrow <- data.frame(matrix(vector(),
1,
length(cols),
dimnames = list(c(), c(cols))), stringsAsFactors = FALSE)
dfrow[["id"]] <- pid
if (inherits(fit, what = "try-error")) {
dfrow[["Notes"]] <- fit[1]
dfrow[["Notes"]] <- strsplit(dfrow[1, "Notes"], "\n")[[1]][2]
} else {
dfrow[1, "N"] <- length(adf$k)
dfrow[1, "AbsSS"] <- if (is.null(deviance(fit))) fit$m$deviance() else deviance(fit)
dfrow[1, "SdRes"] <- sqrt(dfrow[1, "AbsSS"]/(length(adf$k) - length(fit$m$getPars())))
if (is.null(constrainq0)) {
dfrow[1, c("Q0d", "Alpha")] <- if (is.null(coef(fit))) fit$m$getPars()[c("q0", "alpha")] else as.numeric(coef(fit)[c("q0", "alpha")])
} else {
dfrow[1, c("Q0d")] <- min(adf$q0)
dfrow[1, c("Alpha")] <- if (is.null(coef(fit))) fit$m$getPars()[c("alpha")] else as.numeric(coef(fit)[c("alpha")])
}
dfrow[1, "Notes"] <- if ("nls2" %in% class(fit)) "wrapnls failed to converge, reverted to nlxb" else fit$convInfo$stopMessage
dfrow[1, "K"] <- if (kest == "fit") as.numeric(coef(fit)["k"]) else min(adf$k)
if (!inherits(fit, what = "error")) {
if (is.null(constrainq0)) {
dfrow[1, c("Q0se", "Alphase")] <- summary(fit)[[10]][c("q0", "alpha"), "Std. Error"]
dfrow[1, c("Q0Low", "Q0High")] <- nlstools::confint2(fit)["q0", ]
dfrow[1, c("AlphaLow", "AlphaHigh")] <- nlstools::confint2(fit)["alpha", ]
} else {
dfrow[1, c("Q0se")] <- NA
dfrow[1, c("Alphase")] <- summary(fit)[[10]][c("alpha"), "Std. Error"]
dfrow[1, c("Q0Low", "Q0High")] <- NA
dfrow[1, c("AlphaLow", "AlphaHigh")] <- nlstools::confint2(fit)["alpha", ]
}
}
dfrow[1, "EV"] <- 1/(dfrow[1, "Alpha"] * (dfrow[1, "K"] ^ 1.5) * 100)
dfrow[1, "Pmaxd"] <- 1/(dfrow[1, "Q0d"] * dfrow[1, "Alpha"] *
(dfrow[1, "K"] ^ 1.5)) * (0.083 * dfrow[1, "K"] + 0.65)
dfrow[1, "Pmaxa"] <- beezdemand::GetAnalyticPmax(dfrow[1, "Alpha"], dfrow[1, "K"], dfrow[1, "Q0d"])
if (eq == "hs") {
dfrow[1, "R2"] <- if (!inherits(fit, what = "error")) 1.0 - (deviance(fit)/sum((log10(adf$y) - mean(log10(adf$y)))^2)) else NA
dfrow[1, "Omaxd"] <- (10^(log10(dfrow[1, "Q0d"]) +
(dfrow[1, "K"] *
(exp(-dfrow[1, "Alpha"] *
dfrow[1, "Q0d"] *
dfrow[1, "Pmaxd"]) - 1)))) * dfrow[1, "Pmaxd"]
dfrow[1, "Omaxa"] <- (10^(log10(dfrow[1, "Q0d"]) +
(dfrow[1, "K"] *
(exp(-dfrow[1, "Alpha"] *
dfrow[1, "Q0d"] *
dfrow[1, "Pmaxa"]) - 1)))) * dfrow[1, "Pmaxa"]
} else if (eq == "koff") {
dfrow[1, "R2"] <- if (!inherits(fit, what = "error")) 1.0 - (deviance(fit)/sum((adf$y - mean(adf$y))^2)) else NA
dfrow[1, "Omaxd"] <- (dfrow[1, "Q0d"] *
(10^(dfrow[1, "K"] *
(exp(-dfrow[1, "Alpha"] *
dfrow[1, "Q0d"] *
dfrow[1, "Pmaxd"]) - 1)))) * dfrow[1, "Pmaxd"]
dfrow[1, "Omaxa"] <- (dfrow[1, "Q0d"] *
(10^(dfrow[1, "K"] *
(exp(-dfrow[1, "Alpha"] *
dfrow[1, "Q0d"] *
dfrow[1, "Pmaxa"]) - 1)))) * dfrow[1, "Pmaxa"]
}
}
dfrow[1, "Notes"] <- trim.leading(dfrow[1, "Notes"])
dfrow
}
##' Fits curve to pooled data
##'
##' @title Fit Pooled Curves
##' @param dat data frame (long form) of purchase task data.
##' @param equation Character vector of length one. Accepts either "hs" for Hursh and Silberberg (2008) or "koff" for Koffarnus, Franck, Stein, and Bickel (2015).
##' @param k A numeric vector of length one. Reflects the range of consumption in log10 units. If none provided, k will be calculated based on the max/min of the entire sample. If k = "fit", k will be a free parameter
##' @param remq0e If TRUE, removes consumption and price where price == 0. Default value is FALSE
##' @param replfree Optionally replaces price == 0 with specified value. Note, if fitting using equation == "hs", and 0 is first price, 0 gets replaced by replfree. Default value is .01
##' @param rem0 If TRUE, removes all 0s in consumption data prior to analysis. Default value is FALSE.
##' @param nrepl Number of zeros to replace with replacement value (replnum). Can accept either a number or "all" if all zeros should be replaced. Default is to replace the first zero only.
##' @param replnum Value to replace zeros. Default is .01
##' @param plotcurves Boolean whether to create plot. If TRUE, a "plots/" directory is created one level above working directory. Default is FALSE.
##' @param method Character string of length 1. Accepts "Mean" to fit to mean data or "Pooled" to fit to pooled data
##' @param indpoints Boolean whether to plot individual points in gray. Default is TRUE.
##' @param vartext Character vector specifying indices to report on plots. Valid indices include "Q0d", "Alpha", "Q0e", "EV", "Pmaxe", "Omaxe", "Pmaxd", "Omaxd", "K", "Q0se", "Alphase", "R2", "AbsSS"
##' @return Data frame
##' @author Brent Kaplan <bkaplan.ku@@gmail.com>
##' @import graphics
##' @import grDevices
##' @examples
##' ## Fit aggregated data (mean only) using Hursh & Silberberg, 2008 equation with a k fixed at 2
##' FitMeanCurves(apt[sample(apt$id, 5), ], "hs", k = 2, method = "Mean")
##' @export
FitMeanCurves <- function(dat, equation, k, remq0e = FALSE, replfree = NULL, rem0 = FALSE, nrepl = NULL, replnum = NULL, plotcurves = FALSE, method = NULL, indpoints = TRUE, vartext = NULL) {
if (is.null(method) || !any(c("Mean", "Pooled") %in% method)) stop("No method specified. Choose either 'Mean' or 'Pooled'")
if ("p" %in% colnames(dat)) {
colnames(dat)[which(colnames(dat) == "p")] <- "id"
} else if (!"id" %in% colnames(dat)) {
stop("Make sure there is an id column in data")
}
if (plotcurves) {
outdir <- paste0(tempdir(), "/")
tobquote = NULL
if (!is.null(vartext)) {
dict <- data.frame(Name = c("Q0d", "Alpha", "Intensity", "EV", "Pmaxe",
"Omaxe", "Pmaxd", "Omaxd", "Pmaxa", "Omaxa",
"K", "Q0se", "Alphase", "R2", "AbsSS"),
Variable = c("Q[0[d]]", "alpha", "Intensity", "EV", "P[max[e]]",
"O[max[e]]", "P[max[d]]", "O[max[d]]", "P[max[a]]", "O[max[a]]",
"k", "Q[0[se]]", "alpha[se]", "R^2", "AbsSS"))
if (any(is.na(dict$Variable[match(vartext, dict$Name)]))) {
warning(paste0("Invalid parameter in vartext! I will go on but won't print any parameters. Try again with one of the following: ", dict$Name))
printvars <- FALSE
} else {
tobquote <- as.character(dict$Variable[match(vartext, dict$Name)])
printvars <- TRUE
}
} else {
printvars <- FALSE
}
}
dat <- dat[!is.na(dat$y), ]
if (equation == "hs" || equation == "koff") {
cnames <- c("id", "Equation", "Q0d", "K",
"R2", "Alpha", "Q0se", "Alphase", "N", "AbsSS", "SdRes", "Q0Low", "Q0High",
"AlphaLow", "AlphaHigh", "EV", "Omaxd", "Pmaxd", "Omaxa", "Pmaxa", "Notes")
} else if (equation == "linear") {
cnames <- c("id", "Equation", "L", "b", "a",
"R2", "Lse", "bse", "ase", "N", "AbsSS", "SdRes", "LLow", "LHigh",
"bLow", "bHigh", "aLow", "aHigh", "Elasticity", "MeanElasticity",
"Omaxd", "Pmaxd", "Notes")
}
dfres <- data.frame(matrix(vector(),
1,
length(cnames),
dimnames = list(c(), c(cnames))), stringsAsFactors = FALSE)
adf <- aggregate(y ~ x, data = dat, mean)
adf$id <- method
dfresempirical <- GetEmpirical(adf)
dfres[["id"]] <- method
dfres[["Equation"]] <- equation
## Transformations if specified
if (!is.null(nrepl) && !is.null(replnum)) {
dat <- ReplaceZeros(dat, nrepl = nrepl, replnum = replnum)
}
if (method == "Mean") {
dato <- dat
dat <- aggregate(y ~ x, data = dat, mean)
}
## If no k is provided, otherwise
## TODO: provide a character element to fit empirical max/min range
if (!equation == "linear") {
if (missing(k)) {
k <- GetK(dat)
kest <- FALSE
} else if (is.numeric(k)) {
k <- k
kest <- FALSE
} else if (k == "fit") {
kest <- "fit"
kstart <- GetK(dat)
} else {
k <- GetK(dat)
kest <- FALSE
}
if (kest == "fit") {
k <- kstart
} else {
dat[, "k"] <- k
}
}
if (remq0e) {
dat <- dat[dat$x != 0, ]
} else if (!is.null(replfree)) {
replfree <- if (is.numeric(replfree)) replfree else 0.01
dat[dat$x == 0, "x"] <- replfree
}
if (rem0 || equation == "hs") {
dat <- dat[dat$y != 0, ]
}
fo <- switch(equation,
"hs" = (log(y)/log(10)) ~ (log(q0)/log(10)) + k * (exp(-alpha * q0 * x) - 1),
"koff" = y ~ q0 * 10^(k * (exp(-alpha * q0 * x) - 1)),
"linear" = log(y) ~ log(l) + (b * log(x)) - a * x)
fit <- NULL
if (!equation == "linear") {
if (!kest == "fit") {
suppressWarnings(fit <- try(nlsr::wrapnlsr(
formula = fo,
start = list(q0 = 10, alpha = 0.01),
data = dat),
silent = TRUE))
} else {
suppressWarnings(fit <- try(nlsr::wrapnlsr(
formula = fo,
start = list(q0 = 10, k = kstart, alpha = 0.01),
data = dat),
silent = TRUE))
}
} else if (equation == "linear") {
fit <- try(nlsr::wrapnlsr(
formula = fo,
start = list(l = 1, b = 0, a = 0),
data = dat),
silent = TRUE)
}
if (inherits(fit, "try-error")) {
dfres[["Notes"]] <- fit[1]
dfres[["Notes"]] <- strsplit(dfres[["Notes"]], "\n")[[1]][2]
} else {
dfres[["N"]] <- length(dat$k)
dfres[["AbsSS"]] <- deviance(fit)
dfres[["SdRes"]] <- sqrt(deviance(fit)/df.residual(fit))
dfres[["Notes"]] <- fit$convInfo$stopMessage
if (equation == "linear") {
dfres[["L"]] <- as.numeric(coef(fit)["l"])
dfres[["b"]] <- as.numeric(coef(fit)["b"])
dfres[["a"]] <- as.numeric(coef(fit)["a"])
dfres[["Lse"]] <- as.numeric(summary(fit)[[10]][1, 2])
dfres[["bse"]] <- as.numeric(summary(fit)[[10]][2, 2])
dfres[["ase"]] <- as.numeric(summary(fit)[[10]][3, 2])
dfres[["R2"]] <- 1.0 - (deviance(fit)/sum((log(adf$y) - mean(log(adf$y)))^2))
dfres[["LLow"]] <- nlstools::confint2(fit)[1]
dfres[["LHigh"]] <- nlstools::confint2(fit)[4]
dfres[["bLow"]] <- nlstools::confint2(fit)[2]
dfres[["bHigh"]] <- nlstools::confint2(fit)[5]
dfres[["aLow"]] <- nlstools::confint2(fit)[3]
dfres[["aHigh"]] <- nlstools::confint2(fit)[6]
## Calculates mean elasticity based on individual range of x
pbar <- mean(unique(dat$x))
dfres[["MeanElasticity"]] <- dfres[["b"]] - (dfres[["a"]] * pbar)
dfres[["Pmaxd"]] <- (1 + dfres[["b"]])/dfres[["a"]]
dfres[["Omaxd"]] <- (dfres[["L"]] * dfres[["Pmaxd"]]^dfres[["b"]]) /
exp(dfres[["a"]] * dfres[["Pmaxd"]]) * dfres[["Pmaxd"]]
} else {
dfres[["K"]] <- if (kest == "fit") as.numeric(coef(fit)["k"]) else min(dat$k)
dfres[["Alpha"]] <- as.numeric(coef(fit)["alpha"])
dfres[["Q0d"]] <- as.numeric(coef(fit)["q0"])
dfres[["Q0se"]] <- summary(fit)[[10]][1, 2]
dfres[["Alphase"]] <- summary(fit)[[10]][2, 2]
dfres[["Q0Low"]] <- nlstools::confint2(fit)[1]
dfres[["Q0High"]] <- nlstools::confint2(fit)[3]
dfres[["AlphaLow"]] <- nlstools::confint2(fit)[2]
dfres[["AlphaHigh"]] <- nlstools::confint2(fit)[4]
dfres[["EV"]] <- 1/(dfres[["Alpha"]] * (dfres[["K"]] ^ 1.5) * 100)
dfres[["Pmaxd"]] <- 1/(dfres[["Q0d"]] * dfres[["Alpha"]] * (dfres[["K"]] ^ 1.5)) * (0.083 * dfres[["K"]] + 0.65)
dfres[["Pmaxa"]] <- beezdemand::GetAnalyticPmax(dfres[["Alpha"]], dfres[["K"]], dfres[["Q0d"]])
}
if (equation == "hs") {
dfres[["R2"]] <- 1.0 - (deviance(fit)/sum((log10(dat$y) - mean(log10(dat$y)))^2))
dfres[["Omaxd"]] <- (10^(log10(dfres[["Q0d"]]) +
(dfres[["K"]] *
(exp(-dfres[["Alpha"]] *
dfres[["Q0d"]] *
dfres[["Pmaxd"]]) - 1)))) * dfres[["Pmaxd"]]
dfres[["Omaxa"]] <- (10^(log10(dfres[["Q0d"]]) +
(dfres[["K"]] *
(exp(-dfres[["Alpha"]] *
dfres[["Q0d"]] *
dfres[["Pmaxa"]]) - 1)))) * dfres[["Pmaxa"]]
} else if (equation == "koff") {
dfres[["R2"]] <- 1.0 - (deviance(fit)/sum((dat$y - mean(dat$y))^2))
dfres[["Omaxd"]] <- (dfres[["Q0d"]] *
(10^(dfres[["K"]] *
(exp(-dfres[["Alpha"]] *
dfres[["Q0d"]] *
dfres[["Pmaxd"]]) - 1)))) * dfres[["Pmaxd"]]
dfres[["Omaxa"]] <- (dfres[["Q0d"]] *
(10^(dfres[["K"]] *
(exp(-dfres[["Alpha"]] *
dfres[["Q0d"]] *
dfres[["Pmaxa"]]) - 1)))) * dfres[["Pmaxa"]]
}
}
dfres <- merge(dfresempirical, dfres, by = "id")
if (plotcurves) {
majlabels <- c(".0000000001", ".000000001", ".00000001", ".0000001", ".000001", ".00001", ".0001", ".001", ".01", ".1", "1", "10", "100", "1000")
majticks <- exp(seq(log(.0000000001), log(1000), length.out = 14))
minticks <- minTicks(majticks)
datmean <- aggregate(y ~ x, data = dat, mean)
if (!inherits(fit, "try-error")) {
tempnew <- data.frame(x = seq(min(dat$x[dat$x > 0]), max(dat$x), length.out = 1000))
if (equation == "hs") {
tempnew$k = dfres[["K"]]
tempnew$y <- 10^(predict(fit, newdata = tempnew))
} else if (equation == "koff") {
tempnew$k = dfres[["K"]]
tempnew$y <- predict(fit, newdata = tempnew)
} else if (equation == "linear") {
tempnew$y <- exp^(predict(fit, newdata = tempnew))
}
tempnew$expend <- tempnew$x * tempnew$y
xmin <- min(c(tempnew$x[tempnew$x > 0], .1))
xmax <- max(tempnew$x)
if (indpoints && method == "Mean") {
ymin <- min(c(tempnew$y, dato$y[dato$y > 0], 1))
ymax <- min(c(1000, max(c(tempnew$y, dato$y)))) + 5
} else {
ymin <- min(c(tempnew$y, dat$y[dat$y > 0], 1))
ymax <- min(c(1000, max(c(tempnew$y, dat$y)))) + 5
}
pdf(file = paste0(outdir, method, ".pdf"))
par(mar = c(5, 4, 4, 4) + 0.3)
plot(tempnew$x, tempnew$y,
type = "n", log = "xy", yaxt = "n", xaxt = "n", bty = "l",
xlim = c(xmin, xmax),
ylim = c(ymin, ymax),
xlab = "Price", ylab = "Reported Consumption", main = method)
if (indpoints && method == "Pooled") {
points(dat$x, jitter(dat$y, .8), col = "gray", pch = 16, cex = .5)
} else if (indpoints && method == "Mean") {
points(dato$x, jitter(dato$y, .8), col = "gray", pch = 16, cex = .5)
}
points(datmean$x, datmean$y, pch = 16, cex = .9)
axis(1, majticks, labels = majlabels)
axis(1, minticks, labels = NA, tcl = -0.25, lwd = 0, lwd.ticks = 1)
axis(2, majticks, labels = majlabels, las = 1)
axis(2, minticks, labels = NA, tcl = -0.25, lwd = 0, lwd.ticks = 1)
lines(tempnew$y ~ tempnew$x, lwd = 1.5)
if (printvars) {
leg <- vector("expression", length(vartext))
for (j in seq_along(vartext)) {
tmp <- round(dfres[[vartext[j]]], 6)
leg[j] <- parse(text = paste(tobquote[[j]], " == ", tmp))
}
legend("bottomleft", legend = leg, xjust = 0, cex = .7)
}
graphics.off()
} else if (inherits(fit, "try-error")) {
warning(paste0("Unable to fit error: ", strsplit(fit[1], "\n")[[1]][2]))
xmin <- min(c(dat$x[dat$x > 0], .1))
xmax <- max(dat$x)
ymin <- min(c(dat$y, dat$y[dat$y > 0], 1))
ymax <- min(c(1000, max(dat$y))) + 5
pdf(file = paste0(outdir, method, ".pdf"))
par(mar = c(5, 4, 4, 4) + 0.3)
plot(dat$x, dat$y,
type = "n", log = "xy", yaxt = "n", xaxt = "n", bty = "l",
xlim = c(xmin, xmax),
ylim = c(ymin, ymax),
xlab = "Price", ylab = "Reported Consumption", main = method)
if (indpoints && method == "Pooled") {
points(dat$x, jitter(dat$y, .8), col = "gray", pch = 16, cex = .5)
} else if (indpoints && method == "Mean") {
points(dato$x, jitter(dato$y, .8), col = "gray", pch = 16, cex = .5)
}
points(datmean$x, datmean$y, pch = 16, cex = .9)
axis(1, majticks, labels = majlabels)
axis(1, minticks, labels = NA, tcl = -0.25, lwd = 0, lwd.ticks = 1)
axis(2, majticks, labels = majlabels, las = 1)
axis(2, minticks, labels = NA, tcl = -0.25, lwd = 0, lwd.ticks = 1)
graphics.off()
}
}
dfres
}
##' Extra Sum of Squares F-test
##'
##' One alpha better than individual alphas?
##' @title ExtraF
##' @param dat Long form data frame
##' @param equation "hs"
##' @param groups NULL for all. Character vector matching groups in groupcol
##' @param verbose If TRUE, prints all output including models
##' @param k User-defined k value; if missing will attempt to find shared k and then mean emprirical range (in log units)
##' @param compare Specify whether to compare alpha or Q0. Default is alpha
##' @param idcol The column name that should be treated as dataset identifier
##' @param xcol The column name that should be treated as "x" data
##' @param ycol The column name that should be treated as "y" data
##' @param groupcol The column name that should be treated as the groups
##' @param start_alpha Optional numeric to inform starting value for alpha
##' @return List of results and models
##' @author Brent Kaplan <bkaplan.ku@@gmail.com>
##' @import stats
##' @examples
##' ## Compare two groups using equation by Koffarnus et al., 2015 and a fixed k of 2
##' \donttest{
##' apt$group <- NA
##' apt[apt$id %in% sample(unique(apt$id), length(unique(apt$id))/2), "group"] <- "a"
##' apt$group[is.na(apt$group)] <- "b"
##' ExtraF(apt, "koff", k = 2, groupcol = "group")
##' }
##' @export
ExtraF <- function(dat, equation = "hs", groups = NULL, verbose = FALSE, k, compare = "alpha",
idcol = "id", xcol = "x", ycol = "y", groupcol = NULL, start_alpha = .001) {
if (missing(dat)) stop("Need to provide a dataframe!", call. = FALSE)
origcols <- colnames(dat)
#browser()
dat <- CheckCols(dat, xcol = xcol, ycol = ycol, idcol = idcol, groupcol = groupcol)
## change from subsetting groupcol to subsetting "group"
if (is.null(groups) && is.null(groupcol)) {
stop("No groups selected!")
##grps <- unique(dat$id)
} else if (length(groups) == 1) {
stop("Must specify more than 1 group!")
} else if (!is.null(groupcol) && !is.null(groups)) {
dat <- subset(dat, group %in% groups)
grps <- sort(groups)
} else if (!is.null(groups) && is.null(groupcol)) {
dat <- subset(dat, id %in% groups)
grps <- sort(groups)
} else if (is.null(groups) && !is.null(groupcol)) {
grps <- sort(unique(dat$group))
}
if (!any(colnames(dat) %in% "group")) {
dat$group <- dat$id
}
if (any(dat$y %in% 0) && (equation == "hs")) {
warning("Zeros found in data not compatible with equation! Dropping zeros!")
dat <- dat[dat$y != 0, , drop = FALSE]
}
## find best fit k to constrain later
if (!missing(k)) {
bfk <- k
} else {
## TODO: allow to specify id column in GetSharedK
#kdat <- dat
#colnames(kdat) <- c("old-id", "id", "x", "y")
bfk <- try(GetSharedK(dat = dat, equation = equation, sharecol = "group"), silent = TRUE)
if (inherits(bfk, "character") || inherits(bfk, "try-error")) {
message(bfk)
bfk <- GetK(dat)
}
}
## set references
j <- 1
for (i in grps) {
#dat[dat$id == i, "ref"] <- j
dat[dat$group == i, "ref"] <- j
j <- j+1
}
dat$ref <- as.factor(dat$ref)
## create contrasts
dat2 <- cbind(dat, model.matrix(~0 + ref, dat))
nparams <- length(unique(dat2$ref))
## dummy code alpha
if (compare == "q0") {
paramsalpha <- paste(sprintf("alpha%d*ref%d", 1:nparams, 1:nparams), collapse = "+")
startalpha <- paste(sprintf("alpha%d", 1:nparams))
startingvals <- as.vector(c(10, rep(start_alpha, length(startalpha))))
names(startingvals) <- c("q0", startalpha)
}
## dummy code q0
if (compare == "alpha") {
if (equation == "hs") {
paramslogq0 <- paste(sprintf("log(q0%d)/log(10)*ref%d", 1:nparams, 1:nparams), collapse = "+")
paramsq0 <- paste(sprintf("q0%d*ref%d", 1:nparams, 1:nparams), collapse = "+")
startq0 <- paste(sprintf("q0%d", 1:nparams))
} else if (equation == "koff") {
paramsq0 <- paste(sprintf("q0%d*ref%d", 1:nparams, 1:nparams), collapse = "+")
startq0 <- paste(sprintf("q0%d", 1:nparams))
}
startingvals <- as.vector(c(rep(10, length(startq0)), start_alpha))
names(startingvals) <- c(startq0, "alpha")
}
## fit simple model
if (compare == "alpha") {
if (equation == "hs") {
fu <- sprintf("log(y)/log(10) ~ (%s) + k * (exp(-(alpha) * (%s) * x)-1)", paramslogq0, paramsq0)
} else if (equation == "koff") {
fu <- sprintf("y ~ (%s) * 10 ^ (k * (exp(-(alpha) * (%s) * x) - 1))", paramsq0, paramsq0)
}
} else if (compare == "q0") {
if (equation == "hs") {
fu <- sprintf("log(y)/log(10) ~ q0 + k * (exp(-(%s) * q0 * x) - 1)", paramsalpha)
} else if (equation == "koff") {
fu <- sprintf("y ~ q0 * 10 ^ (k * (exp(-(%s) * q0 * x) - 1))", paramsalpha)
}
}
fu <- gsub("k", bfk, fu)
fu <- as.formula(fu)
fit <- NULL
fit <- try(nlsr::wrapnlsr(fu, data = dat2, start = c(startingvals)), silent = TRUE)
if (inherits(fit, "try-error")) stop("Unable to fit simple model!")
## fit complex model (q0 and alpha free, fixed k)
if (equation == "hs") {
fo <- "(log(y)/log(10)) ~ (log(q0)/log(10)) + k * (exp(-alpha * q0 * x) - 1)"
} else if (equation == "koff") {
fo <- "y ~ q0 * 10 ^ (k * (exp(-(alpha * q0 * x)) - 1))"
}
fo <- gsub("k", bfk, fo)
fo <- as.formula(fo)
## to hold predicted values
newdat <- data.frame("group" = rep(grps, each = 100000),
"x" = rep(seq(min(unique(dat$x)),
max(unique(dat$x)),
length.out = 100000), times = length(grps)),
"y" = NA)
## on group by group basis
lstfits <- vector(mode = "list", length = length(grps))
for (i in 1:length(grps)) {
#tmp <- subset(dat, id %in% i) ## groupcol %in% i
tmp <- subset(dat, group %in% grps[i])
lstfits[[i]] <- try(nlsr::wrapnlsr(formula = fo,
start = list(q0 = 10, alpha = 0.01),
data = tmp), silent = TRUE)
if (equation == "hs") {
newdat[newdat$group == grps[i], "y"] <- 10^predict(lstfits[[i]],
subset(newdat, group %in% grps[i], select = "x"))
} else if (equation == "koff") {
newdat[newdat$group == grps[i], "y"] <- predict(lstfits[[i]],
subset(newdat, group %in% grps[i], select = "x"))
}
}
ss1 <- sum(resid(fit)^2)
ss2 <- sum(sapply(sapply(sapply(lstfits, resid), function(x) x^2), sum))
df1 <- df.residual(fit)
df2 <- sum(sapply(lstfits, df.residual))
F <- ((ss1-ss2)/ss2)/((df1-df2)/df2)
pval <- 1 - pf(F, (df1-df2), df2)
critF <- qf(c(0.025, 0.975), (df1 - df2), df2)
print(paste0("Null hypothesis: ", if (compare == "alpha") "alpha" else "q0", " same for all data sets"))
print(paste0("Alternative hypothesis: ", if (compare == "alpha") "alpha" else "q0", " different for each data set"))
print(paste0("Conclusion: ", if(pval < .05) "reject" else "fail to reject", " the null hypothesis"))
print(paste0("F(", (df1-df2), ",", df2, ") = ", round(F, 4), ", p = ", round(pval, 4)))
cnames <- c("Group", "Q0d", "K",
"R2", "Alpha", "N", "AbsSS", "SdRes", "EV",
"Omaxd", "Pmaxd", "Omaxa", "Pmaxa", "Notes", "F-Test")
dfres <- data.frame(matrix(vector(),
(nparams * 2) + 2,
length(cnames),
dimnames = list(c(), c(cnames))), stringsAsFactors = FALSE)
dfres[1, "Group"] <- "Shared"
grps <- as.character(grps)
for (i in 2:(1+nparams)) {
dfres[i, "Group"] <- grps[(i-1)]
if (compare == "alpha") {
dfres[i, c("Q0d", "Alpha")] <- as.numeric(coef(fit)[c(startq0[i-1], "alpha")])
} else if (compare == "q0") {
dfres[i, c("Q0d", "Alpha")] <- as.numeric(coef(fit)[c("q0", startalpha[i-1])])
}
dfres[i, "K"] <- bfk
dfres[i, "EV"] <- 1/(dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5) * 100)
dfres[i, "Pmaxd"] <- 1/(dfres[i, "Q0d"] * dfres[i, "Alpha"] *
(dfres[i, "K"] ^ 1.5)) * (0.083 * dfres[i, "K"] + 0.65)
dfres[i, "Pmaxa"] <- beezdemand::GetAnalyticPmax(dfres[i, "Alpha"], dfres[i, "K"], dfres[i, "Q0d"])
if (equation == "hs") {
dfres[i, "R2"] <- 1.0 - (deviance(fit)/sum((log10(dat2$y) - mean(log10(dat2$y)))^2))
dfres[i, "Omaxd"] <- (10^(log10(dfres[i, "Q0d"]) +
(dfres[i, "K"] *
(exp(-dfres[i, "Alpha"] *
dfres[i, "Q0d"] *
dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"]
dfres[i, "Omaxa"] <- (10^(log10(dfres[i, "Q0d"]) +
(dfres[i, "K"] *
(exp(-dfres[i, "Alpha"] *
dfres[i, "Q0d"] *
dfres[i, "Pmaxa"]) - 1)))) * dfres[i, "Pmaxa"]
} else if (equation == "koff") {
dfres[i, "R2"] <- 1.0 -(deviance(fit)/sum((dat2$y - mean(dat2$y))^2))
dfres[i, "Omaxd"] <- (dfres[i, "Q0d"] *
(10^(dfres[i, "K"] *
(exp(-dfres[i, "Alpha"] *
dfres[i, "Q0d"] *
dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"]
dfres[i, "Omaxa"] <- (dfres[i, "Q0d"] *
(10^(dfres[i, "K"] *
(exp(-dfres[i, "Alpha"] *
dfres[i, "Q0d"] *
dfres[i, "Pmaxa"]) - 1)))) * dfres[i, "Pmaxa"]
}
dfres[i, "N"] <- NROW(dat2$y)
dfres[i, "AbsSS"] <- deviance(fit)
dfres[i, "SdRes"] <- sqrt(deviance(fit)/df.residual(fit))
dfres[i, "Notes"] <- fit$convInfo$stopMessage
}
dfres[nparams+2, "Group"] <- "Not Shared"
j <- 1
for (i in (nparams+3):nrow(dfres)) {
tmp <- lstfits[[j]]
dfres[i, "Group"] <- grps[j]
dfres[i, c("Q0d", "Alpha")] <- as.numeric(coef(tmp)[c("q0", "alpha")])
dfres[i, "K"] <- bfk
dfres[i, "EV"] <- 1/(dfres[i, "Alpha"] * (dfres[i, "K"] ^ 1.5) * 100)
dfres[i, "Pmaxd"] <- 1/(dfres[i, "Q0d"] * dfres[i, "Alpha"] *
(dfres[i, "K"] ^ 1.5)) * (0.083 * dfres[i, "K"] + 0.65)
dfres[i, "Pmaxa"] <- beezdemand::GetAnalyticPmax(dfres[i, "Alpha"], dfres[i, "K"], dfres[i, "Q0d"])
if (equation == "hs") {
dfres[i, "R2"] <- 1.0 - (deviance(tmp)/sum((log10(subset(dat, group %in% grps[j])$y) -
mean(log10(subset(dat, group %in% grps[j])$y)))^2))
dfres[i, "Omaxd"] <- (10^(log10(dfres[i, "Q0d"]) +
(dfres[i, "K"] *
(exp(-dfres[i, "Alpha"] *
dfres[i, "Q0d"] *
dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"]
dfres[i, "Omaxa"] <- (10^(log10(dfres[i, "Q0d"]) +
(dfres[i, "K"] *
(exp(-dfres[i, "Alpha"] *
dfres[i, "Q0d"] *
dfres[i, "Pmaxa"]) - 1)))) * dfres[i, "Pmaxa"]
} else if (equation == "koff") {
dfres[i, "R2"] <- 1.0 -(deviance(tmp)/sum((subset(dat, group %in% grps[j])$y -
mean(subset(dat, group %in% grps[j])$y))^2))
dfres[i, "Omaxd"] <- (dfres[i, "Q0d"] *
(10^(dfres[i, "K"] *
(exp(-dfres[i, "Alpha"] *
dfres[i, "Q0d"] *
dfres[i, "Pmaxd"]) - 1)))) * dfres[i, "Pmaxd"]
dfres[i, "Omaxa"] <- (dfres[i, "Q0d"] *
(10^(dfres[i, "K"] *
(exp(-dfres[i, "Alpha"] *
dfres[i, "Q0d"] *
dfres[i, "Pmaxa"]) - 1)))) * dfres[i, "Pmaxa"]
}
dfres[i, "N"] <- NROW(subset(dat, group %in% grps[j])$y)
dfres[i, "AbsSS"] <- deviance(tmp)
dfres[i, "SdRes"] <- sqrt(deviance(tmp)/df.residual(tmp))
dfres[i, "Notes"] <- tmp$convInfo$stopMessage
j <- j+1
}
dfres[1, "F-test"] <- "Summary of F-test"
dfres[2, "F-test"] <- paste0("Conclusion: ", if(pval < .05) "reject" else "fail to reject",
" the null hypothesis")
dfres[3, "F-test"] <- paste0("F(", (df1-df2), ",", df2, ") = ", round(F, 4),
", p = ", round(pval, 4))
results <- list("ftest" = list("F" = F, "pval" = pval, "df1" = (df1 - df2), "df2" = df2),
"dfres" = dfres,
"newdat" = newdat,
"simpmodel" = fit,
"compmodels" = lstfits)
if (verbose) results else invisible(results)
}
# Calculate sum of squares for exponential demand
##' @noRd
getSumOfSquaresExponential <- function(presort, index, k, Y, X) {
projections <- (log(presort[index,]$Q0)/log(10)) + k * (exp(-presort[index,]$Alpha * presort[index,]$Q0 * X) - 1)
sqResidual <- (log(Y)/log(10) - projections)^2
sum(sqResidual)
}
# Calculate sum of squares for exponentiated demand
##' @noRd
getSumOfSquaresExponentiated <- function(presort, index, k, Y, X) {
projections <- presort[index,]$Q0 * 10^(k * (exp(-presort[index,]$Alpha * presort[index,]$Q0 * X) - 1))
sqResidual <- (Y - projections)^2
sum(sqResidual)
}
##' Finds shared k among selected datasets using global regression
##'
##' Uses global regression to fit a shared k among datasets. Assumes the dataset is in its final form. Used within FitCurves
##' @title Get Shared K
##' @param dat Dataframe (longform)
##' @param equation Character vector. Accepts either "hs" or "koff"
##' @param sharecol Character for column to find shared k. Default to "group" but can loop based on id.
##' @return Numeric value of shared k
##' @author Brent Kaplan <bkaplan.ku@@gmail.com> Shawn P Gilroy <shawn.gilroy@@temple.edu>
##' @examples
##' ## Find a shared k value across datasets indicated by id
##' \donttest{
##' GetSharedK(apt, "hs", sharecol = "id")
##' }
##' @export
GetSharedK <- function(dat, equation, sharecol = "group") {
if (length(unique(dat[, sharecol])) == 1) {
stop("Cannot find a shared k value with only one dataset!", call. = FALSE)
}
## get rid of NAs
dat <- dat[!is.na(dat$y), ]
# get rid of zeroes early on if HS
if (equation == "hs") {
# dat <- dat[dat$x != 0, ]
dat <- dat[dat$y != 0, ]
}
j <- 1
for (i in unique(dat[, sharecol])) {
# get rid of rows with only one or two data points
if (nrow(dat[dat[[sharecol]] == i,]) < 3) {
dat <- dat[dat[[sharecol]] != i,]
next
}
dat[dat[, sharecol] == i, "ref"] <- j
j <- j+1
}
dat$ref <- as.factor(dat$ref)
## create contrasts
dat2 <- cbind(dat, model.matrix(~0 + ref, dat))
nparams <- length(unique(dat2$ref))
if (equation == "hs") {
paramslogq0 <- paste(sprintf("log(q0%d)/log(10)*ref%d", 1:nparams, 1:nparams), collapse = "+")
paramsalpha <- paste(sprintf("alpha%d*ref%d", 1:nparams, 1:nparams), collapse = "+")
paramsq0 <- paste(sprintf("q0%d*ref%d", 1:nparams, 1:nparams), collapse = "+")
startq0 <- paste(sprintf("q0%d", 1:nparams))
startalpha <- paste(sprintf("alpha%d", 1:nparams))
# Domains
minQ <- if (min(dat$y) > 0) min(dat$y) else 0.01
startQ <- seq(log(minQ), log(max(dat$y) * 1.5), length.out = 10)
startA <- seq(0.98, 1.02, length.out = 10)
startK <- seq(log(0.5), log(log(max(dat$y)) + 1), length.out = 10)
startQ <- exp(startQ)
startA <- log(startA)
startK <- exp(startK)
# Pre-sorts
presort <- expand.grid(Q0 = startQ,
Alpha = startA)
presort$sumSquares <- NA
savedStartValues <- data.frame(id = integer(),
Q0 = double(),
Alpha = double(),
SSR = double(),
stringsAsFactors = FALSE)
bestSS <- NA
currentK <- NA
currentData <- NA
bestFrame <- data.frame()
message("Beginning search for best-starting k")
for (k in startK) {
# message(sprintf("Scanning for starting values... %s of %s (K = %s)",
# match(k, startK), length(startK), k))
currentK <- k
for (j in unique(dat$ref)) {
currentData <- dat[dat$ref == j, ]
for (i in 1:nrow(presort)) {
presort[i, ]$sumSquares <- getSumOfSquaresExponential(presort, # Values to check
i, # Index of values
currentK,
currentData$y,
currentData$x)
}
presort <- presort[order(presort[,"sumSquares"]),]
savedStartValues[as.numeric(j),"Q0"] <- presort[1,]$Q0
savedStartValues[as.numeric(j),"Alpha"] <- presort[1,]$Alpha
savedStartValues[as.numeric(j),"SSR"] <- presort[1,]$sumSquares
savedStartValues[as.numeric(j),"K"] <- currentK
savedStartValues[as.numeric(j),"id"] <- j
}
if (is.na(bestSS) || sum(savedStartValues$SSR) < bestSS) {
# message(sprintf("Improvement: K at %s = err: %s", currentK, sum(savedStartValues$SSR)))
bestSS <- sum(savedStartValues$SSR)
bestFrame <- data.frame(Q0 = savedStartValues$Q0,
Alpha = savedStartValues$Alpha,
K = savedStartValues$K)
}
presort$sumSquares <- NA
}
message(sprintf("Best k fround at %s = err: %s", bestFrame$K[1], bestSS))
vecStartQ0 <- bestFrame$Q0
vecStartAlpha <- bestFrame$Alpha
vecStartK <- bestFrame$K
startingvals <- as.vector(c(vecStartQ0, vecStartAlpha, mean(vecStartK)))
names(startingvals) <- c(startq0, startalpha, "k")
minvals <- as.vector(c(rep(0.01, length(startq0)), rep(-Inf, length(startalpha)), 0.5))
names(minvals) <- c(startq0, startalpha, "k")
maxvals <- as.vector(c(rep(Inf, length(startq0)), rep(Inf, length(startalpha)), (log(max(dat$y)) + 0.5) * 2))
names(maxvals) <- c(startq0, startalpha, "k")
fo <- sprintf("log(y)/log(10) ~ (%s) + k * (exp(-(%s) * (%s) * x)-1)", paramslogq0, paramsalpha, paramsq0)
fo <- as.formula(fo)
message("Searching for shared K, this can take a while...")
fit <- NULL
fit <- nlsr::nlxb(fo, data = dat2,
start = c(startingvals),
lower = c(minvals),
upper = c(maxvals),
# control = nls.control(maxiter = 30,
# tol = 1e-05,
# warnOnly = TRUE,
# minFactor = 1/1024),
trace = FALSE)
if (!inherits(fit, "try-error")) {
sharedk <- fit$coefficients["k"]
return(sharedk)
} else {
sharedk <- "Unable to find a shared k."
return(sharedk)
}
} else if (equation == "koff") {
paramsq0 <- paste(sprintf("q0%d*ref%d", 1:nparams, 1:nparams), collapse = "+")
paramsalpha <- paste(sprintf("alpha%d*ref%d", 1:nparams, 1:nparams), collapse = "+")
startq0 <- paste(sprintf("q0%d", 1:nparams))
startalpha <- paste(sprintf("alpha%d", 1:nparams))
# Domains
minQ <- if (min(dat$y) > 0) min(dat$y) else 0.01
startQ <- seq(log(minQ), log(max(dat$y) * 1.5), length.out = 10)
startA <- seq(0.98, 1.02, length.out = 10)
startK <- seq(log(0.5), log(log(max(dat$y)) + 0.5), length.out = 10)
startQ <- exp(startQ)
startA <- log(startA)
startK <- exp(startK)
# Pre-sorts
presort <- expand.grid(Q0 = startQ,
Alpha = startA)
presort$sumSquares <- NA
savedStartValues <- data.frame(id = integer(),
Q0 = double(),
Alpha = double(),
SSR = double(),
stringsAsFactors = FALSE)
bestSS <- NA
currentK <- NA
currentData <- NA
bestFrame <- data.frame()
message("Beginning search for best-starting k")
for (k in startK) {
# message(sprintf("Scanning for starting values... %s of %s (K = %s)",
# match(k, startK), length(startK), k))
currentK <- k
for (j in unique(dat$ref)) {
currentData <- dat[dat$ref == j, ]
for (i in 1:nrow(presort)) {
presort[i, ]$sumSquares <- getSumOfSquaresExponentiated(presort, # Values to check
i, # Index of values
currentK,
currentData$y,
currentData$x)
}
presort <- presort[order(presort[,"sumSquares"]),]
savedStartValues[as.numeric(j),"Q0"] <- presort[1,]$Q0
savedStartValues[as.numeric(j),"Alpha"] <- presort[1,]$Alpha
savedStartValues[as.numeric(j),"SSR"] <- presort[1,]$sumSquares
savedStartValues[as.numeric(j),"K"] <- currentK
savedStartValues[as.numeric(j),"id"] <- j
}
if (is.na(bestSS) || sum(savedStartValues$SSR) < bestSS) {
# message(sprintf("Improvement: K at %s = err: %s", currentK, sum(savedStartValues$SSR)))
bestSS <- sum(savedStartValues$SSR)
bestFrame <- data.frame(Q0 = savedStartValues$Q0,
Alpha = savedStartValues$Alpha,
K = savedStartValues$K)
}
presort$sumSquares <- NA
}
message(sprintf("Best k fround at %s = err: %s", bestFrame$K[1], bestSS))
vecStartQ0 <- bestFrame$Q0
vecStartAlpha <- bestFrame$Alpha
vecStartK <- bestFrame$K
startingvals <- as.vector(c(vecStartQ0, vecStartAlpha, mean(vecStartK)))
names(startingvals) <- c(startq0, startalpha, "k")
minvals <- as.vector(c(rep(0, length(startq0)), rep(-Inf, length(startalpha)), 0.5))
names(minvals) <- c(startq0, startalpha, "k")
maxvals <- as.vector(c(rep(Inf, length(startq0)), rep(Inf, length(startalpha)), (log(max(dat$y)) + 0.5) * 2))
names(maxvals) <- c(startq0, startalpha, "k")
fo <- sprintf("y ~ (%s) * 10^(k * (exp(-(%s) * (%s) * x) - 1))", paramsq0, paramsalpha, paramsq0)
fo <- as.formula(fo)
message("Searching for shared K, this can take a while...")
fit <- NULL
fit <- nlsr::nlxb(fo, data = dat2,
start = c(startingvals),
lower = c(minvals),
upper = c(maxvals),
# control = nls.control(maxiter = 30,
# tol = 1e-05,
# warnOnly = TRUE,
# minFactor = 1/1024),
trace = FALSE)
if (!inherits(fit, "try-error")) {
sharedk <- fit$coefficients["k"]
return(sharedk)
} else {
sharedk <- "Unable to find a shared k."
return(sharedk)
}
}
}
##' Calculates a k value by looking for the max/min consumption across entire dataset and adds .5 to that range
##'
##' Will look for maximum/minimum greater zero
##' @title Get K
##' @param dat Dataframe (long form)
##' @param mnrange Boolean for whether k should be calculated based on the mean range + .5
##' @return Numeric
##' @author Brent Kaplan <bkaplan.ku@@gmail.com>
##' @examples
##' GetK(apt)
##' @export
GetK <- function(dat, mnrange = TRUE) {
if (mnrange) {
dat1 <- aggregate(y ~ x, dat, mean)
(log10(max(dat1$y[dat1$y > 0], na.rm = TRUE)) - log10(min(dat1$y[dat1$y > 0], na.rm = TRUE))) + .5
} else {
(log10(max(dat$y[dat$y > 0], na.rm = TRUE)) - log10(min(dat$y[dat$y > 0], na.rm = TRUE))) + .5
}
}
##' ...
##'
##' ...
##' @title Get pmax
##' @param Alpha alpha parameter
##' @param K k parameter ( > lower limit )
##' @param Q0 Q0
##' @return Numeric
##' @author Shawn Gilroy <sgilroy1@@lsu.edu>
##' @export
GetAnalyticPmax <- function(Alpha, K, Q0) {
if (K <= exp(1)/log(10)) {
return (beezdemand::GetAnalyticPmaxFallback(K, Alpha, Q0));
} else {
return (-beezdemand::lambertW(z = -1/log((10^K))) / (Alpha * Q0));
}
}
##' Fallback method for Analytic Pmax
##'
##' Derivative-based optimization strategy
##' @title Analytic Pmax Fallback
##' @param K_ k parameter
##' @param A_ alpha parameter
##' @param Q0_ q0 parameter
##' @return numeric
##' @author Shawn Gilroy <sgilroy1@@lsu.edu>
##' @export
GetAnalyticPmaxFallback <- function(K_, A_, Q0_) {
result <- optimx::optimx(par = c((1/(Q0_ * A_ * K_^1.5)) * (0.083 * K_ + 0.65)),
fn = function(par, data) {
abs((log((10^data$K)) * (-data$A * data$Q0 * par[1] * exp(-data$A * data$Q0 * par[1]))) + 1)
},
data = data.frame(Q0 = Q0_,
A = A_,
K = K_),
method = c("BFGS"),
control=list(maxit=2500))
return(result$p1)
}
##' Calculates empirical measures for purchase task data
##'
##' Will calculate and return the following empirical measures: Intensity, BP0, BP1, Omax, and Pmax
##' @title GetEmpirical
##' @param dat data frame (long form) of purchase task data.
##' @param idcol The column name that should be treated as dataset identifier
##' @param xcol The column name that should be treated as "x" data
##' @param ycol The column name that should be treated as "y" data
##' @return Data frame of empirical measures
##' @author Brent Kaplan <bkaplan.ku@@gmail.com>
##' @examples
##' ## Obtain empirical measures
##' GetEmpirical(apt)
##' @export
GetEmpirical <- function(dat, xcol = "x", ycol = "y", idcol = "id") {
dat <- CheckCols(dat, xcol = xcol, ycol = ycol, idcol = idcol)
ps <- unique(dat$id)
ps <- as.character(ps)
np <- length(ps)
cnames <- c("id", "Intensity", "BP0", "BP1", "Omaxe", "Pmaxe")
dfres <- data.frame(matrix(vector(), np, length(cnames),
dimnames = list(c(), c(cnames))), stringsAsFactors = FALSE)
for (i in seq_len(np)) {
dfres[i, "id"] <- ps[i]
adf <- NULL
adf <- dat[dat$id == ps[i], ]
if (any(duplicated(adf$x))) {
stop(paste0("Duplicates found where id = ", ps[i], ". Check data and rerun."))
}
adf[, "expend"] <- adf$x * adf$y
## Find empirical measures
dfres[i, "Intensity"] <- adf[which(adf$x == min(adf$x), arr.ind = TRUE), "y"]
if (0 %in% adf$y) {
for (j in nrow(adf):1) {
if (adf$y[j] == 0) {
next
} else {
dfres[i, "BP0"] <- adf$x[j + 1]
break
}
}
} else {
dfres[i, "BP0"] <- NA
}
dfres[i, "BP1"] <- if (sum(adf$y) > 0) max(adf[adf$y != 0, "x"]) else NA
dfres[i, "Omaxe"] <- max(adf$expend)
dfres[i, "Pmaxe"] <- if (dfres[i, "Omaxe"] == 0) 0 else adf[max(which(adf$expend == max(adf$expend))), "x"]
}
dfres
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.