Nothing
#' \code{maxle} returns expression of log-likelihood (LL) of joint normal distribution.
#' @importFrom stats as.formula deriv logLik na.omit nls pnorm qnorm rnorm var AIC BIC
#' @param cheqs0 Strings defining equations of errors. Systems of Nonlinear Regressions (SNR) variant.
#' @param fixed_term if \code{TRUE} fixed term -(k/2)*log(2*pi) (k number of equations) is included
#' @return List with LL expressions of joint normal distribution, first element is string with
#' expression for derivative calculations, the second - string for evaluation.
#' @examples
#' # normal distribution
#' eq_c <- c("Tw ~ ((((PH) + (tw)) * (ta - Tc + 2) + (1 + (tw)) * (Ec/w - 2/w) -(1 + (PH))) +
#' sqrt((((PH) + (tw)) * (ta - Tc + 2) + (1 +(tw)) * (Ec/w - 2/w) - (1 + (PH)))^2 - 4 * (1 + (PH) +
#' (tw)) *(-(PH) * (ta - Tc + 2) + (1 - (tw) * (ta - Tc + 2)) * (2/w -Ec/w))))/(2 * (1 + (PH) +
#' (tw)))",
#' "Tf1 ~ (th1) * (ta - (((((PH) + (tw)) * (ta - Tc + 2) + (1 + (tw)) *(Ec/w - 2/w) - (1 + (PH))) +
#' sqrt((((PH) + (tw)) * (ta -Tc + 2) + (1 + (tw)) * (Ec/w - 2/w) - (1 + (PH)))^2 - 4 *(1 + (PH) +
#' (tw)) * (-(PH) * (ta - Tc + 2) + (1 - (tw) *(ta - Tc + 2)) * (2/w - Ec/w))))/(2 * (1 + (PH) +
#' (tw)))) -Tc + 2) - 1",
#' "Ef1 ~ (ph1)/(PH) * (w * (((((PH) + (tw)) * (ta - Tc + 2) + (1 +(tw)) * (Ec/w - 2/w) -
#' (1 + (PH))) + sqrt((((PH) + (tw)) *(ta - Tc + 2) + (1 + (tw)) * (Ec/w - 2/w) - (1 + (PH)))^2 -4 *
#' (1 + (PH) + (tw)) * (-(PH) * (ta - Tc + 2) + (1 - (tw) *(ta - Tc + 2)) * (2/w - Ec/w))))/(2 *
#' (1 + (PH) + (tw)))) -Ec + 2) - 1")
#' parl <- c("tw","PH","th1","ph1")
#' para_cont <- get_par(parl, eq_c)
#' cheqs0 <- para_cont$cheqs0
#' res <- maxle(cheqs0=cheqs0)
#' @export
maxle <- function(cheqs0, fixed_term = TRUE)
{
. <- NULL
npar <- get_npar(cheqs0)
neq <- length(cheqs0)
n1 <- 1:neq
if(neq>4){
rez <- ff_generate4maxle(neq)
}else{
rez <- datmaxle[[neq]]
}
names(rez$expr) <- c('dets', 'invs', 'expt')
detS <- rez$expr$dets
invS <- rez$expr$invs
expt <- rez$expr$expt # ratsimp(transpose(Y) . invs . Y)
# replace eps with equation expressions for residuals
exptr <- expt
y <- n1 %>% paste0('eps[', ., ']')
yr <- y %>% gsub('[[]', '[[]', .) %>% gsub('[]]$', '[]]', .)
repdat <- data.frame(old = yr, new = paste0('(', unlist(cheqs0), ')'), stringsAsFactors = FALSE)
yyr <- replace_par_wrap(repdat , exptr)
# should constant "(-neqt/2)*log(2*pi)" added, this is fixed to optimization solution is not affected
fixedp <- if (fixed_term == TRUE) {
paste0('(-', neq, '/2)*log(2*pi)-(1/2)*log(', detS, ')')
} else{
paste0('-(1/2)*log(', detS, ')')
}
#for derivatives
rez <- yyr %>% paste0('-1/2*(', ., ')') %>% paste0(fixedp, ' ', .)
reze <- formula2string(rez)
list(expr = reze, formula = rez)
}
#' \code{maxle_p} returns expression of partitioned log-likelihood.
#' f(y1,y2,..,yn)=f(y1)f(y2|y1)f(y3|y2y1)...f(yn|y1..y(n-1))
#' @param cheqs0 Strings defining equations of errors.
#' @param fixed_term if \code{TRUE} fixed term -(k/2)*log(2*pi) (k number of equations) is included
#' @param version2 another formulation of log-likelihood
#' @return List. First element is expression of joint distribution for derivatives,
#' second for evaluation, third latex, fourth marginal distributions for each variable.
#' @examples
#' # joint normal distribution
#' eq_c <- c("Tw ~ ((((PH) + (tw)) * (ta - Tc + 2) + (1 + (tw)) * (Ec/w - 2/w) -(1 + (PH))) +
#' sqrt((((PH) + (tw)) * (ta - Tc + 2) + (1 +(tw)) * (Ec/w - 2/w) - (1 + (PH)))^2 - 4 * (1 + (PH) +
#' (tw)) *(-(PH) * (ta - Tc + 2) + (1 - (tw) * (ta - Tc + 2)) * (2/w -Ec/w))))/(2 * (1 + (PH) +
#' (tw)))",
#' "Tf1 ~ (th1) * (ta - (((((PH) + (tw)) * (ta - Tc + 2) + (1 + (tw)) *(Ec/w - 2/w) - (1 + (PH))) +
#' sqrt((((PH) + (tw)) * (ta -Tc + 2) + (1 + (tw)) * (Ec/w - 2/w) - (1 + (PH)))^2 - 4 *(1 + (PH) +
#' (tw)) * (-(PH) * (ta - Tc + 2) + (1 - (tw) *(ta - Tc + 2)) * (2/w - Ec/w))))/(2 * (1 + (PH) +
#' (tw)))) -Tc + 2) - 1",
#' "Ef1 ~ (ph1)/(PH) * (w * (((((PH) + (tw)) * (ta - Tc + 2) + (1 +(tw)) * (Ec/w - 2/w) - (1 +
#' (PH))) + sqrt((((PH) + (tw)) *(ta - Tc + 2) + (1 + (tw)) * (Ec/w - 2/w) - (1 + (PH)))^2 -4 *
#' (1 + (PH) + (tw)) * (-(PH) * (ta - Tc + 2) + (1 - (tw) *(ta - Tc + 2)) * (2/w - Ec/w))))/(2 *
#' (1 + (PH) + (tw)))) -Ec + 2) - 1")
#' parl <- c("tw","PH","th1","ph1")
#' para_cont <- get_par(parl, eq_c)
#' cheqs0 <- para_cont$cheqs0
#' res <- maxle_p(cheqs0=cheqs0)
#' @export
maxle_p <- function(cheqs0, fixed_term = TRUE, version2=TRUE)
{
neqt <- length(cheqs0)
n1 <- 1:neqt
# get conditional means and variances
# normal partitioned distribution
if(version2){
ff_generate4maxle_p_all <- ff_generate4maxle_p_v2
expr_ll_norm_all <- expr_ll_norm_v2
}else{
ff_generate4maxle_p_all <- ff_generate4maxle_p
expr_ll_norm_all <- expr_ll_norm
}
if(neqt>4){
outl <- c()
for(j in n1){
sdv <- rep(NA, j)
mv <- rep(0, j)
outl <- c(outl, list(cond_expr(j, sdv, mv)))
}
aa <- ff_generate4maxle_p_all(outl, neqt, fixed_term )
}else{
aa <- expr_ll_norm_all[[which(names(expr_ll_norm_all) == paste0("ll_", neqt, "_", fixed_term))]]
}
llphi <- gsub('%(pi)', '\\1', aa$expr[-(neqt + 1)])# replace pi expression
llphi <- paste0('wc_', n1, '*(', llphi, ')')# add weights for cont. eq
#insert expressions of equations
repdat <- data.frame(old = paste0('eps', n1, ''), new = paste0('(', unlist(cheqs0), ')'),
stringsAsFactors = FALSE)
#formula for each eq. with weights
llphi <- replace_par_wrap(repdat, llphi)
#joint LL of all eq.
rez <- paste0(llphi, collapse = '+')
reze <- rez %>% formula2string
list(exprs = reze, formula = rez, latex = aa$latex, formulap = llphi)
}
#' \code{mlsem} returns expression of log-likelihood for joint normal distribution,
#' for maximum likelihood (ML), Simultaneous Equations Models (SEM) variant.
#' @param cheqs0 Strings defining equations of errors.
#' @param fixed_term if \code{TRUE} fixed term -(k/2)*log(2*pi) (k number of equations) is included
#' @return List with LL expressions of joint normal distribution, first element is string with
#' expression for derivative calculations, the second - string for evaluation.
#' @examples
#' # normal distribution
#' eq_c <- c("Tw ~ ((((PH) + (tw)) * (ta - Tc) + Ec/w * (1 + (tw)) + sqrt((Ec/w *(1 + (tw)) +
#' (ta - Tc) * ((PH) + (tw)))^2 - 4 * Ec/w * (ta -Tc) * (tw) * (1 + (PH) + (tw))))/(2 * (1 + (PH)
#' + (tw))))",
#' "Tf1 ~ (th1) *(Tw - Tc)",
#' "Ef1 ~ (ph1)/(PH) * (w * Tw - Ec)")
#' parl <- c("tw","PH","th1","ph1")
#' para_cont <- get_par(parl, eq_c)
#' cheqs0 <- para_cont$cheqs0
#' res <- mlsem(cheqs0, fixed_term=FALSE)
#' @export
mlsem <- function(cheqs0, fixed_term = TRUE)
{
. <- NULL
npar <- get_npar(cheqs0)
neq <- length(cheqs0)
ht <- cheqs0 %>% formula2string
#nnpar <- para_cont$parn%>%formula2string
endog <- cheqs0 %>% gsub("-.*", "", .) %>% trimws
Jt <- lapply(ht, function(x)
x %>% parse(text = .) %>%
deriv(., namevec = endog) %>%
extract_attr_deriv(., 'grad'))
if(neq > 4){
rez <- ff_generate4mlsem(neq)
}else{
rez <- datmlsem[[neq]]
}
names(rez$expr) <- c('dets', 'invs', 'expt', 'detJ')
detS <- rez$expr$dets
detJ <- rez$expr$detJ
invS <- rez$expr$invs
expt <- rez$expr$expt
n1 <- 1:neq
exptr <- expt
y <- n1 %>% paste0('eps[', ., ']')
yr <- y %>% gsub('[[]', '[[]', .) %>% gsub('[]]$', '[]]', .)
repdat <- data.frame(old = yr, new = paste0('(', unlist(cheqs0), ')'), stringsAsFactors = FALSE)
yyr <- replace_par_wrap(repdat, exptr)
#form Jacobian
dd <- n1 %>% expand.grid(., .) %>% apply(., 1, function(x)
paste0(x, collapse = ',')) %>%
paste0('deriv', '[', . , ']') %>% sort
ddr <- dd %>% gsub('[[]', '[[]', .) %>% gsub('[]]$', '[]]', .)
repdat <- data.frame(old = ddr, new = paste0('(', unlist(Jt), ')'), stringsAsFactors = FALSE)
detJr <- replace_par_wrap(repdat, detJ)
fixedp <- if(fixed_term == TRUE){
paste0('(-', length(cheqs0), '/2)*log(2*pi)-(1/2)*log(', detS, ')', '+log(', detJr, ')')
}else{
paste0('-(1/2)*log(', detS, ')', '+log(', detJr, ')')
}
#for derivatives
rez <- yyr %>% paste0('-1/2*(', ., ')') %>% paste0(fixedp, ' ', .)
reze <- formula2string(rez)
list(expr = reze, formula = rez)
}
#' Internal helper function for MNlogitf
#' @keywords internal
help_MNlogitf <- function(exps, transform = FALSE, separatenmm = FALSE,
neq = NULL, weight_disc = FALSE)
{
. <- NULL
#return LL expression and string
exp1 <- exps %>% paste0('log(', ., ')')
n1 <- 1:neq
if(transform == FALSE){
exps <- n1 %>% paste0('avl_', .) %>% paste0(., ' * ', exps)
sume <- exps %>% paste0(., collapse = '+')
sume <- paste0( paste0('chc_', n1), ' * ',paste0('avl_', n1), ' * ( ', paste0(exp1,
'- log(', sume, ')'),')')
}else{
sume <- exp1%>%paste0('avl_', n1, ' * ',.)%>%paste0('chc_', n1, ' * ',.)
}
if(weight_disc==TRUE){
sume <- n1 %>% paste0('wd_', ., '*(', sume, ')')
}
loglik <- if(separatenmm == FALSE){
paste0(sume, collapse = '+')
}else{
sume
}
loglike <- loglik %>% gsub('\\[(\\d{1,10})\\]', '\\1', .)
list(loglik = loglik, loglike = loglike)
}
#' Internal helper function for MNdogitf
#' @keywords internal
help_MNdogitf <- function(exps, transform = FALSE, separatenmm = FALSE, neq = NULL,
ppardogit=NULL, weight_disc = FALSE)
{
. <- NULL
#return LL expression and string
#exp1 <- exps #%>% paste0('log(', ., ')')
#exps <- n1 %>% paste0('avl_', .) %>% paste0(., ' * ', exps)
sume <- n1 %>% paste0('avl_', .) %>% paste0(., ' * ', exps) %>% paste0(., collapse = '+')
exp1 <- paste0(exps, " + ", ppardogit, " * (", sume,")")%>% paste0('log(', ., ')')
n1 <- 1:neq
if(transform == FALSE){
pjis <- ppardogit %>%
paste0(., collapse = ' + ') %>% paste0('(1 + ', ., ')')
sume <- paste0( paste0('chc_', n1), ' * ',paste0('avl_', n1), ' * ( ', paste0(exp1,
'-log(', pjis,')',
'- log(', sume, ')'),')')
}else{
sume <- exp1%>%paste0('avl_', n1, ' * ',.)%>%paste0('chc_', n1, ' * ',.)
}
if(weight_disc==TRUE){
sume <- n1 %>% paste0('wd_', ., '*(', sume, ')')
}
loglik <- if(separatenmm == FALSE){
paste0(sume, collapse = '+')
}else{
sume
}
loglike <- loglik %>% gsub('\\[(\\d{1,10})\\]', '\\1', .)
list(loglik = loglik, loglike = loglike)
}
#' \code{MNlogitf} or \code{MNdogitf} returns log-likelihood(LL) expression for discrete equations of "logit" or "dogit" model.
#' @param ffor Discrete choice equations.
#' @param transform if \code{TRUE}, quantile transformation (normal) is applied.
#' @param separatenmm if \code{TRUE}, equation specific LL is calculated
#' @param weight_disc if \code{TRUE}, equations will include equation specific weights.
#' @describeIn MNlogitf returns log-likelihood(LL) expression for discrete equations of "logit" model.
#' @return
#' \item{formula}{simplified LL for each equation or joint}
#' \item{probs}{probability expression unsimplified}
#' \item{expr}{LL string simplified }
#' \item{formulat}{unsimplified log(P), only if transform is TRUE}
#' \item{probt}{unsimplified P with quantile transformation qnorm(ifelse(P)), only if transform is TRUE}
#' \item{expreteso}{unsimplified pnorm((qnorm(P)-mean)/sd), only if transform is TRUE}
#' \item{probte}{qnorm(P)}
#' \item{tval}{If quantile transformation is applied}
#' \item{sume}{Denominator of logit probability}
#' @examples
#' eq_d <- c("ASC1 * 1 + B11_dur * dur_1" , "ASC2 * 1 + B12_dur * dur_2",
#' "ASC3 * 1 + B13_dur * dur_3 + B20_cost * cost_3 + B53_parkman * PbAvl_3",
#' "ASC4 * 1 + B14_dur * dur_4 + B20_cost * cost_4 + B34_serv * servIdx_4 + B44_stop * stopUs1R1_4")
#' parl <- c(paste0("ASC", 1:4), paste0("B1", 1:4, "_dur"), "B20_cost", "B53_parkman", "B34_serv",
#' "B44_stop")
#' disc_par <- get_par(parl, eq_d)
#' ffor <- disc_par$cheqs0
#' res_l <- MNlogitf(ffor, separatenmm=FALSE, transform=FALSE)
#' res_d <- MNdogitf(ffor, separatenmm=FALSE, transform=FALSE)
#' @export
MNlogitf <- function(ffor, transform = FALSE, separatenmm = FALSE, weight_disc = FALSE)
{
. <- NULL
exps <- ffor %>% paste0('exp(', ., ')')
neq <- ffor %>% length
n1 <- 1:neq
probs <- n1 %>% paste0('avl_', .) %>% paste0(., ' * ', exps)
sume <- probs %>% paste0(., collapse = '+')
#expression for prob
probs <- probs %>% paste0(., '/(', sume, ')')
environment(help_MNlogitf) <- environment()
#if quantile transformation has to be done for probabilities
#if(transform==TRUE){
#apply quantile transformation
probt <- probs %>%
paste0('qnorm(ifelse((', .,')==0, 1e-100, ifelse((', .,')==1,1-1e-16,(', .,'))))')
#get cumulative distribution values for the transformed probabilities
mean_sd_part <- if(transform==TRUE) n1%>%
paste0('- meanqt[',.,'])/sqrt(covqt[',.,'])') else ")"
expret <- paste0('pnorm((', probt , mean_sd_part, ')')
#########w/o ifelse, for derivatives
#probabilities
probte <- probs %>% paste0('qnorm(', ., ')') #quantile expression of probability
#transformed probabilities
#difference from probt in the construction of expression, probt is a simpliefied version of probte
expreteso <- paste0('pnorm((', probte , mean_sd_part, ')')
#another LL expression with ifelse
expret1 <- expret%>%paste0('ifelse(', ., ' == 0, 1e-100, ifelse(', .,
' == 1, 1-1e-16, ', ., '))')
expst <- help_MNlogitf(expret1, transform = TRUE, separatenmm = separatenmm, neq = neq,
weight_disc = weight_disc)
#LL expression without quantile transformation
exps <- help_MNlogitf(exps, transform = FALSE, separatenmm = separatenmm, neq = neq,
weight_disc = weight_disc)
exprs <- if(length(exps$loglike) > 1){
paste0(exps$loglike, collapse = ' + ')
}else{
exps$loglike
}
#probability string
#probse <- gsub('\\[(\\d{1,10})\\]', '\\1', probs)
#exps simplified log(P)
#exps$loglike - for each equation separatenmm LL string expression
#exps$loglik - exps$loglike but as an object which can be evaluated
#probs - probability expression unsimplified P
#exprs - LL string simplified
#expst unsimplified log(P)
#expst$loglike - for each equation separatenmm LL string expression
#expst$loglik - expst$loglike but as an object which can be evaluated
#probt unsimplified P with quantile transformation qnorm(ifelse(P))
#exprst - LL string unsimplified with ifelse
#expret pnorm((qnorm(ifelse(P))-mean)/sd)
#exprete LL unsimplified
#expretes for each eq LL separatenmm unsimplified
#expreteso P unsimplified pnorm((qnorm(P)-mean)/sd)
#probte - qnorm(P)
list(formula = exps$loglik, probs = probs, expr = exprs,
formulat = expst$loglik, probt = probt,
expreteso = expreteso, probte = probte, tval = transform, sume = sume)
}
#'
#' @describeIn MNlogitf returns log-likelihood(LL) expression for discrete equations of "dogit" model.
#' @param ppardogit "dogit" parameters only used in MNdogitf
#' @export
MNdogitf <- function(ffor, transform = FALSE, separatenmm = FALSE,
weight_disc = FALSE, ppardogit = NULL)
{
. <- NULL
exps <- ffor %>% paste0('exp(', ., ')')
neq <- ffor %>% length
n1 <- 1:neq
probs <- n1 %>% paste0('avl_', .) %>% paste0(., ' * ', exps)
sume <- probs %>% paste0(., collapse = '+')
pjis <- c(ppardogit) %>%
paste0(., collapse = ' + ') %>% paste0('(1 + ', ., ')')
#expression for prob dogit
probst1 <- probs %>% paste0(., ' + ', ppardogit, ' * (', sume, ')')
probst2 <- paste0('', pjis,' * (', sume, ')')
probs <- paste0('(', probst1, ')/(', probst2, ')')
#probs <- probs %>% paste0(., '/(', sume, ')')
environment(help_MNdogitf) <- environment()
#if quantile transformation has to be done for probabilities
#if(transform==TRUE){
#apply quantile transformation
probt <- probs %>%
paste0('qnorm(ifelse((', .,')==0, 1e-100, ifelse((', .,')==1,1-1e-16,(', .,'))))')
#get cumulative distribution values for the transformed probabilities
mean_sd_part <- if(transform==TRUE) n1%>%
paste0('- meanqt[',.,'])/sqrt(covqt[',.,'])') else ")"
expret <- paste0('pnorm((', probt , mean_sd_part, ')')
#########w/o ifelse, for derivatives
#probabilities
probte <- probs %>% paste0('qnorm(', ., ')') #quantile expression of probability
#transformed probabilities
#difference from probt in the construction of expression, probt is a simpliefied version of probte
expreteso <- paste0('pnorm((', probte , mean_sd_part, ')')
#another LL expression with ifelse
expret1 <- expret%>%paste0('ifelse(', ., ' == 0, 1e-100, ifelse(', .,
' == 1, 1-1e-16, ', ., '))')
expst <- help_MNdogitf(expret1, transform = TRUE,
separatenmm = separatenmm, neq = neq, ppardogit=ppardogit,
weight_disc= weight_disc)
#LL expression without quantile transformation
exps <- help_MNdogitf(exps, transform = FALSE,
separatenmm = separatenmm, neq = neq, ppardogit=ppardogit,
weight_disc= weight_disc)
exprs <- if(length(exps$loglike) > 1){
paste0(exps$loglike, collapse = ' + ')
}else{
exps$loglike
}
#probability string
#probse <- gsub('\\[(\\d{1,10})\\]', '\\1', probs)
#exps simplified log(P)
#exps$loglike - for each equation separatenmm LL string expression
#exps$loglik - exps$loglike but as an object which can be evaluated
#probs - probability expression unsimplified P
#exprs - LL string simplified
#expst unsimplified log(P)
#expst$loglike - for each equation separatenmm LL string expression
#expst$loglik - expst$loglike but as an object which can be evaluated
#probt unsimplified P with quantile transformation qnorm(ifelse(P))
#exprst - LL string unsimplified with ifelse
#expret pnorm((qnorm(ifelse(P))-mean)/sd)
#exprete LL unsimplified
#expretes for each eq LL separatenmm unsimplified
#expreteso P unsimplified pnorm((qnorm(P)-mean)/sd)
#probte - qnorm(P)
list(formula = exps$loglik, probs = probs, expr = exprs,
formulat = expst$loglik, probt = probt,
expreteso = expreteso, probte = probte, tval = transform, sume = sume)
}
#' \code{f_create} creates functions for log-likelihood of different models.
#' @param mn Expression, can be a list of equations.
#' @param data Name of the data frame with which the function will be evaluated.
#' @param fixed Integer, which parameter is fixed to be 0.
#' @param cheqs0 If continuous are supplied, include the expressions of errors.
#' @param probt Expressions of un-simplified probabilities with quantile transformation(qnorm(ifelse(P))).
#' @param tformula unsimplified log(P)
#' @param hessian Adds lines to check the Hessian, hessian should be the name of hessian function.
#' @param transform if \code{TRUE}, adds lines to check conditional means
#' @param separatenmm if \code{TRUE}, separate log-likelihood for each equations is produced.
#' @param sume Expression of summed likelihoods.
#' @return Function.
#' @examples
#' eq_d <- c("ASC1 * 1 + B11_dur * dur_1" , "ASC2 * 1 + B12_dur * dur_2",
#' "ASC3 * 1 + B13_dur * dur_3 + B20_cost * cost_3 + B53_parkman * PbAvl_3",
#' "ASC4 * 1 + B14_dur * dur_4 + B20_cost * cost_4 + B34_serv * servIdx_4 + B44_stop * stopUs1R1_4")
#' parl <- c(paste0("ASC", 1:4), paste0("B1", 1:4, "_dur"), "B20_cost", "B53_parkman", "B34_serv",
#' "B44_stop")
#' obj <- get_par(parl, eq_d)
#' ffor <- obj$cheqs0
#' res <- MNlogitf(ffor, separatenmm=FALSE, transform=FALSE)
#' ff <- f_create(res$formula, data="data", fixed=1)
#' @export
f_create <- function(mn, data, fixed = 0, cheqs0 = NULL, separatenmm = FALSE, probt = NULL,
tformula = NULL, hessian = NULL, transform = TRUE, sume = NULL)
{
. <- NULL
ffunc <- function(par){}
neq <- length(mn)
n1 <- 1:neq
llf1 <- f_help(mn, data)
#if for quantile transformation
if(!is.null(tformula)){
llf1t <- f_help(tformula, data)
llf1t <- c(' if(transform==TRUE){',
gsub('^ ', ' ', llf1t),
' }else{', gsub('^ ', ' ', llf1), ' }')
llf1 <- llf1t
}
#add errors from cont. equations
t22 <- cheqs0 %>% paste0('"', ., '"') %>% paste0(., collapse = ", ") %>%
paste0(' err_eq <- c(', ., ')')
t2 <- paste0(' err <- sapply(err_eq, function(x) eval(parse(text=x),', data, '))')
t2 <- if(is.null(cheqs0)) t2 else c(t22, t2)
t3 <- ' sigma <- cov(err, use = "pairwise.complete.obs")'
#if expressions include correlations instead of covariances
if(any(grepl('sigma\\[', mn))){
if(any(grepl('rho\\[', mn))){
t3 <- c(t3, ' rho <- cor(err, use = "pairwise.complete.obs")',
' sigma <- sqrt(diag(sigma))')
}else{
t3 <- c(t3, ' sigma <- sqrt(diag(sigma))')
# does this happen that only variances and not correlations are used??
}
}
t4 <- ' return(res)'
#if some parameters is set to be equal to 0, for discrete model
if(fixed > 0){
t0 <- if(fixed == 1)' par <- c(0, par)' else paste0(' par <- c(par[1:', (fixed - 1),
'], 0, par[', fixed, ':length(par)])')
}else{
t0 <- ''
}
if(any(grepl('sigma\\[', mn))){
tt <- c(t0, t2, t3)
}else{
tt <- c(t0)
}
#if cond. moments need to be included
if(!is.null(probt)){
neq <- length(probt)
t5 <- n1 %>% paste0(' meanqt', ., " <- mean(eval(parse(text='", probt[.], "'), ", data,
'), na.rm=FALSE)')
t5 <- c(t5, n1 %>% paste0('meanqt', ., collapse = ', ') %>% paste0(' meanqt <- c(', ., ')'))
t6 <- n1%>%paste0(' covqt', ., " <- var(eval(parse(text='", probt[.], "'), ", data,
'), na.rm=FALSE)')
t6 <- c(t6, n1 %>%paste0('covqt', ., collapse = ', ') %>% paste0(' covqt <- c(', ., ')') )
tt0 <- ' if(transform == TRUE){'
tt <- c(tt[1], tt0, t5, t6, if (length(tt) > 1) tt[2:length(tt)], ' }')
}
#if transform is TRUE check if cond. means have defined values
if(transform==TRUE){
t5 <- paste0(
'if(transform==TRUE) if(any(meanqt%in%c(NaN, Inf, -Inf))|ifelse(any(is.na(covqt)), TRUE, any(covqt < 0.001))) res <- if(separatenmm==TRUE) rep(-Inf, ',
length(mn), ') else -Inf')
}else{
t5 <- ''
}
# include check for hessian function
if(!is.null(hessian)){
t05 <- c(paste0('if(check_hess==TRUE){', ' hess <- try(qr.solve(-', hessian, '(par[',
if(fixed!=0) -fixed else "", '])), silent=TRUE)'),
'if(any(class(hess)%in%"try-error")) hess <- matrix(-0.01,1,1)', '}else{',
'hess <- matrix(0.01,1,1)}')
t15 <- c('nrepp <- 1',
paste0('nrepp <- if(methodopt%in%c("BHHH")) nrow(' ,data ,') else nrepp'),
paste0('nrepp <- if(separatenmm==TRUE)',length(mn), ' else nrepp'),
paste0('if(any(diag(hess)<0)) res <- rep(-Inf, nrepp)'))
t5<-c(t5, t05, t15)
}
tffunc <- c('{', tt, llf1, t5, t4, '}')
if(!is.null(sume)){
tffunc %<>% gsub(sume, "dalyba", ., fixed = TRUE)
xs <- paste0(" dalyba <- eval(parse(text='",sume, "'), ", data,")")
xs1 <- paste0(" dalyba <- eval(parse(text='replace(dalyba, dalyba==Inf, 99999999999)'), ", data,")")
xs2 <- paste0(" dalyba <- eval(parse(text='replace(dalyba, dalyba==-Inf, -99999999999)'), ", data,")")
tffunc <- c(tffunc[1:2], xs, xs1, xs2, tffunc[3:length(tffunc)])
}else{
#ts <- NULL
}
body(ffunc) <- parse(text = tffunc)
ffunc
}
#' Internal helper function
#' @keywords internal
#' @param mn expression to be converted to a function
#' @param data name of data frame
f_help <- function(mn, data)
{
. <- NULL
neq <- length(mn)
n1 <- 1:neq
if(neq > 1){
#for partial conditional distributions
llf0 <- n1 %>% paste0(' res', . , " <- eval(parse(text='", mn, "'), ", data, ')')
llf1 <- c(' if(methodopt%in%c("BHHH")){',
n1 %>% paste0(' res', ., ' <- res', .),
' }else{',
n1 %>% paste0(' res', ., ' <- sum(res', ., ', na.rm=FALSE)'),
' }')
llf1 <- c(llf1, n1 %>% paste0('res', ., collapse = ', ') %>% paste0(' res <- cbind(', ., ')'))
llf1 <- c(llf1, c(' if(separatenmm==FALSE){',' res <- rowSums(res, na.rm=FALSE)', ' }'))
llf1 <- c(llf0, llf1)
}else{
llf0 <- paste0(" res <- eval(parse(text='", mn , "'), ", data, ')')
llf00 <- "res <- replace(res, res%in%-Inf, -999999999999)"
llf1 <- 'res <- if(methodopt%in%c("BHHH")) res else sum(res, na.rm=FALSE)'
llf1 <- c(llf0, llf00, llf1)#, ' if(separatenmm==FALSE){ }')
}
llf1
}
#' \code{grad_hess_eval} forms function of gradient and Hessian of log-likelihood produced
#' by \code{f_create}.
#' @param mn Expression, can be a list of equations.
#' @param parnl Names of parameters.
#' @param hessian if \code{TRUE}, returns hessian function, otherwise gradient.
#' @param fixed Integer, which parameter is fixed to be 0.
#' @param data Name of the data frame with which the function will be evaluated.
#' @param cheqs0 If continuous are supplied, include the expressions of errors.
#' @return A function for evaluation of gradient or Hessian.
#' @examples
#' eq_d <- c("ASC1 * 1 + B11_dur * dur_1" , "ASC2 * 1 + B12_dur * dur_2",
#' "ASC3 * 1 + B13_dur * dur_3 + B20_cost * cost_3 + B53_parkman * PbAvl_3",
#' "ASC4 * 1 + B14_dur * dur_4 + B20_cost * cost_4 + B34_serv * servIdx_4 + B44_stop * stopUs1R1_4")
#' parl <- c(paste0("ASC", 1:4), paste0("B1", 1:4, "_dur"), "B20_cost", "B53_parkman", "B34_serv",
#' "B44_stop")
#' disc_par <- get_par(parl, eq_d)
#' ffor <- disc_par$cheqs0
#' parld <- disc_par$parld
#' res <- MNlogitf(ffor, separatenmm=FALSE, transform=FALSE)
#' parnl <- paste0("par", 1:length(parld))
#' gf <- grad_hess_eval (res, parnl, data="data", fixed=1)
#' hf <- grad_hess_eval (res, parnl, data="data", fixed=1, hessian=TRUE)
#' @export
grad_hess_eval <- function(mn, parnl, hessian = FALSE, fixed = 0, data = '', cheqs0 = NULL)
{
. <- NULL
#get derivatives
gradMN <- deriv(parse(text = mn$expr) , parnl, hessian = hessian, function.arg = TRUE)
mnf <- deparse(gradMN)
#if line ends with , combine with previous line
mnfo <- mnf
ind <- grep(', $', mnfo)
while(length(ind) > 0){
mnfo[ind[1]] <- paste0(mnfo[ind[1]], mnfo[ind[1] + 1])
mnfo <- mnfo[-(ind[1] + 1)]
ind <- grep(', $', mnfo)
}
mnf <- mnfo
llf1 <- string2formula(mnf)
llf1 %<>% gsub('expression\\(', '', .)
#remove first line
llf1 <- llf1[!grepl('^function|(par\\[\\d{1,10}\\], par\\[\\d{1,10}\\])', llf1)]
llf1 %<>% gsub('(})(\\))$', '\\1', .)
ffunc <- function(par){}
textv <- paste0('llf1 <- c(', paste0("'", llf1[2:(length(llf1) - 1)], "'", collapse = ', '), ')')
#insert parameter vector
inspar <- if(fixed == 1)' par <- c(0, par)' else if(fixed > 1) paste0(' par <- c(par[1:',
(fixed -1), '], 0, par[', fixed,
':length(par)])') else ''
#add errors from cont. eq
t22 <- cheqs0 %>% paste0('"', ., '"') %>% paste0(., collapse = ", ") %>%
paste0(' err_eq <- c(', ., ')')
t2 <- paste0(' err <- sapply(err_eq, function(x) eval(parse(text=x),', data, '))')
t2 <- if(is.null(cheqs0)) t2 else c(t22, t2)
t3 <- ' sigma <- cov(err, use = "pairwise.complete.obs")'
if(any(grepl('sigma\\[', textv))){
if(any(grepl('rho\\[', textv))){
t3 <- c(t3, ' rho <- cor(err, use = "pairwise.complete.obs")',
' sigma <- sqrt(diag(sigma))')
}else{
t3 <- c(t3, ' sigma <- sqrt(diag(sigma))')
# does this happen that only covariances and not correlations are used??
}
}
#if correlation(rho) is used instead of covariance
if(any(grepl('sigma\\[', mn$formula))){
tt <- c(t2, t3)
}else{
tt <- ''
}
llf1 <- c('{',tt, inspar, textv, paste0(' res <- eval(parse(text=llf1), ',data, ')'))
#add summation of gradient and hessian
if(hessian){
llf1 <- c(llf1, ' res <- attributes(res)$hessian',
' res <- if(methodopt%in%c("BHHH"))res else apply(res, 3, function(x)colSums(x, na.rm=FALSE))',
if(fixed>0)paste0(' if(methodopt%in%c("BHHH")) return(res[,-', fixed, ',-', fixed,
']) else return(res[-', fixed, ',-', fixed, '])') else ' return(res)')
}else{
llf1 <- c(llf1, ' res <- attributes(res)$gradient',
' res <- if(methodopt%in%c("BHHH"))res else apply(res, 2, function(x)sum(x, na.rm=FALSE))',
if(fixed > 0) paste0(' if(methodopt%in%c("BHHH")) return(res[, -',fixed,
']) else return(res[-', fixed, '])') else ' return(res)')
}
llf1 <- c(llf1, '}')
llf1 <- parse(text = llf1)
body(ffunc) <- llf1
ffunc
}
#' \code{add_variable} adds columns to the data matrix
#' @param data data.frame, if \code{dname=="chc"} columns "chc_i" has to be in the data.
#' @param dname if \code{dname=="chc"} (dummy for chosen alternative) dummy for the choice
#' alternative added, if "weights" weights added
#' @param weights Matrix with weights to be added to the data
#' @return data.frame
#' @examples
#' chc <- c(1,2,1,4,3,1,4)
#' data <- data.frame(choice=chc, x=rnorm(length(chc)), y=rnorm(length(chc)))
#' add_variable(data, dname="chc")
#' ww <- c(1,1,1,2,2,2,3)
#' add_variable(data, dname="weights", weights=ww)
#' @export
add_variable <- function(data = data, dname = "chc", weights = NULL)
{
. <- NULL
#check if all arguments supplied
mf <- match.call()
varl <- paste0(match.call()[[1]], '%>%args%>%as.list') %>% parse(text = .) %>% eval
no_def <- varl[varl == ""] %>% names#no default values
m <- match(no_def, mf %>% names, 0L)
if(any(m==0)){
paste0(no_def[m == 0], collapse = ", ") %>% paste0("No value for: ", .) %>%
stop
}
if(!any(grepl(paste0(dname, "_"), names(data)))){
if(dname=="chc"){
misdata <- matrix(0, ncol = length(unique(data[, "choice"])), nrow = nrow(data))
misdata <- data.frame(choice = data[, "choice"], misdata, stringsAsFactors = FALSE)
names(misdata)[-1] <- paste0(paste0(dname, "_"), unique(data[, "choice"]))
for(i in unique(data[,"choice"])){
misdata[misdata$choice == i, paste0(paste0(dname, "_"), i)] <- 1
}
misdata <- misdata[-1]
data <- cbind(data, misdata)
data <- data[, c(names(data)[1:3], sort(names(data)[-(1:3)]))]
}
if(dname == "weights" & !is.null(weights)){
data <- cbind(data, weights)
}
}
return(data)
}
#' \code{get_start_cont} get starting values for continuous equations.
#' @import systemfit
#' @import DEoptim
#' @import maxLik
#' @import AER
#' @param of Function to me optimized.
#' @param hf Hessian function.
#' @param datcont Data-set used in the optimization.
#' @param eq_c Equations that will be estimated with \code{nlsystemfit}.
#' @param par_c Names of parameters.
#' @param startvals Supplied starting values.
#' @param DEoptim_run if \code{TRUE}, \code{DEoptim} is run for optimization.
#' @param deconst Absolute value of lower and upper bounds for \code{DEoptim}.
#' @return Starting values for continuous equations.
#' @keywords internal
get_start_cont <- function(of, hf, datcont, eq_c, par_c, startvals = NULL, DEoptim_run = TRUE,
deconst = 2)
{
. <- NULL
#check if all arguments supplied
mf <- match.call()
varl <- paste0(match.call()[[1]],'%>%args%>%as.list')%>%parse(text=.)%>%eval
no_def <- varl[varl==""]%>%names#no default values
m <- match(no_def, mf%>%names, 0L)
if(any(m==0)){
paste0(no_def[m==0], collapse = ", ")%>%paste0("No value for: ", .)%>%stop
}
npar <- length(par_c)
#function for DEoptim
of_DEoptim <- function(par){
hfr <- try(diag(solve(-hf(par))), silent=TRUE)
if((any(class(hfr)%in%"try-error")|any(hfr%in%c(Inf, -Inf, NA, NaN)))){
res <- Inf
}else{
if(any(hfr<0)){
res <- Inf
}else{
if(is.null(hf)){
hfr1 <- try(maxLik(of, start = par, method="BFGS", iterlim=10000), silent=TRUE)
}else{
hfr1 <- try(maxLik(of, hess = hf, start = par, method="BFGS", iterlim=10000), silent=TRUE)
}
if(any(class(hfr1)%in%"try-error") )browser()
res <- -of(par)
}
}
res
}
startv <- rep(0.45, npar)
names(startv) <- par_c
if(DEoptim_run==TRUE){
a <- DEoptim(of_DEoptim, lower=rep(-deconst, npar),upper=rep(deconst, npar),
control=list(trace=FALSE, VTR=10000000))
}
startv <- if(!is.null(startvals)) startvals else startv
start1 <- startv
a1 <- try(nlsystemfit(method="SUR", eqns=lapply(eq_c, as.formula), startvals = start1,
data=datcont), silent=TRUE)
if(any(class(a1)%in%"try-error")){
a <- DEoptim(of_DEoptim, lower=rep(-deconst, npar),upper=rep(deconst, npar),
control=list(trace=FALSE, VTR=10000000))
startv <- a$optim$bestmem
names(startv ) <- par_c
start1 <- startv
a1 <- try(nlsystemfit(method="SUR", eqns=lapply(eq_c, as.formula), startvals = start1,
data=datcont), silent=TRUE)
if(any(class(a1)%in%"try-error")){
a1 <- c()
a1$b <- startv
}
}else{
}
aL1 <- suppressWarnings(of_DEoptim(a1$b))
if(DEoptim_run==TRUE){
aL <- of_DEoptim(a$optim$bestmem)
aa <- if(aL < aL1) a$optim$bestmem else a1$b
}else{
aa <- a1$b
}
aa
}
#' \code{prepare_data} prepare data for the estimation.
#' @import data.table
#' @importFrom abind abind
#' @importFrom tidyr spread gather unite separate
#' @param data \code{data.frame}
#' @param choice Name of variable with modes.
#' @param dummy Name of variable indicating, if the mode was chosen.
#' @param PeID Name of variable with individual identification numbers.
#' @param WeID Name of variable with trip identification.
#' @param type Type of data. If "long", then modifications are done.
#' @param mode_spec_var Used if format "long", mode specific variables.
#' @param avl if \code{TRUE}, includes dummies for mode availability.
#' @param chc if \code{TRUE}, includes dummies for choice of mode.
#' @param wc if \code{TRUE}, creates weights 1 for continuous equations.
#' @param wd if \code{TRUE}, creates weights 1 for discrete equations.
#' @param nc Integer, number of continuous equations.
#' @param weights Data matrix with weights,
#' column names have to be $wc_i$(continuous), $wd_i$(discrete).
#' @param weight_paths if \code{TRUE}, weight according to number of trips per person, discrete part.
#' @param weight_paths_cont if \code{TRUE}, weight continuous part.
#' @param mode_factors if choice is not factor or numeric, this is important to supply.
#' @return data.frame used for modeling.
#' @examples
#' data("TravelMode", package = "AER")
#' mode_spec_var <- c("wait", "vcost", "travel", "gcost")
#' res <- prepare_data(TravelMode, choice="mode", dummy="choice", PeID="individual", WeID="",
#' type="long", mode_spec_var =mode_spec_var, nc=3)
#' @export
prepare_data <- function(data, choice = "", dummy = "", PeID = "", WeID = "", type = "",
mode_spec_var = "", avl = TRUE, chc = TRUE, wc = TRUE, wd = TRUE, nc = 0,
weights = NULL, weight_paths = FALSE, weight_paths_cont = FALSE,
mode_factors = NULL)
{
. <- NULL
choicen <- NULL
npaths <- NULL
npaths_cont <- NULL
data_weight <- NULL
mode_code <- c()
if(choice!=""){
if(!is.null(mode_factors)){
data$choicen <- data[, choice]%>%factor(., levels=mode_factors)%>%as.numeric
}else{
data$choicen <- data[, choice]%>%as.factor%>%as.numeric
}
mode_code <- data[, c(choice, "choicen")] %>% data.frame%>%unique
if(!is.null(mode_factors)){
cat("Used mode coding:", "\n")
print(mode_code[order(mode_code$choicen),])
}
data[, choice] <- NULL
}
if(dummy!=""){
data$dummy <- if(any(class(data[, dummy])%in%
c("character", "factor", "logical"))) data[, dummy]%>%
as.factor %>% as.numeric - 1 else data[, dummy]
if(length(data$dummy %>% unique)==1){
data$dummy <- 1
}
data[, dummy] <- NULL
}
data$PeID <- if(!any(names(data)%in%"PeID")&PeID==""){
1:nrow(data)
} else{
xx <- data[, c(PeID, "PeID")[c(PeID, "PeID")%in%names(data)]]
if(is.null(ncol(xx))){
xx
}else{
if(ncol(xx)==2){
xx[, 1]
}else{
browser()
}
}
}
if(PeID!=""&PeID!="PeID")data[, PeID] <- NULL
data$WeID <- if(!any(names(data)%in%"WeID")&WeID=="") {1
} else{
xx <- data[, c(WeID, "WeID")[c(WeID, "WeID")%in%names(data)]]
if(is.null(ncol(xx))){
xx
}else{
if(ncol(xx)==2){
xx[, 1]
} else{
browser()
}
}
}
if(WeID!=""&WeID!="WeID")data[, WeID] <- NULL
if(type=="long"){
const_var <- names(data)[!names(data)%in%c("PeID", "WeID", mode_spec_var, "choicen", "dummy")]
ctext <- const_var%>%paste0(., collapse=' + ')
ctext <- if(ctext!="") paste0(" + ", ctext) else ctext
vtext <- mode_spec_var%>%paste0('"',., '"')%>%paste0(., collapse=', ')
setDT(data)
choiced <- data[, .(PeID, WeID, dummy, choicen)]
choiced <- choiced[dummy==1,]
choiced[, dummy:=NULL]
data <- paste0('dcast(data, PeID + WeID ', ctext,' ~ choicen, value.var=c(',vtext,'))')%>%
parse(text=.)%>%eval
data <- merge(data, choiced, all=TRUE, by=c("PeID","WeID"))
#setnames(data, "choicen", "choice")
data <- data.frame(data, stringsAsFactors = FALSE)
}
data <- data.table(data, stringsAsFactors = FALSE)
if(weight_paths_cont==FALSE|weight_paths==TRUE){
if(!any(names(data)%in%"npaths_cont")) data[, "npaths_cont"] <- 1
}
if(weight_paths==FALSE){
if(!any(names(data)%in%"npaths")) data[, "npaths"] <- 1
}
if(!any(names(data)%in%"npaths")) data[, npaths:=length(WeID), by="PeID"]
if(!any(names(data)%in%"npaths_cont")) data[, npaths_cont:=length(WeID), by="PeID"]
data <- data.frame(data, stringsAsFactors = FALSE)
if(chc==TRUE&!any(grepl("^chc_", names(data)))){
xx <- try(setnames(data, "choicen", "choice"), silent = TRUE)
data <- add_variable(data, "chc")
if(!any(class(xx)%in%"try-error"))setnames(data, "choice", "choicen")
}
if(is.null(mode_code)&any(names(data)%in%"choice")){
mode_code <- data.frame(choicen=1:length(unique(data[, "choice"])))
}
if(avl==TRUE&!any(grepl("^avl_", names(data)))){
avld <- matrix(1, nrow=nrow(data), ncol=nrow(mode_code))
colnames(avld) <- paste0("avl_", mode_code$choicen)
data <- cbind(data, avld)
}
if(!is.null(weights)){
data <- add_variable(data = data, dname = "weights", weights = data_weight)
}
if(wc==TRUE&!any(grepl("^wc_", names(data)))&nc!=0){
avld <- matrix(1, nrow=nrow(data), ncol=nc)
colnames(avld) <- paste0("wc_", 1:nc)
data <- add_variable(data, dname = "weights",weights = avld)
}
if(wd==TRUE&!any(grepl("^wd_", names(data)))){
avld <- matrix(1, nrow=nrow(data), ncol=nrow(mode_code))
colnames(avld) <- paste0("wd_",mode_code$choicen)
data <- add_variable(data, dname = "weights",weights = avld)
}
data
}
#' Function for finding starting values for nls equations with errors
#' @keywords internal
generate_disc_coefs <- function(coefs, eqest, i = 1, type_select = "smallest")
{
. <- NULL
para <- NULL
parai <- NULL
value <- NULL
inder <- which(lapply(coefs, class)%in%"try-error")
if(length(inder)==0) {
inder <- 1
mci <- coefs[[1]]
}else{
mci <- coefs[-inder][[1]]
}
for(ij in 1:length(inder)){
#print(paste0("nls for equation: ", inder[ij], " produced an error."))
get_par_sep <- eqest[[inder[ij]]] %>%deparse() %>% paste0(., collapse="") %>%
gsub(".*~", "", .) %>%trimws %>% gsub("I\\(", "", .) %>%
gsub("\\)$", "", .) %>% strsplit( . ,"\\+|\\*|\\^|\\*\\*|\\-") %>%unlist %>%
gsub("\\)|\\(", "", .) %>%
trimws() %>% grep("^par\\d{1,10}", ., value=TRUE)
if(length(mci)!=length(get_par_sep)){
all_coef <- unlist(coefs[-inder])
acd <- data.table(para = all_coef %>% names %>% gsub(".*\\.", "", .),
value = all_coef)
acd <- acd[order(para)]
acd[, parai := gsub("par", "", para) %>% as.numeric()]
mci <- lapply(get_par_sep, function(x){
xx <- acd[para==x, value][i]
indxx <- gsub("par", "", x) %>% as.numeric()
if(type_select == "smallest"){
if(is.na(xx)){
xx <- acd[parai<indxx, value][i]
}
if(is.na(xx)){
xx <- acd[parai>indxx, value][i]
}
}else if(type_select == "biggest"){
if(is.na(xx)){
xx <- acd[parai>indxx, value][i]
}
if(is.na(xx)){
xx <- acd[parai<indxx, value][i]
}
}else{
#set.seed(1234)
xx <- sample(acd[, value], 1)
}
xx
})
mci <- unlist(mci)
}
names(mci) <- get_par_sep
coefs[[inder[ij]]] <- mci
}
coefs
}
#' \code{get_start_disc} get starting values for discrete choice model.
#' @import AER
#' @import mlogit
#' @param MNf1 Function to me optimized
#' @param hff Hessian function
#' @param eqest Equations that will be estimated with nls
#' @param sdat Data-set used for in the optimization
#' @param ppardogit "dogit" parameters, NULL for "logit"
#' @param type_select method for filling missing starting values. Default is "smallest"(closest by the index to the left value).
#' "biggest" takes closest by index to the right value and "random" - samples randomly values.
#' @return Starting values for discrete equations.
#' @keywords internal
get_start_disc <- function(MNf1, hff, eqest, sdat, ppardogit = NULL, type_select = "smallest")
{
. <- NULL
#set.seed(123124)
#check if all arguments supplied
mf <- match.call()
varl <- paste0(match.call()[[1]],'%>%args%>%as.list')%>%parse(text=.)%>%eval
no_def <- varl[varl==""]%>%names#no default values
m <- match(no_def, mf%>%names, 0L)
if(any(m==0)){
paste0(no_def[m==0], collapse = ", ")%>%paste0("No value for: ", .)%>%stop
}
coefs <- suppressWarnings(lapply(eqest, function(m) try(summary(nls(m, sdat))$coef[,1], silent = TRUE)))
# if error for come equations write down the values of other equations
coefs_o <- coefs
if(any(lapply(coefs, class)%in%"try-error")){
coefs <- generate_disc_coefs(coefs, eqest, i=1, type_select=type_select)
# only works if the same number of parameters
}
help_start <- help_get_disc_start(coefs_o, coefs, hff, MNf1, eqest, ppardogit, i=1)
help_start_o <- help_start
gind <- help_start[[3]]
if(sum(gind)==0){
coefs <- generate_disc_coefs(coefs_o, eqest, i=1, type_select="biggest")
help_start <- help_get_disc_start(coefs_o, coefs, hff, MNf1, eqest, ppardogit,
type_select = "biggest", i=1)
gind <- help_start[[3]]
if(sum(gind)==0){
coefs <- generate_disc_coefs(coefs_o, eqest, i=1, type_select="random")
help_start <- help_get_disc_start(coefs_o, coefs, hff, MNf1, eqest, ppardogit,
type_select = "random", i=1)
gind <- help_start[[3]]
}
if(sum(gind)==0){
help_start <- help_start_o
}
}
goodv <- help_start[[1]]
valf <- help_start[[2]]
gind <- help_start[[3]]
startv <- help_start[[4]]
if(length(goodv)>1&sum(gind)>0){
goodv <- goodv[which.min(unlist(valf[gind]))]
}else if(length(goodv)==0){
print("No good starting values automatically found: can not invert the Hessian.")
print("Optimization will be longer. You can stop the program and input starting values by yourself or wait.")
goodv <- startv
}
goodv <- goodv[[1]][,2]
names(goodv) <- startv[[1]][,1]
goodv
}
#' @keywords internal
help_get_disc_start <- function(coefs_o, coefs, hff, MNf1, eqest, ppardogit,
type_select="smallest", i=1)
{
. <- NULL
coefs <- data.frame(parlp=gsub(".*[.]","",names(unlist(coefs))), stv=unlist(coefs),
stringsAsFactors = FALSE,row.names = NULL)
coefs[, "parlp"] <- coefs[, "parlp"] %>% gsub(".*~", "", .) %>%trimws %>% gsub("I\\(", "", .) %>%
gsub("\\)$", "", .) %>% gsub("\\s.*", "", .) %>% gsub("\\*.*", "", .)
coefs <- coefs[order(nchar(coefs$parlp), coefs$parlp),]
duplc <- coefs[duplicated(coefs$parlp),"parlp"]%>%unique
# if no same coefficients
if(length(duplc)==0){
startv <- list(coefs)
}else{
duf <- table(coefs$parlp)[duplc]
unc <- coefs[!coefs$parlp%in%duplc,]
sac <- coefs[coefs$parlp%in%duplc,]
sac$ind <- paste0(sac$parlp,"_", unlist(lapply(duf, function(i)1:i)))
inder <- TRUE
ninder <- length(duf)
nmodes <- length(eqest)
# might be to many combinations, try removing the possibilities
while(inder&any(duf>=2)){
poscomb <- choose(length(sac$ind), ninder)
if(poscomb<=10000){
#possible combs
inder <- FALSE
combn <- try(combn(sac$ind, ninder), silent = TRUE)
}else{
# remove 2 par from each block
for(ij in sac$parlp %>% unique){
xx <- sac[sac$parlp==ij,]
if(nrow(xx)>=2){
xx <- xx[sample(1:nrow(xx), 2), ]
sac <- rbind(sac[sac$parlp!=ij,], xx)
}
}
}
duf <- sac$parlp %>% table
}
combnn <- combn
indc <- apply(combnn, 2, function(x){
xx <- unlist(strsplit(x,split = "_"))%>%table
if(any(xx[grep("par", names(xx))]>1)) FALSE else TRUE
})
combnn <- combnn[,indc]%>%matrix(., ncol=sum(indc))
if(length(combnn)==0){
combnn <- matrix(sac[,"ind"], nrow=1)
}
startv <- apply(combnn, 2, function(x){
xx <- rbind(unc, sac[sac$ind%in%x, 1:2])
xx[order(nchar(xx$parlp), xx$parlp),]
})
if(length(startv)>20){
startv <- startv[sample(1:length(startv), 20)]
}
}
# add for each alternative if dogit 1/n, ((1 + par[8] + par[9] + par[10] + par[11]) * (dalyba)
if(!is.null(ppardogit)){
startv <- lapply(startv, function(x){
xd <- data.frame(parlp=ppardogit %>% gsub("\\[|\\]", "", .), stv=1/(length(ppardogit)))
rbind(x, xd)
})
}
valf <- lapply(startv,function(par) sum(MNf1(par[-1,2])))
valh <- lapply(startv,function(par) try( tryCatch({
qr.solve(-hff(par[-1,2]))},
error = function(err){
r <- solve(-hff(par[-1,2]))
if(any(is.na(diag(r)))){
diag(r) <- -1
}
return(r)}),
silent = TRUE))
gind <- !sapply(valh, function(hes){
if(any(class(hes)%in%"try-error")) hes <- matrix(-1)
any(diag(hes)<0)
} )
goodv <- startv[gind]
if(length(goodv)>1){
goodv <- goodv[which.min(unlist(valf[gind]))]
res_goodv <- list(goodv, valf, gind, startv)
}else if(length(goodv)==0&i<=length(coefs_o)){
#print("No good starting values automatically found: can not invert the Hessian.")
#print("Starting another combination")
i <- i+1
coefs <- generate_disc_coefs(coefs_o,eqest, i, type_select = type_select)
res_goodv <- help_get_disc_start(coefs_o, coefs, hff, MNf1, eqest, ppardogit, type_select, i)
}else{
goodv <- startv
res_goodv <- list(goodv, valf, gind, startv)
}
return(res_goodv)
}
#' @keywords internal
parf <- function(fname)
{
. <- NULL
paste0("args(",fname,")%>%as.list%>%names%>%paste0('# @param ', ., '\n')%>%cat")%>%
parse(text=.)%>%eval
}
#' \code{get_start} get starting values for discrete or continuous choice model.
#' @param eq_c Continuous equations errors.
#' @param eq_d Discrete equations.
#' @param data \code{data.frame} is used in the optimization.
#' @param part Type of estimation: "joint", "cont", "disc".
#' @param datan Name of \code{data.frame} used in the optimization.
#' @param fixed_term if \code{TRUE}, includes fixed term in log-likelihood.
#' @param weight_paths if \code{TRUE}, weights paths of the whole system.
#' @param weight_paths_cont if \code{TRUE}, weight paths only in continuous part.
#' @param data_weight \code{data.frame} with weights for continuous and discrete equations,
#' same dim as \code{data}.
#' @param par_c Names of parameters in continuous equations.
#' @param par_d Names of parameters in discrete equations.
#' @param startvals Starting values, can be also \code{NULL}.
#' @param DEoptim_run if \code{TRUE}, runs DEoptim for the optimization.
#' @param hessian Name of hessian function.
#' @param best_method if \code{TRUE}, try all possible optimization methods and
#' choose the one with the smallest likelihood.
#' @param transform if \code{TRUE}, quantile transformation is applied.
#' @param MNtypef "dogit" or "logit".
#' @param pardogit "dogit" parameters.
#' @param opt_method optimization method to use.
#' @param numerical_deriv if \code{TRUE}, numerical derivatives are calculated in nmm function
#' @return Starting values for discrete or continuous blocks.
#' @examples
#' # Example of discrete choice model
#' data("TravelMode", package = "AER")
#' eq_d <- c("ASC1 * 1 + B2_t * travel_1 + B3_v * vcost_1" ,
#' "ASC2 * 1 + B2_t * travel_2 + B3_v * vcost_2",
#' "ASC3 * 1 + B2_t * travel_3 + B3_v * vcost_3",
#' "ASC4 * 1 + B2_t * travel_4 + B3_v * vcost_4")
#' parl <- c(paste0("ASC", 1:4), "B2_t", "B3_v")
#' obj <- get_par(parl, eq_d)
#'
#' mode_spec_var <- c("wait", "vcost", "travel", "gcost")
#' data <- TravelMode
#' data$wait <- as.numeric(data$wait)
#' data[data$wait==0,"wait"] <- 0.000001 # add a small number to 0
#' data$travel <- as.numeric(data$travel)
#' data[data$travel==0,"travel"] <- 0.000001
#' data$vcost <- as.numeric(data$vcost)
#' data[data$vcost==0,"vcost"] <- 0.000001
#' data <- prepare_data(data, choice="mode", dummy="choice", PeID="individual", WeID="",
#' type="long", mode_spec_var =mode_spec_var, wc=FALSE)
#' stv <- get_start(eq_d=eq_d, data=data, datan="data", part="disc", par_d = parl,
#' transform = FALSE)
#'
#' #example, system of equations
#' data("CreditCard", package="AER")
#' cdat <- CreditCard
#' cdat$income2 <- cdat$income^2
#' cdat$d_selfemp <- as.numeric(cdat$selfemp)
#' eq_c <- c("expenditure ~ b1*age + b2*income + b3*income2",
#' "income ~ a1*age + a2*d_selfemp + a3*dependents + a4*majorcards")
#' parl <- c(paste0("b", 1:3), paste0("a", 1:4))
#' para_cont <- get_par(parl, eq_c)
#' cheqs0 <- para_cont$cheqs0
#' \donttest{
#' stv <- get_start(eq_c = eq_c, data=cdat, datan="cdat", part="cont", par_c=parl)
#' }
#' @export
get_start <- function(eq_c = NULL, eq_d = NULL, data = NULL, part = 'joint', datan = 'data',
fixed_term = FALSE, weight_paths = TRUE, weight_paths_cont = FALSE,
data_weight = NULL, par_c = NULL, par_d = NULL, best_method = FALSE,
startvals = NULL, DEoptim_run = FALSE, hessian = NULL, transform = TRUE,
MNtypef="logit", pardogit = NULL, opt_method = "BFGS",
numerical_deriv = FALSE)
{
. <- NULL
#set.seed(1134252)
if(!is.null(eq_c)){
para_cont <- get_par(par_c, eq_c)
cheqs0 <- para_cont$cheqs0
}else{
cheqs0 <- NULL
}
if(!is.null(eq_d)){
para_disc <- get_par(par_d, eq_d)
ffor <- para_disc$cheqs0
}else{
ffor <- NULL
}
indp <- !part%in%c("cont")
data <- prepare_data(data, avl= indp, chc=indp, wd=indp, nc=length(cheqs0), wc=!indp)
methodopt <- "BFGS"
if(!is.null(cheqs0)&part!='disc'){
npar <- length(par_c)
datac <- 'datcont'
# version 1
mn <- maxle_p(cheqs0 = cheqs0, fixed_term = fixed_term)
varcont <- strsplit(mn$formulap%>%gsub('_', 'XXX', .),'[[:punct:]]')%>%unlist%>%unique%>%
trimws%>%gsub('XXX', '_', .)
ncont <- c('PeID',paste0('wc_', 1:length(cheqs0)),names(data)[names(data)%in%varcont])%>%unique
datcont <- data[, ncont]
datcont <- unique(datcont)
of <- f_create(mn$formula,data = datac, cheqs0=cheqs0, transform = FALSE)
hf <- grad_hess_eval(mn = mn, parnl = paste0('par',1:npar), hessian = TRUE, data=datac,
cheqs0=cheqs0)
environment(of) <- environment(hf) <- environment()
s_cont <- get_start_cont(of, hf, datcont, eq_c, par_c, startvals=startvals,
DEoptim_run=DEoptim_run)
# version 2
mn <- maxle_p(cheqs0 = cheqs0, fixed_term = fixed_term, version2 = TRUE)
of <- f_create(mn$formula,data = datac, cheqs0=cheqs0, transform = FALSE)
hf <- grad_hess_eval(mn = mn, parnl = paste0('par',1:npar), hessian = TRUE, data=datac,
cheqs0=cheqs0)
environment(of) <- environment(hf) <- environment()
s_cont2 <- get_start_cont(of, hf, datcont, eq_c, par_c, startvals=startvals,
DEoptim_run=DEoptim_run)
v1 <- of(s_cont)
v2 <- of(s_cont2)
res_cont <- nmm(data = datcont, start_v=s_cont, eq_c=eq_c, par_c=par_c,
eq_type = "cont", opt_method = opt_method,
best_method = best_method, hessian=hessian,
numerical_deriv = numerical_deriv)
if(v1!=v2){
res_cont_v2 <- nmm(data = datcont, start_v=s_cont2, eq_c=eq_c, par_c=par_c,
eq_type = "cont", opt_method = opt_method,
best_method = best_method, hessian=hessian,
numerical_deriv = numerical_deriv)
if(res_cont_v2$maximum>res_cont$maximum){
res_cont <- res_cont_v2
}
}
s_cont <- res_cont$estimate
}else{
s_cont <- NULL
}
if(!is.null(ffor)&part!='cont'){
methodopt <- 'NA'
#transform <- TRUE
fforp <- gsub("\\[|\\]","", ffor)
eqest <- sapply(paste0(paste0("chc_", 1:length(fforp))," ~ ", "I(",fforp,")" ), as.formula)
if(MNtypef=="logit"){
mnf1 <- MNlogitf(ffor, transform = transform, separatenmm = TRUE, weight_disc = TRUE)
ppardogit <- NULL
}else{
ppardogit <- c(paste0('par[',(length(para_disc$parld)+1):(length(para_disc$parld)+length(pardogit)),']'))
mnf1 <- MNdogitf(ffor, transform = transform, separatenmm = TRUE,
weight_disc = TRUE, ppardogit)
}
datad <- 'datdisc'
vardisc <- strsplit(mnf1$formula%>%gsub('_', 'XXX', .),'[[:punct:]]')%>%unlist%>%unique%>%
trimws%>%gsub('XXX', '_', .)
ndisc <- c('PeID','WeID','choice',names(data)[names(data)%in%vardisc])
datdisc<- data[, names(data)[names(data)%in%ndisc]]
datdisc <- unique(datdisc)
MNf1 <- f_create(mn=mnf1$formula, data=datad, cheqs0=cheqs0, fixed=1, probt=mnf1$probt,
tformula=mnf1$formulat, separatenmm = TRUE, sume=mnf1$sume)
dnpar <- ffor%>%get_npar
if(!is.null(pardogit)){
dnpar <- dnpar+length(pardogit)
}
hf <- grad_hess_eval(mn=mnf1, parnl = paste0('par', 1:dnpar),
fixed=1, hessian = TRUE, data=datad, cheqs0=cheqs0)
environment(MNf1) <- environment(hf) <- environment()
separatenmm <- FALSE
s_disc <- get_start_disc(MNf1, hf, eqest, datdisc, ppardogit=ppardogit)#calc hessian gradient
if(!is.null(pardogit)){
names(s_disc) <- c(para_disc$parld, pardogit)
}else{
names(s_disc) <- para_disc$parld
}
s_disc <- s_disc[-1]
res_disc <- nmm(data=datdisc, start_v = s_disc, eq_d=eq_d, eq_type = "disc",
weight_paths = FALSE, par_d = par_d, opt_method = opt_method,
best_method = FALSE, hessian=hessian, MNtypef = MNtypef,
numerical_deriv = numerical_deriv)
s_disc <- res_disc$estimate
}else{
s_disc <- NULL
}
s_joint <- c(s_cont, s_disc)
s_joint
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.