Nothing
#' Robust Mixture Regression with Thresholding-Embedded EM Algorithm for Penalized Estimation
#'
#' A robust mixture regression model that simultaneously conducts outlier detection and
#' robust parameter estimation. It uses a sparse, case-specific, and scale-dependent
#' mean-shift mixture model parameterization (Yu et al., 2017):
#' \deqn{f(y_i|\boldsymbol{x}_i,\boldsymbol{\theta},\boldsymbol{\gamma}_i) = \sum_{j=1}^C\pi_j\phi(y_i;\boldsymbol{x}^{\top}\boldsymbol{\beta}_j+\gamma_{ij}\sigma_j,\sigma_j^2),}
#' \eqn{i=1,\cdots,n}, where \eqn{C} is the number of components in the model,
#' \eqn{\boldsymbol{\theta}=(\pi_1,\boldsymbol{\beta}_1,\sigma_1,..,\pi_{C},\boldsymbol{\beta}_C,\sigma_C)^{\top}}
#' is the parameter to estimate,
#' and \eqn{\boldsymbol{\gamma}_i=(\gamma_{i1},...,\gamma_{iC})^{\top}} is a vector of mean-shift parameter for the ith observation.
#'
#' @usage
#' mixregRM2(x, y, C = 2, ini = NULL, nstart = 20, tol = 1e-02, maxiter = 50,
#' method = c("HARD", "SOFT"), sigma.const = 0.001, lambda = 0.001)
#'
#' @details
#' The parameters are estimated by maximizing the corresponding penalized log-likelihood function using an EM algorithm.
#' The thresholding rule involes the estimation of \eqn{\gamma_{ij}} corresponding to different penalty:
#' * Soft threshold: \eqn{\hat{\gamma}_{ij} = sgn(\epsilon_{ij})(|\epsilon_{ij}|-\lambda_{ij}^*)_{+})}, corresponding to the \eqn{l_1} penalty.
#' * Hard threshold: \eqn{\hat{\gamma}_{ij} = \epsilon_{ij}I(|\epsilon_{ij}|>\lambda_{ij}^*))}, corresponding to the \eqn{l_0} penalty.
#'
#' Here, \eqn{\epsilon_{ij} = (y_i-\boldsymbol{x}_i^{\top}\boldsymbol{\beta_j})/\sigma_j} and
#' \eqn{(\cdot)_{+}=\max(\cdot,0)}. Also, \eqn{\lambda_{ij}^*} is taken as \eqn{\lambda/p_{ij}^{(k+1)}} for soft threshold and
#' \eqn{\lambda/\sqrt{p_{ij}^{(k+1)}}} for hard threshold.
#'
#' @param x an n by p data matrix where n is the number of observations and p is the number of explanatory variables.
#' The intercept term will automatically be added to the data.
#' @param y an n-dimensional vector of response variable.
#' @param C number of mixture components. Default is 2.
#' @param ini initial values for the parameters. Default is NULL, which obtains the initial values
#' using the \code{\link{mixreg}} function. It can be a list with the form of \code{list(pi, beta, sigma, gamma)}, where
#' \code{pi} is a vector of C mixing proportions,
#' \code{beta} is a C by (p + 1) matrix for regression coefficients of C components,
#' \code{sigma} is a vector of C standard deviations, and
#' \code{gamma} is a vector of C mean shift values.
#' @param nstart number of initializations to try. Default is 20.
#' @param tol stopping criteria (threshold value) for the EM algorithm. Default is 1e-02.
#' @param maxiter maximum number of iterations for the EM algorithm. Default is 50.
#' @param method character, determining which threshold method to use: \code{HARD} or \code{SOFT}.
#' Default is \code{HARD}. See details.
#' @param sigma.const constraint on the ratio of minimum and maximum values of sigma. Default is 0.001.
#' @param lambda tuning parameter in the penalty term. It can be found based on BIC. See Yu et al. (2017) for more details.
#'
#' @return A list containing the following elements:
#' \item{pi}{C-dimensional vector of estimated mixing proportions.}
#' \item{beta}{C by (p + 1) matrix of estimated regression coefficients.}
#' \item{sigma}{C-dimensional vector of estimated standard deviations.}
#' \item{gamma}{n-dimensional vector of estimated mean shift values.}
#' \item{posterior}{n by C matrix of posterior probabilities of each observation belonging to each component.}
#' \item{run}{total number of iterations after convergence.}
#'
#' @seealso \code{\link{mixreg}} for initial value calculation.
#'
#' @references
#' Yu, C., Yao, W., and Chen, K. (2017). A new method for robust mixture regression.
#' Canadian Journal of Statistics, 45(1), 77-94.
#'
#' @importFrom stats optim
#' @export
#' @examples
#' data(tone)
#' y = tone$tuned
#' x = tone$stretchratio
#' k = 160
#' x[151:k] = 0
#' y[151:k] = 5
#' \donttest{est_RM2 = mixregRM2(x, y, lambda = 1)}
mixregRM2 <- function(x, y, C = 2, ini = NULL, nstart = 20, tol = 1e-02, maxiter = 50,
method = c("HARD", "SOFT"), sigma.const = 0.001, lambda = 0.001){
#SK-- Used functions: mixreg, hard_plh (or soft_plh), hard_ipod (or soft_ipod)
# Match arguments
method <- match.arg(method)
f_plh <- switch(method, "HARD" = hard_plh, "SOFT" = soft_plh)
f_ipod <- switch(method, "HARD" = hard_ipod, "SOFT" = soft_ipod)
outer.conv <- inner.conv <- tol
runnum1 <- runnum2 <- maxiter
m = C
# Add intercept variable --SK 9/3/2023
#if (addintercept){
X = cbind(1, x)
#}
n = length(y)
p = ncol(X)
y = matrix(y)
# Add default value for pi, beta, sigma, gamma Xin Shen
# If the initial value is not assigned, estimate by mix regression with Gaussian assumption.
# If the output of mixreg is updated to be more user friendly, be sure to update here as well
#if(is.null(pi0) | is.null(beta0) | is.null(sig0) | is.null(gamma0)){
# ini_est = mixreg(x, y, m, nstart) # SK 08/31/2023 20 -> nstart
#}
#if(is.null(pi0)){
# pi0 = matrix(ini_est$theta[, p + 2])
#}
#if(is.null(beta0)){
# beta0 = t(ini_est$theta[, 1:p])
#}
#if(is.null(sig0)){
# sig0 = matrix(ini_est$theta[, p + 1])
#}
#if(is.null(gamma0)){
# gamma0 = matrix(0, nrow = n)
#}
## SK 08/31/2023
if(is.null(ini)){
ini = mixreg(x, y, m, nstart, tol) #SK mixreg(x, y, m, nstart)
beta0 = t(ini$beta)
sig0 = matrix(ini$sigma)
pi0 = matrix(ini$pi)
#beta0 = t(ini_est$theta[, 1:p])
#sig0 = matrix(ini_est$theta[, p + 1])
#pi0 = matrix(ini_est$theta[, p + 2])
gamma0 = matrix(0, nrow = n)
} else {
pi0 = ini$pi
beta0 = ini$beta
sig0 = ini$sigma
gamma0 = ini$gamma
}
pi1 = as.matrix(pi0)
beta1 = as.matrix(beta0)
sig1 = as.matrix(sig0)
gamma1 = gamma0
###--- tuning lambda
#if(is.null(lambda)){
#
#}
###---
out_lh = f_plh(y = y, X = X, pi = pi1, beta = beta1, sigma = sig1, gamma = gamma1, lambda = lambda)
dif = 1
obj <- vector()
run = 0
restart = 0
## EM loop
while (dif > outer.conv & run < runnum1 & restart < 10){
out_lh0 = out_lh
run = run + 1
oldgamma = gamma1
oldpi = pi1
oldbeta = beta1
oldsig = sig1
pk = matrix(rep(0, m * n), nrow = n)
for(j in seq(m)){
pk[, j] = oldpi[j, ] * pmax(10^(-300), stats::dnorm(y - oldgamma * oldsig[j, ] - X %*% oldbeta[, j], mean = 0, sd = oldsig[j, ]))
}
p_ij = pk/matrix(rep(apply(pk, 1, sum), m), nrow = n)
n_p = apply(p_ij, 2, sum)
# M-step
pi2 = oldpi
beta2 = oldbeta
sig2 = oldsig
gamma2 = oldgamma
inner_lh = f_plh(y = y, X = X, pi = oldpi, beta = oldbeta, sigma = oldsig, gamma = oldgamma, lambda = lambda)
run1 = 0
dif1 = 1
while(dif1 > inner.conv & run1 < runnum2){
inner_lh0 = inner_lh
run1 = run1 + 1
prepi = pi2
prebeta = beta2
presig = sig2
pregamma = gamma2
pi = matrix(rep(0, m), nrow = m)
beta = matrix(rep(0, p * m), nrow = p)
sig = matrix(rep(0, m), nrow = m)
# update gamma first
gamma = matrix(c(rep(0, n)), nrow = n)
for(i in seq(n)){
gamma[i] = f_ipod(theta = ((p_ij[i,]) %*% ((y[i, ] - (X %*% beta2)[i, ])/(sig2))), lambda = lambda)
}
gamma2 = as.matrix(gamma, nrow = n)
for(k in seq(m)){
pi[k] = n_p[k]/n
wt = diag(p_ij[, k])
beta[, k] = MASS::ginv(t(X) %*% wt %*% X) %*% t(X) %*% wt %*% (y - gamma2 * presig[k, ])
}
sig = stats::optim(presig[1, ], y = y, X = X, beta = beta, p_ij = p_ij, gamma = gamma2, fn = sigmaf,
method = "BFGS", hessian = FALSE, m = m)$par
pi2 = as.matrix(pi, nrow = m)
beta2 = as.matrix(beta, nrow = p)
sig2 = matrix(c(rep(sig, m)), nrow = m)
# check if there is extreme value
if(min(sig2)/max(sig2) < sigma.const){
warning('One of the sigma is going to 0, start again with another initial value.')
# generate one new initial value from mixrege
ini_est = mixreg(x, y, m, 1, tol) #SK mixreg(x, y, m, 1)
beta_new = t(ini_est$beta)
sig_new = matrix(ini_est$sigma)
pi_new = matrix(ini_est$pi)
#beta_new = t(ini_est$theta[, 1:p])
#sig_new = matrix(ini_est$theta[, p + 1])
#pi_new = matrix(ini_est$theta[, p + 2])
gamma_new = matrix(0, nrow = n)
out1 = list(pi = pi_new, beta = beta_new, sig = sig_new, gamma = gamma_new, run = 0)
out_lh0 = 0
obj <- vector()
restart = restart + 1
break
}
# sig2[sig2<sigma.const] <- 0.5 ##truncate the extremely small sigma
# sig2[sig2>3] <- 1
# updated loglikelihood in the inner-loop
inner_lh = f_plh(y = y, X = X, pi = pi2, beta = beta2, sigma = sig2, gamma = gamma2, lambda = lambda)
dif1 = inner_lh - inner_lh0
#if(dif1 < -0.1 & sigma.flag==FALSE) message(paste("Warning: Inner loop decreases. DIFF = ", round(dif1,4)," Sig.truncated = ", sigma.flag, sep=" "))
####
if(dif1 > (-1e-10) & dif1 < 0){
dif1 = 0
}
out1 = list(pi = pi2, beta = beta2, sig = sig2, gamma = gamma2, run = run1)
}
pi1 = out1$pi
beta1 = out1$beta
sig1 = out1$sig
gamma1 = out1$gamma
out_lh = f_plh(y = y, X = X, pi = pi1, beta = beta1, sigma = sig1, gamma = gamma1, lambda = lambda)
obj[run] <- out_lh
dif = out_lh - out_lh0
if(dif < 0 & sum((beta1 - beta2)^2) > 1e-2){
warning(paste("Outer loop decreases. DIFF =", dif, sep =" "))
}
if(dif> (-1e-10) & dif < 0){
dif = 0
}
}
# check if the algorithm converged
cov = TRUE
if(run >= runnum1){
warning('The algorithm didn\'t converge. stopped due to reaching the maximum iteration.')
cov = FALSE
}
if(restart >= 10){
warning('The algorithm didn\'t converge. stopped due to too many restarts.')
cov = FALSE
}
out = list(pi = as.vector(pi1), beta = t(beta1), sigma = as.vector(sig1), gamma = as.vector(gamma1),
posterior = p_ij, run = run)#SK, cov = cov)#,diff=dif1,dif=dif) #seems do not need these value to return
}
##############################################
## mixreg based on multiple initial values
##############################################
#----------------------------------------------------------------------------------------------
# Programming Note:
# This function is used to estimate the initial values of parameters in mixregRM2 function.
# If any update is made about the output of this function, make sure to update corresponding part in mixregRM2.
#----------------------------------------------------------------------------------------------
#' MLE of Mixture Regression with Normal Errors
#'
#' `mixreg' provides the MLE estimates of a mixture of regression models with normal errors.
#' The result from this function can be used as initial values of the \code{\link{mixregRM2}} function.
#'
#' @usage
#' mixreg(x, y, C = 2, nstart = 20, tol = 1e-05)
#'
#' @param x an n by p data matrix where n is the number of observations and p is the number of explanatory variables.
#' The intercept term will automatically be added to the data.
#' @param y an n-dimensional vector of response variable.
#' @param C number of mixture components. Default is 2.
#' @param nstart number of initializations to try. Default is 20.
#' @param tol stopping criteria (threshold value) for the EM algorithm. Default is 1e-05.
#'
#' @return A list containing the following elements:
#' \item{pi}{C-dimensional vector of estimated mixing proportions.}
#' \item{beta}{C by (p + 1) matrix of estimated regression coefficients.}
#' \item{sigma}{C-dimensional vector of estimated standard deviations.}
#' \item{lik}{final likelihood.}
#' \item{run}{total number of iterations after convergence.}
#'
#' @seealso \code{\link{mixregRM2}}
#'
#' @export
#' @examples
#' data(tone)
#' y = tone$tuned
#' x = tone$stretchratio
#' k = 160
#' x[151:k] = 0
#' y[151:k] = 5
#' est = mixreg(x, y, 2, nstart = 1)
mixreg <- function(x, y, C = 2, nstart = 20, tol = 1e-05){
#SK-- Used functions: mixregone
k = C
n = length(y)
x = matrix(x, nrow = n)
a = dim(x)
p = a[2] + 1
n1 = 2 * p
lh = rep(0, nstart)
bet = matrix(rep(0, k * p), nrow = k)
sig = 0
for(j in seq(k)){
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/k, k)
sig = sig/n1/k
est = mixregone(x, y, bet, sig, pr, k, tol) #SK mixregone(x, y, bet, sig, pr, k)
lh[1] = est$lik
for(i in seq(nstart - 1)){
bet = matrix(rep(0, k * p), nrow = k)
sig = 0
for(j in seq(k)){
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/k, k)
sig = sig/n1/k
temp = mixregone(x, y, bet, sig, pr, k, tol) #SK mixregone(x, y, bet, sig, pr, k)
lh[i + 1] = temp$lik
if(lh[i + 1] > est$lik & min(temp$theta[, p + 2]) > min(0.05, est$theta[, p + 2])){
est = temp
}
}
#SK est$inilikelihoodseq = lh
#colnames(out$theta) <- c("beta.intercept", "beta.slope", "sigma", "prop")
npar <- ncol(est$theta)
out <- list(pi = est$theta[, npar],
beta = est$theta[, - c(npar - 1, npar)],
sigma = est$theta[, npar - 1])
return(out)
}
#' Robust EM Algorithm For Mixture of Linear Regression Based on Bisquare Function
#'
#' `mixregBisq' is used to robustly estimate the parameters of a mixture regression model
#' using the bisquare function based on multiple initial values (Bai et al., 2012). The solution is the mode
#' of the solutions obtained from all initial values.
#'
#' @usage
#' mixregBisq(x, y, C = 2, nstart = 20)
#'
#' @param x an n by p data matrix where n is the number of observations and p is the number of explanatory variables.
#' The intercept term will automatically be added to the data.
#' @param y an n-dimensional vector of response variable.
#' @param C number of mixture components. Default is 2.
#' @param nstart number of initializations to try. Default is 20.
#'
#' @return A list containing the following element:
#' \item{pi}{C-dimensional vector of estimated mixing proportions.}
#' \item{beta}{C by (p + 1) matrix of estimated regression coefficients.}
#' \item{sigma}{C-dimensional vector of estimated standard deviations.}
#'
#' @references
#' Bai, X., Yao, W., and Boyer, J. E. (2012). Robust fitting of mixture regression models.
#' Computational Statistics & Data Analysis, 56(7), 2347-2359.
#'
#' @export
#' @examples
#' data(tone)
#' y = tone$tuned
#' x = tone$stretchratio
#' k = 160
#' x[151:k] = 0
#' y[151:k] = 5
#' est_bi = mixregBisq(x, y, 2, nstart = 20)
mixregBisq <- function(x, y, C = 2, nstart = 20){
#SK-- Used functions: permutations, mixregBisqone
# theta: estimated parameters matrix for the best estimator (the estimated value with most initial values converge to),
# the columns are beta(intercept, slopes), sigma, proportion for each component
# estall: all estimated parameters
# uniqueest: unique estimated parameters
# countuniqueest: number of unique estimated parameters
# uniqueestindex: which initial values gives the unique estimators
# bestindex: index of initial value gives the best estimated parameter
# estindex: matrix of each initial value converge to which set of estimated parameters.
m = C
n = length(y)
x = matrix(x, nrow = n)
a = dim(x)
p = a[2] + 1
n1 = 2 * p
perm = permutations(m, m)
sig = 0
ind1 = c()
bet = matrix(rep(0, 2 * p), nrow = 2)
for(j in seq(m)){
ind1 = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind1, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind1]
sig = sig + sum((y[ind1] - X %*% bet[j, ])^2)
}
pr = rep(1/m, m)
sig = max(sig/n1/m)
est = mixregBisqone(x, y, bet, sig, pr, m)
lenpar = length(c(est$theta))
theta = matrix(rep(0, lenpar * (nstart)), ncol = lenpar)
theta[1, ] = c(est$theta)
minsig = 10^(-1) * sig
trimbet = matrix(theta[1, 1:(p * m)], nrow = m)
trimbet = matrix(rep(matrix(t(trimbet), ncol = p * m, byrow = TRUE), gamma(m + 1)), ncol = p * m, byrow = TRUE)
ind = matrix(rep(0, nstart), nrow = 1)
ind[1] = 1
numsol = 1
solindex = 1
sol = matrix(theta[1, ], nrow = 1)
for(i in 2:nstart){
sig = 0
ind1 = c()
for(j in seq(m)){
ind1 = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind1, ])
bet[j, ]= MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind1]
sig = sig + sum((y[ind1] - X %*% bet[j, ])^2)
}
pr = rep(1/m, m)
sig = max(sig/n1/m, minsig)
est = mixregBisqone(x, y, bet, sig, pr, m)
theta[i, ] = est$theta
temp = matrix(theta[i, 1:(p * m)], nrow = m)
temp = matrix(t(temp[t(perm), ]), ncol = p * m, byrow = TRUE)
dif = apply((trimbet - temp)^2, 1, sum)
temp1 = which(dif == min(dif))
theta[i, ] = c(c(matrix(temp[temp1[1], ], nrow = m, byrow = TRUE)), theta[i, p * m + perm[temp1[1], ]], theta[i, p * m + m + perm[temp1[1], ]])
dif = apply((matrix(rep(theta[i, 1:(p * m)], numsol), nrow = numsol, byrow = TRUE) - sol[, 1:(p * m)])^2, 1, sum)
if(min(dif) > 0.1){
sol = rbind(sol, theta[i, ])
numsol = numsol + 1
solindex = c(solindex, i)
ind = rbind(ind, rep(0, nstart))
ind[numsol, i] = 1
} else {
ind1 = which(dif == min(dif))
ind[ind1, i] = 1
}
}
num = apply(ind, 1, sum)
ind1 = order(- num)
bestindex = ind1
for(j in seq(numsol)){
if(min(sol[ind1[j], (p * m + m + 1):(p * m + 2 * m)]) > 0.05){
index =1
est = matrix(sol[ind1[j], ], nrow = m)
for(l in seq(m - 1)){
temp = matrix(rep(est[l, 1:p], m - l), nrow = m - l, byrow = TRUE) - est[(l + 1):m, 1:p]
temp = matrix(temp, nrow = m - l)
dif = apply(temp^2, 1, sum)
if(min(dif) < 0.1){
index = 0
break
}
}
if(index == 1){
bestindex = ind1[j]
break
}
}
}
est = sol[bestindex[1], ]
# out=list(theta=matrix(est,nrow=m),estall=theta, uniqueest=sol, countuniqueest=num,uniqueestindex=solindex, bestindex= solindex[bestindex],estindex=ind);out }
#SK-- 9/4/2023
#out = list(theta = matrix(est, nrow = m))
#colnames(out$theta) <- c("beta.intercept", "beta.slope", "sigma", "prop")
est = list(theta = matrix(est, nrow = m))
npar <- ncol(est$theta)
out <- list(pi = est$theta[, npar],
beta = est$theta[, - c(npar - 1, npar)],
sigma = est$theta[, npar - 1])
return(out)
}
#' Robust Regression Estimator Using Trimmed Likelihood
#'
#' `mixregTrim' is used for robust regression estimation of a mixture model using the trimmed likelihood estimator
#' (Neykov et al., 2007). It trims the data to reduce the impact of outliers on the model.
#'
#' @usage
#' mixregTrim(x, y, C = 2, keep = 0.95, nstart = 20)
#'
#' @param x an n by p data matrix where n is the number of observations and p is the number of explanatory variables.
#' The intercept term will automatically be added to the data.
#' @param y an n-dimensional vector of response variable.
#' @param C number of mixture components. Default is 2.
#' @param keep proportion of data to be kept after trimming, ranging from 0 to 1. Default is 0.95.
#' @param nstart number of initializations to try. Default is 20.
#'
#' @return A list containing the following elements:
#' \item{pi}{C-dimensional vector of estimated mixing proportions.}
#' \item{beta}{C by (p + 1) matrix of estimated regression coefficients.}
#' \item{sigma}{C-dimensional vector of estimated standard deviations.}
#' \item{lik}{final likelihood.}
#'
#' @references
#' Neykov, N., Filzmoser, P., Dimova, R., and Neytchev, P. (2007). Robust fitting of mixtures using
#' the trimmed likelihood estimator. Computational Statistics & Data Analysis, 52(1), 299-308.
#'
#' @export
#' @examples
#' data(tone)
#' y = tone$tuned
#' x = tone$stretchratio
#' k = 160
#' x[151:k] = 0
#' y[151:k] = 5
#' est_TLE = mixregTrim(x, y, 2, 0.95, nstart = 1)
mixregTrim <- function(x, y, C = 2, keep = 0.95, nstart = 20){
#SK-- Used functions: mixregTrimone
k = C
n = length(y)
x = matrix(x, nrow = n)
a = dim(x)
p = a[2] + 1
n1 = 2 * p
bet = matrix(rep(0, k * p), nrow = k)
sig = 0
for(j in seq(k)){
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/k, k)
sig = sig/n1/k
lh = rep(0, nstart)
est = mixregTrimone(x, y, k, keep, bet, sig, pr)
lh[1] = est$lik
for(i in seq(nstart - 1)){
sig = 0
for(j in seq(k)){
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/k, k)
sig = sig/n1/k
temp = mixregTrimone(x, y, k, keep, bet, sig, pr)
lh[i + 1] = temp$lik
if(lh[i + 1] > est$lik){
est = temp
}
}
#colnames(est$theta) <- c("beta.intercept", "beta.slope", "sigma", "prop")
# est=list(theta=est$theta,finallikelihood=est$likelihood,likelihoodseq=lh)
#est = list(theta = est$theta, lik = est$lik)
#est
#SK-- 9/4/2023
#est = list(theta = matrix(est, nrow = m), lik = est$lik)
npar <- ncol(est$theta)
out <- list(pi = est$theta[, npar],
beta = est$theta[, - c(npar - 1, npar)],
sigma = est$theta[, npar - 1],
lik = est$lik)
return(out)
}
#' Robust Mixture Regression with T-distribution
#'
#' `mixregT' provides a robust estimation for a mixture of linear regression models
#' by assuming that the error terms follow the t-distribution (Yao et al., 2014). The degrees of freedom
#' is adaptively estimated.
#'
#' @usage
#' mixregT(x, y, C = 2, maxdf = 30, nstart = 20, tol = 1e-05)
#'
#' @param x an n by p data matrix where n is the number of observations and p is the number of explanatory variables.
#' The intercept term will automatically be added to the data.
#' @param y an n-dimensional vector of response variable.
#' @param C number of mixture components. Default is 2.
#' @param maxdf maximum degrees of freedom for the t-distribution. Default is 30.
#' @param nstart number of initializations to try. Default is 20.
#' @param tol threshold value (stopping criteria) for the EM algorithm. Default is 1e-05.
#'
#' @return A list containing the following elements:
#' \item{pi}{C-dimensional vector of estimated mixing proportions.}
#' \item{beta}{C by (p + 1) matrix of estimated regression coefficients.}
#' \item{sigma}{C-dimensional vector of estimated standard deviations.}
#' \item{lik}{final likelihood.}
#' \item{df}{estimated degrees of freedom of the t-distribution.}
#' \item{run}{total number of iterations after convergence.}
#'
#' @seealso \code{\link{mixregLap}} for robust estimation with Laplace distribution.
#'
#' @references
#' Yao, W., Wei, Y., and Yu, C. (2014). Robust mixture regression using the t-distribution.
#' Computational Statistics & Data Analysis, 71, 116-127.
#'
#' @importFrom robustbase covMcd
#' @export
#' @examples
#' data(tone)
#' y = tone$tuned
#' x = tone$stretchratio
#' k = 160
#' x[151:k] = 0
#' y[151:k] = 5
#' \donttest{est_t = mixregT(x, y, 2, nstart = 20, tol = 0.1)}
mixregT <- function(x, y, C = 2, maxdf = 30, nstart = 20, tol = 1e-05){
# degreerange: all degree of freedoms used to estimate the parameters
# vlikelihoodseq: all likelihood associated with degrees of freedom
# lh: likelihood list for all initial values
m = C
n = length(y)
x = matrix(x, nrow = n)
a = dim(x)
p = a[2] + 1
n1 = 2 * p
bet = matrix(rep(0, m * p), nrow = m)
sig = 0
for(j in seq(m)){
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/m, m)
sig = sig/n1/m
lh = rep(0, nstart)
est = mixregTv(x, y, bet, sig, pr, m, maxdf, tol)
lh[1] = est$lik
for(i in seq(nstart - 1)){
sig = 0
for(j in seq(m)){
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/m, m)
sig = sig/n1/m
temp = mixregTv(x, y, bet, sig, pr, m, maxdf, tol)
lh[i + 1] = temp$lik
if(lh[i + 1] > est$lik & min(temp$theta[, p + 2]) > min(0.05, est$theta[, p + 2])){
est = temp
}
}
est$degreerange = c(1, maxdf)
if(maxdf < 30 | tol > 10^(-5)){
bet = est$theta[, 1:p]
sig = est$theta[, p + 1]
pr = est$theta[, p + 2]
est = mixregTv(x, y, bet, sig[1], pr, m, 30, 10^(-5))
}
#colnames(est$theta) <- c("beta.intercept", "beta.slope", "sigma", "prop")
#est$inilikelihoodseq = lh
#SK-- 9/4/2023
npar <- ncol(est$theta)
out <- list(pi = est$theta[, npar],
beta = est$theta[, - c(npar - 1, npar)],
sigma = est$theta[, npar - 1],
lik = est$lik,
df = est$df,
run = est$run)
#out = list(theta = est$theta, lik = est$lik, df = est$df, run = est$run)
return(out)
}
#-------------------------------------------------------------------------------
# Internal functions
#-------------------------------------------------------------------------------
bisquare <- function(t, k = 4.685){
out = t * pmax(0, (1 - (t/k)^2))^2
return(out)
}
biscalew <- function(t){
t[which(t == 0)] = min(t[which(t != 0)])/10
out = pmin(1 - (1 - t^2/1.56^2)^3, 1)/t^2
return(out)
}
########################### EM algorithm to fit the mixture of linear regression
# mixregone estimates the mixture regression parameters by MLE based on ONE initial value
mixregone <- function(x, y, bet, sig, pr, m = 2, tol = 1e-05) {
run <- 0
n <- length(y)
X <- cbind(rep(1, n), x)
if (length(sig) > 1) { # the case when the variance is unequal
r <- matrix(rep(0, m * n), nrow = n)
pk <- r
lh <- 0
for (j in seq(m)) {
r[, j] <- y - X %*% bet[j, ]
lh <- lh + pr[j] * stats::dnorm(r[, j], 0, sig[j])
}
lh <- sum(log(lh))
# E-steps
repeat {
prest <- c(bet, sig, pr)
run <- run + 1
plh <- lh
for (j in seq(m)) {
pk[, j] <- pr[j] * pmax(10^(-300), stats::dnorm(r[, j], 0, sig[j]))
}
pk <- pk / matrix(rep(apply(pk, 1, sum), m), nrow = n)
# M-step
np <- apply(pk, 2, sum)
pr <- np / n
lh <- 0
for (j in seq(m)) {
w <- diag(pk[, j])
bet[j, ] <- MASS::ginv(t(X) %*% w %*% X) %*% t(X) %*% w %*% y
r[, j] <- y - X %*% bet[j, ]
sig[j] <- sqrt(t(pk[, j]) %*% (r[, j]^2) / np[j])
lh <- lh + pr[j] * stats::dnorm(r[, j], 0, sig[j])
}
lh <- sum(log(lh))
dif <- lh - plh
#if (dif < 10^(-5) | run > 500) { SK
if (dif < tol | run > 500) {
break
}
}
} else { # the case when the variance is equal
r <- matrix(rep(0, m * n), nrow = n)
pk <- r
lh <- 0
for (j in seq(m)) {
r[, j] <- y - X %*% bet[j, ]
lh <- lh + pr[j] * stats::dnorm(r[, j], 0, sig)
}
lh <- sum(log(lh))
# E-steps
repeat {
prest <- c(bet, sig, pr)
run <- run + 1
plh <- lh
for (j in seq(m)) {
pk[, j] <- pr[j] * pmax(10^(-300), stats::dnorm(r[, j], 0, sig))
}
pk <- pk / matrix(rep(apply(pk, 1, sum), m), nrow = n)
# M-step
np <- apply(pk, 2, sum)
pr <- np / n
for (j in seq(m)) {
w <- diag(pk[, j])
bet[j, ] <- MASS::ginv(t(X) %*% w %*% X) %*% t(X) %*% w %*% y
r[, j] <- y - X %*% bet[j, ]
}
sig <- sqrt(sum(pk * (r^2)) / n)
lh <- 0
for (j in seq(m)) {
lh <- lh + pr[j] * stats::dnorm(r[, j], 0, sig)
}
lh <- sum(log(lh))
dif <- lh - plh
#if (dif < 10^(-5) | run > 500) { SK
if (dif < tol | run > 500) {
break
}
}
sig <- sig * rep(1, m)
}
est <- list(theta = matrix(c(bet, sig, pr), nrow = m), lik = lh, run = run)#SK, diflh = dif)
est
}
########################### Robust EM algorithm to fit the mixture of linear regression based on bisquare function
#' Robust EM algorithm to fit the mixture of linear regression based on bisquare function with one initial value
#'
#' mixregBisqone estimates the mixture regression parameters robustly using bisquare function based on one initial value
#' @param x explanatory variables matrix with one observation per row
#' @param y response variable vector
#' @param bet initial value of beta
#' @param sig initial value of sigma
#' @param pr initial value of proportion
#' @param m number of components, default is 2
#'
#' @return A list containing the following elements:
#'
#' \item{theta}{estimated parameters matrix, the columns are beta(intercept, slopes), sigma, proportion for each component.}
#' \item{run}{number of iterations to converge.}
#' @noRd
mixregBisqone <- function(x, y, bet, sig, pr, m = 2) {
#SK-- Used functions: bisquare, biscalew
run <- 0
tol <- 10^(-4) * max(abs(c(bet, sig, pr)))
n <- length(y)
X <- cbind(rep(1, n), x)
p <- dim(X)[2]
if (length(sig) > 1) {
r <- matrix(rep(0, m * n), nrow = n)
pk <- r
for (j in seq(m))
r[, j] <- (y - X %*% bet[j, ]) / sig[j]
# E-steps
repeat {
prest <- c(sig, bet, pr)
run <- run + 1
for (j in seq(m)) {
pk[, j] <- pr[j] * pmax(10^(-300), stats::dnorm(r[, j], 0, 1)) / sig[j]
}
pk <- pk / matrix(rep(apply(pk, 1, sum), m), nrow = n)
# M-step
np <- apply(pk, 2, sum)
pr <- np / n
r[which(r == 0)] <- min(r[which(r != 0)]) / 10
for (j in seq(m)) {
w <- diag(pk[, j] * bisquare(r[, j]) / r[, j])
bet[j, ] <- solve(t(X) %*% w %*% X + 10^(-10) * diag(rep(1, p))) %*% t(X) %*% w %*% y
r[, j] <- (y - X %*% bet[j, ]) / sig[j]
sig[j] <- sqrt(sum(r[, j]^2 * sig[j]^2 * pk[, j] * biscalew(r[, j])) / np[j] / 0.5)
}
dif <- max(abs(c(sig, bet, pr) - prest))
if (dif < tol | run > 500) {
break
}
}
} else {
r <- matrix(rep(0, m * n), nrow = n)
pk <- r
for (j in seq(m))
r[, j] <- (y - X %*% bet[j, ]) / sig
# E-steps
repeat {
prest <- c(sig, bet, pr)
run <- run + 1
for (j in seq(m)) {
pk[, j] <- pr[j] * pmax(10^(-300), stats::dnorm(r[, j], 0, 1)) / sig
}
pk <- pk / matrix(rep(apply(pk, 1, sum), m), nrow = n)
# M-step
np <- apply(pk, 2, sum)
pr <- np / n
r[which(r == 0)] <- min(r[which(r != 0)]) / 10
for (j in seq(m)) {
w <- diag(pk[, j] * bisquare(r[, j]) / r[, j])
bet[j, ] <- solve(t(X) %*% w %*% X + 10^(-10) * diag(rep(1, p))) %*% t(X) %*% w %*% y
r[, j] <- (y - X %*% bet[j, ]) / sig
}
sig <- sqrt(sum(pk * (r^2 * sig[1]^2) * biscalew(r)) / n / 0.5)
dif <- max(abs(c(sig, bet, pr) - prest))
if (dif < tol | run > 500) {
break
}
}
sig <- rep(sig, m)
}
theta <- matrix(c(bet, sig, pr), nrow = m)
est <- list(theta = theta, difpar = dif, run = run)
est
}
########################### Trimmed likelihood estimator
# mixregTrimone uses the trimmed likelihood estimator based on ONE initial value.
mixregTrimone <- function(x, y, k = 2, keep = 0.95, bet, sig, pr) {
#SK-- Used functions: mixregone
n <- length(y)
n1 <- round(n * keep)
x <- matrix(x, nrow = n)
a <- dim(x)
p <- a[2] + 1
X <- cbind(rep(1, n), x)
if (dim(bet)[2] == k) {
bet <- t(bet)
}
lh <- 0
for (i in seq(k)) {
lh <- lh + pr[i] * stats::dnorm(y - X %*% bet[i, ], 0, sig[1])
}
ind <- order(-lh)
run <- 0
tol <- 10^(-4)
obj <- sum(log(lh[ind[1:n1]]))
repeat {
pobj <- obj
run <- run + 1
x1 <- x[ind[1:n1], ]
y1 <- y[ind[1:n1]]
fit <- mixregone(x1, y1, bet, sig, pr, k)
fit <- fit$theta
bet <- matrix(fit[1:(p * k)], nrow = k)
sig <- fit[p * k + 1]
pr <- fit[(p * k + k + 1):(p * k + 2 * k)]
lh <- 0
for (i in seq(k)) {
lh <- lh + pr[i] * stats::dnorm(y - X %*% bet[i, ], 0, sig[1])
}
ind <- order(-lh)
obj <- sum(log(lh[ind[1:n1]]))
dif <- obj - pobj
if (dif < tol | run > 50) {
break
}
}
if (length(sig) < 2) {
sig <- rep(sig, k)
}
theta <- matrix(c(bet, sig, pr), nrow = k)
# theta=matrix(c(bet,sig,pr),nrow=m) # m is not working, updated with k by Xin Shen
est <- list(theta = theta, lik = obj, diflikelihood = dif, run = run)
est
}
########################### Robust EM algorithm to fit the mixture of linear regression using t-distribution
# Definition of t density
dent <- function(y, mu, sig, v) {
est = gamma((v + 1) / 2) * sig^(-1) / ((pi * v)^(1 / 2) * gamma(v / 2) * (1 + (y - mu)^2 / (sig^2 * v))^(0.5 * (v + 1)))
est
}
# mixregT estimates the mixture regression parameters robustly assuming the error distribution is t-distribution
mixregTonev <- function(x, y, bet, sig, pr, v = 3, m = 2, tol = 10^(-5)) {
#SK-- Used functions: dent
run = 0
n = length(y)
if (length(v) == 1) {
v = rep(v, m)
}
X = cbind(rep(1, n), x)
lh = -10^10
a = dim(x)
p = a[2] + 1
r = matrix(rep(0, m * n), nrow = n)
pk = r
u = r
logu = r
if (length(sig) > 1) { # the component variances are different
for (j in seq(m)) {
r[, j] = (y - X %*% bet[j, ]) / sig[j]
}
# E-steps
repeat {
run = run + 1
prelh = lh
for (j in seq(m)) {
pk[, j] = pr[j] * pmax(10^(-300), dent(r[, j] * sig[j], 0, sig[j], v[j]))
u[, j] = (v[j] + 1) / (v[j] + r[, j]^2)
# logu[, j] = log(u[, j]) + (digamma((v[j] + 1) / 2) - log((v[j] + 1) / 2))
}
lh = sum(log(apply(pk, 1, sum)))
pk = pk / matrix(rep(apply(pk, 1, sum), m), nrow = n)
dif = lh - prelh
if (dif < tol | run > 500) {
break
}
# M-step
np = apply(pk, 2, sum)
pr = np / n
for (j in seq(m)) {
w = diag(pk[, j] * u[, j])
bet[j, ] = MASS::ginv(t(X) %*% w %*% X) %*% t(X) %*% w %*% y
sig[j] = sqrt(sum((y - X %*% bet[j, ])^2 * pk[, j] * u[, j]) / sum(pk[, j]))
r[, j] = (y - X %*% bet[j, ]) / sig[j]
}
}
} else {
for (j in seq(m)) {
r[, j] = (y - X %*% bet[j, ]) / sig
}
# E-steps
repeat {
run = run + 1
prelh = lh
for (j in seq(m)) {
pk[, j] = pr[j] * dent(y, X %*% bet[j, ], sig, v[j])
u[, j] = (v[j] + 1) / (v[j] + r[, j]^2)
# logu[, j] = log(u[, j]) + (digamma((v[j] + 1) / 2) - log((v[j] + 1) / 2))
}
lh = sum(log(apply(pk, 1, sum)))
pk = pk / matrix(rep(apply(pk, 1, sum), m), nrow = n)
dif = lh - prelh
if (dif < tol | run > 500) {
break
}
# M-step
np = apply(pk, 2, sum)
pr = np / n
for (j in seq(m)) {
w = diag(pk[, j] * u[, j])
bet[j, ] = MASS::ginv(t(X) %*% w %*% X) %*% t(X) %*% w %*% y
r[, j] = y - X %*% bet[j, ]
}
sig = sqrt(sum(pk * r^2 * u) / sum(pk))
r = r / sig
}
sig = sig * rep(1, m)
}
theta = matrix(c(bet, sig, pr), nrow = m)
est = list(theta = theta, lik = lh, df = v, dif = dif, run = run)
est
}
#' Robust Mixture Regression Based on T-distribution
#' mixregTv adaptively estimates the mixture regression parameters robustly assuming the error distribution is t-distribution
#' @param x explanatory variables matrix with rows for each observation
#' @param y response variable vector
#' @param bet initial values for beta
#' @param sig initial value for sigma; if the component variance is different then use a vector, otherwise a scaler would be fine.
#' @param pr initial value of proportion for each component, vector
#' @param m number of component, default is 2
#' @param maxdf maximum degree of freedom, default is 15
#' @param tol stopping criteria, default is 10^(-5)
#'
#'
#' @noRd
#'
# mixregTv estimates the mixture regression parameters robustly assuming the error distribution is t-distribution with varying degrees of freedom
mixregTv <- function(x, y, bet, sig, pr, m = 2, maxdf = 15, tol = 10^(-5)) {
#SK-- Used functions: mixregone, mixregTonev
# theta: estimated parameters beta(intercept, slopes), sigma, proportion for each component
# likelihood: likelihood of the estimated parameter
# dif: difference of likelihood of the last iteration and second to last iteration
# run: number of iterations to converge
# df: estimated degree of the t distribution
# degreerange: all degrees of freedom used to estimate the parameters
# vlikelihoodseq: all likelihoods associated with degrees of freedom
est = mixregone(x, y, bet, sig, pr, m)
fv = 0
a = dim(x)
p = a[2] + 1
lh = rep(0, maxdf)
lh[1] = est$lik
for (v in 1:maxdf) {
temp = mixregTonev(x, y, bet, sig, pr, v, m, tol)
lh[v + 1] = temp$lik
if (lh[v + 1] > est$lik & min(temp$theta[, p + 2]) > min(0.05, est$theta[, p + 2])) {
est = temp
fv = v
}
}
est = list(theta = est$theta, lik = est$lik, dif = est$dif, run = est$run)
est$df = fv
est$degreerange = c(1, maxdf)
est$vlikelihoodseq = lh
est
}
# Function to generate permutations
permutations <- function(n, r, v = 1:n) {
if (r == 1) {
matrix(v, n, 1)
} else if (n == 1) {
matrix(v, 1, r)
} else {
X <- NULL
for (i in 1:n) {
X <- rbind(X, cbind(v[i], permutations(n - 1, r - 1, v[-i])))
}
X
}
}
###########################
hard_penalty <- function(gamma, lambda) {
((lambda^2) / 2) * (gamma != 0)
}
hard_ipod <- function(theta, lambda) {
theta * (abs(theta) > lambda)
}
hard_plh <- function(y, X, pi, beta, sigma, gamma, lambda, m = 2) {
lh = 0
for (r in seq(m)) {
lh = lh + pi[r, ] * pmax(10^(-300), stats::dnorm(y - gamma * sigma[r, ] - X %*% beta[, r], mean = 0, sd = sigma[r, ]))
}
##the objective function =loglikehood-hard penalty
sum(log(lh)) - sum(hard_penalty(gamma = gamma, lambda = lambda))
}
soft_penalty <- function(gamma, lambda) {
abs(gamma) * lambda
}
soft_ipod <- function(theta, lambda) {
a = abs(theta) - lambda
ifelse(a < 0, 0, sign(theta) * a)
}
soft_plh <- function(y, X, pi, beta, sigma, gamma, lambda, m = 2) {
lh = 0
for (r in seq(m)) {
lh = lh + pi[r, ] * pmax(10^(-300), stats::dnorm(y - gamma * sigma[r, ] - X %*% beta[, r], mean = 0, sd = sigma[r, ]))
}
## the objective function = loglikehood - hard penalty
sum(log(lh)) - sum(soft_penalty(gamma = gamma, lambda = lambda))
}
## objective function for sigma estimation
sigmaf <- function(sig, y, X, p_ij, beta, gamma, m) {
func = 0
for (r in seq(m)) {
func = func + 0.5 * sum(p_ij[, r] * log(sig^2)) + (t(p_ij[, r]) %*% (y - gamma * sig - X %*% beta[, r])^2) / (2 * sig^2)
}
func
}
#-------------------------------------------------------------------------------
# Keep unused functions (SK, 08/31/2023)
#-------------------------------------------------------------------------------
#library(gregmisc)
#library(robustbase)
huberpsi <- function(t, k = 1.345){
out = pmax(- k, pmin(k, t))
return(out)
}
mixregTw <- function(x, y, m = 2, maxdf = 10, nstart = 20, tol = 10^(-5)) {
#SK-- Used functions: mixregT
n = length(y)
x = matrix(x, nrow = n)
w = robustbase::covMcd(x)
w = w$mcd.wt
x = x[w == 1, ]
y = y[w == 1]
est = mixregT(x, y, m, maxdf, nstart, tol)
est
}
########################### mixregt with unknown degree of freedoms
#%%start from one initial value
# mixregTonev1 estimates the mixture regression parameters robustly assuming the error distribution is t-distribution with varying degrees of freedom
mixregTonev1 <- function(x, y, bet, sig, pr, m = 2, v = 3, maxdf = 30, tol = 10^(-5)) {
#SK-- Used functions: dent
run = 0
n = length(y)
if (length(v) == 1) {
v = rep(v, m)
}
X = cbind(rep(1, n), x)
lh = -10^10
a = dim(x)
p = a[2] + 1
r = matrix(rep(0, m * n), nrow = n)
pk = r
u = r
logu = r
if (length(sig) > 1) {
for (j in seq(m))
r[, j] = (y - X %*% bet[j, ]) / sig[j]
# E-steps
repeat {
# prest=c(sig,bet,pr)
run = run + 1
prelh = lh
for (j in seq(m)) {
pk[, j] = pr[j] * pmax(10^(-300), dent(r[, j] * sig[j], 0, sig[j], v[j]))
u[, j] = (v[j] + 1) / (v[j] + r[, j]^2)
# logu[, j] = log(u[, j]) + (digamma((v[j] + 1) / 2) - log((v[j] + 1) / 2))
}
lh = sum(log(apply(pk, 1, sum)))
pk = pk / matrix(rep(apply(pk, 1, sum), m), nrow = n)
dif = lh - prelh
if (dif < tol | run > 500) {
break
}
# M-step
np = apply(pk, 2, sum)
pr = np / n
for (j in seq(m)) {
w = diag(pk[, j] * u[, j])
bet[j, ] = MASS::ginv(t(X) %*% w %*% X) %*% t(X) %*% w %*% y
sig[j] = sqrt(sum((y - X %*% bet[j, ])^2 * pk[, j] * u[, j]) / sum(pk[, j]))
r[, j] = (y - X %*% bet[j, ]) / sig[j]
q = rep(0, maxdf)
for (k in seq(maxdf)) {
q[k] = -log(gamma(0.5 * k)) + 0.5 * k * log(0.5 * k) + 0.5 * k * (sum((log(u[, j]) - u[, j]) * pk[, j]) / np[j] + digamma(0.5 * (v[j] + 1)) - log(0.5 * (v[j] + 1)))
}
ordq = order(-q)
v[j] = ordq[1]
}
}
} else {
for (j in seq(m))
r[, j] = (y - X %*% bet[j, ]) / sig
# E-steps
repeat {
run = run + 1
prelh = lh
for (j in seq(m)) {
pk[, j] = pr[j] * dent(y, X %*% bet[j, ], sig, v[j])
u[, j] = (v[j] + 1) / (v[j] + r[, j]^2)
}
lh = sum(log(apply(pk, 1, sum)))
pk = pk / matrix(rep(apply(pk, 1, sum), m), nrow = n)
dif = lh - prelh
if (dif < tol | run > 500) {
break
}
# M-step
np = apply(pk, 2, sum)
pr = np / n
for (j in seq(m)) {
w = diag(pk[, j] * u[, j])
bet[j, ] = MASS::ginv(t(X) %*% w %*% X) %*% t(X) %*% w %*% y
r[, j] = (y - X %*% bet[j, ]) / sig
q = rep(0, maxdf)
for (k in seq(maxdf)) {
q[k] = -log(gamma(0.5 * k)) + 0.5 * k * log(0.5 * k) + 0.5 * k * (sum((log(u[, j]) - u[, j]) * pk[, j]) / np[j] + digamma(0.5 * (v[j] + 1)) - log(0.5 * (v[j] + 1)))
}
ordq = order(-q)
v[j] = ordq[1]
}
sig = sqrt(sum(pk * r^2 * u) / sum(pk))
r = r / sig
}
sig = sig * rep(1, m)
}
theta = matrix(c(bet, sig, pr), nrow = m)
est = list(theta = theta, df = v, lik = lh, degreerange = c(1, maxdf), run = run, dif = dif)
est
}
# Start from multiple degrees
mixregTone <- function(x, y, bet, sig, pr, m = 2, maxdf = 30, tol = 10^(-5), numdeg = 100) {
#SK-- Used functions: mixregTonev1, mixregTonev
lh = rep(0, numdeg)
u = stats::runif(m, 0, 1)
u = pmax(1, round(maxdf * u))
est = mixregTonev1(x, y, bet, sig, pr, m, u, maxdf, tol)
lh[1] = est$lik
a = dim(x)
p = a[2] + 1
for (v in seq(numdeg)) {
u = stats::runif(m, 0, 1)
u = pmax(1, round(maxdf * u))
temp = mixregTonev1(x, y, bet, sig, pr, m, u, maxdf, tol)
lh[v + 1] = temp$lik
if (lh[v + 1] > est$lik & min(temp$theta[, p + 2]) > min(0.05, est$theta[, p + 2])) {
est = temp
}
}
if (max(est$df) == maxdf & maxdf < 30) {
est$df[est$df == maxdf] = 30
temp = mixregTonev(x, y, bet, sig, pr, est$df, m, tol)
lh[v + 2] = temp$lik
if (lh[v + 2] > est$lik & min(temp$theta[, p + 2]) > min(0.05, est$theta[, p + 2])) {
est = temp
}
}
est$vlikelihoodseq = lh
est
}
# Start from multiple initial values
mixregTueqv <- function(x, y, m = 2, maxdf = 30, nstart = 20, tol = 10^(-5)) {
#SK-- Used functions: mixregTone
n = length(y)
x = matrix(x, nrow = n)
a = dim(x)
p = a[2] + 1
n1 = 2 * p
bet = matrix(rep(0, m * p), nrow = m)
sig = 0
for (j in seq(m)) {
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/m, m)
sig = sig / n1 / m
lh = rep(0, nstart)
est = mixregTone(x, y, bet, sig, pr, m, maxdf, tol)
lh[1] = est$lik
for (i in seq(nstart - 1)) {
sig = 0
for (j in seq(m)) {
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/m, m)
sig = sig / n1 / m
temp = mixregTone(x, y, bet, sig, pr, m, maxdf, tol)
lh[i + 1] = temp$lik
if (lh[i + 1] > est$lik & min(temp$theta[, p + 2]) > min(0.05, est$theta[, p + 2])) {
est = temp
}
}
if (maxdf < 30 | tol > 10^(-5)) {
bet = est$theta[, 1:p]
sig = est$theta[, p + 1]
pr = est$theta[, p + 2]
est = mixregTone(x, y, bet, sig[1], pr, m, 30, 10^(-5))
}
est$inilikelihoodseq = lh
est
}
mixregTwueqv <- function(x, y, m = 2, maxdf = 30, nstart = 20, tol = 10^(-5)) {
#SK-- Used functions: mixregTueqv
n = length(y)
x = matrix(x, nrow = n)
w = robustbase::covMcd(x)
w = w$mcd.wt
x = x[w == 1, ]
y = y[w == 1]
est = mixregTueqv(x, y, m, maxdf, nstart, tol)
est
}
# mixregT adaptively estimates the mixture regression parameters robustly assuming the error distribution is
# t-distribution
mixregTv1 <- function(x, y, m = 2, v, nstart = 20, tol = 10^(-5)) {
#SK-- Used functions: mixregTonev
n = length(y)
x = matrix(x, nrow = n)
a = dim(x)
p = a[2] + 1
n1 = 2 * p
bet = matrix(rep(0, m * p), nrow = m)
sig = 0
for (j in seq(m)) {
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/m, m)
sig = sig / n1 / m
lh = rep(0, nstart)
est = mixregTonev(x, y, bet, sig, pr, v, m, tol)
lh[1] = est$lik
for (i in seq(nstart - 1)) {
sig = 0
for (j in seq(m)) {
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/m, m)
sig = sig / n1 / m
temp = mixregTonev(x, y, bet, sig, pr, v, m, tol)
lh[i + 1] = temp$lik
if (lh[i + 1] > est$lik & min(temp$theta[, p + 2]) > min(0.05, est$theta[, p + 2])) {
est = temp
}
}
if (tol > 10^(-5)) {
bet = est$theta[, 1:p]
sig = est$theta[, p + 1]
pr = est$theta[, p + 2]
est = mixregTonev(x, y, bet, sig[1], pr, v, m, 10^(-5))
}
est$inilikelihoodseq = lh
est
}
mixregTueqv1 <- function(x, y, m = 2, v, nstart = 20, tol = 10^(-5)) {
#SK-- Used functions: mixregTonev1
n = length(y)
x = matrix(x, nrow = n)
a = dim(x)
p = a[2] + 1
n1 = 2 * p
bet = matrix(rep(0, m * p), nrow = m)
sig = 0
for (j in seq(m)) {
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/m, m)
sig = sig / n1 / m
lh = rep(0, nstart)
est = mixregTonev1(x, y, bet, sig, pr, m, v, tol)
lh[1] = est$lik
for (i in seq(nstart - 1)) {
sig = 0
for (j in seq(m)) {
ind = sample(1:n, n1)
X = cbind(rep(1, n1), x[ind, ])
bet[j, ] = MASS::ginv(t(X) %*% X) %*% t(X) %*% y[ind]
sig = sig + sum((y[ind] - X %*% bet[j, ])^2)
}
pr = rep(1/m, m)
sig = sig / n1 / m
temp = mixregTonev1(x, y, bet, sig, pr, m, v, tol)
lh[i + 1] = temp$lik
if (lh[i + 1] > est$lik & min(temp$theta[, p + 2]) > min(0.05, est$theta[, p + 2])) {
est = temp
}
}
if (tol > 10^(-5)) {
bet = est$theta[, 1:p]
sig = est$theta[, p + 1]
pr = est$theta[, p + 2]
est = mixregTonev1(x, y, bet, sig[1], pr, m, v, 10^(-5))
}
est$inilikelihoodseq = lh
est
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.