Nothing
# K_alpha matrix required for calculating s.e.
#' @useDynLib robustSFA, .registration = TRUE
#' @importFrom Rcpp evalCpp
K_alpha<-function(y,x,theta,alpha,h_scale=1e-6) {
n <- length(y)
p <- length(theta)
dh <- diag(abs(theta * h_scale))
K <- matrix(0, p, p)
for(i in 1:n) {
deriv_H_alpha<-numeric(p)
H_alpha0<-H_alpha(y[i],x[i,],theta,alpha)
for(j in 1:p) {
deriv_H_alpha[j]<-(H_alpha(y[i],x[i,],theta+dh[,j],alpha)-H_alpha0)/dh[j,j]
}
K<-K+deriv_H_alpha%*%t(deriv_H_alpha)
}
return(K)
}
# J_alpha matrix required for calculating s.e.
#' @useDynLib robustSFA, .registration = TRUE
#' @importFrom Rcpp evalCpp
J_alpha <- function(y, x, theta, alpha, h_scale = 1e-6) {
n <- length(y)
p <- length(theta)
h <- diag(abs(theta * h_scale))
hessian_sum <- matrix(0, p, p)
for (i in 1:n) {
H_alpha_i <- function(theta) {
H_alpha(y[i], x[i, ], theta, alpha)
}
hessian_i <- matrix(0, p, p)
for (j in 1:p) {
for (k in j:p) {
theta_jk1 <- theta; theta_jk1[j] <- theta_jk1[j] + h[j, j]; theta_jk1[k] <- theta_jk1[k] + h[k, k]
theta_jk2 <- theta; theta_jk2[j] <- theta_jk2[j] + h[j, j]
theta_jk3 <- theta; theta_jk3[k] <- theta_jk3[k] + h[k, k]
hessian_value <- (
H_alpha_i(theta_jk1) - H_alpha_i(theta_jk2) - H_alpha_i(theta_jk3) + H_alpha_i(theta)
) / (h[j, j] * h[k, k])
hessian_i[j, k] <- hessian_value
if (j != k) {
hessian_i[k, j] <- hessian_value
}
}
}
hessian_sum <- hessian_sum + hessian_i
}
return(hessian_sum)
}
# function returning standard errors
se_mdpde<-function(Y,X,theta,alpha) {
n<-length(Y)
K<-K_alpha(Y,X,theta,alpha)
J<-J_alpha(Y,X,theta,alpha)
epsilon <- 1e-8
J <- J + epsilon * diag(nrow(J))
var<-solve(J)%*%K%*%solve(J)
result <- suppressWarnings(sqrt(diag(var)))
if (any(is.nan(result))) {
warning("Outlying observation(s) may be included in the data.")
result[is.nan(result)] <- NA
}
return(result)
}
rsfa <- function(formula, data = NULL,alpha=0,se=TRUE) {
if (!is.numeric(alpha) || alpha < 0) {
stop("`alpha` must be a non-negative numeric value.")
}
call <- match.call()
call$se <- NULL
if (!is.null(data)) {
formula <- as.formula(formula)
mf <- model.frame(formula, data = data)
y <- model.response(mf)
X <- model.matrix(formula, mf)
} else {
y <- eval(formula[[2]], parent.frame())
X <- model.matrix(formula, parent.frame())
}
variable_names <- colnames(X)
if(alpha==0) {
obj_f <- function(params) {
-obj_f0_cpp(params, y, X)
}
} else {
obj_f <- function(params) {
obj_f_alpha_cpp(params, y, X, alpha)
}
}
fit_lm<-lm(y~0+X)
ini_sigmaSq<-var(fit_lm$res)
ini<-c(coef(fit_lm),ini_sigmaSq,1)
lower_bounds <- c(rep(-Inf, length(ini) - 2), 1e-6, 1e-6)
upper_bounds <- c(rep(Inf, length(ini) - 2), 10*ini_sigmaSq, 20)
result <- nlminb(
start = ini,
objective = obj_f,
lower = lower_bounds,
upper = upper_bounds
)
estimates <- result$par
if (result$convergence != 0) {
stop("Optimization failed: ", result$message)
}
estimates <- result$par
names(estimates) <- c(variable_names, "sigmaSq", "lambda")
if(se) {
se<-se_mdpde(y,X,estimates,alpha)
names(se)<- names(estimates)
} else {
se<-NULL
}
p<-length(estimates)
res<-y-X%*%estimates[1:(p-2)]
sigma2<-estimates[p-1]; lambda<-estimates[p]
sigma2_v<-sigma2/(1+lambda^2)
sigma2_u<-sigma2-sigma2_v
mu_star<--res*sigma2_u/sigma2
sigma_star<-sigma2_u*sigma2_v/sigma2
pnorm1<-pnorm(mu_star/sigma_star-sigma_star)
pnorm2<-pnorm(mu_star/sigma_star)
if(any(pnorm2==0)) epsilon<-1e-10
else epsilon<-0
term1<-(pnorm1+epsilon)/(pnorm2+epsilon)
term2<-exp(-mu_star+0.5*sigma_star^2)
TE<-pmin(term1*term2,1)
result <- list(
call = call,
coefficients = estimates,
se=se,
residuals=res,
efficiencies=TE,
sigma2_V=sigma2_v,
sigma2_U=sigma2_u,
alpha = alpha
)
class(result) <- "rsfa"
return(result)
}
print.rsfa <- function(x,...) {
cat("Call:\n")
print(x$call)
alpha<-x$alpha
if(alpha==0) cat("\nMaximum likelihood estimates:\n")
else cat("\nRobust (MDPD) estimates with alpha =",alpha,":\n")
print(x$coefficients)
}
coef.rsfa <- function(object,...) {
object$coefficients
}
summary.rsfa <- function(object,...) {
estimates <- object$coefficients
se <- object$se
z_values <- estimates / se
p_values <- 2 * (1 - pnorm(abs(z_values)))
signif_codes <- cut(p_values,
breaks = c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
labels = c("***", "**", "*", ".", " "))
results <- data.frame(
Estimate = round(estimates, 4),
`Std. Error` = round(se, 4),
`z value` = round(z_values, 3),
`Pr(>|z|)` = ifelse(p_values < 2e-16, "<2e-16", format.pval(p_values, digits = 3)),
Signif = sprintf("%-3s", as.character(signif_codes))
)
colnames(results) <- c("Estimate", "Std. Error", "z value", "Pr(>|z|)", "")
cat("Call:\n")
print(object$call)
alpha <- object$alpha
if(alpha==0) cat("\nMaximum likelihood estimation results:\n")
else cat("\nRobust (MDPD) estimation results:\n")
print(results, right = TRUE, row.names = TRUE)
cat("---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
cat("\nmean efficiency:",round(mean(object$efficiencies),4),"\n")
}
efficiency <- function(object, ...) {
if (inherits(object, "rsfa")) {
TE <- object$efficiencies
if (any(TE==1)) {
warning("Obs ", paste(which(TE == 1), collapse = ", "),
" may represent high-performing producer(s) or potential outlier(s).") }
TE
}
else {
stop("Object must be of class 'rsfa'.")
}
}
bootstrap_test <- function(formula, data=NULL, alpha0, alpha1, B = 99) {
verbose <- TRUE
if(alpha0==alpha1) {
1
} else {
similarity <- function(T0, T1) {
p<-length(T0)
sigma2_0<-T0[p-1]; lambda_0<-T0[p]
sigma2_1<-T1[p-1]; lambda_1<-T1[p]
abs(sigma2_1-sigma2_0)/sigma2_0+abs(lambda_1-lambda_0)/lambda_0
}
if (!is.null(data)) {
formula <- as.formula(formula)
mf <- model.frame(formula, data = data)
y <- model.response(mf)
X <- model.matrix(formula, mf)
} else {
y <- eval(formula[[2]], parent.frame())
X <- model.matrix(formula, parent.frame())
}
T0<-coef(rsfa(formula, data=data, alpha0, se=FALSE))
T1<-coef(rsfa(formula, data=data, alpha1, se=FALSE))
p<-length(T0)
beta_T0<-T0[1:(p-2)]
sigma2<-T0[p-1]; lambda<-T0[p]
sigma2_v<-sigma2/(1+lambda^2)
sigma2_u<-sigma2-sigma2_v
n <- length(y)
sim_star <- numeric(B)
p <- ncol(X)
formula_str <- paste("y_star ~", paste(paste0("X[,", 2:p, "]"), collapse = " + "))
model_formula <- as.formula(formula_str)
if(verbose) cat("Progress:\n")
pb <- txtProgressBar(min = 0, max = B, style = 3)
for (b in 1:B) {
V_star<-rnorm(n,0,sqrt(sigma2_v))
U_star<-rtruncnorm(n,0,Inf,0,sqrt(sigma2_u))
y_star <- X%*%beta_T0+V_star-U_star
T0_b<-coef(rsfa(model_formula,alpha=alpha0, se=F))
T1_b<-coef(rsfa(model_formula,alpha=alpha1, se=F))
sim_star[b] <- similarity(T0_b, T1_b)
setTxtProgressBar(pb, b)
}
close(pb)
sim<-similarity(T0,T1)
mean(c(sim_star,sim) >= sim)
}
}
optimal_alpha <- function(formula, data=NULL, base_alpha=.5,B = 99, s_level = 0.1) {
ALPHA <- seq(0.05, base_alpha, by = 0.05)
verbose <- TRUE
if(verbose) cat("Comparing MDPD estimate with alpha = ", base_alpha, " and ML estimate.\n")
p_val<-bootstrap_test(formula,data,base_alpha,0,B)
if(verbose) cat("P-value:",p_val,"\n")
if(p_val>s_level) {
if(verbose) cat("MLE (alpha=0) is recommended!\n")
} else {
i<-1
while(p_val<=s_level) {
if(verbose) cat("Comparing MDPD estimates with alpha =",base_alpha,"and alpha =",ALPHA[i], ".\n")
p_val<-bootstrap_test(formula,data,base_alpha,ALPHA[i],B)
if(verbose) cat("P-value:",p_val,"\n")
if(p_val>s_level) {
if(verbose) cat("MDPDE with alpha=",ALPHA[i] ,"is recommended!\n")
stop
}
i<-i+1
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.