R/robustSFM.R

Defines functions optimal_alpha bootstrap_test efficiency summary.rsfa coef.rsfa print.rsfa rsfa se_mdpde J_alpha K_alpha

Documented in bootstrap_test coef.rsfa efficiency optimal_alpha print.rsfa rsfa summary.rsfa

# K_alpha matrix required for calculating s.e.
#' @useDynLib robustSFA, .registration = TRUE
#' @importFrom Rcpp evalCpp
K_alpha<-function(y,x,theta,alpha,h_scale=1e-6) {
  n <- length(y)  
  p <- length(theta)  
  dh <- diag(abs(theta * h_scale))
  
  K <- matrix(0, p, p)
  for(i in 1:n) {
    deriv_H_alpha<-numeric(p)
    H_alpha0<-H_alpha(y[i],x[i,],theta,alpha)
    for(j in 1:p) {
      deriv_H_alpha[j]<-(H_alpha(y[i],x[i,],theta+dh[,j],alpha)-H_alpha0)/dh[j,j]
    }
    K<-K+deriv_H_alpha%*%t(deriv_H_alpha)
  }
  return(K)
}

# J_alpha matrix required for calculating s.e.
#' @useDynLib robustSFA, .registration = TRUE
#' @importFrom Rcpp evalCpp
J_alpha <- function(y, x, theta, alpha, h_scale = 1e-6) {
  n <- length(y)  
  p <- length(theta)  
  h <- diag(abs(theta * h_scale))
  
  hessian_sum <- matrix(0, p, p)
  
  for (i in 1:n) {
    H_alpha_i <- function(theta) {
      H_alpha(y[i], x[i, ], theta, alpha)
    }
    hessian_i <- matrix(0, p, p)
    for (j in 1:p) {
      for (k in j:p) {
        theta_jk1 <- theta; theta_jk1[j] <- theta_jk1[j] + h[j, j]; theta_jk1[k] <- theta_jk1[k] + h[k, k]
        theta_jk2 <- theta; theta_jk2[j] <- theta_jk2[j] + h[j, j]
        theta_jk3 <- theta; theta_jk3[k] <- theta_jk3[k] + h[k, k]
        
        hessian_value <- (
          H_alpha_i(theta_jk1) - H_alpha_i(theta_jk2) - H_alpha_i(theta_jk3) + H_alpha_i(theta)
        ) / (h[j, j] * h[k, k])
        
        hessian_i[j, k] <- hessian_value
        if (j != k) {
        hessian_i[k, j] <- hessian_value
        }
      }
    }
    
    hessian_sum <- hessian_sum + hessian_i
  }
  
  return(hessian_sum)
}


# function returning standard errors
se_mdpde<-function(Y,X,theta,alpha) {
  n<-length(Y)
  K<-K_alpha(Y,X,theta,alpha)
  J<-J_alpha(Y,X,theta,alpha)
  epsilon <- 1e-8
  J <- J + epsilon * diag(nrow(J))
  
  var<-solve(J)%*%K%*%solve(J)
  result <- suppressWarnings(sqrt(diag(var)))
  
  if (any(is.nan(result))) {
    warning("Outlying observation(s) may be included in the data.")
    result[is.nan(result)] <- NA  
  }
  
  return(result)
}




rsfa <- function(formula, data = NULL,alpha=0,se=TRUE) {
  if (!is.numeric(alpha) || alpha < 0) {
    stop("`alpha` must be a non-negative numeric value.")
  }
  
  call <- match.call()
  call$se <- NULL

  if (!is.null(data)) {
    formula <- as.formula(formula)
    mf <- model.frame(formula, data = data)
    y <- model.response(mf)
    X <- model.matrix(formula, mf)  
  } else {
    y <- eval(formula[[2]], parent.frame())
    X <- model.matrix(formula, parent.frame())
  }

  variable_names <- colnames(X)

  if(alpha==0) {
    obj_f <- function(params) {
      -obj_f0_cpp(params, y, X)    
    }
  } else {
    obj_f <- function(params) {
      obj_f_alpha_cpp(params, y, X, alpha) 
    }
  }
  
  fit_lm<-lm(y~0+X)
  ini_sigmaSq<-var(fit_lm$res)
  ini<-c(coef(fit_lm),ini_sigmaSq,1)
    
  lower_bounds <- c(rep(-Inf, length(ini) - 2), 1e-6, 1e-6)
  upper_bounds <- c(rep(Inf, length(ini) - 2), 10*ini_sigmaSq, 20)
  
  result <- nlminb(
    start = ini,            
    objective = obj_f,      
    lower = lower_bounds,   
    upper = upper_bounds
  )
  estimates <- result$par
  
  if (result$convergence != 0) {
    stop("Optimization failed: ", result$message)
  }
  
  estimates <- result$par
  names(estimates) <- c(variable_names, "sigmaSq", "lambda")
  
  if(se) {
    se<-se_mdpde(y,X,estimates,alpha)
    names(se)<- names(estimates)
  } else {
      se<-NULL
  }
  
  p<-length(estimates)
  res<-y-X%*%estimates[1:(p-2)]
 
  sigma2<-estimates[p-1]; lambda<-estimates[p]
  sigma2_v<-sigma2/(1+lambda^2)
  sigma2_u<-sigma2-sigma2_v
  mu_star<--res*sigma2_u/sigma2
  sigma_star<-sigma2_u*sigma2_v/sigma2
  
  pnorm1<-pnorm(mu_star/sigma_star-sigma_star)
  pnorm2<-pnorm(mu_star/sigma_star)
  if(any(pnorm2==0))  epsilon<-1e-10
  else epsilon<-0
  term1<-(pnorm1+epsilon)/(pnorm2+epsilon)
  term2<-exp(-mu_star+0.5*sigma_star^2)
  TE<-pmin(term1*term2,1) 


  result <- list(
    call = call,
    coefficients = estimates,
    se=se,
    residuals=res,
    efficiencies=TE,
    sigma2_V=sigma2_v,
    sigma2_U=sigma2_u,
    alpha = alpha
  )
  
  class(result) <- "rsfa"
  return(result)
}


print.rsfa <- function(x,...) {
  cat("Call:\n")
  print(x$call)
  alpha<-x$alpha
  if(alpha==0) cat("\nMaximum likelihood estimates:\n")
  else cat("\nRobust (MDPD) estimates with alpha =",alpha,":\n")
  print(x$coefficients)
}


coef.rsfa <- function(object,...) {
  object$coefficients  
}



summary.rsfa <- function(object,...) {
  estimates <- object$coefficients
  se <- object$se
  z_values <- estimates / se
  p_values <- 2 * (1 - pnorm(abs(z_values)))
  signif_codes <- cut(p_values,
                      breaks = c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
                      labels = c("***", "**", "*", ".", " "))
  
  results <- data.frame(
    Estimate = round(estimates, 4),
    `Std. Error` = round(se, 4),
    `z value` = round(z_values, 3),
    `Pr(>|z|)` = ifelse(p_values < 2e-16, "<2e-16", format.pval(p_values, digits = 3)),
    Signif = sprintf("%-3s", as.character(signif_codes))
  )
  
  colnames(results) <- c("Estimate", "Std. Error", "z value", "Pr(>|z|)", "")
  
  cat("Call:\n")
  print(object$call)
  
  alpha <- object$alpha
  if(alpha==0) cat("\nMaximum likelihood estimation results:\n")
  else cat("\nRobust (MDPD) estimation results:\n")
  print(results, right = TRUE, row.names = TRUE)
  cat("---\nSignif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
  cat("\nmean efficiency:",round(mean(object$efficiencies),4),"\n")
}


efficiency <- function(object, ...) {
  if (inherits(object, "rsfa")) { 
    TE <- object$efficiencies
    if (any(TE==1)) {
      warning("Obs ", paste(which(TE == 1), collapse = ", "), 
              " may represent high-performing producer(s) or potential outlier(s).")    }
    TE
  } 
  else {
    stop("Object must be of class 'rsfa'.")
  }
}

bootstrap_test <- function(formula, data=NULL, alpha0, alpha1, B = 99) {
 
    verbose <- TRUE
    if(alpha0==alpha1) {
        1
    } else {
    
    similarity <- function(T0, T1) {
      p<-length(T0)
      sigma2_0<-T0[p-1]; lambda_0<-T0[p]
      sigma2_1<-T1[p-1]; lambda_1<-T1[p]
      abs(sigma2_1-sigma2_0)/sigma2_0+abs(lambda_1-lambda_0)/lambda_0
    }
    
    if (!is.null(data)) {
      formula <- as.formula(formula)
      mf <- model.frame(formula, data = data)
      y <- model.response(mf)
      X <- model.matrix(formula, mf)  
    } else {
      y <- eval(formula[[2]], parent.frame())
      X <- model.matrix(formula, parent.frame())
    }
    
    T0<-coef(rsfa(formula, data=data, alpha0, se=FALSE))
    T1<-coef(rsfa(formula, data=data, alpha1, se=FALSE))
    
    p<-length(T0)
    beta_T0<-T0[1:(p-2)]
    sigma2<-T0[p-1]; lambda<-T0[p]
    sigma2_v<-sigma2/(1+lambda^2)
    sigma2_u<-sigma2-sigma2_v
    
    n <- length(y)  
    sim_star <- numeric(B)  
    
    p <- ncol(X)  
    formula_str <- paste("y_star ~", paste(paste0("X[,", 2:p, "]"), collapse = " + "))
    model_formula <- as.formula(formula_str)
    
    if(verbose) cat("Progress:\n")
    pb <- txtProgressBar(min = 0, max = B, style = 3)
    
    for (b in 1:B) {
      V_star<-rnorm(n,0,sqrt(sigma2_v))
      U_star<-rtruncnorm(n,0,Inf,0,sqrt(sigma2_u))
      y_star <- X%*%beta_T0+V_star-U_star
    
      T0_b<-coef(rsfa(model_formula,alpha=alpha0, se=F))
      T1_b<-coef(rsfa(model_formula,alpha=alpha1, se=F))
      sim_star[b] <- similarity(T0_b, T1_b)
      setTxtProgressBar(pb, b)
    }
    close(pb)
    
    sim<-similarity(T0,T1) 
    mean(c(sim_star,sim) >= sim)
  }
}

optimal_alpha <- function(formula, data=NULL, base_alpha=.5,B = 99, s_level = 0.1) {
  
  ALPHA <- seq(0.05, base_alpha, by = 0.05)  
  verbose <- TRUE
  if(verbose) cat("Comparing MDPD estimate with alpha = ", base_alpha, " and ML estimate.\n")
  p_val<-bootstrap_test(formula,data,base_alpha,0,B)
  if(verbose) cat("P-value:",p_val,"\n")
  if(p_val>s_level) { 
    if(verbose) cat("MLE (alpha=0) is recommended!\n")
  } else {
    i<-1
    while(p_val<=s_level) {
      if(verbose) cat("Comparing MDPD estimates with alpha =",base_alpha,"and alpha =",ALPHA[i], ".\n")
      p_val<-bootstrap_test(formula,data,base_alpha,ALPHA[i],B)
      if(verbose) cat("P-value:",p_val,"\n")
      if(p_val>s_level) {
        if(verbose) cat("MDPDE with alpha=",ALPHA[i] ,"is recommended!\n")
        stop
      }
      i<-i+1
    }
  }
} 
 

Try the robustSFA package in your browser

Any scripts or data that you put into this service are public.

robustSFA documentation built on April 3, 2025, 6:12 p.m.