knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 )
The qDEA package implements quantile Data Envelopment Analysis (qDEA), an extension of traditional DEA that allows for a pre-specified proportion of observations to lie outside the production frontier. This approach provides robust efficiency estimation in the presence of outliers or measurement error.
Traditional DEA assumes all observations should be on or below the production frontier. In contrast, qDEA allows a proportion α (alpha) of observations to lie outside the frontier, making the method more robust to outliers while maintaining computational efficiency through linear programming.
The qDEA method is based on:
# Install from CRAN install.packages("qDEA") # Load the package library(qDEA)
# For vignette building library(qDEA)
We'll start with the simplest case using the CST11 dataset from Cooper, Seiford, and Tone (2006).
# Load example data data(CST11) head(CST11) # Prepare input and output matrices X <- as.matrix(CST11$EMPLOYEES) Y <- as.matrix(CST11$SALES_EJOR) # Run output-oriented DEA with constant returns to scale result_dea <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = 0) # Standard DEA (no outliers allowed) # View efficiency scores result_dea$effvals
Efficiency scores range from 0 to 1, where 1 indicates the unit is on the efficient frontier.
Now let's allow one observation (12.5% of 8 observations) to be outside the frontier:
# Run qDEA allowing one outlier qout <- 1/nrow(X) # Proportion = 1/8 = 0.125 result_qdea <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = qout) # Compare DEA and qDEA efficiency scores comparison <- data.frame( Store = CST11$STORE, DEA = round(result_dea$effvals, 3), qDEA = round(result_qdea$effvalsq, 3), Difference = round(result_qdea$effvalsq - result_dea$effvals, 3) ) print(comparison)
Notice how qDEA scores can exceed 1.0, as some units are now allowed to be "super-efficient" relative to the tighter frontier.
Let's examine a more complex example with multiple inputs:
# Load two-input, one-output data data(CST21) head(CST21) # Prepare matrices X <- as.matrix(CST21[, c("EMPLOYEES", "FLOOR_AREA")]) Y <- as.matrix(CST21$SALES) # Input-oriented DEA with VRS result_vrs <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 1/nrow(X)) # Display results data.frame( Store = CST21$STORE, Employees = CST21$EMPLOYEES, Floor_Area = CST21$FLOOR_AREA, Sales = CST21$SALES, Efficiency = round(result_vrs$effvalsq, 3) )
# Load one-input, two-output data data(CST12) head(CST12) # Prepare matrices X <- as.matrix(CST12$EMPLOYEES) Y <- as.matrix(CST12[, c("CUSTOMERS", "SALES")]) # Output-oriented qDEA result_outputs <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = 0.15) # Allow 15% outliers # Results data.frame( Store = CST12$STORE, Employees = CST12$EMPLOYEES, Customers = CST12$CUSTOMERS, Sales = CST12$SALES, DEA_Eff = round(result_outputs$effvals, 3), qDEA_Eff = round(result_outputs$effvalsq, 3) )
A realistic example using hospital data:
# Load hospital data data(CST22) head(CST22) # Prepare matrices X <- as.matrix(CST22[, c("DOCTORS", "NURSES")]) Y <- as.matrix(CST22[, c("OUT_PATIENTS", "IN_PATIENTS")]) # Run qDEA with 10% outliers allowed result_hospital <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10) # Create results table hospital_results <- data.frame( Hospital = CST22$HOSPITAL, Doctors = CST22$DOCTORS, Nurses = CST22$NURSES, Out_Patients = CST22$OUT_PATIENTS, In_Patients = CST22$IN_PATIENTS, Efficiency = round(result_hospital$effvalsq, 3) ) print(hospital_results) # Identify efficient hospitals efficient <- hospital_results$Hospital[hospital_results$Efficiency >= 0.99] cat("\nEfficient hospitals:", paste(efficient, collapse = ", "))
The qDEA package supports multiple orientations:
Minimize inputs while maintaining output levels:
result_in <- qDEA(X = X, Y = Y, orient = "in", RTS = "CRS", qout = 0.10) cat("Input-oriented efficiencies:\n") print(round(result_in$effvalsq, 3))
Maximize outputs while maintaining input levels:
result_out <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = 0.10) cat("Output-oriented efficiencies:\n") print(round(result_out$effvalsq, 3))
Minimize inputs and maximize outputs simultaneously:
result_graph <- qDEA(X = X, Y = Y, orient = "inout", RTS = "VRS", qout = 0.10) cat("Graph-oriented distances:\n") print(round(result_graph$distvalsq, 3))
Compare different returns to scale assumptions:
# Constant Returns to Scale (CRS) result_crs <- qDEA(X = X, Y = Y, orient = "in", RTS = "CRS", qout = 0.10) # Variable Returns to Scale (VRS) result_vrs_comp <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10) # Decreasing Returns to Scale (DRS) result_drs <- qDEA(X = X, Y = Y, orient = "in", RTS = "DRS", qout = 0.10) # Increasing Returns to Scale (IRS) result_irs <- qDEA(X = X, Y = Y, orient = "in", RTS = "IRS", qout = 0.10) # Compare results rts_comparison <- data.frame( Hospital = CST22$HOSPITAL, CRS = round(result_crs$effvalsq, 3), VRS = round(result_vrs_comp$effvalsq, 3), DRS = round(result_drs$effvalsq, 3), IRS = round(result_irs$effvalsq, 3) ) print(rts_comparison)
VRS efficiency is always ≥ CRS efficiency, as VRS is a less restrictive assumption.
Bootstrap methods can correct for bias in efficiency estimates:
# Run qDEA with bootstrap (100 replications) # Note: This takes longer to run result_boot <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10, nboot = 100, seedval = 12345) # Access bias-corrected estimates bias_corrected <- result_boot$BOOT_DATA$effvalsq.bc # Compare original and bias-corrected comparison_boot <- data.frame( Hospital = CST22$HOSPITAL, Original = round(result_boot$effvalsq, 3), Bias_Corrected = round(bias_corrected, 3), Bias = round(result_boot$effvalsq - bias_corrected, 3) ) print(comparison_boot)
Identify which units serve as benchmarks for inefficient units:
# Run qDEA to get peer information data(CST11) X <- as.matrix(CST11$EMPLOYEES) Y <- as.matrix(CST11$SALES_EJOR) result_peers <- qDEA(X = X, Y = Y, orient = "out", RTS = "VRS", qout = 0.10) # Access peer information peers <- result_peers$PEER_DATA$PEERSq print(peers)
The peers dataframe shows which efficient units (and with what weights) form the benchmark for each inefficient unit.
Calculate target input/output levels for inefficient units:
# Run with projection calculation result_proj <- qDEA(X = X, Y = Y, orient = "out", RTS = "VRS", qout = 0.10, getproject = TRUE) # Access projected values X_target <- result_proj$PROJ_DATA$X0HATq Y_target <- result_proj$PROJ_DATA$Y0HATq # Compare actual vs. target projection_comparison <- data.frame( Store = CST11$STORE, Actual_Input = X[,1], Target_Input = round(X_target[,1], 1), Actual_Output = Y[,1], Target_Output = round(Y_target[,1], 1), Efficiency = round(result_proj$effvalsq, 3) ) print(projection_comparison)
For more precise qDEA estimates, use iterative refinement:
# Run with multiple iterations result_iter <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = 0.15, nqiter = 5) # Allow up to 5 iterations # Check number of iterations used cat("Iterations completed:", result_iter$LP_DATA$qiter, "\n") # Check actual proportion of outliers cat("Actual outlier proportion:", round(result_iter$LP_DATA$qhat, 4), "\n")
The algorithm iteratively adjusts the frontier until the proportion of outliers converges to the specified qout value.
Higher qout values make the frontier more robust but may be too permissive.
When in doubt, start with VRS as it's the most general assumption.
The main function qDEA() returns a list with several components:
result <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = 0.10) # Main efficiency scores result$effvals # DEA efficiency scores result$effvalsq # qDEA efficiency scores result$distvals # DEA distance measures result$distvalsq # qDEA distance measures # Input data (for reference) result$INPUT_DATA # Bootstrap data (if nboot > 0) result$BOOT_DATA # Peer information result$PEER_DATA # Projected values (if getproject = TRUE) result$PROJ_DATA # LP solver information result$LP_DATA
Here's a complete analysis workflow:
# Load data data(CST22) # Prepare data X <- as.matrix(CST22[, c("DOCTORS", "NURSES")]) Y <- as.matrix(CST22[, c("OUT_PATIENTS", "IN_PATIENTS")]) # Run comprehensive analysis result_complete <- qDEA( X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10, nqiter = 3, getproject = TRUE ) # Create comprehensive results table complete_results <- data.frame( Hospital = CST22$HOSPITAL, Doctors = CST22$DOCTORS, Nurses = CST22$NURSES, Out_Patients = CST22$OUT_PATIENTS, In_Patients = CST22$IN_PATIENTS, DEA_Eff = round(result_complete$effvals, 3), qDEA_Eff = round(result_complete$effvalsq, 3), Target_Doctors = round(result_complete$PROJ_DATA$X0HATq[,1], 1), Target_Nurses = round(result_complete$PROJ_DATA$X0HATq[,2], 1) ) print(complete_results) # Summary statistics cat("\n=== Summary Statistics ===\n") cat("Mean DEA efficiency:", round(mean(result_complete$effvals), 3), "\n") cat("Mean qDEA efficiency:", round(mean(result_complete$effvalsq), 3), "\n") cat("Efficient units (qDEA ≥ 0.99):", sum(result_complete$effvalsq >= 0.99), "out of", nrow(X), "\n") # Potential input reductions input_savings <- cbind( X - result_complete$PROJ_DATA$X0HATq ) colnames(input_savings) <- c("Doctor_Savings", "Nurse_Savings") cat("\nTotal potential input reductions:\n") cat("Doctors:", round(sum(input_savings[,1]), 1), "\n") cat("Nurses:", round(sum(input_savings[,2]), 1), "\n")
Here's how to visualize efficiency scores:
# Create efficiency comparison plot data(CST22) X <- as.matrix(CST22[, c("DOCTORS", "NURSES")]) Y <- as.matrix(CST22[, c("OUT_PATIENTS", "IN_PATIENTS")]) result_viz <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10) # Prepare data for plotting plot_data <- data.frame( Hospital = CST22$HOSPITAL, DEA = result_viz$effvals, qDEA = result_viz$effvalsq ) # Simple bar plot barplot( t(as.matrix(plot_data[, c("DEA", "qDEA")])), beside = TRUE, names.arg = plot_data$Hospital, col = c("skyblue", "coral"), main = "Hospital Efficiency: DEA vs qDEA", ylab = "Efficiency Score", xlab = "Hospital", legend.text = c("DEA", "qDEA"), args.legend = list(x = "topright") ) abline(h = 1, lty = 2, col = "red")
Cause: Dataset may be too small or all units are on the frontier.
Solution: Try a larger dataset or verify data quality.
Cause: Possible outliers or data quality issues.
Solution: - Increase qout to allow more outliers - Verify data for errors - Consider whether VRS is more appropriate than CRS
Cause: Large dataset or too many bootstrap replications.
Solution: - Start with fewer replications (nboot = 100) - Process a subset of units using dmulist parameter - Run on a more powerful computer
Cooper, W.W., Seiford, L.M., and Tone, K. (2006). Introduction to Data Envelopment Analysis and Its Uses. Springer, New York.
Atwood, J., and Shaik, S. (2020). Theory and Statistical Properties of Quantile Data Envelopment Analysis. European Journal of Operational Research, 286:649-661. https://doi.org/10.1016/j.ejor.2020.03.054
Atwood, J., and Shaik, S. (2018). Quantile DEA: Estimating qDEA-alpha Efficiency Estimates with Conventional Linear Programming. In Productivity and Inequality. Springer Press. https://doi.org/10.1007/978-3-319-68678-3_4
Simar, L., and Wilson, P.W. (2011). Two-stage DEA: caveat emptor. Journal of Productivity Analysis, 36:205-218.
For questions or issues with the qDEA package:
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.