R/selectTransform.R

Defines functions selectTransform

Documented in selectTransform

selectTransform <- function(data, alpha = 0.05,
                             na.rm = TRUE, verbose = TRUE) {
  
  METHOD <- "Data-Driven Selection of the Optimal Transformation Method"
  DNAME <- deparse(substitute(data))
  
  quiet <- function(expr) {
    suppressWarnings(suppressMessages(expr))
  }
  
  if (na.rm) {
    data <- data[!is.na(data)]
  }
  
  if (!is.numeric(data)) {
    stop("The response must be a numeric variable.")
  }
  
  if (length(data) < 3) {
    stop("'data' must contain at least 3 observations.")
  }
  
  if (!requireNamespace("Transform", quietly = TRUE)) {
    stop("The 'Transform' package is required.")
  }
  
  out <- NULL
  
  transform_methods <- data.frame(
    Method = c(
      "Box-Cox",
      "Manly",
      "Modulus",
      "Bickel-Doksum",
      "Yeo-Johnson",
      "Dual",
      "Gpower",
      "Log-shift",
      "Square-root shift",
      "Log",
      "Reciprocal",
      "Glog",
      "Neglog"
    ),
    Function = c(
      "bcTransform",
      "mnTransform",
      "mdTransform",
      "bdTransform",
      "yjTransform",
      "dlTransform",
      "gpTransform",
      "lsTransform",
      "ssTransform",
      "lgTransform",
      "rpTransform",
      "glTransform",
      "nlTransform"
    ),
    stringsAsFactors = FALSE
  )
  
  lambda_free_methods <- c(
    "lgTransform",
    "rpTransform",
    "glTransform",
    "nlTransform"
  )
  
  for (k in 1:nrow(transform_methods)) {
    
    method_name <- transform_methods$Method[k]
    function_name <- transform_methods$Function[k]
    
    fit <- tryCatch({
      quiet({
        if (function_name == "bcTransform") {
          Transform::bcTransform(
            data,
            lambda = seq(-3, 3, 0.01),
            lambda2 = NULL,
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "mnTransform") {
          Transform::mnTransform(
            data,
            lambda = seq(-3, 3, 0.01),
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "mdTransform") {
          Transform::mdTransform(
            data,
            lambda = seq(-3, 3, 0.01),
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "bdTransform") {
          Transform::bdTransform(
            data,
            lambda = seq(0.01, 6, 0.01),
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "yjTransform") {
          Transform::yjTransform(
            data,
            lambda = seq(-3, 3, 0.01),
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "dlTransform") {
          Transform::dlTransform(
            data,
            lambda = seq(0, 6, 0.01),
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "gpTransform") {
          Transform::gpTransform(
            data,
            lambda = seq(-3, 3, 0.01),
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "lsTransform") {
          Transform::lsTransform(
            data,
            lambda = seq(-3, 3, 0.01),
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "ssTransform") {
          Transform::ssTransform(
            data,
            lambda = seq(-3, 3, 0.01),
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "lgTransform") {
          Transform::lgTransform(
            data,
            lambda2 = NULL,
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "rpTransform") {
          Transform::rpTransform(
            data,
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "glTransform") {
          Transform::glTransform(
            data,
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
          
        } else if (function_name == "nlTransform") {
          Transform::nlTransform(
            data,
            plot = FALSE,
            alpha = alpha,
            verbose = FALSE
          )
        }
      })
    }, error = function(e) NULL)
    
    statistic <- NA_real_
    p.value <- NA_real_
    lambda_hat <- NA_real_
    
    if (!is.null(fit)) {
      
      lambda_names <- c(
        "lambda.hat",
        "lambda_hat",
        "lambda",
        "lambda.est",
        "lambda_est",
        "Lambda",
        "lambdaHat"
      )
      
      for (lnm in lambda_names) {
        if (!is.null(fit[[lnm]])) {
          if (length(fit[[lnm]]) == 1 && is.numeric(fit[[lnm]])) {
            lambda_hat <- as.numeric(fit[[lnm]])
            break
          }
        }
      }
      
      transformed_data <- NULL
      
      possible_names <- c(
        "transformed.data",
        "transformed_data",
        "tf.data",
        "tdata",
        "data.t",
        "newdata",
        "y"
      )
      
      for (nm in possible_names) {
        if (!is.null(fit[[nm]])) {
          transformed_data <- fit[[nm]]
          break
        }
      }
      
      if (is.null(transformed_data)) {
        numeric_parts <- fit[sapply(fit, is.numeric)]
        numeric_parts <- numeric_parts[
          sapply(numeric_parts, length) == length(data)
        ]
        
        if (length(numeric_parts) > 0) {
          transformed_data <- numeric_parts[[1]]
        }
      }
      
      if (!is.null(transformed_data)) {
        sw <- shapiro.test(transformed_data)
        statistic <- as.numeric(sw$statistic)
        p.value <- sw$p.value
      }
    }
    
    lambda_print <- if (function_name %in% lambda_free_methods) {
      "NA"
    } else if (is.na(lambda_hat)) {
      "NA"
    } else {
      sprintf("%.2f", lambda_hat)
    }
    
    out <- rbind(
      out,
      data.frame(
        Method = method_name,
        Function = paste0(function_name, "(...)"),
        lambda.hat = lambda_hat,
        lambda = lambda_print,
        statistic = statistic,
        p.value = p.value,
        stringsAsFactors = FALSE
      )
    )
  }
  
  out$statistic[is.na(out$statistic)] <- 0
  out$p.value[is.na(out$p.value)] <- 0
  
  df_sorted <- out[order(-out$statistic, -out$p.value), ]
  
  max_stat <- max(df_sorted$statistic)
  max_p_among_best <- max(round(df_sorted$p.value[df_sorted$statistic == max_stat], 4))
  
  if (verbose == TRUE) {
    
    fmt_header <- "%-22s | %-20s | %-10s | %-10s | %-12s | %-15s\n"
    fmt_row    <- "%-22s | %-20s | %-10s | %-10.4f | %-12.4f | %-15s\n"
    
    header_txt <- sprintf(
      fmt_header,
      "Method",
      "Function call",
      "Lambda",
      "SW stat.",
      "P-value",
      "Evaluation"
    )
    
    line_width <- max(
      nchar(sub("\n$", "", header_txt)),
      nchar(METHOD)
    )
    
    cat(strrep("=", line_width), "\n", sep = "")
    cat(METHOD, "\n")
    cat(strrep("-", line_width), "\n", sep = "")
    
    cat(header_txt)
    
    cat(strrep("-", line_width), "\n", sep = "")
    
    for (i in 1:nrow(df_sorted)) {
      
      eval_str <- "-"
      
      if (df_sorted$statistic[i] == max_stat) {
        if (round(df_sorted$p.value[i], 4) == max_p_among_best) {
          eval_str <- "Suggested *"
        }
      }
      
      cat(sprintf(
        fmt_row,
        df_sorted$Method[i],
        df_sorted$Function[i],
        df_sorted$lambda[i],
        df_sorted$statistic[i],
        df_sorted$p.value[i],
        eval_str
      ))
    }
    
    cat(strrep("=", line_width), "\n", sep = "")
    cat("* Suggested method yielding the maximum Shapiro-Wilk test statistic.\n")
  }
  
  result <- df_sorted[, c(
    "Method",
    "Function",
    "lambda",
    "statistic",
    "p.value"
  )]
  
  return(invisible(result))
}

Try the Transform package in your browser

Any scripts or data that you put into this service are public.

Transform documentation built on June 5, 2026, 5:08 p.m.