Report Mahalanobis Distance"
In cmahalanobis: Calculate Distance Measures for a Given List of Data Frames with Factors

# Assuming R Markdown package is installed

knitr::opts_chunk$set(echo = FALSE)

cmahalanobis <- function(dataset, formula, plot = TRUE, plot_title = "Mahalanobis Distance Between Groups") {
  # Verify that the input is a data frame
  if (!is.data.frame(dataset)) {
    stop("The input must be a dataframe")
  }

  # Extract the response and predictor variables from the formula
  response <- all.vars(formula)[1]
  predictors <- all.vars(formula)[-1]

  # Split the data into groups based on the response variable
  groups <- split(dataset, dataset[[response]])

  # Select only numeric variables in each group
  groups <- lapply(groups, function(df) {
    df <- df[, sapply(df, is.numeric)]  # Select only numeric columns
    return(df)
  })

  # Replace missing values with arithmetic mean in each dataframe into the list 
  groups <- lapply(groups, impute_with_mean)

  # Obtain the number of groups
  n <- length(groups)

  # Precalculate means and covariances
  means <- lapply(groups, colMeans)
  covariances <- lapply(groups, function(df) {
    cov_matrix <- cov(df)
    n <- nrow(cov_matrix)
    reg <- 0.01  # Valore di regolarizzazione
    cov_matrix <- cov_matrix + diag(reg, nrow = n)
    return(cov_matrix)
  })

  # Create a empty matrix to store distances
  distances <- matrix(0, nrow = n, ncol = n)

  # Calculate Mahalanobis distance between each pair of groups
  for (i in 1:n) {
    mean_i <- means[[i]]
    cov_i <- covariances[[i]]
    for (j in 1:n) {
      if (i != j) {
        distances[i, j] <- mean(mahalanobis(x = groups[[j]], center = mean_i, cov = cov_i))
      }
    }
  }


  # If plot is TRUE, call the "plot_mahalanobis_distances" function and print the plot
  if (plot) {
    print(plot_mahalanobis_distances(distances, plot_title))
  }

  # Return a list containing distances and optionally p_values
  return(list(distances = distances))
}

impute_with_mean <- function(df) {
  # Calculate mean for each columns, ignoring missing values
  means <- colMeans(df, na.rm = TRUE)

  # Replace missing values with the corresponding mean
  for (i in 1:ncol(df)) {
    df[is.na(df[, i]), i] <- means[i]
  }

  return(df)
}

# Auxiliary function to print the Mahalanobis distances plot 
plot_mahalanobis_distances <- function(distances, plot_title) {
  requireNamespace("ggplot2")
  requireNamespace("reshape2")
  output_df <- as.data.frame(distances)
  output_df$Species <- rownames(output_df)

  output_df_long <- reshape2::melt(output_df, id.vars = "Species")
  colnames(output_df_long) <- c("Species", "Comparison", "Distance")

  ggplot2::ggplot(output_df_long, ggplot2::aes(x = Species, y = Distance, fill = Comparison)) +
    ggplot2::geom_bar(stat = "identity", position = "dodge") +
    ggplot2::labs(title = plot_title, x = "Species", y = "Distance", fill = "Comparison") +
    ggplot2::theme_minimal()
}

plot_p_values <- function(p_values, plot_title = "p_values heatmap") {
 requireNamespace("ggplot2")
  requireNamespace("reshape2")
  output_df <- as.data.frame(p_values)
  output_df$Species <- rownames(output_df)

  output_df_long <- reshape2::melt(output_df, id.vars = "Species")
  colnames(output_df_long) <- c("Species", "Comparison", "Distance")

  ggplot2::ggplot(output_df_long, ggplot2::aes(x = Species, y = Distance, fill = Comparison)) +
    ggplot2::geom_bar(stat = "identity", position = "dodge") +
    ggplot2::labs(title = plot_title, x = "Species", y = "Distance", fill = "Comparison") +
    ggplot2::theme_minimal()
}

pvaluescmaha <- function(dataset, formula, pvalue.method = "chisq", num.permutations = 100, num.bootstraps = 10, plot = TRUE) {
  # Verify that the input is a dataframe
  if (!is.data.frame(dataset)) {
    stop("The input must be a dataframe")
  }

  # Extract the name of response variable by the formula
  response <- all.vars(formula)[1]

  # Verify that the response variable exists in the dataset
  if (!response %in% names(dataset)) {
    stop(paste("Response variable", response, "is absent in the dataset"))
  }

  # Calculate Mahalanobis distances using "cmahalanobis" function
  mahalanobis_results <- cmahalanobis(dataset, formula, plot = FALSE)
  distances <- mahalanobis_results$distances

  # Obtain groups number
  n <- nrow(distances)

  # Initialize p_values matrix
  p_values <- matrix(NA, nrow = n, ncol = n)

  # Calculate p_values
  for (i in 1:n) {
    df <- ncol(dataset) - 1  # Degrees of freedom (adjusted for covariance matrix estimate)
    for (j in 1:n) {
      if (i != j) {
        # Choose p_values method calculation based user input
        if (pvalue.method == "chisq") {
          p_values[i, j] <- pchisq(distances[i, j], df, lower.tail = FALSE, log.p = TRUE)
        } else if (pvalue.method == "permutation") {
          # Permutation test
          observed_distance <- distances[i, j]
          permutation_distances <- replicate(num.permutations, {
            # Permute labels group and recalculate the distance
            permuted_data <- dataset
            permuted_data[[response]] <- sample(dataset[[response]])
            permuted_results <- cmahalanobis(permuted_data, formula, plot = FALSE)
            permuted_distances <- permuted_results$distances
            return(permuted_distances[i, j])
          })
          p_values[i, j] <- mean(permutation_distances >= observed_distance)
        } else if (pvalue.method == "bootstrap") {
          # Bootstrap
          observed_distance <- distances[i, j]
          bootstrap_distances <- replicate(num.bootstraps, {
            # Extract a repetition sample
            bootstrap_sample <- sample(nrow(dataset), replace = TRUE)
            bootstrap_data <- dataset[bootstrap_sample, ]
            bootstrap_results <- cmahalanobis(bootstrap_data, formula, plot = FALSE)
            bootstrap_distance <- bootstrap_results$distances[i, j]
            return(bootstrap_distance)
          })
          p_values[i, j] <- mean(abs(bootstrap_distances) >= abs(observed_distance))
        } else {
          stop("p_values method calculation not supported. Use 'chisq', 'permutation' or 'bootstrap'.")
        }
      }
    }
  }

  # Plot the heatmap if plot is TRUE
  if (plot) {
    requireNamespace("ggplot2")
    requireNamespace("reshape2")
    p_values_melt <- reshape2::melt(p_values, na.rm = TRUE)
    colnames(p_values_melt) <- c("Var1", "Var2", "value")

    print(ggplot2::ggplot(data = p_values_melt, ggplot2::aes(x = Var1, y = Var2, fill = value)) +
            ggplot2::geom_tile() +
            ggplot2::scale_fill_gradient(low = "white", high = "red") +
            ggplot2::labs(title = "p_values heatmap", x = "Gruppo 1", y = "Gruppo 2", fill = "p_value") +
            ggplot2::theme_minimal() +
            ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)))
  }

  return(p_values)
}

distances <- params$distances  # Replace with your actual distances data
p_values <- params$p_values    # Replace with your actual p_values data


if (!is.null(distances) && !all(is.na(distances))) {
  knitr::kable(distances, caption = "Mahalanobis Distances")
} else {
  print("No distances available")
}

if (!is.null(p_values) && !all(is.na(p_values))) {
   knitr::kable(p_values, caption = "p_values")
} else {
  print("No p_values available")
}

if (!is.null(p_values) && !all(is.na(p_values))) {
  plot_p_values(p_values, "P_Values")
} else {
  print("No p_values available")
}

if (!is.null(distances) && !all(is.na(distances))) {
  plot_mahalanobis_distances(distances, "Mahalanobis Distance Between Groups")
} else {
  print("No distances available for plotting")
}

Any scripts or data that you put into this service are public.

cmahalanobis documentation built on April 3, 2025, 8:51 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

cmahalanobis
Calculate Distance Measures for a Given List of Data Frames with Factors

Report Mahalanobis Distance"
In cmahalanobis: Calculate Distance Measures for a Given List of Data Frames with Factors

Try the cmahalanobis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cmahalanobis Calculate Distance Measures for a Given List of Data Frames with Factors

Report Mahalanobis Distance" In cmahalanobis: Calculate Distance Measures for a Given List of Data Frames with Factors

Try the cmahalanobis package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cmahalanobis
Calculate Distance Measures for a Given List of Data Frames with Factors

Report Mahalanobis Distance"
In cmahalanobis: Calculate Distance Measures for a Given List of Data Frames with Factors