R/MLP_varIMP_permute.R

Defines functions MLP_varIMP_permute

Documented in MLP_varIMP_permute

MLP_varIMP_permute=function(
  optmodel,
  feature_names = NULL,
  train_y = NULL,
  train_x = NULL,
  smaller_is_better = NULL,
  type = c("difference", "ratio"),
  nsim = 1,
  sample_size = NULL,
  sample_frac = NULL,
  verbose = FALSE,
  progress = "none",
  parallel = FALSE,
  paropts = NULL,
  ...
) {
  require(caret)
  # Compute baseline metric for comparison
  baseline <- as.data.frame(t(caret::postResample(
    pred = keras::predict_on_batch(optmodel, train_x),
    obs = train_y)))
  
  # Type of comparison
  type <- match.arg(type)
  `%compare%` <- if (type == "difference") {
    `-`
  } else {
    `/`
  }
  
  # Construct VIP scores
  #
  # Loop through each feature and do the following:
  #
  #   1. make a copy of the training data;
  #   2. permute the values of the original feature;
  #   3. get new predictions based on permuted data set;
  #   4. record difference in accuracy.
  
  
  permute_columns <- function(x, columns = NULL) {
    if (is.null(columns)) {
      stop("No columns specified for permutation.")
    }
    x[, columns] <- x[sample(nrow(x)), columns]
    x
  }
  
  
  #
  sort_importance_scores <- function(x, decreasing) {
    x[order(x$Importance, decreasing = decreasing), ]
  }
  #unlist
  MLP_varIMP <- replicate(nsim,(plyr::llply(feature_names, .progress = "none",
                                            .parallel = parallel, .paropts = paropts,
                                            .fun = function(x) {
                                              if (verbose && !parallel) {
                                                message("Computing variable importance for ", x, "...")
                                              }
                                              if (!is.null(sample_size)) {
                                                ids <- sample(length(train_y), size = sample_size, replace = FALSE)
                                                train_x <- train_x[ids, ]
                                                train_y <- train_y[ids]
                                              }
                                              # train_x_permuted <- train_x  # make copy
                                              # train_x_permuted[[x]] <- sample(train_x_permuted[[x]])  # permute values
                                              train_x_permuted <- permute_columns(train_x, columns = x)
                                              ###caret::postResample(pred, data1[,1]) give three values: RMSE Rsquared, MAE
                                             # x_tensor=kerasR::expand_dims(train_x_permuted,axis = 2) 
                                              permuted <- as.data.frame(t(caret::postResample(
                                                pred = keras::predict_on_batch(optmodel, train_x_permuted),
                                                obs = train_y)))
                                              # if (smaller_is_better) {
                                              #   permuted %compare% baseline  # e.g., RMSE
                                              # } else {
                                              #    baseline %compare% permuted  # e.g., R-squared
                                              # }
                                            })))
  
  # Construct tibble of variable importance scores
  #tib <- tibble::tibble(
  #  "Variable" = feature_names,
  #  "Importance" = apply(MLP_varIMP, MARGIN = 1, FUN = mean)
  #)
  #if (nsim > 1) {
  #  tib$StDev <- apply(MLP_varIMP, MARGIN = 1, FUN = stats::sd)
  #}
  
  # Add all nsim scores as an attribute
  #if (nsim > 1 && keep) {
  # rownames(MLP_varIMP) <- feature_names
  #  colnames(MLP_varIMP) <- paste0("permutation_", seq_len(ncol(MLP_varIMP)))
  #  attr(tib, which = "raw_scores") <- MLP_varIMP
  #}
  
  # Add variable importance type attribute
  #attr(tib, which = "type") <- "permutation"
  
  # Add "vip" class
  #class(tib) <- c("MLP_IMP", class(tib))
  
  # Return results
  #tib#

  varimp1= apply(MLP_varIMP, MARGIN = 1, function(x) do.call(cbind,x))
  varimp2=do.call(rbind,varimp1)
  ## now this is a array 1000, 30 dataframe
  nm <- colnames(varimp2)
  RMSE=varimp2[,grepl("^RMSE", nm)]
  Rsquared=varimp2[,grepl("^Rsquared", nm)]
  MAE=varimp2[,grepl("^MAE", nm)]
  
  RMSE=as.data.frame(rowMeans(RMSE,na.rm = TRUE))
  Rsquared=as.data.frame(rowMeans(Rsquared,na.rm = TRUE))
  MAE=as.data.frame(rowMeans(MAE,na.rm = TRUE))
  rownames(RMSE)=feature_names
  rownames(Rsquared)=feature_names
  rownames(MAE)=feature_names
  MLP_SNPsIMP=cbind(RMSE,Rsquared,MAE)
  Decrease_acc=lapply(1:3, function(i) (baseline[,i]-MLP_SNPsIMP[,i]))
  MLP_Decrease_acc=do.call(cbind,Decrease_acc)
  return(list(MLP_Decrease_acc=MLP_Decrease_acc,MLP_SNPsIMP=MLP_SNPsIMP,baseline=baseline))
  
}
xinghuq/DeepGenomeScan documentation built on Sept. 20, 2022, 8:46 a.m.