macam: A collection of miscellaneous R functions for ecology

Documented in s_map_mdr uic_across

#' @title Perform UIC across data.frame
#' @description \code{uic_across} Perform UIC for a target variable and multiple causal variables
#' @param block Data.frame contains time series data.
#' @param effect_var Character or Numeric. Column name or index of the effect variable.
#' @param E_range Numeric. Embedding dimensions that will be tested.
#' @param tp_range Numeric. `tp` tested for UIC.
#' @param tau Numeric. `tau` tested for UIC.
#' @param num_surr Numeric. The number of surrogate data generated to compute p-value.
#' @param alpha Numeric. the significant level to determine the embedding dimension of reference model (i.e., `E0`). If `alpha = NULL`, `E0` is set to `E - 1`. If `0 < alpha < 1` `E0` depends on the model results with lower embedding dimensions. Default is 0.05.
#' @param fdr Logical. If `TRUE`, calculate False Discovery Rate using `stats::p.adjust()` and BH method.
#' @param silent Logical. if `TRUE`, progress message will not be shown.
#' @return A data.frame that contains UIC results
#' @details
#' \itemize{
#'  \item{Osada & Ushio (2020) rUIC:Unified Information-theoretic Causality for R. https://doi.org/10.5281/zenodo.5163234}
#' }
#' @export
uic_across <- function(block,
                       effect_var,
                       E_range = 0:10,
                       tp_range = -4:0,
                       tau = 1,
                       num_surr = 1000,
                       alpha = 0.05,
                       fdr = FALSE,
                       #random_seed = 1234,
                       silent = FALSE) {
  # Set random seed
  #set.seed(random_seed)

  # Retrieve colnames
  x_names <- colnames(block)

  # Check input arguments
  if (!all(unique(x_names) == x_names)) stop("\"block\" should have unique column names.")
  if (!is.data.frame(block)) stop("\"block\" should be data.frame.")
  if (!is.numeric(E_range) | !is.numeric(tp_range)) stop("\"E_range\" and \"tp_range\" should be numeric.")
  if (is.numeric(effect_var)) effect_var <- x_names[effect_var]

  # ---------------------------------------------------- #
  # Preparation for UIC
  # ---------------------------------------------------- #
  # Rearrange the column order
  block <- dplyr::select(block, tidyselect::all_of(effect_var), dplyr::everything())

  # ---------------------------------------------------- #
  # Identify causal relationship using rUIC
  # ---------------------------------------------------- #
  ## Perform UIC for all pairs
    # ---------------------------------------------------- #
    # uic.optimal()
    # ---------------------------------------------------- #
  for (y_i in x_names[x_names != effect_var]) {
    time_start <- proc.time()
    # Testing the effect of "y_i" on "effect_var" using uic.optimal()
    uic_xy <- rUIC::uic.optimal(block, lib_var = effect_var, tar_var = y_i, E = E_range, tau = tau, tp = tp_range, num_surr = num_surr, alpha = alpha) %>%
      dplyr::mutate(effect_var = effect_var, cause_var = y_i)
    # Combine results
    if (y_i != x_names[x_names != effect_var][1]) { uic_res <- rbind(uic_res, uic_xy) } else { uic_res <- uic_xy }
    # Output message
    time_used <- (proc.time() - time_start)[3]
    if (!silent) { message(sprintf("Effects from %s to %s tested by UIC: %.2f sec elapsed", y_i, effect_var, time_used)) }
  }

  if (fdr) {
    # Add FDR ("BH" method)
    uic_res$fdr <- stats::p.adjust(uic_res$pval, method = "BH")
  }

  # Message
  if (!silent) {
    message("By using `uic_across()` the effect variable is recorded in the column named `effect_var`, while potential causal variables are recorded in the column named `cause_var`.")
    message("Please note that these column names are used in the subsequent analysis if you use `make_block_mvd()`.")
  }

  # Return results
  return(as.data.frame(uic_res))
}



#' @title Generate block for MDR S-map
#' @description \code{make_block_mvd} Generate data.frame for MDR S-map
#' @param block Data.frame contains time series data.
#' @param uic_res Data.frame contains UIC results. Usually this is the output of `uic_across()`, but custom data.frame may be usable.
#' @param effect_var Character or Numeric. Column name or index of the effect variable.
#' @param E_effect_var Numeric. Optimal embedding dimension of the effect variable.
#' @param cause_var_colname Character. A column name for causal variables in `uic_res`.
#' @param th_var_colname Character. A column name for a threshold to choose causal variables in `uic_res`.
#' @param include_var Character. `all_significant`, `strongest_only`, or `tp0_only`. If `all_significant`, all significantly influencing variables are used for the embedding. If `strongest_only`, tp with the strongest influence for each variable is used. If `tp0_only`, only variables with no time-delay are used.
#' @param p_threshold Numeric. Rows with pval larger than `p_threshold` is removed.
#' @param tp_adjust NULL or numeric. Adjust tp values.
#' @param sort_tp Logical. If TRUE, `block_mvd` is sorted according to `tp` for each causal variable.
#' @param silent Logical. if `TRUE`, progress message will not be shown.
#' @return Embedded time series will be returned.
#' @export
make_block_mvd <- function (block,
                            uic_res,
                            effect_var,
                            E_effect_var,
                            cause_var_colname = "cause_var",
                            th_var_colname = "pval",
                            p_threshold = 0.050,
                            include_var = "strongest_only",
                            tp_adjust = NULL,
                            sort_tp = TRUE,
                            silent = FALSE) {
  # Retrieve colnames
  x_names <- colnames(block)

  # Check colnames to be used (and critical) in the analysis
  if (is.numeric(effect_var)) effect_var <- x_names[effect_var]
  if (!(effect_var %in% x_names)) stop("No 'effect_var' in the block!")
  if (!(cause_var_colname %in% colnames(uic_res))) stop("No 'cause_var_colname' in `uic_res`! Please specify the correct colname for causal variables")
  if (!("tp" %in% colnames(uic_res))) stop("'tp' column is required for `uic_res`.")
  if (!("te" %in% colnames(uic_res))) stop("'te' column is required for `uic_res`.")
  if (!(th_var_colname %in% colnames(uic_res))) stop(sprintf("'%s' column is required for `uic_res`.", th_var_colname))

  # Check arguments
  if (!all(unique(x_names) == x_names)) stop("\"block\" should have unique column names.")
  if (!is.data.frame(block)) stop("\"block\" should be data.frame.")
  if (!is.numeric(p_threshold)) stop("\"p_threshold\" should be numeric.")
  if (!include_var %in% c("all_significant", "strongest_only", "tp0_only")) stop("\"include_var\" should be \"all_significant\", \"strongest_only\", or \"tp0_only\".")
  if (E_effect_var < 1) stop("\"max_delay_self\" should be >= 1.")

  # Preparation
  if (include_var == "tp0_only") {
    block_mvd <- data.frame(block[,effect_var])
    colnames(block_mvd) <- sprintf("%s_tp0", effect_var)
  } else {
    block_mvd <- data.frame(rEDM::make_block(block[,effect_var], max_lag = E_effect_var)[,-1])
    colnames(block_mvd) <- sprintf("%s_tp%s", effect_var, 0:(-(E_effect_var-1)))
  }
  # Pre-screening (typically, p & tp)
  if (!silent) message(sprintf("UIC results with `tp` <= 0 and `pval` <= %s are kept for further analyses.", p_threshold))
  uic_res <- uic_res[uic_res$pval <= p_threshold & uic_res$tp <= 0,]
  if (nrow(uic_res) < 1) stop("No significant causal variables were detected. Please use the univariate S-map.")
  if (!is.null(tp_adjust)) {
    uic_res$tp <- uic_res$tp + tp_adjust
  }

  # Sort block columns according to tp for each causal variable
  if (sort_tp) {
    sort_id <- 0
    for (col_i in unique(uic_res[,cause_var_colname])) {
      sort_id_i <- order(-uic_res[uic_res[,cause_var_colname] == col_i,"tp"])
      sort_id <- c(sort_id, max(sort_id) + sort_id_i)
    }
    sort_id <- sort_id[-1]; uic_res <- uic_res[sort_id,]
  }

  # ---------------------------------------------------- #
  # Select variables to be included in block
  # ---------------------------------------------------- #
  if (include_var == "all_significant") {
    # ---------------------------------------------------- #
    # Select all significant variables
    # ---------------------------------------------------- #
    for (i in 1:nrow(uic_res)) {
      if(uic_res$tp[i] <= 0) {
        block_new <- dplyr::lag(block[,uic_res[i, cause_var_colname]], n = abs(uic_res[i,"tp"]))
      } else {
        block_new <- dplyr::lead(block[,uic_res[i, cause_var_colname]], n = abs(uic_res[i,"tp"]))
      }
      block_new <- data.frame(block_new)
      colnames(block_new) <- sprintf("%s_tp%s", uic_res[i, cause_var_colname], uic_res[i,"tp"])
      block_mvd <- cbind(block_mvd, block_new)
    }
  } else if (include_var == "strongest_only") {
    # ---------------------------------------------------- #
    # Select tp with the strongest influence from each causal variable
    # (te is used as a criterion)
    # ---------------------------------------------------- #
    for (cause_i in unique(uic_res[,cause_var_colname])) {
      uic_res_tmp <- uic_res[uic_res[,cause_var_colname] == cause_i,]
      if (!exists("uic_res_new")) {
        uic_res_new <- uic_res_tmp[which.max(uic_res_tmp[,"te"]),]
      } else {
        uic_res_new <- rbind(uic_res_new, uic_res_tmp[which.max(uic_res_tmp[,"te"]),])
      }
    }
    # Replace "uic_res"
    uic_res <- uic_res_new
    #uic_res <- uic_res %>% dplyr::group_by(.data$cause_var) %>% dplyr::filter(.data$te == max(.data$te))
    for (i in 1:nrow(uic_res)) {
      if(uic_res$tp[i] <= 0) {
        block_new <- dplyr::lag(block[,uic_res[i, cause_var_colname]], n = abs(uic_res[i,"tp"]))
      } else {
        block_new <- dplyr::lead(block[,uic_res[i, cause_var_colname]], n = abs(uic_res[i,"tp"]))
      }
      block_new <- data.frame(block_new)
      colnames(block_new) <- sprintf("%s_tp%s", uic_res[i,cause_var_colname], uic_res[i,"tp"])
      block_mvd <- cbind(block_mvd, block_new)
    }
  } else if (include_var == "tp0_only") {
    # ---------------------------------------------------- #
    # Select causal variables with tp = 0
    # ---------------------------------------------------- #
    block_new <- block[,unique(uic_res[,cause_var_colname])]
    block_new <- data.frame(block_new)
    colnames(block_new) <- sprintf("%s_tp0", unique(uic_res[,cause_var_colname]))
    block_mvd <- cbind(block_mvd, block_new)
  }
  # Return results
  return(block_mvd)
}



#' @title Computing multiview distance
#' @description \code{compute_mvd} Compute multiview distance
#' @param block_mvd Data.frame contains time series data. The first column should be the target column.
#' @param effect_var Character or Numeric. Column name or index of the effect variable.
#' @param E Numeric. Optimal embedding dimension of `effect_var`
#' @param tp Numeric. Forecasting time ahead.
#' @param lib Numeric vector. Library indices.
#' @param pred Numeric vector. Prediction indices.
#' @param make_block_method Character. If `naive`, the multivariate data.frame (`block_mvd`) is directly used to calculate the multiview distance. If `rEDM`, `rEDM::make_block()` is used to add the time-delayed ordinate for each variable, which make the method equivalent to Chang et al. (2021).
#' @param make_block_max_lag Numeric. `max_lag` in `rEDM::make_block()`. This argument will be used only if `make_block_method = "rEDM"`.
#' @param n_ssr Numeric. The total number of embeddings examined.
#' @param k Numeric. The number of embeddings used to calculate ensemble distance.
#' @param random_seed Numeric. Random seed.
#' @param distance_only Logical. if `TRUE`, only distance matrix is returned.
#' @param silent Logical. if `TRUE`, progress message will not be shown.
#' @return A distance matrix and other information (if `distance_only = FALSE`).
#' @details
#' \itemize{
#'  \item{Chang et al. (2021) Ecology Letters. https://doi.org/10.1111/ele.13897}
#' }
#' @export
compute_mvd <- function (block_mvd, effect_var, E,
                         tp = 1,
                         lib = c(1, nrow(block_mvd)),
                         pred = lib,
                         make_block_method = "naive", # "rEDM"
                         make_block_max_lag = E,
                         n_ssr = 10000, k = floor(sqrt(n_ssr)),
                         random_seed = 1234,
                         distance_only = TRUE,
                         silent = FALSE) {
  # Set random seed
  set.seed(random_seed)
  # Retrieve colnames
  x_names <- colnames(block_mvd)

  # Check input
  if (!(E %% 1 == 0) | E < 1) stop("\"E\" should be a natural number.")
  if (is.numeric(effect_var)) effect_var <- x_names[effect_var]
  if (colnames(block_mvd)[1] != sprintf("%s_tp0", effect_var)) stop("The first column should be the target variable with tp = 0!")
  if (!(make_block_method %in% c("naive", "rEDM"))) stop("\"make_block_method\" should be \"naive\" or \"rEDM\".")


  # ---------------------------------------------------- #
  # Generate an embedding list
  # ---------------------------------------------------- #
  if (E == 1) {
    embedding_list <- matrix(1, ncol = 1, nrow = 1)
  } else {
    if (make_block_method == "naive") {
      # ---------------------------------------------------- #
      # Generate embedding list using sample() function
      # ---------------------------------------------------- #
      valid_idx <- 2:length(x_names)
    } else if (make_block_method == "rEDM") {
      # ---------------------------------------------------- #
      # ! Recommended only if "block_mvd" is generated by specifying "tp0_only" in make_block_mvd()
      # Make a new block using rEDM::make_block()
      # ---------------------------------------------------- #
      block_redm <- rEDM::make_block(block_mvd, max_lag = make_block_max_lag)[,-1]
      block_mvd <- block_redm
      valid_idx <- 2:ncol(block_mvd)
    }
    # Check the number of possible embeddings
    if (choose(length(valid_idx), E-1) >= n_ssr) {
      # Output message
      if (!silent) message(sprintf("The performance of %s embeddings (n_ssr) will be evaluated from %s potential embeddings", n_ssr, choose(length(valid_idx), E-1)))
      # Randomly select embeddings
      emb_list0 <- matrix(0, nrow = 1, ncol = E-1); i <- 1
      while (nrow(emb_list0) < n_ssr + 1) {
        i <- i + 1; set.seed(random_seed + i)
        emb_list0_tmp <- matrix(sort(sample(valid_idx, E-1, replace = FALSE)), nrow = 1)
        emb_dif <- t(t(emb_list0) - c(emb_list0_tmp)) # Subtract a vector from matrix
        if(all(!apply(emb_dif == 0, 1, all))) emb_list0 <- rbind(emb_list0, emb_list0_tmp)
      }
      # Add target variable
      emb_list1 <- cbind(matrix(1, ncol = 1, nrow = nrow(emb_list0)-1), emb_list0[-1,])
      # Sort emb_list1
      emb_list1 <- emb_list1[do.call(order, as.list(as.data.frame(emb_list1))),]
      embedding_list <- emb_list1
    } else {
      if (!silent) message("The number of potential embeddings is smaller than \"n_srr\". Thus, the performance of all potential embeddings will be evalulated.")
      emb_list0 <- t(utils::combn(valid_idx, E-1, simplify = TRUE))
      emb_list1 <- cbind(matrix(1, ncol = 1, nrow = nrow(emb_list0)), emb_list0)
      embedding_list <- emb_list1
    }
  }
  nrow_res <- nrow(embedding_list)

  # ---------------------------------------------------- #
  # Main loop of randomized simplex projections
  # ---------------------------------------------------- #
  for (i in 1:nrow_res) {
    ## Choose embedding ids (id = 1 is always a target column)
    embedding_idx_i <- embedding_list[i,]
    ## Perform simplex projection
    rand_embed_res_i <- rEDM::block_lnlp(block_mvd[,embedding_idx_i],
                                         lib = lib,
                                         pred = pred,
                                         method = "simplex",
                                         silent = TRUE,
                                         tp = tp,
                                         target_column = 1)
    # Add information
    rand_embed_res_i$embedding <- stringr::str_sub(stringr::str_c(embedding_idx_i, ",", collapse = " "), end = -2)
    # Merge results
    if (i == 1) { rand_embed_res <- rand_embed_res_i } else { rand_embed_res <- rbind(rand_embed_res, rand_embed_res_i)}
  }

  ## Remove rho <= 0 because it is meaningless prediction.
  rand_embed_res <- rand_embed_res[rand_embed_res[,"rho"] > 0,]
  if (!silent) message("Embeddings with rho < 0 were removed.")
  if (nrow(rand_embed_res) == 0) stop("Cannot find embeddings with positive prediction skill. Reconsider parameters.")

  ## Sort the result based on "evaluated_by"
  rand_embed_res <- rand_embed_res[order(rand_embed_res[,"rho"], decreasing = T),]
  if (k <= nrow(rand_embed_res)) {
    # Output message
    if (!silent) message(sprintf("Top %s embeddings (k) will be used from %s embeddings (rho > 0 in n_ssr embeddings)", k, nrow(rand_embed_res)))
    top_embed_res <- rand_embed_res[1:k,]
  } else {
    if (!silent) message("The total number of valid embeddings is smaller than \"k\". Thus, all valid embeddings will be used to calculate the multiview distance.")
    top_embed_res <- rand_embed_res
  }
  # Rename row names
  rownames(top_embed_res) <- 1:nrow(top_embed_res)

  # ---------------------------------------------------- #
  # Calculate ensemble weights for S-map
  # ---------------------------------------------------- #
  # Calculate a weight for each embedding
  w_multi <- top_embed_res[,"rho"]/sum(top_embed_res[,"rho"])

  # "multiview_dist" will be used to calculate S-map weights
  multiview_dist <- matrix(0, ncol = nrow(block_mvd), nrow = nrow(block_mvd))
  for (i in 1:nrow(top_embed_res)) {
    top_embed_ids <- stringr::str_split(top_embed_res$embedding[i], pattern = ",")[[1]] %>%
      as.numeric
    multiview_dist <- multiview_dist + w_multi[i] * as.matrix(stats::dist(block_mvd[,top_embed_ids]))
  }

  # ---------------------------------------------------- #
  # Return results
  # ---------------------------------------------------- #
  if (distance_only) {
    return(multiview_dist)
  } else {
    all_res <- list(multiview_dist); names(all_res) <- "multiview_dist"
    all_res$embeddings <- rand_embed_res
    all_res$top_embeddings <- top_embed_res
    if (is.null(random_seed)) random_seed <- "NULL"
    all_res$parms <- data.frame(E = E, tp = tp, n_ssr = nrow(rand_embed_res),
                                k = nrow(top_embed_res), random_seed = random_seed)
    return(all_res)
  }
}


#' @title A function for MDR S-map
#' @description \code{s_map_mdr} performs MDR S-map introduced in Chang et al. (2021) Ecology Letters.
#' @param block_mvd Data.frame contains time series data. The first column should be the target column.
#' @param dist_w Matrix. Multiview disntance matrix. Usually this is the output of `compute_mvd()`.
#' @param lib Numeric vector. Library indices.
#' @param pred Numeric vector. Prediction indices.
#' @param tp Numeric. Forecasting time ahead.
#' @param theta Numeric. Weighing function for S-map.
#' @param weight_method Specify weighing method for `dist_w`. Default is `sqrt`, which is the same for Chang et al. (2021).
#' @param regularized Logical If `TRUE`, regularized S-map will be performed. If `FALSE`, the normal S-map will be performed. Please use `rEDM::s_map` function.
#' @param lambda Numeric. Specify the strength of penalty in the regularization.
#' @param alpha Numeric. `alpha = 0` is the ridge regression, `alpha = 1` is the lasso regression, and `0 < alpha < 1` is an elastic net.
#' @param glmnet_parallel Logical. If TRUE, the computation will be parallel (currently, experimental).
#' @param save_smap_coefficients Logical. If `TRUE`, S-map coefficients will be saved.
#' @param random_seed Numeric. Random seed.
#' @return A list that contains predictions, statistics, and S-map coefficients (if `save_smap_coefficients = TRUE`)
#' @details
#' \itemize{
#'  \item{Chang, C.-W., Miki, T., Ushio, M., Ke, P.-J., Lu, H.-P., Shiah, F.-K. & et al. (2021) Reconstructing large interaction networks from empirical time series data. Ecology Letters, 24, 2763– 2774. https://doi.org/10.1111/ele.13897}
#' }
#' @export
s_map_mdr <- function(block_mvd,
                      dist_w,
                      lib = c(1, nrow(block_mvd)), pred = lib,
                      tp = 1, theta = 8,
                      weight_method = "sqrt",
                      regularized = FALSE,
                      lambda = 0, alpha = 0,
                      glmnet_parallel = FALSE,
                      save_smap_coefficients = FALSE,
                      random_seed = 1234) {
  # Check input
  if (!(nrow(block_mvd) == nrow(dist_w))) stop(" \"dist_w\" should have the same size with \"block_mvd\"")
  if (!is.null(weight_method) & weight_method != "sqrt") stop("Specify the valid option for \"weight_method\": NULL or \"sqrt\"")
  if (weight_method == "sqrt") dist_w = sqrt(dist_w)

  # ---------------------------------------------------- #
  # Perform MDR S-map using macam::extended_lnlp
  # ---------------------------------------------------- #
  mdr_res <- macam::extended_lnlp(block_mvd,
                                  lib = lib,
                                  pred = pred,
                                  target_column = 1,
                                  tp = tp,
                                  dist_w = dist_w,
                                  theta = theta,
                                  regularized = regularized,
                                  lambda = lambda,
                                  alpha = alpha,
                                  glmnet_parallel = glmnet_parallel,
                                  save_smap_coefficients = save_smap_coefficients,
                                  random_seed = random_seed)
  # Return results
  return(mdr_res)
}



#' @title A all-in-one wrapper function for MDR S-map
#' @description \code{s_map_mdr_all} A all-in-one wrapper function for MDR S-map. Detect causality between variables, construct data.frame for multiview embedding, compute multiview distance, and perform MDR S-map. For fine-tuning, use the step-by-step functions such as `compute_mvd()` etc..
#' @param block Data.frame contains time series data. The first column should be the target column.
#' @param effect_var Character or Numeric. Column name or index of the effect variable.
#' @param lib Numeric vector. Library indices. If `NULL`, `c(1, nrow(block_mvd))` will be used as `lib`.
#' @param pred Numeric vector. Prediction indices.
#' @param tp Numeric. Forecasting time ahead.
#' @param E_range Numeric. Embedding dimensions that will be tested.
#' @param tp_range Numeric. `tp` tested for UIC.
#' @param n_ssr Numeric. The total number of embeddings examined.
#' @param k Numeric. The number of embeddings used to calculate ensemble distance.
#' @param theta Numeric. Weighing function for S-map.
#' @param regularized Logical If `TRUE`, regularized S-map will be performed. If `FALSE`, the normal S-map will be performed. Please use `rEDM::s_map` function.
#' @param lambda Numeric. Specify the strength of penalty in the regularization.
#' @param alpha Numeric. `alpha = 0` is the ridge regression, `alpha = 1` is the lasso regression, and `0 < alpha < 1` is an elastic net.
#' @param glmnet_parallel Logical. If TRUE, the computation will be parallel (currently, experimental).
#' @param save_smap_coefficients Logical. If `TRUE`, S-map coefficients will be saved.
#' @param silent Logical. if `TRUE`, progress message will not be shown.
#' @param random_seed Numeric. Random seed.
#' @return A list that contains predictions, statistics, and S-map coefficients (if `save_smap_coefficients = TRUE`)
#' @export
s_map_mdr_all <- function (block,
                           effect_var,
                           lib = NULL,
                           pred = lib,
                           tp = 1,
                           E_range = 0:10,
                           tp_range = -4:0,
                           n_ssr = 100,
                           k = 10,
                           theta = 0,
                           regularized = FALSE,
                           lambda = 0,
                           alpha = 0,
                           glmnet_parallel = FALSE,
                           save_smap_coefficients = TRUE,
                           silent = FALSE,
                           random_seed = 1234) {
  # Step 1: Determine optimal embedding dimension
  simp_res <- rUIC::simplex(block, lib_var = effect_var, E = E_range, tau = 1, tp = 1)
  Ex <- simp_res[which.min(simp_res$rmse),"E"]

  # Step 2: Perform UIC to detect causality
  uic_res <- uic_across(block, effect_var, E_range = E_range, tp_range = tp_range, silent = silent)

  # Step 3: Make block to calculate multiview distance
  block_mvd <- make_block_mvd(block, uic_res, effect_var, E_effect_var = Ex, include_var = "tp0_only")

  # Step. 4: Compute multiview distance
  multiview_dist <- compute_mvd(block_mvd, effect_var, E = Ex,
                                make_block_method = "rEDM",
                                make_block_max_lag = Ex,
                                n_ssr = n_ssr, k = k,
                                tp = tp, random_seed = random_seed)

  # Use "c(1, nrow(block_mvd))" as "lib" if "lib" is not specified
  if (is.null(lib)) {
    lib <- c(1, nrow(block_mvd))
    pred <- lib
  }

  # Step. 5: Do MDR S-map
  mdr_res <- s_map_mdr(block_mvd,
                       dist_w = multiview_dist,
                       lib = lib,
                       pred = pred,
                       tp = tp,
                       theta = theta,
                       regularized = regularized,
                       lambda = lambda,
                       alpha = alpha,
                       glmnet_parallel = glmnet_parallel,
                       save_smap_coefficients = save_smap_coefficients,
                       random_seed = random_seed)

  return(mdr_res)
}



# #' @title `make_block` function from rEDM v0.7.5
# #' @description \code{make_block} generates a lagged block with the appropriate max_lag and tau, while respecting lib (by inserting NANs, when trying to lag past lib regions)
# #' @param block a data.frame or matrix where each column is a time series
# #' @param t Numeric. The time index for the block.
# #' @param max_lag The total number of lags to include for each variable.
# #' @param tau The lag to use for time delay embedding.
# #' @param lib A 2-column matrix (or 2-element vector) where each row specifies the first and last *rows* of the time series to use for attractor reconstruction.
# #' @param restrict_to_lib Whether to restrict the final lagged block to just the rows specified in lib (if lib exists).
# #' @return A data.frame with the lagged columns and a time column. If the original block had columns X, Y, Z and max_lag = 3, then the returned data.frame will have columns TIME, X, X_1, X_2, Y, Y_1, Y_2, Z, Z_1, Z_2.
# #' @details
# #' \itemize{
# #'  \item{Ye et al. (2018) Applications of Empirical Dynamic Modeling from Time Series. https://github.com/ha0ye/rEDM}
# #' }
# #' @export
# make_block <- function (block, t = NULL, max_lag = 3, tau = 1, lib = NULL,
#           restrict_to_lib = TRUE) {
#   if (is.vector(block)) block <- matrix(block, ncol = 1)
#   num_vars <- NCOL(block)
#   num_rows <- NROW(block)
#
#   if (!is.null(lib)) {
#     if (is.vector(lib)) lib <- matrix(lib, ncol = 2, byrow = TRUE)
#   }
#   output <- matrix(NA, nrow = num_rows, ncol = 1 + num_vars * max_lag)
#   col_names <- character(1 + num_vars * max_lag)
#   if (is.null(t)) output[, 1] <- 1:num_rows else output[, 1] <- t
#   col_names[1] <- "time"
#   col_index <- 2
#   if (is.null(colnames(block))) colnames(block) <- paste0("col", seq_len(num_vars))
#
#   for (j in 1:num_vars) {
#     ts <- block[, j]
#     if (is.list(ts)) {
#       ts <- unlist(ts)
#     }
#     output[, col_index] <- ts
#     col_names[col_index] <- colnames(block)[j]
#     col_index <- col_index + 1
#     if (max_lag > 1) {
#       for (i in 1:(max_lag - 1)) {
#         ts <- c(rep_len(NA, tau), ts[1:(num_rows - tau)])
#         if (!is.null(lib)) {
#           for (k in seq_len(NROW(lib))) {
#             ts[lib[k, 1] - 1 + (1:tau)] <- NA
#           }
#         }
#         output[, col_index] <- ts
#         col_names[col_index] <- paste0(colnames(block)[j], "_", i * tau)
#         col_index <- col_index + 1
#       }
#     }
#   }
#   if (!is.null(lib) && restrict_to_lib) {
#     row_idx <- sort(unique(do.call(c, mapply(seq, lib[, 1], lib[, 2], SIMPLIFY = FALSE))))
#     output <- output[row_idx, ]
#   }
#   output <- data.frame(output)
#   names(output) <- col_names
#   return(output)
# }