R/operations.R

Defines functions unary COS SIN EXP LOG SQRE INV SQRT ABS binary MINUS_ABS DIVD MULTI MINUS ADD dataprocessing

dataprocessing <- function(data) {
  data$X <- as.matrix(data$X)

  # Remove X cols with duplicated data
  temp <- round(data$X, digits = 6)
  dup_idx <- duplicated(temp, MARGIN = 2)
  data$X <- as.matrix(data$X[, !dup_idx])
  data$head <- data$head[!dup_idx]
  if (!is.null(data$unit)) data$unit <- data$unit[!dup_idx]

  # Remove columns with -Inf, Inf
  inf_idx <- apply(data$X, 2, function(x) any(abs(x) == Inf))
  data$X <- as.matrix(data$X[, !inf_idx])
  data$head <- data$head[!inf_idx]
  if (!is.null(data$unit)) data$unit <- data$unit[!inf_idx]

  # Remove columns without variability
  s <- apply(data$X, 2, function(x) sd(x))
  no_var_idx <- (s == 0)
  data$X <- as.matrix(data$X[, !no_var_idx])
  data$head <- data$head[!no_var_idx]
  if (!is.null(data$unit)) data$unit <- data$unit[!no_var_idx]
  colnames(data$X) <- data$head

  return(data)
}


##### Binary Operations #####
ADD <- function(dat) {
  X <- dat$X
  head_in <- dat$head
  dimen_in <- dat$unit

  n <- nrow(X)
  p <- ncol(X)
  X_tmp <- matrix(0, nrow = n, ncol = choose(p, 2))
  head <- c()
  unit <- list()
  count <- 1

  if (is.null(dimen_in)) {
    for (i in 1:(p - 1)) {
      for (j in (i + 1):p) {
        X_tmp[, count] <- X[, i] + X[, j]
        head[count] <- paste0("(", head_in[i], "+", head_in[j], ")")
        count <- count + 1
      }
    }
    unit <- NULL
  } else {
    for (i in 1:(p - 1)) {
      for (j in (i + 1):p) {
        dimen_i <- dimen_in[[i]]
        dimen_j <- dimen_in[[j]]
        dimen_i <- dimen_i[order(names(dimen_i))]
        dimen_j <- dimen_j[order(names(dimen_j))]
        if (setequal(names(dimen_i), names(dimen_j)) && isTRUE(dimen_i == dimen_j)) {
          X_tmp[, count] <- X[, i] + X[, j]
          head[count] <- paste0("(", head_in[i], "+", head_in[j], ")")
          unit[[count]] <- dimen_i
        } else {
          head[count] <- "empty"
          unit[[count]] <- "empty"
        }
        count <- count + 1
      }
    }

    # Remove non-physical descriptors
    idx_empty <- (head == "empty")
    X_tmp <- as.matrix(X_tmp[, !idx_empty])
    head <- head[!idx_empty]
    unit <- unit[!idx_empty]
  }

  data_out <- list(X = as.matrix(X_tmp),
                   head = head,
                   unit = unit)
  return(data_out)
}

MINUS <- function(dat) {
  X <- dat$X
  head_in <- dat$head
  dimen_in <- dat$unit

  n <- nrow(X)
  p <- ncol(X)
  X_tmp <- matrix(0, nrow = n, ncol = choose(p, 2))
  head <- c()
  unit <- list()
  count <- 1

  if (is.null(dimen_in)) {
    for (i in 1:(p - 1)) {
      for (j in (i + 1):p) {
        X_tmp[, count] <- X[, i] - X[, j]
        head[count] <- paste0("(", head_in[i], "-", head_in[j], ")")
        count <- count + 1
      }
    }
    unit <- NULL
  } else {
    for (i in 1:(p - 1)) {
      for (j in (i + 1):p) {
        dimen_i <- dimen_in[[i]]
        dimen_j <- dimen_in[[j]]
        dimen_i <- dimen_i[order(names(dimen_i))]
        dimen_j <- dimen_j[order(names(dimen_j))]
        if (setequal(names(dimen_i), names(dimen_j)) && isTRUE(dimen_i == dimen_j)) {
          X_tmp[, count] <- X[, i] - X[, j]
          head[count] <- paste0("(", head_in[i], "-", head_in[j], ")")
          unit[[count]] <- dimen_i
        } else {
          head[count] <- "empty"
          unit[[count]] <- "empty"
        }
        count <- count + 1
      }
    }

    # Remove non-physical descriptors
    idx_empty <- (head == "empty")
    X_tmp <- as.matrix(X_tmp[, !idx_empty])
    head <- head[!idx_empty]
    unit <- unit[!idx_empty]
  }

  data_out <- list(X = as.matrix(X_tmp),
                   head = head,
                   unit = unit)
  return(data_out)
}

MULTI <- function(dat) {
  X <- dat$X
  head_in <- dat$head
  dimen_in <- dat$unit

  n <- nrow(X)
  p <- ncol(X)
  X_tmp <- matrix(0, nrow = n, ncol = choose(p, 2))
  head <- c()
  unit <- list()
  count <- 1

  if (is.null(dimen_in)) {
    for (i in 1:(p - 1)) {
      for (j in (i + 1):p) {
        X_tmp[, count] <- X[, i] * X[, j]
        head[count] <- paste0("(", head_in[i], "*", head_in[j], ")")
        count <- count + 1
      }
    }
    unit <- NULL
  } else {
    for (i in 1:(p - 1)) {
      for (j in (i + 1):p) {
        X_tmp[, count] <- X[, i] * X[, j]
        head[count] <- paste0("(", head_in[i], "*", head_in[j], ")")

        dimen_i <- dimen_in[[i]]
        dimen_j <- dimen_in[[j]]
        dimen_i <- dimen_i[order(names(dimen_i))]
        dimen_j <- dimen_j[order(names(dimen_j))]
        names <- union(names(dimen_i), names(dimen_j))
        names_i <- setdiff(names, names(dimen_i))
        names_j <- setdiff(names, names(dimen_j))
        if (length(names_i) > 0) {
          dimen_i[names_i] <- 0
          dimen_i <- dimen_i[order(names(dimen_i))]
        }
        if (length(names_j) > 0) {
          dimen_j[names_j] <- 0
          dimen_j <- dimen_j[order(names(dimen_j))]
        }
        unit[[count]] <- dimen_i + dimen_j
        count <- count + 1
      }
    }
  }

  data_out <- list(X = as.matrix(X_tmp),
                   head = head,
                   unit = unit)
  return(data_out)
}

DIVD <- function(dat) {
  X <- dat$X
  head_in <- dat$head
  dimen_in <- dat$unit

  n <- nrow(X)
  p <- ncol(X)
  X_tmp_1 <- matrix(0, nrow = n, ncol = choose(p, 2))
  X_tmp_2 <- matrix(0, nrow = n, ncol = choose(p, 2))
  head_1 <- head_2 <- c()
  dimen_1 <- dimen_2 <- list()
  count <- 1

  if (is.null(dimen_in)) {
    for (i in 1:(p - 1)) {
      for (j in (i + 1):p) {
        if (all(X[, j] != 0)) {
          X_tmp_1[, count] <- X[, i] / X[, j]
          head_1[count] <- paste0("(", head_in[i], "/", head_in[j], ")")
        } else {
          head_1[count] <- "empty"
        }
        if (all(X[, i] != 0)) {
          X_tmp_2[, count] <- X[, j] / X[, i]
          head_2[count] <- paste0("(", head_in[j], "/", head_in[i], ")")
        } else {
          head_2[count] <- "empty"
        }
        count <- count + 1
      }
    }
    idx_empty_1 <- (head_1 == "empty")
    idx_empty_2 <- (head_2 == "empty")

    # Remove non-physical descriptors
    X_tmp_1 <- as.matrix(X_tmp_1[, !idx_empty_1])
    head_1 <- head_1[!idx_empty_1]
    X_tmp_2 <- as.matrix(X_tmp_2[, !idx_empty_2])
    head_2 <- head_2[!idx_empty_2]

    dimen_1 <- dimen_2 <- NULL
  } else {
    for (i in 1:(p - 1)) {
      for (j in (i + 1):p) {
        dimen_i <- dimen_in[[i]]
        dimen_j <- dimen_in[[j]]
        dimen_i <- dimen_i[order(names(dimen_i))]
        dimen_j <- dimen_j[order(names(dimen_j))]

        if (all(X[, j] != 0)) {
          X_tmp_1[, count] <- X[, i] / X[, j]
          head_1[count] <- paste0("(", head_in[i], "/", head_in[j], ")")

          names <- union(names(dimen_i), names(dimen_j))
          names_i <- setdiff(names, names(dimen_i))
          names_j <- setdiff(names, names(dimen_j))
          if (length(names_i) > 0) {
            dimen_i[names_i] <- 0
            dimen_i <- dimen_i[order(names(dimen_i))]
          }
          if (length(names_j) > 0) {
            dimen_j[names_j] <- 0
            dimen_j <- dimen_j[order(names(dimen_j))]
          }
          dimen_1[[count]] <- dimen_i - dimen_j
        } else {
          head_1[count] <- "empty"
          dimen_1[[count]] <- "empty"
        }

        if (all(X[, i] != 0)) {
          X_tmp_2[, count] <- X[, j] / X[, i]
          head_2[count] <- paste0("(", head_in[j], "/", head_in[i], ")")

          names <- union(names(dimen_i), names(dimen_j))
          names_i <- setdiff(names, names(dimen_i))
          names_j <- setdiff(names, names(dimen_j))
          if (length(names_i) > 0) {
            dimen_i[names_i] <- 0
            dimen_i <- dimen_i[order(names(dimen_i))]
          }
          if (length(names_j) > 0) {
            dimen_j[names_j] <- 0
            dimen_j <- dimen_j[order(names(dimen_j))]
          }
          dimen_2[[count]] <- dimen_j - dimen_i
        } else {
          head_2[count] <- "empty"
          dimen_2[[count]] <- "empty"
        }
        count <- count + 1
      }
    }

    idx_empty_1 <- (head_1 == "empty")
    idx_empty_2 <- (head_2 == "empty")

    # Remove non-physical descriptors
    X_tmp_1 <- as.matrix(X_tmp_1[, !idx_empty_1])
    head_1 <- head_1[!idx_empty_1]
    dimen_1 <- dimen_1[!idx_empty_1]

    X_tmp_2 <- as.matrix(X_tmp_2[, !idx_empty_2])
    head_2 <- head_2[!idx_empty_2]
    dimen_2 <- dimen_2[!idx_empty_2]
  }

  X_out <- cbind(X_tmp_1, X_tmp_2)
  head_out <- c(head_1, head_2)
  dimen_out <- c(dimen_1, dimen_2)

  data_out <- list(X = as.matrix(X_out),
                   head = head_out,
                   unit = dimen_out)
  return(data_out)
}

MINUS_ABS <- function(dat) {
  X <- dat$X
  head_in <- dat$head
  dimen_in <- dat$unit

  n <- nrow(X)
  p <- ncol(X)
  X_tmp <- matrix(0, nrow = n, ncol = choose(p, 2))
  head <- c()
  unit <- list()
  count <- 1

  if (is.null(dimen_in)) {
    for (i in 1:(p - 1)) {
      for (j in (i + 1):p) {
        X_tmp[, count] <- abs(X[, i] - X[, j])
        head[count] <- paste0("|", head_in[i], "-", head_in[j], "|")
        count <- count + 1
      }
    }
    unit <- NULL
  } else {
    for (i in 1:(p - 1)) {
      for (j in (i + 1):p) {
        dimen_i <- dimen_in[[i]]
        dimen_j <- dimen_in[[j]]
        dimen_i <- dimen_i[order(names(dimen_i))]
        dimen_j <- dimen_j[order(names(dimen_j))]
        if (setequal(names(dimen_i), names(dimen_j)) && setequal(dimen_i, dimen_j)) {
          X_tmp[, count] <- abs(X[, i] - X[, j])
          head[count] <- paste0("|", head_in[i], "-", head_in[j], "|")
          unit[[count]] <- dimen_i
        } else {
          head[count] <- "empty"
          unit[[count]] <- "empty"
        }
        count <- count + 1
      }
    }

    # Remove non-physical descriptors
    idx_empty <- (head == "empty")
    X_tmp <- as.matrix(X_tmp[, !idx_empty])
    head <- head[!idx_empty]
    unit <- unit[!idx_empty]
  }

  data_out <- list(X = as.matrix(X_tmp),
                   head = head,
                   unit = unit)
  return(data_out)
}

binary <- function(data, sin_cos) {
  p <- ncol(data$X)
  if (p < 2) {
    message("X has less than 2 columns. Need at least 2 columns to perform binary operations!")
    data$error <- TRUE
    return(data)
  } else {
    # Binary operations
    data_add <- ADD(data)
    data_minus <- MINUS(data)
    data_multi <- MULTI(data)
    data_divd <- DIVD(data)

    dat_tmp <- list()
    dat_tmp$X <- cbind(data_add$X, data_minus$X, data_multi$X, data_divd$X)
    dat_tmp$head <- c(data_add$head, data_minus$head, data_multi$head, data_divd$head)
    dat_tmp$unit <- c(data_add$unit, data_minus$unit, data_multi$unit, data_divd$unit)

    # Combine datasets
    if (sin_cos == FALSE) {
      data_abs <- ABS(dat_tmp)
      data$X <- cbind(dat_tmp$X, data_abs$X)
      data$head <- c(dat_tmp$head, data_abs$head)
      data$unit <- c(dat_tmp$unit, data_abs$unit)
    } else{
      data_abs_minus <- MINUS_ABS(data)
      data$X <- cbind(dat_tmp$X, data_abs_minus$X)
      data$head <- c(dat_tmp$head, data_abs_minus$head)
      data$unit <- c(dat_tmp$unit, data_abs_minus$unit)
    }
    colnames(data$X) <- unname(data$head)

    # Remove redundant descriptors
    data <- dataprocessing(data)
    return(data)
  }
}

##### Unary Operations #####
ABS <- function(dat) {
  dat$X <- apply(dat$X, 2, function(x) abs(x))
  dat$head <- unname(sapply(dat$head, function(x) paste0("abs(", x, ")")))

  data_out <- list(X = dat$X,
                   head = dat$head,
                   unit = dat$unit)
  return(data_out)
}

SQRT <- function(dat, apply_pos_opt_on_neg_x) {
  # Record which columns has negative values
  neg_col <- apply(dat$X, 2, function(x) any(x < 0))

  if (apply_pos_opt_on_neg_x) {
    dat$X <- suppressWarnings(apply(dat$X, 2, function(x) sqrt(abs(x))))
    dat$head <- unname(sapply(dat$head, function(x) paste0(x, "^0.5")))
    if (any(neg_col)) dat$head[neg_col] <- unname(sapply(dat$head[neg_col], function(x) paste0("abs(", x, ")^0.5")))
    if (!is.null(dat$unit)) dat$unit <- lapply(dat$unit, function(x) 0*x)
  } else {
    if (sum(neg_col) == ncol(dat$X)) {
      return(list(X = NULL, head = NULL, unit = NULL))
    } else {
      dat$X <- suppressWarnings(apply(as.matrix(dat$X[, !neg_col]), 2, function(x) sqrt(abs(x))))
      dat$head <- unname(sapply(dat$head[!neg_col], function(x) paste0(x, "^0.5")))
      if (!is.null(dat$unit)) dat$unit <- dat$unit[!neg_col]
    }
  }
  dat$X <- as.matrix(dat$X)
  data_out <- list(X = dat$X,
                   head = dat$head,
                   unit = dat$unit)
  return(data_out)
}

INV <- function(dat) {
  # Remove columns containing 0
  zero_idx <- apply(dat$X, 2, function(x) any(x == 0))

  dat$X <- suppressWarnings(apply(as.matrix(dat$X[, !zero_idx]), 2, function(x) x^(-1)))
  dat$head <- unname(sapply(dat$head[!zero_idx], function(x) paste0(x, "^(-1)")))
  if (!is.null(dat$unit)) dat$unit <- lapply(dat$unit[!zero_idx], function(x) -x)

  dat$X <- as.matrix(dat$X)
  data_out <- list(X = dat$X,
                   head = dat$head,
                   unit = dat$unit)
  return(data_out)
}

SQRE <- function(dat) {
  dat$X <- apply(dat$X, 2, function(x) x^2)
  dat$head <- unname(sapply(dat$head, function(x) paste0(x, "^2")))
  if (!is.null(dat$unit)) dat$unit <- lapply(dat$unit, function(x) 2 * x)

  data_out <- list(X = dat$X,
                   head = dat$head,
                   unit = dat$unit)
  return(data_out)
}

LOG <- function(dat, apply_pos_opt_on_neg_x) {
  # Record which columns has negative values
  neg_col <- apply(dat$X, 2, function(x) any(x < 0))

  if (apply_pos_opt_on_neg_x) {
    dat$X <- suppressWarnings(apply(dat$X, 2, function(x) log(abs(x))))
    dat$head <- unname(sapply(dat$head, function(x) paste0("log(", x, ")")))
    if (any(neg_col)) dat$head[neg_col] <- unname(sapply(dat$head[neg_col], function(x) paste0("log(abs(", x, "))")))
    if (!is.null(dat$unit)) dat$unit <- lapply(dat$unit, function(x) 0*x)
  } else {
    if (sum(neg_col) == ncol(dat$X)) {
      return(list(X = NULL, head = NULL, unit = NULL))
    } else {
      dat$X <- suppressWarnings(apply(as.matrix(dat$X[, !neg_col]), 2, function(x) log(abs(x))))
      dat$head <- unname(sapply(dat$head[!neg_col], function(x) paste0("log(", x, ")")))
      if (!is.null(dat$unit)) dat$unit <- dat$unit[!neg_col]
    }
  }

  # Remove NAs in case of log(0)
  NA_col <- apply(as.matrix(dat$X), 2, anyNA)
  dat$X <- as.matrix(dat$X[, !NA_col])
  dat$head <- dat$head[!NA_col]
  if (!is.null(dat$unit)) dat$unit <- dat$unit[!NA_col]

  data_out <- list(X = dat$X,
                   head = dat$head,
                   unit = dat$unit)
  return(data_out)
}

EXP <- function(dat) {
  dat$X <- apply(dat$X, 2, function(x) exp(x))
  dat$head <- unname(sapply(dat$head, function(x) paste0("exp(", x, ")")))
  if (!is.null(dat$unit)) dat$unit <- lapply(dat$unit, function(x) 0*x)

  data_out <- list(X = dat$X,
                   head = dat$head,
                   unit = dat$unit)
  return(data_out)
}

SIN <- function(dat) {
  dat$X <- apply(dat$X, 2, function(x) sin(pi * x))
  dat$head <- unname(sapply(dat$head, function(x) paste0("sin(pi*", x, ")")))
  if (!is.null(dat$unit)) dat$unit <- lapply(dat$unit, function(x) 0*x)

  data_out <- list(X = dat$X,
                   head = dat$head,
                   unit = dat$unit)
  return(data_out)
}

COS <- function(dat) {
  dat$X <- apply(dat$X, 2, function(x) cos(pi * x))
  dat$head <- unname(sapply(dat$head, function(x) paste0("cos(pi*", x, ")")))
  if (!is.null(dat$unit)) dat$unit <- lapply(dat$unit, function(x) 0*x)

  data_out <- list(X = dat$X,
                   head = dat$head,
                   unit = dat$unit)
  return(data_out)
}

unary <- function(data, sin_cos, apply_pos_opt_on_neg_x) {
  p <- ncol(data$X)
  if (p < 1) {
    stop("X has zero column. Need at least 1 column to perform unary operations!")
  } else {
    # unary operations
    data_abs <- ABS(data)
    data_sqrt <- SQRT(data, apply_pos_opt_on_neg_x)
    data_inv <- INV(data)
    data_sqre <- SQRE(data)
    data_log <- LOG(data, apply_pos_opt_on_neg_x)
    data_exp <- EXP(data)

    if(sin_cos == TRUE){
      data_sin <- SIN(data)
      data_cos <- COS(data)

      # Combine datasets
      data$X <- cbind(data$X, data_sqrt$X, data_sqre$X,
                      data_log$X, data_exp$X, data_sin$X,
                      data_cos$X, data_inv$X, data_abs$X)
      data$head <- c(data$head, data_sqrt$head, data_sqre$head,
                     data_log$head, data_exp$head, data_sin$head,
                     data_cos$head, data_inv$head, data_abs$head)
      data$unit <- c(data$unit, data_sqrt$unit, data_sqre$unit,
                      data_log$unit, data_exp$unit, data_sin$unit,
                      data_cos$unit, data_inv$unit, data_abs$unit)
    } else{
      # Combine datasets
      data$X <- cbind(data$X, data_abs$X, data_sqrt$X,
                      data_inv$X, data_sqre$X, data_log$X,
                      data_exp$X)
      data$head <- c(data$head, data_abs$head, data_sqrt$head,
                     data_inv$head, data_sqre$head, data_log$head,
                     data_exp$head)
      data$unit <- c(data$unit, data_abs$unit, data_sqrt$unit,
                      data_inv$unit, data_sqre$unit, data_log$unit,
                      data_exp$unit)
    }

    colnames(data$X) <- unname(data$head)

    # Remove redundant descriptors
    data <- dataprocessing(data)
    return(data)
  }
}

Try the iBART package in your browser

Any scripts or data that you put into this service are public.

iBART documentation built on Nov. 15, 2023, 1:09 a.m.