# R/auxiliar.R In BioCor: Functional similarities

```# combinadic ####
#' i-th combination of n elements taken from r
#'
#' Function similar to combn but for larger vectors. To avoid allocating a big
#' vector with all the combinations each one can be computed with this
#' function.
#' @param n Elements to extract the combination from
#' @param r Number of elements per combination
#' @param i ith combination
#' @return The combination ith of the elements
#' @export
#' @examples
#' #Output of all combinations
#' combn(LETTERS[1:5], 2)
#' # Otuput of the second combination
#' @author Joshua Ulrich
#' @references
#' 4494469/2886003}
combinadic <- function(n, r, i) {

# http://msdn.microsoft.com/en-us/library/aa289166(VS.71).aspx
n0 <- length(n)
if (i < 1L | i > choose(n0, r)) {
stop("'i' must be 0 < i <= n0!/(n0-r)!")
}
largestV <- function(n, r, i) {
v <- n # Adjusted for one-based indexing
while (choose(v,r) >= i) { # Adjusted for one-based indexing
v <- v - 1L
}
return(v)
}

res <- rep(NA,r)
for (j in 1L:r) {
res[j] <- largestV(n0, r, i)
i <- i - choose(res[j], r)
n0 <- res[j]
r <- r - 1L
}
res <- res + 1L
res <- n[res]
return(res)
}

# seq2mat ####
#' Transforms a vector to a symmetric matrix
#'
#' Fills a matrix of \code{ncol = length(x)} and \code{nrow = length(x)} with
#' the values in \code{dat} and setting the diagonal to 1.
#'
#' \code{dat} should be at least \code{choose(length(x), 2)} of length. It
#' assumes that the data provided comes from using the row and column id to
#' obtain it.
#' @param x names of columns and rows, used to define the size of the matrix
#' @param dat Data to fill with the matrix with except the diagonal.
#' @return A square matrix with the diagonal set to 1 and \code{dat} on the
#' upper and lower triangle with the columns ids and row ids from x.
#' @examples
#' seq2mat(LETTERS[1:5], 1:10)
#' seq2mat(LETTERS[1:5], seq(from = 0.1, to = 1, by = 0.1))
#' @export
#' @author Lluís Revilla
seq2mat <- function(x, dat) {
if (length(dat) != choose(length(x), 2L)) {
stop("Data is not enough big to populate the matrix")
}
out <- matrix(ncol = length(x), nrow = length(x))
out[upper.tri(out)] <- unlist(dat, use.names = TRUE)
out[lower.tri(out)] <- t(out)[lower.tri(t(out))]
diag(out) <- 1L
rownames(out) <- colnames(out) <- x
return(out)
}

# duplicateIndices ####
#' Finds the indices of the duplicated events of a vector
#'
#' Finds the indices of duplicated elements in the vector given.
#'
#' For each duplication it can return a list or if all the duplication events
#' are of the same length it returns a matrix, where each column is duplicated.
#' @param vec Vector of identifiers presumably duplicated
#' @return The format is determined by the simplify2array
#' @export
#' @author Lluís Revilla
#' @examples
#' duplicateIndices(c("52", "52", "53", "55")) # One repeated element
#' duplicateIndices(c("52", "52", "53", "55", "55")) # Repeated elements
#' duplicateIndices(c("52", "55", "53", "55", "52")) # Mixed repeated elements
duplicateIndices <- function(vec) {
if (!is.character(vec)) {
stop("Expected a list of characters to find duplicates on it")
}
sapply(unique(vec[duplicated(vec)]), function(x){
b <- 1:length(vec)
b[vec == x]}, simplify = FALSE)
}

# removeDup ####
#' Remove duplicated rows and columns
#'
#' Given the indices of the duplicated entries remove the columns and rows
#' until just one is left, it keeps the duplicated with the highest absolute
#' mean value.
#'
#' @param cor_mat List of matrices
#' @param dupli List of indices with duplicated entries
#' @return A matrix with only one of the columns and rows duplicated
#' @export
#' @author Lluís Revilla
#' @seealso \code{\link{duplicateIndices}} to obtain the list of indices with
#' duplicated entries.
#' @examples
#' a <- seq2mat(c("52", "52", "53", "55"), runif(choose(4, 2)))
#' b <- seq2mat(c("52", "52", "53", "55"), runif(choose(4, 2)))
#' mat <- list("kegg" = a, "react" = b)
#' mat
#' dupli <- duplicateIndices(rownames(a))
#' remat <- removeDup(mat, dupli)
#' remat
removeDup <- function(cor_mat, dupli) {
if (!all(sapply(cor_mat, isSymmetric))) {
stop("All the matrices of mat should be symmetric and with the same ",
"column names and rownames")
}
cor_mat <- Map(function(mat, x = dupli) {
rem.colum <- sapply(x, function(y, m) {
mean.column <- apply(m[, y], 2L, mean, na.rm = TRUE)
i <- which.max(abs(mean.column))
setdiff(y, y[i])
}, m = mat)

mat[-rem.colum, -rem.colum]
}, cor_mat)
return(cor_mat)
}

#' Invert a list
#'
#' Calculate the pathways per gene of list
#'
#' @param x A list with genes as names and names of pathways as values of the
#' list
#' @return The number of pathways each gene has.
#' @author Lluís Revilla
#' @keywords internal
inverseList <- function(x) {
stopifnot(length(names(x)) == length(x))
stopifnot(all(sapply(x, function(x){is.character(x) || is.na(x)})))
genes <- unlist(x, use.names = FALSE)
pathways <- rep(names(x), lengths(x))
split(pathways, genes)
}
```

## Try the BioCor package in your browser

Any scripts or data that you put into this service are public.

BioCor documentation built on Nov. 1, 2018, 3:34 a.m.