# R/aboutLoop.R In fionarhuang/TreeSummarizedExperiment: TreeSummarizedExperiment: a S4 Class for Data with Tree Structures

#### Documented in detectLoopresolveLoop

```#' Detect loops
#' \code{detectLoop} detects loops
#'
#' @param tax_tab a data frame where columns store hierarchical levels. The
#'   columns from the left to the right correspond nodes from the root to the
#'   leaf.
#' @importFrom dplyr '%>%' mutate_if
#' @export
#' @author Ruizhu Huang
#' @return a data frame
#' @examples
#'
#' df <- data.frame(A = rep("a", 8),
#'                  B = rep (c("b1", "b2", "b3", "b4"), each = 2),
#'                  C = paste0("c", c(1, 2, 2, 3:7)),
#'                  D = paste0("d", 1:8))
#'
#' # The result means that a loop is caused by 'b1' and 'b2' in column 'B' and
#' # 'c2' in column 'C' (a-b1-c2; a-b2-c2)
#' detectLoop(tax_tab = df)
#'
#' df <- data.frame(R1 = rep("A", 6),
#'                      R2 = c("B1", rep("B2", 4), "B3"),
#'                      R3 = c("C1", "C2", "C3", NA, NA, NA),
#'                      R4 = c("D1", "D2", "D3", NA, NA, NA),
#'                      R5 = paste0("E", 1:6))
#' detectLoop(tax_tab = df)
#'
#' df <- data.frame(R1 = rep("A", 7),
#'                      R2 = c("B1", rep("B2", 4), "B3", "B3"),
#'                      R3 = c("C1", "C2", "C3", "", "", "", ""),
#'                      R4 = c("D1", "D2", "D3", "", "", "", ""),
#'                      R5 = paste0("E", 1:7))
#' detectLoop(tax_tab = df)
#'
#' df <- data.frame(R1 = rep("A", 7),
#'                      R2 = c("B1", rep("B2", 4), "B3", "B3"),
#'                      R3 = c("C1", "C2", "C3", NA, NA, NA, NA),
#'                      R4 = c("D1", "D2", "D3", NA, NA, NA, NA),
#'                      R5 = paste0("E", 1:7))
#' detectLoop(tax_tab = df)

detectLoop <- function(tax_tab){

tax_tab <- tax_tab %>%
mutate_if(is.factor, as.character)

nc <- ncol(tax_tab)
cnam <- colnames(tax_tab)

tab_list <- lapply(seq_len(nc-1), function(x){
xx <- table(tax_tab[, x], tax_tab[, x+1], useNA = "always")
sx <- colSums(xx > 0)
nx <- names(sx)[sx > 1]
if (length(nx)) {
warning("Detected ", cnam[x+1], " from different ",
cnam[x], " : ", paste(nx, collapse = " "))

ssx <- xx[, sx>1, drop = FALSE]
ssx <- ssx[rowSums(ssx) > 0, , drop = FALSE]
ind <- which(ssx > 0, arr.ind = TRUE)

data.frame(parent = rownames(ssx)[ind[, "row"]],
child = colnames(ssx)[ind[, "col"]],
parent_column = cnam[x],
child_column = cnam[x+1])
} else {
NULL
}

})

out <- do.call(rbind, tab_list)
data.frame(lapply(out, as.character), stringsAsFactors = FALSE)
}

#' Resolve loops
#' \code{resolveLoop} resolve loops by adding suffix to the child node. The
#' suffix is "_i" where 'i' is a number. Please see examples.
#'
#' @param tax_tab a data frame where columns store hierarchical levels. The
#'   columns from the left to the right correspond nodes from the root to the
#'   leaf.
#' @importFrom dplyr '%>%' mutate_if group_by mutate row_number
#' @importFrom rlang .data
#' @export
#' @author Ruizhu Huang
#' @return a data frame
#' @examples
#' # example 1
#' df <- data.frame(A = rep("a", 8),
#'                  B = rep (c("b1", "b2", "b3", "b4"), each = 2),
#'                  C = paste0("c", c(1, 2, 2, 3:7)),
#'                  D = paste0("d", 1:8))
#'
#' # The result means that a loop is caused by 'b1' and 'b2' in column 'B' and
#' # 'c2' in column 'C' (a-b1-c2; a-b2-c2)
#' resolveLoop(tax_tab = df)
#'
#' # example 2
#' taxTab <- data.frame(R1 = rep("A", 5),
#'                      R2 = c("B1", rep("B2", 3), ""),
#'                      R3 = c("C1", "C2", "C3", "", ""),
#'                      R4 = c("D1", "D2", "D3", "", ""),
#'                      R5 = paste0("E", 1:5))
#'
#' resolveLoop(tax_tab = taxTab)
#' # example 3
#' taxTab <- data.frame(R1 = rep("A", 6),
#'                      R2 = c("B1", rep("B2", 4), ""),
#'                      R3 = c("C1", "C2", "C3", "", "", ""),
#'                      R4 = c("D1", "D2", "D3", "", "", ""),
#'                      R5 = paste0("E", 1:6))
#'
#' resolveLoop(tax_tab = taxTab)
#'
#'  # example 3
#' taxTab <- data.frame(
#'             R1 = rep("A", 5),
#'             R2 = c("B1", rep("B2", 3), "B3"),
#'             R3 = c("C1", "C2", "C3", NA, NA),
#'             R4 = c("D1", "D2", "D3", NA, NA),
#'             R5 = paste0("E", 1:5))
#' resolveLoop(tax_tab = taxTab)
#'
#'
resolveLoop <- function(tax_tab) {
# solve the loop by adding suffix
tax_s <- .solveLoop(tax_tab)

# test whether all loops are solved
loop_s <- detectLoop(tax_tab = tax_s)
while (nrow(loop_s)) {
tax_s <- .solveLoop(tax_s)
loop_s <- detectLoop(tax_tab = tax_s)
}
return(tax_s)
}

.solveLoop <- function(tax_tab){
# change factor to character
tax_tab <- tax_tab %>%
mutate_if(is.factor, as.character)

df <- detectLoop(tax_tab = tax_tab)
if (nrow(df)) {
df <- df %>% group_by(.data\$child) %>% mutate(count = row_number())
df_list <- split(df, seq(nrow(df)))

tax_x <- tax_tab
for (i in seq_along(df_list)){
df_i <- df_list[[i]]
ind_c <- tax_x[[df_i\$child_column]] %in% df_i\$child
ind_p <- tax_x[[df_i\$parent_column]] == df_i\$parent
ind <- which(ind_p & ind_c)
tax_x[ind, df_i\$child_column] <- paste(tax_x[ind, df_i\$child_column],
df_i\$count, sep = "_")
}
tax_x
} else {
tax_tab
}

}
```
fionarhuang/TreeSummarizedExperiment documentation built on Aug. 18, 2021, 12:15 p.m.