R/checkInfoGlength.R

Defines functions checkInfoGlength

Documented in checkInfoGlength

#' Check the info and glength files
#'
#' Check the info and glength files to make sure all the genes that are in the counts are also in the annotation/glength
#'
#' @param counts numeric \code{matrix} of count data
#' @param info \code{data.frame} containing gene ids in the first column and further information in the following
#' @param glength \code{data.frame} containing gene ids in the first column and their length in the second
#' @author Hugo Varet

# created July 10th, 2019

# counts <- matrix(rnorm(100), ncol=10)
# rownames(counts) <- letters[1:10]
# info <- data.frame(id=letters[1:9], name=LETTERS[1:9])
# info2 <- data.frame(id=letters[1:10], name=LETTERS[1:10])
# info3 <- data.frame(id=letters[3:12], name=LETTERS[2:11])
# info4 <- data.frame(id=letters[c(3:12, 3, 4)], name=LETTERS[c(2:11, 12, 13)])
# glength <- data.frame(id=letters[1:9], name=LETTERS[1:9])
# glength2 <- data.frame(id=letters[1:10], name=LETTERS[1:10])
# glength3 <- data.frame(id=letters[3:12], name=LETTERS[2:11])
# checkInfoGlength(counts, info, glength)
# checkInfoGlength(counts, info2, glength2)
# checkInfoGlength(counts, info3, glength2)
# checkInfoGlength(counts, info4, glength2)

checkInfoGlength <- function(counts, info=NULL, glength=NULL){
  if (!is.null(info)){
    if (any(duplicated(info[,1]))) stop("Duplicated ids in info: ", paste(unique(info[duplicated(info[,1]), 1]), collapse=", "))
    if (nrow(counts) != nrow(info)){
      cat("Different number of features between counts and info:", nrow(counts), "vs", nrow(info), "\n")
    } else{
      cat("Features in counts but not in info:", paste0(setdiff(rownames(counts), info[,1]), collapse=", "), "\n")
      cat("Features in info but not in counts:", paste0(setdiff(info[,1], rownames(counts)), collapse=", "), "\n")
    }
  }
  if (!is.null(glength)){
    if (any(duplicated(glength[,1]))) stop("Duplicated ids in glength: ", paste(unique(glength[duplicated(glength[,1]), 1]), collapse=", "))
    if (nrow(counts) != nrow(glength)){
      cat("Different number of features between counts and glength:", nrow(counts), "vs", nrow(glength), "\n")
    } else{
      cat("Features in counts but not in glength:", paste0(setdiff(rownames(counts), glength[,1]), collapse=", "), "\n")
      cat("Features in glength but not in counts:", paste0(setdiff(glength[,1], rownames(counts)), collapse=", "), "\n")
    }
  }
}
biomics-pasteur-fr/RNADiff documentation built on Aug. 27, 2020, 12:44 a.m.