Nothing
#' Filter for unique elements
#'
#' This function aims to identify and remove duplicated elements in a list and maintain the list-structure in the output.
#' \code{filtSizeUniq} filters 'lst' (list of character-vectors or character-vector) for elements being unique (to 'ref' or if NULL to all 'lst') and of character length.
#' In addition, the min- and max- character length may be filtered, too. Eg, in proteomics this helps removing peptide sequences which would not be measured/detected any way.
#'
#' @param lst list of character-vectors or character-vector
#' @param ref (character) optional alternative 'reference', if not \code{NULL} used in addition to 'lst' for considering elements of 'lst' as unique
#' @param minSize (integer) minimum number of characters, if \code{NULL} set to 0
#' @param maxSize (integer) maximum number of characters
#' @param filtUnique (logical) if \code{TRUE} return unique-only character-strings
#' @param byProt (logical) if \code{TRUE} organize output as list (by names of input, eg protein-names) - if 'lst' was named list
#' @param inclEmpty (logical) optional including empty list-elements when all elements have been filtered away - if 'lst' was named list
#' @param silent (logical) suppress messages
#' @param debug (logical) additional messages for debugging
#' @param callFrom (character) allow easier tracking of messages produced
#' @return list of filtered input
#' @seealso \code{\link{correctToUnique}}, \code{\link[base]{unique}}, \code{\link[base]{duplicated}}
#' @examples
#' filtSizeUniq(list(A="a",B=c("b","bb","c"),D=c("dd","d","ddd","c")),filtUn=TRUE,minSi=NULL)
#' # input: c and dd are repeated
#' filtSizeUniq(list(A="a",B=c("b","bb","c"),D=c("dd","d","ddd","c")),ref=c(letters[c(1:26,1:3)],
#' "dd","dd","bb","ddd"),filtUn=TRUE,minSi=NULL) # a,b,c,dd repeated
#' @export
filtSizeUniq <- function(lst, ref=NULL, minSize=6, maxSize=36, filtUnique=TRUE, byProt=TRUE, inclEmpty=TRUE,silent=FALSE,debug=FALSE, callFrom=NULL) {
## filter protein sequences for size/length and for unique
fxNa <- .composeCallName(callFrom,newNa="filtSizeUniq")
if(!isTRUE(silent)) silent <- FALSE
if(isTRUE(debug)) silent <- FALSE else debug <- FALSE
chNa <- grep("\\.$", names(utils::head(lst))) # check for attached tailing '.'
if(!is.list(lst)) {byProt <- FALSE; inclEmpty <- FALSE}
if(length(chNa) <= min(2,length(lst))) names(lst) <- paste(names(lst),".",sep="")
pep <- unlist(lst)
chNa <- max(sapply(lst,length),na.rm=TRUE)
if(chNa >1) names(pep) <- sub("\\.$","",names(pep)) # remove tailing '.' of names if list-element has length=1
nPep <- length(pep)
nAA <- nchar(pep)
if(length(minSize) <1) minSize <- 0
if(length(maxSize) <1) {maxSize <- 40
if(!silent) message(fxNa," can't understant 'maxSize', setting to default=40")}
## filter by size
chAA <- nAA >= minSize & nAA <= maxSize
if(any(!chAA)) {pep <- if(all(!chAA)) NULL else pep[which(chAA)]
if(!silent) message(fxNa,nPep - length(pep)," out of ",nPep," peptides beyond range (",minSize,"-",maxSize,")")}
## filter unique /reundant
if(filtUnique) {
nPe2 <- length(pep)
if(length(ref) >0) {pep0 <- pep; pep <- c(pep,unique(unlist(ref))) } else pep0 <- NULL
chDup <- duplicated(pep,fromLast=FALSE)
if(any(chDup)) {
chDu2 <- duplicated(pep,fromLast=TRUE)
if(length(ref) >0) {pep <- pep0; chDup <- chDup[1:nPe2]; chDu2 <- chDu2[1:nPe2]}
pep <- list(unique=pep[which(!chDu2 & !chDup)],allRedund=pep[which(!(!chDu2 & !chDup))], firstOfRed=pep[which(chDu2 & !chDup)])
if(!silent) message(fxNa,length(pep$allRedund)," out of ",nPe2," peptides redundant")
} else {if(length(ref) >0) {pep <- pep0; chDup <- chDup[1:nPe2]}}
}
##
if(byProt) { fac <- sub("\\.[[:digit:]]+$","",names(if(filtUnique) pep$unique else pep))
pep <- tapply(if(filtUnique) pep$unique else pep,fac,function(x) x)
if(length(pep) <1) pep <- character()
if(inclEmpty) { iniPro <- sub("\\.$","",names(lst))
curPro <- names(pep)
newNo <- sum(!iniPro %in% curPro)
if(newNo >0){ pep[length(curPro)+(1:newNo)] <- lapply(1:newNo,function(x) character())
names(pep)[length(curPro)+(1:newNo)] <- iniPro[which(!iniPro %in% curPro)]}
}
}
pep }
#' Filter for size
#'
#' This function aims to filter for size
#'
#' @param x main inpuy
#' @param minSize (integer) minimum number of characters, if \code{NULL} set to 0
#' @param maxSize (integer) maximum number of characters
#' @return list of filtered input
#' @seealso \code{\link{filtSizeUniq}}; \code{\link{correctToUnique}}, \code{\link[base]{unique}}, \code{\link[base]{duplicated}}
#' @examples
#' aa <- 1:10
#' @export
.filtSize <- function(x, minSize=5, maxSize=36) {nCha <- nchar(x); x[which(nCha >= minSize & nCha <= maxSize)]} # filter by size (no of characters)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.