Nothing
#' Remove or rename enumerator tag/name (or remove entire enumerator) from tailing enumerators
#'
#' @description
#' This function allows indentifying, removing or renaming enumerator tag/name (or remove entire enumerator) from tailing enumerators (eg 'abc_No1' to 'abc_1').
#' A panel of potential candidates as combination of separator-symbols and separtor text/words will be tested to find if one matches all data.
#' In case the main input is a matrix, all columns will be tested independently to find the first column where one specific combination of separator-symbols and separtor text/words is found.
#' Several options exist for the output, the combination of separator-symbols and separtor text/words may be included, too.
#'
#' @details
#' Please note, that checking a variety of different separator text-word and separator-symbols may give an important number of combinations to check.
#' In particular, when automatic trimming of separator text-words is added (eg \code{incl="trim2"}), the complexity of associated searches increases quickly.
#' Thus, with large data-sets restricting the content of the arguments \code{nameEnum}, \code{sepEnum} and (in particular) \code{newSep} to the most probable terms/options
#' is suggested to help reducing demands on memory and CPU.
#'
#' In case the input \code{dat} is a matrix and multiple different numerator-types are found, only the first colum (from the left) will be treated.
#' If you which to remove/subsitute mutiple types of enumerators the function \code{rmEnumeratorName} must be run independently, see last example below.
#'
#' @param dat (character vecor or matrix) main input
#' @param nameEnum (character) potential enumerator-names
#' @param sepEnum (character) potential separators for enumerator-names
#' @param newSep (character) potential enumerator-names
#' @param incl (character) options to include further variants of the enumerator-names, use \code{"rmEnum"} for completely removing enumerator tag/name and digits
#' for differentr options of trimming names/tags from \code{nameEnum} one may use \code{anyCase}, \code{trim3} (trimming down to max 3 letters),
#' \code{trim2} (trimming to max 2 letters) or \code{trim1} (trimming down to single letter);
#' \code{trim0} works like \code{trim1} but also includes ' ', ie no enumerator tag/name in front of the digit(s)
#' @param silent (logical) suppress messages
#' @param debug (logical) display additional messages for debugging
#' @param callFrom (character) allow easier tracking of messages produced
#' @return This function returns a corrected vector (or matrix), or a list if \code{incl="rmEnumL"} containing $dat (corrected data),
#' $pattern (the combination of separator-symbols and separtor text/words found), and if input is matrix $column (which column of the input was identified and treated)
#' @seealso when the exact pattern is known \code{\link[base]{grep}} and \code{sub} may allow direct manipulations much faster
#' @examples
#' xx <- c("hg_Re1","hjRe2_Re2","hk-Re3_Re33")
#' rmEnumeratorName(xx)
#' rmEnumeratorName(xx, newSep="--")
#' rmEnumeratorName(xx, incl="anyCase")
#'
#' xy <- cbind(a=11:13, b=c("11#11","2_No2","333_samp333"), c=xx)
#' rmEnumeratorName(xy)
#' rmEnumeratorName(xy,incl=c("anyCase","trim2","rmEnumL"))
#'
#' xz <- cbind(a=11:13, b=c("23#11","4#2","567#333"), c=xx)
#' apply(xz, 2, rmEnumeratorName, sepEnum=c("","_"), newSep="_", silent=TRUE)
#'
#' @export
rmEnumeratorName <- function(dat, nameEnum=c("Number","No","#","Replicate","Sample"), sepEnum=c(" ","-","_"), newSep="", incl=c("anyCase","trim2"), silent=FALSE, debug=FALSE, callFrom=NULL) {
## remove or rename enumerator tag/name (or remove entire enumerator) from tailing enumerators (eg 'abc_No1' to 'abc_1'), only if found present in all instances
## dat (character voector or matrix)
## return character vector of same length as initial
## return-options : 1) repl/no EnumName 2) wo any enum, no digits 3) both
fxNa <- .composeCallName(callFrom, newNa="rmEnumeratorName")
if(isTRUE(debug)) silent <- FALSE
if(!isTRUE(silent)) silent <- FALSE
out <- dat
datOK <- length(dat) >0
if(datOK) {
ch1 <- if(length(dim(dat)) >0) colSums(!matrix(grepl("..[[:digit:]]+$", as.matrix(dat)), ncol=ncol(dat))) <1 else all(grepl("..[[:digit:]]+$", as.character(dat)))
datOK <- any(ch1, na.rm=TRUE)
useCol <- if(length(dim(dat)) >0) which(ch1) else NULL }
if(length(nameEnum) >0) { nameEnum <- naOmit(nameEnum)}
if(length(nameEnum) <1) { nameEnum <- c("Number","Replicate")
if(!silent) message(fxNa,"Empty or Invalid entry for 'sepEnum', setting to default") }
if(length(sepEnum) >0) { sepEnum <- naOmit(sepEnum)
chDu <- duplicated(nameEnum)
if(any(chDu)) nameEnum <- nameEnum[which(!chDu)]
}
if(length(sepEnum) <1) { sepEnum <- c(" ","-","_")
if(!silent) message(fxNa,"Empty or Invalid entry for 'sepEnum', setting to default") }
if(length(incl) <1) incl <- NA
if(length(newSep) >0) { newSep <- naOmit(newSep)[1]}
if(length(newSep) <1) { newSep <- ""
if(!silent) message(fxNa,"Empty or Invalid entry for 'newSep', setting to default") }
if(datOK) {
## prepare enumerator-patterns to test
chDu <- duplicated(nameEnum)
if(any(chDu)) nameEnum <- nameEnum[which(!chDu)]
if("anyCase" %in% incl) nameEnum <- unique(c(nameEnum, tolower(nameEnum), toupper(nameEnum)))
if("trim3" %in% incl) {tmp <- 3:max(nchar(nameEnum)); nameEnum <- unique(substring(rep(nameEnum, each=length(tmp)), 1, rep(3:max(nchar(nameEnum)), length(nameEnum)))) } else {
if("trim2" %in% incl) {tmp <- 2:max(nchar(nameEnum)); nameEnum <- unique(substring(rep(nameEnum, each=length(tmp)), 1, rep(2:max(nchar(nameEnum)), length(nameEnum)))) } else {
if("trim1" %in% incl || "trim0" %in% incl) {tmp <- 1:max(nchar(nameEnum)); nameEnum <- unique(substring(rep(nameEnum, each=length(tmp)), 1, rep(1:max(nchar(nameEnum)), length(nameEnum)))) }}}
if("trim0" %in% incl) {nameEnum <- unique(c(nameEnum, ""))
sepEnum <- unique(c(sepEnum, ""))}
chDu <- duplicated(sepEnum)
if(any(chDu)) sepEnum <- sepEnum[which(!chDu)]
nameEnum <- paste0(rep(sepEnum, length(nameEnum)), rep(nameEnum, each=length(sepEnum)),"[[:digit:]]+$")
if(debug) message(fxNa,"Ready to test ",length(nameEnum)," types of enumerators")
## main
if(length(nameEnum) >1) nameEnum <- nameEnum[order(nchar(nameEnum), decreasing=TRUE)] # sort to prefer using longest version
chEnum <- if(length(dim(dat)) ==2) apply(dat[,useCol], 2, function(y) sapply(nameEnum, function(x) all(grepl(x, y)))) else {
sapply(nameEnum, function(x) all(grepl(x, dat)))}
if(debug) {message(fxNa,"rEN0 .."); rEN0 <- list(dat=dat,out=out,nameEnum=nameEnum,newSep=newSep,sepEnum=sepEnum,chEnum=chEnum,newSep=newSep)}
if(any(chEnum)) { nameEnumInd <- which(chEnum, arr.ind=length(dim(dat)) ==2) # each hit in new line
if(debug) {message(fxNa,"rEN1 .."); rEN1 <- list(dat=dat,out=out,nameEnum=nameEnum,nameEnumInd=nameEnumInd,chEnum=chEnum,newSep=newSep)}
if(length(dim(dat)) >0) { ## input is matrix
usePat <- rownames(nameEnumInd)[1] # the (1st) pattern matching all input
nameEnumInd <- nameEnumInd[1,]
enu2 <- sub(usePat,"", dat[,useCol[nameEnumInd[2]]]) # wo nameEnumerator
curSep <- sepEnum[1+ ((nameEnumInd[1] -1) %/% length(sepEnum))]
if(debug) {message(fxNa,"rEN2"); rEN2 <- list()}
out[,useCol[nameEnumInd[2]]] <- if(length(grep("^rmEnum", incl)) >0) enu2 else {
paste0(enu2, if(length(newSep)==1) newSep else curSep, substr(dat[,useCol[nameEnumInd[2]]], nchar(enu2) +nchar(usePat) -12, max(nchar(dat[,useCol[nameEnumInd[2]]]))))}
if(debug && any(grepl(".L$", incl))) message("Matched matrix via column '",useCol[nameEnumInd[2]],"'")
if(any(grepl(".L$", incl))) out <- list(dat=out, column=useCol[nameEnumInd[2]], pattern=substr(usePat, 1, nchar(usePat) -13)) # optinal return as list including info which col was modified
} else { ## input is vector
if(length(nameEnumInd) >1) nameEnumInd <- nameEnumInd[1]
usePat <- names(nameEnumInd)
maxNch <- max(nchar(dat), na.rm=TRUE)
enu2 <- sub(names(nameEnumInd),"", dat) # wo nameEnumerator
curSep <- sepEnum[1+ ((nameEnumInd -1) %/% length(sepEnum))]
if(length(grep("^rmEnum", incl)) >0) out <- enu2 else {
out <- if(length(grep("^rmEnum", incl)) >0) enu2 else {
paste0(enu2, if(length(newSep)==1) newSep else curSep, substr(dat, nchar(enu2) +nchar(usePat) -12, maxNch)) }
}
if("all" %in% incl) out <- cbind(ini=dat, new=out)
if(debug && any(grepl(".L$", incl))) message(fxNa,"Matched vector")
if(any(grepl(".L$", incl))) out <- list(dat=out, pattern=substr(usePat, 1, nchar(usePat) -13)) # optinal return as list including info which col was modified
}
} else if(!silent) message(fxNa,"No conistent enumerator+digit combination found; nothing to do ..")
} else if(!silent) message(fxNa,"Invalid or empty input; nothing to do ..")
out }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.