miscfun: Misc Functions for daily convenience

Documented in caption cls getmatches lf libs list2matrix list2table rc read showxls strcompare

#' caption
#'
#' display string as caption (maybe underlined)
#'
#' @param txt
#'   Text to display as caption.
#'
#' @param color
#'   ANSI color code for caption.
#'
#' @param ulchar
#'   Character for underlining caption.
#'   Should be exactly 1 character or "" or NULL or NA for no underline.
#'
#' @param ulcolor
#'   ANSI color code for underline.
#'
#' @export
caption <- function(txt, color="1;37", ulchar="▔", ulcolor="31") {
    txt <- gsub("(^\\s+)|(\\s+$)", "", txt)
    cat("\033[",color,"m",txt,"\033[22;39m\n",sep="")
    if (!(is.null(ulchar) || is.na(ulchar) || (nchar(ulchar) == 0)))
        cat("\033[",ulcolor,"m",rep(ulchar,nchar(txt)),"\033[39m\n",sep="")
}

#' cls
#'
#' clear screen (including scrollback buffer (as opposed to Ctrl-L))
#'
#' @param hard
#'   if TRUE, a full reset is performed. Usefull if something has messed up your terminal.
#'
#' @export
cls <- function(hard=F) {
    system(if (hard) "reset" else "clear")
}

#' libs
#'
#' load multiple packages without startup messages but with colorfull progress
#'
#' @param pkg
#'   character vector containing package-names to load
#'
#' @export
libs <- function(pkg) {
    # this may linebreak. It should not. But we still want all the names.
    strtstr <- "\033[97mLoading Packages:\033[0m"
    contstr <- "\033[97m                |\033[0m"
    strtwidth <- 18     # nchar(strtstr)+1 but without ANSI-escapes
    liblines <- strsplit(strwrap(paste(pkg, collapse=" "), getOption("width")-strtwidth), " ")
    
    for (ll in 1:length(liblines)) {
        cat(ifelse(ll == 1, strtstr, contstr), liblines[[ll]],"\n")
    }
    cat(sprintf("\033[%dA\033[%dG", length(liblines), strtwidth))
    for (ll in 1:length(liblines)) {
        for (p in liblines[[ll]]) {
            tryCatch({
                suppressPackageStartupMessages(library(p, char=T, warn=F, quiet=T))
                cat("\033[0;36m", p)
            }, error=function(e) {
                cat("\033[0;31m", p)
            })
            flush.console()
        }
        cat(sprintf("\n\033[%dG", strtwidth))
    }
    cat("\033[0m\r")
}

#' lf
#'
#' List content of current global environment
#'
#' @param withsize
#'   If TRUE, approximate size of each object in memory is displayed.
#'   Be aware that this may take some time and create overhead in RAM if there
#'   are large objects in the global environment.
#'
#' @param concise
#'   If TRUE, headings will be omited; output will be about 6 lines less.
#'
#' @param datafirst
#'   If TRUE, objects are listed first, then functions.
#'
#' @export
lf <- function(withsize=FALSE, concise=FALSE, datafirst=FALSE, maxwidth=getOption("width")) {
    if (withsize) {
        # This may be very slow and create lots of overhead in RAM
        dat <- gdata::ll(".GlobalEnv",digits=1, dim=T, sort=T)
    } else {
        obj <- ls(envir=globalenv())
        if (length(obj) > 0) {
            dat <- data.frame(
                row.names=obj,
                Class=sapply(obj, function(x){ class(get(x))[1] }),
                KB=NA,
                Dim=sapply(obj, function(x){
                    d <- dim(get(x))
                    if (is.null(d)) d <- length(get(x))
                    paste(d, collapse=" x ")
                })
            )
        } else {
            dat <- data.frame()
        }
    }
    dat_fn <- dat[dat$Class=="function",]
    dat_dt <- dat[dat$Class!="function" & rownames(dat)!="ppbarenv",]

    print_func <- function(pren=F) {
        # TODO: Implement maxwidth
        if (nrow(dat_fn) > 0) {
            if (pren) cat("\n")
            if (!concise) caption("Functions", ulcolor=32)
            for (item in rownames(dat_fn)) {
                if (is.function(get(item))) {
                    para <- gsub("^function \\(|\\)  $","",capture.output(str(get(item), give.attr=F)))
                    #    neu = function(z, y=list(),a="b , f", c=17, d=matrix(), æ){}
                    # testcase fails and it's a bit inconsistent but it's better than nothing.
                    para <- gsub("([^ ,]+)(,| = ([^ ]+))? ", "\033[97m\\1\033[33m\\2 ", para)
                    cat("\033[92m",item,"\033[39m(\033[33m",para,"\033[39m)\n", sep="")
                }
            }
            invisible(TRUE)
        } else {
            invisible(FALSE)
        }
    }
    print_data <- function(pren=F) {
        # TODO: Implement maxwidth
        if (nrow(dat_dt) > 0) {
            if (pren) cat("\n")
            if (!concise) caption("Data",ulcolor=32)
            dcl <- capture.output(print(dat_dt[, if (withsize) c(1,3,2) else c(1,3)]))
            if (concise) dcl <- dcl[-1]
            # highlight!
            dcl <- sub("^([^ ]+) ", "\033[97m\\1\033[39m ", dcl) # all names
            dcl <- sub(" ([0-9 .x]+)$",  " \033[38;5;247m\\1\033[39m ",dcl) # dimensions or size
            dcl[1] <- paste("\033[97m",dcl[1],"\033[39m", sep="") # heading
            dcl <- sub(" (data.(frame|table)|matrix|array) ",   " \033[36m\\1\033[39m ",dcl)
            dcl <- sub(" (list) ",                              " \033[35m\\1\033[39m ",dcl)
            dcl <- sub(" (environment) ",                       " \033[31m\\1\033[39m ",dcl)
            dcl <- sub(" (numeric|logical|integer) ",           " \033[93m\\1\033[39m ",dcl)
            dcl <- sub(" (factor|character) ",                  " \033[33m\\1\033[39m ",dcl)
            cat(dcl,sep="\n")
            invisible(TRUE)
        } else {
            invisible(FALSE)
        }
    }
    if (datafirst) {
        hasdat <- print_data()
        print_func(hasdat)
    } else {
        hasfun <- print_func()
        print_data(hasfun)
    }
}

#' rc
#'
#' read from clipboard into vector (separated by line)
#' @export
rc <- function() {
    cbf = file("clipboard")
    targ <- readLines(cbf)
    close(cbf)
    return(targ)
}

.checkSysBin <- function(bin) {
    res <- attr(suppressWarnings(system(paste("which",bin), int=T, ignore.stdo=T, ignore.stde=T)),"status")
    return(length(res) == 0 || res[1] == 0)
}

#' read
#'
#' automagically read tables to data.frame or data.table (default).
#' Can read textfiles (csv) and compressed textfiles (bz2, xz, gzip) (using data.table::fread)
#' Can read fst-files (package fst), qs-files (package qs) and Rds.
#' Can also read xlsx-workbooks (using openxlsx::read.xlsx).
#' Will read directly from http(s), ftp.
#' Download and decompression is done via pipes, not temporary files (except for xlsx-workbooks).
#'
#' @param input
#'   Filename to read. May start with 'http:/', 'https://' or 'ftp://'.
#'   For local files, wildcard-expansion (globbing) is done (See Sys.glob)
#'   If the file is a (compressed) textfile it will be downloaded, decompressed and read on-the-fly without using tempfiles.
#'   If it's an xlsx-workbook, it will be downloaded to a tempfile, then read.
#'
#' @param data.table
#'   set to TRUE to return a data.table instead of data.frame
#'
#' @export
read <- function(input, data.table=T, ...) {
    suppressPackageStartupMessages(require(data.table))
    suppressPackageStartupMessages(require(fst))
    stopifnot(is(input, "character"))
    stopifnot(length(input) == 1)
    
    # list of known decompressors, must accept -d and output to stream.
    # fread is able to read gz and bz2 directly, but it decompresses to a tempfile.
    # using streams is way better.
    DECOMP <- c(gz="gzip", bz2="bzip2", xz="xz", zstd="zstd", zst="zstd")
    # check for faster versions
    if (.checkSysBin("pigz")) DECOMP["gz"] <- "pigz"
    if (.checkSysBin("pixz")) DECOMP["xz"] <- "pixz"
    if (.checkSysBin("pbzip2")) DECOMP["bz2"] <- "pbzip2"

    iscmd <- FALSE
    remote <- FALSE
    if (substring(input, 1, 6) %in% c("http:/","https:","ftp://")) {
        input <- sprintf('wget -q --show-progress -O- "%s"', input)
        remote <- TRUE
        iscmd <- TRUE
    } else {    # Auto-globbing
        if (!file.exists(input) && grepl("[*?[]", input)) {
            globinput <- Sys.glob(input)
            if (length(globinput) == 1) input <- globinput
            if (length(globinput) > 1) stop(paste(c("ambiguous glob:", globinput), collapse="\n"))
        }
    }

    extension <- last(unlist(strsplit(input, ".", fix=T)))
    if (extension %in% names(DECOMP)) {
        if (remote)
            input <- sprintf("%s | %s -d", input, DECOMP[extension])
        else
            input <- sprintf('%s -d <"%s"', DECOMP[extension], input)
        iscmd <- TRUE
    }

    if (toupper(extension) == "FST") {
        return(read_fst(input, as.data.table=data.table))
    }

    if (toupper(extension) == "QS") {
        return(qs::qread(input))
    }

    if (toupper(extension) == "RDS") {
        return(readRDS(input))
    }

    if (toupper(extension) %in% c("XLS", "XLSX")) {
        if (remote) {
            tmp <- tempfile()
            system(sprintf("%s > \"%s\"", input, tmp))
            input <- tmp
        }
        if (toupper(extension) == "XLSX") {
            # well yes, gdata::read.xls can read xlsx as well, but it is SLOW.
            suppressPackageStartupMessages(require(openxlsx))
            res <- read.xlsx(input, ...)
        } else {
            suppressPackageStartupMessages(require(gdata))
            res <- read.xls(input, ...)
        }
        if (data.table) {
            return(as.data.table(res))
        } else {
            return(res)
        }
    }
    if (iscmd) {
        # TODO: fread writes a tempfile even for cmd.
        # we do want to pipe it directly. But fread needs to map the complete input first.
        # so this won't work. Piping data into R is a completely different task!
        return(fread(cmd=input, data.table=data.table, ...))
    } else {
        return(fread(input, data.table=data.table, ...))
    }
}

#' read.fasta
#'
#' read FASTA-File
#'
#' @param file
#'   file to read
#'
#' @param to.upper
#'   convert everthing in input to upper case
#'
#' @export
read.fasta <- function (file, to.upper=FALSE) {
    raw.fa <- scan(file, what=character(0), sep="\n", quiet=TRUE)
    ind <- grep(">", raw.fa)
    if (length(ind) == 0)
        stop("read.fasta: no '>' id lines found, check file format")

    if (to.upper) raw.fa[-ind] <- toupper(raw.fa[-ind])

    ind.s <- ind + 1
    ind.e <- c((ind - 1)[-1], length(raw.fa))

    seq.format <- function(x) {
        paste(raw.fa[(x[1]:x[2])], collapse="")
    }
    store.fa <- apply(cbind(ind.s, ind.e), 1, seq.format)
    names(store.fa) <- gsub("^>", "", raw.fa[ind])

    return(store.fa)
}

#' write.fasta
#'
#' write FASTA-File. Beware! It is not sequential, so may be slow or bloaty for
#' larger Seqcence-vectors
#'
#' @param sq
#'   Sequences to be exported. They need names.
#'
#' @param file
#'   Write to this file.
#'
#' @param wrap
#'   Wrap sequence (not names) after this many chars
#'
#' @param to.upper
#'   If TRUE, sequences will be converted to uppercase
#'
#' @export
write.fasta <- function (sq, file, wrap=80, to.upper=FALSE) {
    stopifnot(!is.null(names(sq)))
    if (to.upper) sq <- toupper(sq)
    splot <- gsub(sprintf("(.{%d})",wrap), "\\1\n", sq)
    cat(sprintf(">%s\n%s\n", names(sq), splot), sep="\n", file=file)
}

#' showxls
#'
#' export (and show) something as excel-file.
#' @param mat
#'   This may be:
#'    - a single object (anything that may be coerced to a data frame). It will be exportet as a single-sheet-XLSX.
#'    - an environment or list. Every object in the list which can be coerced to a data.frame will be exportet as a single sheet in the resulting XLSX.
#'
#' @param filename
#'   Write to this file. If 'NA', it will be a tempfile() and directly opend in localc.
#'
#' @export
showxls <- function(mat, filename=NA) {
    fina <- ifelse(is.character(filename), filename, paste(tempfile(),".xlsx",sep=""))
    require(openxlsx)
    framename <- make.unique(substr(gsub("[^A-Za-z0-9_-]","_", match.call()[2]),1,30))
    if (is.data.frame(mat)) {
        # this fine
    } else if (is.matrix(mat)) {
        mat <- as.data.frame(mat)
    } else if (is.environment(mat) || is.list(mat)) {
        mat <- as.list(mat)
        framename <- names(mat)
        framename[nchar(framename) == 0] <- "Sheet"
        framename <- make.unique(framename)
        names(mat) <- framename
        framename <- NULL
        # filter functions
        mat <- mat[!sapply(mat, function(x) any(c("refObject", "function") %in% is(x)))]
        mat <- lapply(mat, function(x) if(is.null(dim(x))) as.data.frame(x) else x)
    } else if (is.vector(mat)) {
        mat <- data.frame(mat)
        colnames(mat) <- framename
    } else {
        mat <- data.frame(capture.output(print(mat)))
        colnames(mat) <- framename
    }
    write.xlsx(
        mat,
        fina,
        rowNames=TRUE,
        firstRow=FALSE,
        colNames=TRUE,
        firstCol=FALSE,
        #colWidths=ifelse(is.list(mat),NA,"auto"),
        sheetName=framename,
        headerStyle=createStyle(textDecoration="bold"),
        sep="|"
    )
    # show directly if no filename given
    if (is.na(filename)) system(paste ("localc" ,fina), ignore.stdout=T, ignore.stderr=T, wait=F)
}

#' list2table
#'
#' create a table from a list of vectors (even with different lengths)
#' each list in one column, simply side by side, preserving vector-ordering
#' (so row-association is meaningless)
#' @export
list2table <- function(x) {
    mali <- max(sapply(x,length))
    targ <- data.frame(row.names=1:mali)
    if (is.null(names(x))) names(x) <- paste0("L",1:length(x))
    for (l in 1:length(x)) {
        vec <- x[[l]]
        if (length(vec) < mali) vec <- c(vec, rep(NA, mali-length(vec)))
        targ[,names(x)[l]] <- vec
    }
    return(targ)
}

#' list2matrix
#'
#' create a matrix from a list of vectors (even with different lengths)
#' each list in one column but items will be alligned by content to create
#' a feature-matrix. Sym can be NA (items will be used as they are)
#' or a string (such es "X" or "🗸")
#'
#' @export
list2matrix <- function(lst, sym=NA) {
    # schnappt sich aus jedem vektor der liste die einträge und wurschtelt das in ne Matrix um
    # wenn sym NA ist, wird der ursprüngliche Bezeichner verwendet, ansonsten sym.
    tags <- na.omit(unique(unlist(lst)))
    res <- matrix(nrow=length(tags), ncol=length(lst))
    if (is.null(names(lst))) names(lst) <- sprintf("L_%d",1:length(lst))
    rownames(res) <- tags
    colnames(res) <- names(lst)
    for (j in names(lst)) {
        itm <- na.omit(lst[[j]])
        res[itm,j] <- if (is.na(sym)) itm else sym
    }
    return(res)
}


#' strcompare
#'
#' calculate the similarity of 2 strings
#' @export
strcompare <- function(x, y, motif=F) {
    x <- unlist(strsplit(x, "")) # compare this
    y <- unlist(strsplit(y, "")) # ...to that
#~     if (length(y) < length(x)) { # output will be length x
#~         tmp <- x; x <- y; y <- tmp
#~     }
    # pad second sequence
    y <- c(rep(" ", length(x)), y, rep(" ", length(x)))
    # calc all matches
    mtch <- sapply(1:(length(y)-length(x)), function(z) {
        sum(x==y[z:(length(x)-1+z)])
    })
    if (motif) {
        z <- which.max(mtch)
        m <- x==y[z:(length(x)-1+z)]
        x[!m] <- "."

        return (list(
            motif=paste(x, collapse=""),
            match_sequence=paste(y[z:(z+length(x)-1)], collapse=""),
            position=c(z-length(x)-1, z-1),
            matched=mtch[z] / length(x)
        ))
    } else {
        return(max(mtch) / length(x))
    }
}

#' getmatches
#'
#' get all regex-matches from a list of strings.
#' e.g.: unlist(getmatches("rs[0-9{3,16}", book)) will return all rs-ids from a
#  text
#'
#' @export
getmatches <- function(pattern, txt) {
    m <- gregexpr(pattern, txt)
    return(lapply(1:length(m), function(i) {
        if (m[[i]][1] > -1) {
            substr(txt[i], m[[i]], m[[i]] + attr(m[[i]], "match.length") - 1)
        }
    }))

}
inkeso/miscfun documentation built on Feb. 3, 2025, 5:33 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
inkeso/miscfun
Misc Functions for daily convenience

R/misc.r
In inkeso/miscfun: Misc Functions for daily convenience

Defines functions getmatches strcompare list2matrix list2table showxls read .checkSysBin rc lf libs cls caption

Documented in caption cls getmatches lf libs list2matrix list2table rc read showxls strcompare

R Package Documentation

Browse R Packages

We want your feedback!

inkeso/miscfun Misc Functions for daily convenience

R/misc.r In inkeso/miscfun: Misc Functions for daily convenience

Defines functions getmatches strcompare list2matrix list2table showxls read .checkSysBin rc lf libs cls caption

Documented in caption cls getmatches lf libs list2matrix list2table rc read showxls strcompare

R Package Documentation

Browse R Packages

We want your feedback!

inkeso/miscfun
Misc Functions for daily convenience

R/misc.r
In inkeso/miscfun: Misc Functions for daily convenience