R/package.R

Defines functions error error error error summ_glm glm.nb2 off_diag get_accuracy getcache reset.job idj viewtask output_cols diginjob dig_job dirfind parse_slurm_time refactor levelsinuse mrocdat mrocpred mroclab classystat reassign globasn make_chunks stack.dt dir2 table3 table2 max.col.narm min.col.narm is.empty f2int error error error error error fun.aggregate rand.string dedup process_tbl fix.cols lapply_dt interaction2 undebug.s4 debug.s4 symdiff numeq rm_mparen runion rintersect normpath rownames_to_column column_to_rownames `%K%` match3 qmat copys4 copyr6 peepr6 copy2 overwriteR6 file.not.exists file.exists2 rep_len2 rep_len rep_each seq_along2 `%=%` aggregate_roc make_ttsplit set_rngseed make_xfold rm_mparen `colnames2<-` `rownames2<-` `names2<-` colnames2 rownames2 names2 enframe clobber et nonacol colexists duped do.assign parasn do.cols rmcol .gc log10p unI mkst flag2int ne.na qtrim qq complete.cases2 seevar fillby nott printerr split_by metanames w.quantile DIM2 DIM NCOL2 nodim uncut isNA enframe_list sampler upset2 matrify2 jitter2 somejit make_dummy rows.any rows.all row.sort frac eNROW un softmax fitzscore save.r readin kpng ksvg kcpdf kpdf read.header OR AND copydt bool one_vs_other lg2f lst.emptyreplace lst.zerochar2empty lst.emptychar2na lst.emptychar2null lst.null2na lst.empty2null lst.empty2na lst.empty2zero lst.empty ws2und empty2na na2empty nan2zero na2zero na2true na2false nonzero2na false2na ix_sdiff intercalate_lst matrify intercalate setNames2 setAllNames setcols setRownames setColnames ret_na_err ret_err iderr check_lst selfname undup find_dups match2 match_s

Documented in aggregate_roc AND bool check_lst classystat clobber colexists colnames2 column_to_rownames complete.cases2 copy2 copydt copyr6 copys4 debug.s4 dedup diginjob dig_job DIM DIM2 dir2 dirfind do.assign do.cols duped empty2na enframe enframe_list eNROW et f2int false2na file.exists2 file.not.exists fillby find_dups fitzscore fix.cols flag2int frac .gc get_accuracy getcache glm.nb2 globasn iderr idj interaction2 intercalate intercalate_lst is.empty isNA ix_sdiff jitter2 kcpdf kpdf kpng ksvg lapply_dt levelsinuse lg2f log10p lst.empty lst.empty2na lst.empty2null lst.empty2zero lst.emptychar2na lst.emptychar2null lst.emptyreplace lst.null2na lst.zerochar2empty make_chunks make_dummy make_ttsplit make_xfold match2 match3 match_s matrify matrify2 max.col.narm metanames min.col.narm mkst mrocdat mroclab mrocpred na2empty na2false na2true na2zero names2 nan2zero NCOL2 ne.na nodim nonacol nonzero2na normpath nott numeq off_diag one_vs_other OR output_cols overwriteR6 parasn parse_slurm_time peepr6 printerr process_tbl qmat qq qtrim rand.string read.header readin reassign refactor rep_each rep_len rep_len2 reset.job ret_err ret_na_err rintersect rmcol rm_mparen rownames2 rownames_to_column rows.all rows.any row.sort runion sampler save.r seevar selfname seq_along2 setAllNames setColnames setcols setNames2 set_rngseed setRownames softmax somejit split_by stack.dt summ_glm symdiff table2 table3 un uncut undebug.s4 undup unI upset2 viewtask w.quantile ws2und

## #' @importMethodsFrom S4Vectors with
## #' @importMethodsFrom gUtils %&%
## #' @importMethodsFrom S4Vectors as.data.frame
## #' @importMethodsFrom S4Vectors split
## #' @importMethodsFrom S4Vectors mcols

## #' @import tools

#' @importFrom S4Vectors with as.data.frame split mcols `mcols<-`
#' @importFrom gUtils `%&%`
#' @importFrom methods setClass setGeneric setMethod setRefClass
#' @importFrom GenomeInfoDb `seqlevels<-` `seqlengths<-` `seqnames<-`
#' @importMethodsFrom GenomeInfoDb `seqlevels<-` `seqlengths<-` `seqnames<-`
#' @importFrom data.table key `:=` rbindlist dcast.data.table
#' @importFrom dplyr `%>%`
#' @importFrom ggplot2 ggplot aes scale_fill_manual scale_colour_manual scale_color_manual geom_histogram geom_hex geom_point geom_path geom_bar geom_errorbar geom_smooth facet_wrap facet_grid xlab ylab scale_y_continuous scale_x_continuous theme theme_bw element_blank element_line element_text rel position_dodge

#' @export 
mysep = "__ss__"

#' ordered match
#'
#' match x indices in terms of y
#' 
#' @name match_s
#' @param x A vector
#' @param y A vector
#' @return a vector of indices of \code{x} ordered by \code{y}
#' @examples
#' match_s(c(1,3,5,7,9), c(9, 5, 3))
#' match_s(c(1,3,5,7,9), c(3, 5, 9))
#' @export
match_s = function(x, y) {
    ## x_tmp = factor(as.character(x), levels = as.character(y))
    ## y_tmp = factor(as.character(y), levels = as.character(x))
    ## y_tmp[which(y_tmp %in% x_tmp)]
    x_tmp = setNames(as.character(x), as.character(x))
    x_ind = setNames(1:length(x), as.character(x))
    y_tmp = setNames(as.character(y), as.character(y))
    y_ind = setNames(1:length(y), as.character(y))
    ## return(x_ind[names(y_tmp)[which(y_tmp %in% x_tmp)]])
    these_idx = which(y_tmp %in% x_tmp)
    find_in_x = names(y_tmp)[these_idx]
    names(find_in_x) = y_ind[these_idx]
    return(setNames(x_ind[find_in_x], names(find_in_x)))
}

#' matches x in terms of y
#'
#' returns vector of indices of matches in x with length of vector = length(y)
#' non matches are NA
#'
#' @name match2
#' @export
match2 = function(x, y) {
    ## x_tmp = factor(as.character(x), levels = as.character(y))
    ## y_tmp = factor(as.character(y), levels = as.character(x))
    ## y_tmp[which(y_tmp %in% x_tmp)]
    x_tmp = setNames(as.character(x), as.character(x))
    x_ind = setNames(1:length(x), as.character(x))
    y_tmp = setNames(as.character(y), as.character(y))
    y_ind = setNames(1:length(y), as.character(y))
    ## return(x_ind[names(y_tmp)[which(y_tmp %in% x_tmp)]])
    these_idx = which(y_tmp %in% x_tmp)
    find_in_x = names(y_tmp)[these_idx]
    names(find_in_x) = y_ind[these_idx]
    new_index = rep(NA, length(y_tmp))
    new_index[y_ind[these_idx]] = x_ind[find_in_x]
    return(new_index)
}

#' find all duplicates in a vector
#' 
#' find all elements that have duplicates in a vector 
#'
#' @name find_dups
#' @param vec A vector
#' @return a logical vector with all positions marked TRUE being duplicates
#' @examples
#' find_dups(c(1,1,1,3,5))
#' find_dups(c(1,3,1,3,1))
#' find_dups(c(3,1,5,4,4))
#' @export
find_dups = function(..., re_sort = FALSE, sep = " ", as.logical = FALSE) {
  lst = as.list(match.call())[-1]
  ix = setdiff(seq_along(lst), which(names(lst) %in% c("re_sort", "sep", "as.logical")))
  ## cl = sapply(lst[ix], class)
  if (length(ix) > 1)
    vec = do.call(function(...) paste(..., sep = sep), list(...))
  else
      vec = unlist(list(...))
  duplg = duplicated(vec)
  if (as.logical) return(duplg)
  dupix = which(duplg); rm(duplg)
  if (!re_sort) {
    return(which(vec %in% vec[dupix]))
  } else {
    return(seq_along(vec)[order(vec[dupix])])
    ## matching_idx = match2(sort(vec[dupix]), vec)
    ## return(which(!is.na(matching_idx))[order(na.omit(matching_idx))])
  }
}

#' an alternative to base::unique() that preserves names
#'
#' unique(), but keep names
#' 
#' @name undup
#' @param obj an R vector
#' @return unique values of obj with names preserved
#' @export
undup = function(obj, fromLast = FALSE, nmax = NA, na.rm = FALSE) {
  dupid = which(duplicated(obj, fromLast = fromLast, nmax = NA))
  if (isTRUE(na.rm)) {
    naid = which(is.na(obj))
    dupid = union(naid, dupid)
  }
  if (NROW(dupid))
    return(obj[-dupid])
  else
    return(obj)
}



#' name a character vector to itself
#'
#' self explanatory
#' 
#' @name selfname
#' @param char A character vector
#' @return A named character vector
#' @export
selfname = function(char) {setNames2(char, char)}

#' @name check_lst
#'
#' @title checking list for elements that are errors
#'
#' checking a list for any elements that are try-errors
#' usually from an lapply(..., function(x) try({})) call
#'
#' @param lst A list
#' @return a logical vector marking which elements are try-errors"
#' @export
check_lst = function(
    lst,
    class_condition = c("simpleError", "try-error", "error", "errored", "err")
)
{
    ## unlist(lapply(lst, function(x) class(x)[1])) %in% class_condition
    return(vapply(lst, function(x) class(x)[1], "") %in% class_condition)
}

#' returns ids of list elements that are errors
#'
#' checking a list for any elements that are try-errors
#' usually from an lapply(..., function(x) try({})) call
#'
#' @name iderr
#' @param lst A list
#' @return a logical vector marking which elements are try-errors"
#' @export
iderr = function(
    lst,
    class_condition =
      c("simpleError", "try-error", "error", "errored", "err")
) {
  which(check_lst(lst))
}


#' same as iderr
#' 
#' checking a list for any elements that are try-errors
#' usually from an lapply(..., function(x) try({})) call
#' 
#' @name whicherr
#' @param lst A list
#' @return a logical vector marking which elements are try-errors"
#' @export
whicherr = iderr


#' @name ret_no_err
#' 
#' @title a wrapper around check_lst 
#'
#' @param lst A list (usually the output of lapply(... , function(x) try({}))
#' @return only returns the non-errors in the list
#' @export
ret_no_err = (
    function(
             lst,
             class_condition = (
                 c("simpleError", "try-error", "error", "errored", "err")
             )
             )
    {
        return(lst[!check_lst(lst, class_condition = class_condition)])
    }
)

#' @name ret_err
#' @title a wrapper around check_lst
#'
#'
#' @param lst A list (usually the output of lapply(... , function(x) try({}))
#' @return only returns the errors in the list
#' @export
ret_err = function(lst, class_condition = c("simpleError", "try-error", "error", "errored", "err"))
{
    return(lst[check_lst(lst, class_condition = class_condition)])
}

#' using check_lst to return
#'
#' @param lst A list (usually the output of lapply(... , function(x) try({}))
#' @return returns full length list with errored elements changed to NA
#' @export
ret_na_err = function(lst, class_condition = c("try-error", "error", "errored", "err"))
{
    lst[check_lst(lst, class_condition = class_condition)] = NA
    return(lst)
}


#' @name setColnames
#' @title convenience function to set column names
#'
#' @param object tabled object
#' @param nm names of the new columns
#' @return colnamed object
#' @export
setColnames = function(object = nm, nm = NULL, pattern = NULL, replacement = "") {
    if (!is.null(nm)) {
        if (is.null(names(nm)))
            colnames2(object)  = nm
        else {
            ix = match3(names(nm), colnames(object))
            colnames2(object)[ix] = nm
        }
    } else if (!is.null(pattern)) {
        colnames2(object) = gsub(pattern, replacement, colnames2(object))
    }
    return(object)
}

#' @name setcolnames
#' @title convenience function to set column names
#'
#' alias of setColnames
#'
#' @param object tabled object
#' @param nm names of the new columns
#' @return colnamed object
#' @export
setcolnames = setColnames


#' @name setRownames
#' @title convenience function to set row names
#'
#' sets rownames of an object
#'
#' @param object tabled object
#' @param nm names of the new columns
#' @return rownamed object
#' @export
setRownames = function(object = nm, nm) {
    rownames2(object) = nm
    object
}


#' @name setrownaes
#' @title convenience function to set row names
#'
#' sets rownames of an object
#'
#' @param object tabled object
#' @param nm names of the new columns
#' @return rownamed object
#' @export
setrownames = setRownames


#' @name setcols
#' @title convenience function to set columns
#'
#' sets columns of an object
#'
#' @param dt data frame/table or matrix
#' @param old integer or character or logical vector corresponding to current colnames in dt
#' @param new character vector for new column names
#' @return colnamed object
#' @export
setcols = function(dt, old, new) {
  if (inherits(dt, c("GRanges", "GRangesList"))) {
    mcols(dt) = setcols(mcols(dt), old, new)
    return(dt)
  }
  cnames = colnames2(dt)
  if (missing(new) || missing(old)) {
    if (missing(old)) {
      old = new
    }
    if (is.character(old) && length(old) == length(cnames)) {
      colnames(dt) = old
      return(dt)
    } else {
      stop("names provided must be same length as ncol(dt)")
    }
  }
  if (is.character(old)) {
    out = merge(
      data.frame(cnames, seq_along(cnames)),
      data.frame(cnames = old, new = new),
      allow.cartesian = T
    )
    cnames[out[[2]]] = out[[3]]
    colnames(dt) = cnames
    return(dt)
  }
  if (is.logical(old)) {
    if (! length(old) == length(cnames)) stop("logical vector must be same length as ncol(dt)")
    old = which(old)
  }
  cnames[old] = new
  colnames(dt) = cnames
  return(dt)
}


#' @name setAllNames
#' @title setAllNames
#'
#' convenience function to set all names of a vector
#'
#' @param vec vector
#' @param nm character
#' @return named vector
#' @export
setAllNames = function(vec, nm) {
    if (is.null(nm)) {
        return(setNames(vec, NULL))
    } else {
        if (length(nm) < length(vec)) {
            nm = rep_len2(nm, vec)
        }
    }
    return(setNames(vec, nm))
}

#' @name setNames2
#' @title setNames2
#'
#' convenience function to set all names of a vector
#'
#' @param vec vector
#' @param nm character
#' @return named vector
#' @export
setNames2 = function(vec, nm, useempty = FALSE) {
    names2(vec, useempty = useempty) = nm
    return(vec)
}


#' @name intercalate
#' @title collate two vectors together
#'
#' interleave vectors together
#'
#' @param ... A set of vectors to collate
#' @return a vector with values of inputs collated together
#' @examples
#' intercalate(c("a","d","f"), c("b", "e", "g", "z"))
#' @export
intercalate = function(..., fillin = FALSE) {
    isNested <- function(x) {
        if (class(x) != "list") {
            stop("Expecting 'x' to be a list")
        }
        out <- any(sapply(x, is.list))
        return(out)
    }
    args = list(...)
    if (isNested(args)) {
        args = unlist(args, recursive = F)
    }
    if (fillin) {
      mx = max(lengths(args))
      args = lapply(args, function(x) x[rep_len(seq_along(x), mx)])
    }
    s_along = lapply(args, seq_along)
    ord = order(do.call(c, s_along))
    conc = do.call(c, args)
    return(conc[ord])
}


#' @name matrify
#' @title take a data.table/frame, shave first column into rownames, make a matrix
#'
#' @description
#' convenience function to convert to matrix
#' and optionally filter out the first column
#' which may be rownames that are not relevant to further data analysis
#'
#' @param obj a data.frame or matrix
#' @param rm_col1 a logical vector specifying if the 1st column should be removed
#' @return a matrix
#' @export
matrify = function(obj, rm_col1 = TRUE, use.c1.rownames = TRUE) {
    if (rm_col1) {
        if (use.c1.rownames) {
            rn = as.matrix(obj[,1])[,1, drop = TRUE]
        } else {
            rn = NULL
        }
        setRownames(as.matrix(obj[,-1]), rn)
    } else {
        as.matrix(obj)
    }
}


#' @name intercalate_lst
#' @title collate lists together
#'
#' @description
#' interleave multiple vectors
#'
#' @param ... A set of lists to collate
#' @return a lists with elements collated together
#' @examples
#' intercalate(list(paste0(1:5, "_A")), list(paste0(1:3, "_B")), list(paste0(1:6, "_C")))
#' @export
intercalate_lst = function(...) {
    args = list(...)
    s_along = lapply(args, seq_along)
    ord = order(do.call(c, s_along))
    conc = do.call(c, args)
    return(conc[ord])
}


#' @name ix_sdiff
#' @title subset out indices
#'
#' @description
#' A function that subsets out indices and is robust to
#' if filt_out indices are integer(0)
#'
#' @return obj with indices indicated in filt_out taken out
#' @export
ix_sdiff = function(obj, filt_out) {
    if (is.null(nrow(obj))) {
        ix = 1:length(obj)
    } else {
        ix = 1:nrow(obj)
    }
    obj[! ix %in% filt_out]
}

#' @name false2na
#' @title replace FALSE with NA
#'
#' @description
#' A convenience function to set a logical vector with NAs to false
#'
#' @return A logical vector with NAs set to FALSE
#' @export
false2na = function(x) {
    if (is.logical(x))
        x[x %in% FALSE] = NA
    else
        stop("x is not logical")
    x
}

#' @name nonzero2na
#' @title replace 0 to with NA
#'
#' @description
#' A convenience function to set a logical vector with NAs to false
#'
#' @return A logical vector with NAs set to FALSE
#' @export
nonzero2na = function(x) {
    if (is.integer(x))
        naval = NA_integer_
    else if (is.double(x))
        naval = NA_real_
    else
        stop("x is not double or integer")
    x[x > 0] = naval
    x
}

#' @name na2false
#' @title replace logical vector with NA to FALSE
#'
#' @description
#' A convenience function to set a logical vector with NAs to false
#'
#' @return A logical vector with NAs set to FALSE
#' @export
na2false = function(v)
{
    ## v = ifelse(is.na(v), v, FALSE)
    ## v[is.na(v)] = FALSE
    v[isNA(v)] = FALSE
    ## mode(v) = "logical"
    v
}


#' @name na2true
#' @title replace logical vector with NA to TRUE
#'
#' @description
#' A convenience function to set a logical vector with NAs to TRUE
#'
#' @return A logical vector with NAs set to TRUE
#' @export
na2true = function(v)
{
    ## v = ifelse(is.na(v), v, FALSE)
    ## v[is.na(v)] = TRUE
    v[isNA(v)] = TRUE
    ## as.logical(v)
    ## mode(v) = "logical"
    v
}

#' @name na2zero
#' @title na2zero
#'
#' A convenience function to set a numeric vector with NAs to zero
#'
#' @return A numeric vector with NAs set to zero
#' @export
na2zero = function(v) {
    ## v = ifelse(is.na(v), v, FALSE)
    ## v[is.na(v)] = 0
    v[isNA(v)] = 0
    return(v)
}

#' @name nan2zero
#' @title nan2zero
#'
#' A convenience function to set a numeric vector with NaNs to zero
#'
#' @return A numeric vector with NaNs set to zero
#' @export
nan2zero = function(v) {
    v[is.nan(v)] = 0
    return(v)
}


#' @name na2empty
#' @title na2empty
#'
#' A convenience function to set a character vector with NAs to an
#' empty character
#'
#' @return A character vector
#' @export
na2empty = function(v) {
    ## v = ifelse(is.na(v), v, FALSE)
    ## v[is.na(v)] = ""
    v[isNA(v)] = ""
    as.character(v)
}



#' Set vector to NA
#' 
#' A convenience function to set a character vector with NAs to an
#' empty character
#'
#' @name empty2na
#' @return A character vector
#' @export
empty2na = function(v) {
    ## v = ifelse(is.na(v), v, FALSE)
    v[nchar(v) == 0] = as.character(NA)
    v
}


#' Clean up whitespace from columns of data.frame-like object
#'
#' make data.frame or data.table column name whitespaces into
#' underscores and remove end whitespaces
#'
#' @return A character vector
#' @name ws2und
#' @export
ws2und = function(df)
{
  data.table::setnames(
    df,
    gsub(
      "^_|_$", "",
      gsub(
        "_{2,}", "_",
        gsub(
          "(\\/)|(\\.)|( )|\\(|\\)|\\#", "_", trimws(colnames(df))
        )
      )
    )
  )
    return(df)
}


#' @name lst.empty
#' 
#' @title lst.empty
#'
#' A logical vector to select which list elements are empty
#'
#' @return A list
#' @export lst.empty
lst.empty = function(x) {
    lengths(x) == 0
    ## S4Vectors::elementNROWS(x) == 0
}

#' Convert empty elements to zero.
#'
#' set empty list elements to zero
#'
#' @return A logical vector of length(x)
#' @name lst.empty2zero
#' @export lst.empty2zero
lst.empty2zero = function(x) {
    x[lengths(x) == 0] = 0
    ## x[S4Vectors::elementNROWS(x) == 0] = 0
    x
}


#' empty list elements set to NA
#'
#' set empty list elements to NA
#'
#' @return A list
#' @name lst.empty2na
#' @export lst.empty2na
lst.empty2na = function(x) {
    x[lengths(x) == 0] = NA
    ## x[S4Vectors::elementNROWS(x) == 0] = NA
    ## x[x == "character(0)"] = NA
    ## x[x == "numeric(0)"] = NA
    ## x[x == "logical(0)"] = NA
    ## x[x == "integer(0)"] = NA
    x
}


#' empty list elements set to NULL
#'
#' set empty list elements to NULL
#'
#' @return A list
#' @name lst.empty2null
#' @export lst.empty2null
lst.empty2null = function(x) {
    x[lengths(x) == 0] = NULL
    x
}


#' NULL elements of list to NA
#'
#' set NULL list elements to NA
#'
#' @name lst.null2na
#' @return A list
#' @export lst.null2na
lst.null2na = function(x) {
    x[x == "NULL"] = NA
    x
}

#' empty character elements of list to NULL
#'
#' set empty character to null
#'
#' @name lst.emptychar2null
#' @return A list
#' @export lst.emptychar2null
lst.emptychar2null = function(x) {
    x[!nzchar(x)] = NULL
    x
}


#' empty character elements of list to NA
#'
#' set empty character to NA
#'
#' @name lst.emptychar2na
#' @return A list
#' @export lst.emptychar2na
lst.emptychar2na = function(x) {
    x[!nzchar(x)] = NA_character_
    x
}


#' @name lst.zerochar2empty
#' @title zero length character elements to empty char
#'
#' set 0 length chracter to empty
#'
#' @return A list
#' @export lst.zerochar2empty
lst.zerochar2empty = function(x) {
    x[x == "character(0)"] = list("")
    x
}

#' @name lst.empty2replace
#' @title length 0 elements of list to replace with a value
#'
#' set empty elements to a replacement value
#'
#' @return A list
#' @export lst.emptyreplace
lst.emptyreplace = function(x, replace = NA) {
    x[lengths(x) == 0] = replace
    x
}


################################################## general R utilities
##################################################
##################################################

#' @name lg2f
#' @title logical to factor
#'
#' @description
#' logical to factor
#'
#' @export
lg2f = function(x, labels = NULL) {
  if (is.logical(x))
    if (is.null(labels))
      return(factor(x, c(FALSE, TRUE)))
    else
      return(factor(x, c(FALSE, TRUE), labels))
}


#' one level vs other
#'
#' create factor levels
#'
#' @name one_vs_other
#' @export
one_vs_other = function(fac, ref_level = 1) {
    lvs = levels(fac)
    ref = lvs[ref_level]
    others = lvs[-ref_level]
    if (NROW(lvs) == 1) {
        stop("only 1 level, no comparisons to be made")
    }
    ix = seq_len(NROW(lvs))
    mat = t(combn(ix, 2))
    mat = cbind(mat[,2], mat[,1])
    comparisons = matrix(lvs[mat], nrow = nrow(mat), ncol = ncol(mat))
    lst = lapply(
        setNames(
            seq_len(NROW(comparisons)),
            paste(comparisons[,1], comparisons[,2], sep = "__vs__")
        ), function(x) {
            ref.lv = comparisons[x,2]
            test.lv = comparisons[x,1]
            ref = fac == ref.lv
            test = fac == test.lv
            eligible = ref | test
            list(
              test = test,
              eligible = eligible,
              comparison = paste(test.lv, ref.lv, sep = "__vs__")
            )
        })
    return(lst)
}


#' Clean Up Boolean Logic
#'
#' wrapping around boolean statements to ignore NULL or length(0) vectors
#' in a series of boolean statements
#'
#' @name bool
#' @export
bool = function(x, nullignore = TRUE, na2false = TRUE) {
    x1.2394823987394875 = bools.234987987293487329487 = substitute(x)
    bools.234987987293487329487 = as.list(bools.234987987293487329487)
    arg1.143487587 = logical(0)
    arg2.143487587 = logical(0)
    ## lst = list()
    boollist.239487239857928752938473987 = list()
    counter.12934872394872394897 = 1
    if (!toString(bools.234987987293487329487[[1]]) %in% c("&", "|")) return(eval(x1.2394823987394875))
    while(length(bools.234987987293487329487) > 1 && toString(bools.234987987293487329487[[1]]) %in% c("&", "|")) {
        boollist.239487239857928752938473987[[counter.12934872394872394897]] = eval(bools.234987987293487329487[[1]])
        ## lst[[1]] = eval(bools.234987987293487329487[[length(bools.234987987293487329487)]])
        if (counter.12934872394872394897 > 1) {
            arg2.143487587 = eval(bools.234987987293487329487[[length(bools.234987987293487329487)]])
            if (anyNA(arg2.143487587) && na2false) {
                arg2.143487587 = na2false(arg2.143487587)
            }
            if (length(arg2.143487587) == 0 && length(arg1.143487587) && nullignore) {
                arg2.143487587 = arg1.143487587
            }
        } else {
            arg1.143487587 = eval(bools.234987987293487329487[[length(bools.234987987293487329487)]])
            if (anyNA(arg2.143487587) && na2false) {
                arg1.143487587 = na2false(arg1.143487587)
            }
            if (length(arg1.143487587) == 0 & nullignore) {
                arg1.143487587 = logical(0)
            }
        }

        bools.234987987293487329487 = bools.234987987293487329487[-length(bools.234987987293487329487)]
        tmp.2394872958349587987 = as.list(bools.234987987293487329487[[length(bools.234987987293487329487)]])
        if (length(tmp.2394872958349587987) > 1 && toString(tmp.2394872958349587987[[1]]) %in% c("&", "|"))
            bools.234987987293487329487 = tmp.2394872958349587987
        if (counter.12934872394872394897 > 1 && length(arg1.143487587) && length(arg2.143487587)) {
            arg1.143487587 = boollist.239487239857928752938473987[[counter.12934872394872394897 - 1]](arg1.143487587, arg2.143487587)
        } else if (counter.12934872394872394897 > 1 && length(arg1.143487587) == 0) {
            arg1.143487587 = arg2.143487587
        }
        counter.12934872394872394897 = counter.12934872394872394897 + 1
    }
    return(arg1.143487587)
}


#' copy data frame/table columns to a new data table with forced column structure
#'
#' Ensure that all columns in out data table possess the specified columns
#' in which default values for missing columns will be NA valuess
#'
#' @name copydt
#' @export
copydt = function(dt, columns, as.data.table = TRUE) {
    out = data.frame()[seq_len(max(NROW(dt), 1)),]
    ix = seq_len(NROW(columns))
    outname = names(columns)
    badnames = !nzchar(outname) | is.na(outname)
    if (is.null(outname)) outname = columns
    if (any(badnames)) outname[badnames] = columns[badnames]
    for (i in ix) {
        cn = columns[i]
        nm = outname[i]
        if (is.null(dt[[cn]]))
            out[[nm]] = NA
        else
            out[[nm]] = rep_len(dt[[cn]], NROW(out))
    }
    if (NROW(dt) == 0) {
        out = out[0,,drop=F]
    }
    if (as.data.table) {
        setDT(out)
    }
    return(out)
}


#' test boolean AND across multiple vectors
#'
#' testing boolean across multiple vectors for simplifying interactive coding
#'
#' @name AND
#' @export
AND = function(FUN = identity, ...) {
    lst = lapply(list(...), FUN)
    Reduce(function(x,y) {x & y}, lst)
}

#' test boolean OR across multiple vectors
#'
#' @name OR
#' @export
OR = function(FUN = identity, ...) {
    lst = lapply(list(...), FUN)
    Reduce(function(x,y) {x | y}, lst)
}

#' mclapply on a table split by a column
#'
#' Do function on splits of a table in parallel
#'
#' @name dtapply
#' @export
dtapply = function (
    tbl,
    split_col = "system_id",
    FUN,
    mc.cores = 1,
    mc.strict = TRUE,
    split_col_sort = FALSE,
    mclapply = parallel::mclapply,
    ...
) {
    ## spl = tbl[[split_col]]
    ## dups = logical(NROW(tbl))
    dups = 0
    for (x in split_col) {
        dups = dups + anyDuplicated(tbl[[x]])
    }
    if (dups > 0) {
        if (isTRUE(mc.strict)) errfun = stop else errfun = warning
        errfun("split column contains duplicates - some entries will have multiple paths")
    }
    ## if (!isTRUE(split_col_sort)) {
    ##     spl = factor(spl, unique(spl))
    ## }
    ## lst = split(tbl, spl)
    lst = split_by(tbl, split_col, split_col_sort = split_col_sort)
    parallel::mclapply(lst, mc.cores = mc.cores, FUN, ...)
}


#' read header of file
#'
#' Get header of file (apply to vcf, sam, or bam formats)
#' 
#' @name read.header
#' @export read.header
read.header = function(path, header.char = "#") {
    n = 0
    f = file(path, open = "r")
    on.exit(close(f))
    out = character(0)
    rl = readLines(f, n = 1)
    while(grepl(paste0("^", header.char), rl)) {
        n = n + 1
        out = c(out, rl)
        rl = readLines(f, n = 1)
    }
    return(list(output = out, nlines = n))
}


#' open pdf device with defaults
#' 
#' output to pdf device in default directory and keep open
#'
#' @name kpdf
#' @export
kpdf = function(
    filename = "plot.pdf",
    height = 10, width = 10, 
    h = height, w = width, cex = 1,
    title = NULL, byrow = TRUE, 
    dim = NULL, cex.title = 1,
    oma.scale = 0, oma.val = c(1, 1, 1, 1),
    useDingbats = FALSE, res = 0, pars = list(), 
    ...
) {
    this.env = environment()
    if (length(cex) == 1) 
        cex = rep(cex, 2)
    height = h
    width = w
    height = cex[1] * height
    width = cex[2] * width
    DEFAULT.OUTDIR = Sys.getenv("PPDF.DIR")
    if (nchar(DEFAULT.OUTDIR) == 0) 
        DEFAULT.OUTDIR = normalizePath("~/public_html/")
    if (!grepl("^[~/]", filename)) 
        filename = paste(DEFAULT.OUTDIR, filename, sep = "/")
    if (!file.exists(file.dir(filename))) 
        system(paste("mkdir -p", file.dir(filename)))
    cat("rendering to", filename, "\n")
    dev = pdf(file = filename, height = height, width = width, useDingbats = useDingbats, ...)
    if (!is.null(dim)) {
            if (length(dim) == 1) 
                dim = rep(dim, 2)
            dim = dim[1:2]
            graphics::layout(matrix(1:prod(dim), nrow = dim[1], 
                ncol = dim[2], byrow = byrow))
    }
    if (!is.null(title)) 
        title(title, cex.main = cex.title * max(cex))
    return(dev)
}


#' open Cairo pdf device with defaults
#' 
#' output to Cairo pdf device in default directory and keep open
#'
#' @name kcpdf
#' @export
kcpdf = function(
    filename = "plot.pdf",
    height = 10, width = 10, 
    h = height, w = width, cex = 1,
    title = NULL, byrow = TRUE, 
    dim = NULL, cex.title = 1, oma.scale = 0,
    oma.val = c(1, 1, 1, 1),
    useDingbats = FALSE, res = 0,
    pars = list(), 
    ...
) {
    this.env = environment()
    if (length(cex) == 1) 
        cex = rep(cex, 2)
    height = h
    width = w
    height = cex[1] * height
    width = cex[2] * width
    DEFAULT.OUTDIR = Sys.getenv("PPDF.DIR")
    if (nchar(DEFAULT.OUTDIR) == 0) 
        DEFAULT.OUTDIR = normalizePath("~/public_html/")
    if (!grepl("^[~/]", filename)) 
        filename = paste(DEFAULT.OUTDIR, filename, sep = "/")
    if (!file.exists(file.dir(filename))) 
        system(paste("mkdir -p", file.dir(filename)))
    cat("rendering to", filename, "\n")
    dev = Cairo::Cairo(
      file = filename, height = height,
      width = width,
      useDingbats = useDingbats,
      type = "pdf", units = "in",
      ...
    )
    if (!is.null(dim)) {
            if (length(dim) == 1) 
                dim = rep(dim, 2)
            dim = dim[1:2]
            graphics::layout(matrix(1:prod(dim), nrow = dim[1], 
                ncol = dim[2], byrow = byrow))
    }
    if (!is.null(title)) 
        title(title, cex.main = cex.title * max(cex))
    return(dev)
}

#' open svg device with defaults
#' 
#' output to svg device in default directory and keep open
#'
#' @name ksvg
#' @export
ksvg = function(filename = "plot.svg", height = 10, width = 10,
                h = height, w = width, cex = 1, title = NULL, byrow = TRUE, 
                dim = NULL, cex.title = 1, oma.scale = 0, units = 'in', oma.val = c(1, 1, 1, 1),
                useDingbats = FALSE, res = 300, pars = list(),
                ...) {
    this.env = environment()
    if (length(cex) == 1) 
        cex = rep(cex, 2)
    height = h
    width = w
    height = cex[1] * height
    width = cex[2] * width
    DEFAULT.OUTDIR = Sys.getenv("PPDF.DIR")
    if (nchar(DEFAULT.OUTDIR) == 0) 
        DEFAULT.OUTDIR = normalizePath("~/public_html/")
    if (!grepl("^[~/]", filename)) 
        filename = paste(DEFAULT.OUTDIR, filename, sep = "/")
    if (!file.exists(file.dir(filename))) 
        system(paste("mkdir -p", file.dir(filename)))
    cat("rendering to", filename, "\n")
    svg(file = filename, height = height, width = width, ...)
    if (!is.null(dim)) {
        if (length(dim) == 1) 
            dim = rep(dim, 2)
        dim = dim[1:2]
        graphics::layout(matrix(1:prod(dim), nrow = dim[1], 
                                ncol = dim[2], byrow = byrow))
    }
    if (!is.null(title)) 
        title(title, cex.main = cex.title * max(cex))
}
    
    


#' open png device with defaults
#' 
#' output to png device in default directory and keep open
#'
#' @name kpng
#' @export
kpng = function(filename = "plot.png", height = 10, width = 10,
                h = height, w = width, cex = 1, title = NULL, byrow = TRUE, 
                dim = NULL, cex.title = 1, oma.scale = 0, units = 'in', oma.val = c(1, 1, 1, 1),
                useDingbats = FALSE, res = 300, pars = list(),
                ...) {
    this.env = environment()
    if (length(cex) == 1) 
        cex = rep(cex, 2)
    height = h
    width = w
    height = cex[1] * height
    width = cex[2] * width
    DEFAULT.OUTDIR = Sys.getenv("PPDF.DIR")
    if (nchar(DEFAULT.OUTDIR) == 0) 
        DEFAULT.OUTDIR = normalizePath("~/public_html/")
    if (!grepl("^[~/]", filename)) 
        filename = paste(DEFAULT.OUTDIR, filename, sep = "/")
    if (!file.exists(file.dir(filename))) 
        system(paste("mkdir -p", file.dir(filename)))
    cat("rendering to", filename, "\n")
    dev = png(file = filename, height = height, width = width, units = units, res = res, ...)
    if (!is.null(dim)) {
        if (length(dim) == 1) 
            dim = rep(dim, 2)
        dim = dim[1:2]
        graphics::layout(matrix(1:prod(dim), nrow = dim[1], 
                                ncol = dim[2], byrow = byrow))
    }
    if (!is.null(title)) 
        title(title, cex.main = cex.title * max(cex))
    return(dev)
}



#' flexible file opening
#'
#' open text, vcf, or rds files flexibly
#'
#' @name readin
#' @export
readin <- function(x, txt.fun = data.table::fread,
                   vcf.fun = skidb::read_vcf, other.txt = NULL,
                   alt.fun = NULL, alt.ext = NULL,
                   other.compress = NULL) {
    compression_ext = c("gz", "bz2", "xz")
    if (!is.null(other.compress) && is.character(other.compress)) {
        compression_ext = c(compression_ext, na.omit(other.compress))
    }
    compression.ptrn = paste0(paste0("(.", sub("^\\.", "", compression_ext), ")"), collapse = "|")
    txt_ext = c("txt", "csv", "tsv")
    if (!is.null(other.txt) && is.character(other.txt)) {
        txt_ext = c(txt_ext, na.omit(other.txt))
    }
    if (!is.null(alt.fun) && is.function(alt.fun) &&
        !is.null(alt.ext) && is.character(alt.ext)) {
        .NotYetImplemented()
    }
    vcf_ext = c("vcf")
    rds_ext = c("rds")
    txt.ptrn = paste0(paste0("(.", sub("^\\.", "", c(txt_ext, other.txt)), ")"), collapse = "|")
    txt.ptrn = paste0('(', txt.ptrn, ")(", compression.ptrn, "){0,}$")
    rds.ptrn = paste0(paste0("(.", sub("^\\.", "", rds_ext), ")"), collapse = "")
    rds.ptrn = paste0('(', rds.ptrn, ")(", compression.ptrn, "){0,}$")
    vcf.ptrn = paste0(paste0("(.", sub("^\\.", "", vcf_ext), ")"), collapse = "")
    vcf.ptrn = paste0('(', txt.ptrn, ")(", compression.ptrn, "){0,}$")
    ## is.txt = grepl("((.txt)|(.csv)|(.tsv))((.gz)|(.bz2)|(.xz)){0,}$", x, T)
    is.txt = grepl(txt.ptrn, x, T)
    is.rds = grepl(rds.ptrn, x, T)
    ## is.vcf = grepl("((.vcf))((.gz)|(.bz2)|(.xz)){0,}$", x, T)
    is.vcf = grepl(vcf.ptrn, x, T)
    if (isTRUE(is.txt))
        return(txt.fun(x))
    else if (isTRUE(is.rds))
        return(readRDS(x))
    else if (isTRUE(is.vcf))
        return(vcf.fun(x))
    else if (!file.exists(x) && !identical(x, "/dev/null")) {
        warning("File does not exist")
        return(structure(list(), msg = "File does not exist"))
    } else
        stop("file extension not recognized")
}


#' save R data session
#'
#' save the current 
#'
#' @name save.r
#' @export save.r
save.r <- function(file, note = NULL, verbose = FALSE, compress = FALSE, ...) {
    stamped.file = gsub(".RData$", paste(".", timestamp(), ".RData", 
                                         sep = ""), file, ignore.case = TRUE)
    if ( compress ) {
        message("Compression of the .RData object set to TRUE... Saving will take a while...")
    } else {
        message("Compression of the .RData object set to FALSE... Saving will be faster than with compression.")
        message("Keep an eye on disk space usage!")
    }
    save.image(stamped.file, compress = compress, ...)
    if (file.exists(file)) {
        if (verbose) 
            message("Removing existing ", file)
        system(paste("rm", file))
    }
    if (verbose) 
        message("Symlinking ", file, " to ", stamped.file)
    system(paste("ln -sfn", normalizePath(stamped.file), file))
    if (!is.null(note)) {
        writeLines(note, paste0(stamped.file, ".readme"))
    }
}

#' @name trans
#' @title transpose a list
#'
#' @description
#'
#' @export
trans <- function (lst, ffun = list) 
{
    do.call(Map, c(f = ffun, lst))
}

#' transpose a list
#'
#' transpose all list elements
#'
#' @name transp
#' @export
transp <- trans

#' calculate z score
#'
#' busywork function to calculate z score with mean and stddev
#' stores mean and stddev as attributes
#'
#' @name fitzscore
#' @export
fitzscore <- function(x, mean, stddev) {
    structure((x - mean) / stddev, mean = mean, stddev = stddev)
}

#' calculate softmax
#'
#' self explanatory
#'
#' @name softmax
#' @export
softmax <- function(x, neg = FALSE) {
    if (neg) {
        numer = exp(-x)
    } else {
        numer = exp(x)
    }
    denom = sum(numer)
    return(numer / denom)
}


#' shortcut to check unique entries
#'
#' get ids of unique entries
#'
#' @name un
#' @export un
un <- function(..., i = 1, sep = " ") {
  lst = as.list(match.call())[-1]
  ix = setdiff(seq_along(lst), which(names(lst) %in% c("ix")))
  if (length(ix) > 1) 
    vec = do.call(function(...) paste(..., sep = sep), list(...))
  else vec = unlist(list(...))
  unique_ix = which(!duplicated(vec))
  which(vec %in% vec[unique_ix][i])
}


#' does vapply NROW
#'
#' get either length or nrow (if dim(x)[1] > 1)
#' of each list element
#'
#' @name eNROW
#' @export eNROW
eNROW <- function(x) {
    return(vapply(x, NROW, integer(1)))
}


#' calculate fraction
#' 
#' fraction from a vector of numeric values
#'
#' @name frac
#' @export frac
frac = function(x) {
    x / sum(x)
}


#' sort rows of integer matrix
#'
#' Came from stackoverflow
#' https://stackoverflow.com/questions/9506442/fastest-way-to-sort-each-row-of-a-large-matrix-in-r
#' Rfast::rowsort is faster than the base function but it is still very fast
#'
#' @name row.sort
#' @export row.sort
row.sort <- function(a, use_rfast = TRUE) {
    out = tryCatch(Rfast::rowSort(a),
                   error = function(e) matrix(a[order(row(a), a)], ncol = ncol(a), byrow = TRUE))
    return(out)
}



#' test whether all row entries are TRUE
#'
#' perform all() rowwise
#'
#' @name rows.all
#' @export rows.all
rows.all <- function(mat) {
    vec = logical(NROW(mat))
    for (i in seq_len(NROW(mat))) {
        vec[i] = all(mat[i,])
    }
    return(vec)
}

#' test whether any row entries are TRUE
#'
#' perform any() rowwise
#'
#' @export rows.any
rows.any <- function(mat) {
    vec = logical(NROW(mat))
    for (i in seq_len(NROW(mat))) {
        vec[i] = any(mat[i,])
    }
    return(vec)
}

#' make table of dummy encodings
#'
#' not sure what I used this for
#'
#' @name make_dummy
#' @export
make_dummy = function(x, field = ".", sep = ".", levelsOnly = FALSE, fullRank = FALSE, return.data.table = T) {
  paste(field, collapse = ", ")
  this.str = paste0(" ~ ", field)
  mod = caret::dummyVars(this.str, data = x, sep = sep, levelsOnly = levelsOnly,
    fullRank = fullRank)
  out = caret:::predict.dummyVars(mod, newdata = x)
  if (return.data.table)
    return(as.data.table(out))
  else
    return(out)
}


#' add tiny jitter
#'
#' jitter with seed
#'
#' @name somejit
#' @export
somejit <- function(x, factor = 1e-6) {
    set.seed(10); jitter(x, factor = factor)
}

#' jitter with consistent seed
#'
#' jitter with consistent random seed
#'
#' @name jitter2
#' @export
jitter2 = function(x, factor = 1, amount = NULL, seed = 10) {
    set_rngseed(seed)
    jitter(x, factor = factor, amount = amount)
}


#' create matrix from table like obj with dplyr syntax
#'
#' apply dplyr::select syntax to a data frame and then create matrix
#'
#' @name matrify2
#' @export
matrify2 = function(df, ..., rownames = 1, rnsep = " ") {
    vec = c()
    arglst = match.call(expand.dots = F)$`...`
    if (NROW(arglst)) {
        for (thisexpr in arglst) {
            vec = c(vec, deparse(substitute(thisexpr)))
        }
        expr = paste(vec, collapse = ", ")
    } else {
        expr = "-1"
    }
    if ( !inherits(df, "data.frame") && (!is.null(dim(df)) | is.list(df)) ) {
        df = as.data.frame(df)
    }
    rnexpr = deparse(substitute(rownames))
    rncols = eval(parse(text = sprintf("dplyr::select(df, %s)", rnexpr)))
    rn = dodo.call2(paste, c(as.list(rncols), sep = rnsep))
    mat = as.matrix(eval(parse(text = sprintf("dplyr::select(df, %s)", expr))))
    rownames(mat) = rn
    return(mat)
}



#' wrapper around UpSetR::upset()
#'
#' convenience function for better UpSetR defaults
#'
#' @name upset2
#' @export
upset2 = function(data, text.scale = 1.5, mb.ratio = c(0.7, 0.3), empty.intersections = "on", ...) {
    if (class(data)[1] != "data.frame") data = as.data.frame(data)
    coercethese = which(sapply(data, nott(inherits), c("integer")))
    for (i in coercethese) {
        data[[i]] = sign(data[[i]])
    }
    UpSetR::upset(data, order.by = "freq", text.scale = text.scale, keep.order = TRUE, sets = rev(colnames(data)), mb.ratio = c(0.6, 0.4), nsets = 1e6, empty.intersections = empty.intersections, ...)
}


#' @name sampler
#' @title sample elements of vector or rows of table
#'
#' @description
#'
#' @export
sampler = function(x, n = NROW(x), seed = 10, rngkind = "L'Ecuyer-CMRG", verbose = FALSE, replace = FALSE, prob = NULL) {
    if (NROW(seed) & !isNA(seed)) {
        current.rng = .Random.seed
        txt = parse(text = sprintf(".Random.seed = as.integer(%s)", mkst(current.rng)))
        set_rngseed(seed = seed, rngkind = rngkind, verbose = verbose)
        on.exit(eval(txt, envir = parent.frame()))
    }

    x_nrow = NROW(x)
    ix = seq_len(x_nrow)
    
    if (isFALSE(replace) && n > x_nrow) {
        warning("replace = FALSE, but n supplied is greater than dimension of input")
        warning("requested n=", n, " sampled points; ", "sampling n=", n, " instead")
        n = x_nrow
    }
    
    randid = sample(ix, n, replace = replace, prob = prob)

    if (!is.null(dim(x)))
        return(x[randid,,drop=F])
    else
        return(x[randid])
}


#' data table-ize list elements and add name column
#'
#' make every element of list a data frame
#'
#' @name enframe_list
#' @export
enframe_list = function(lst, name = "name", value = "value", as.data.table = TRUE, rbind = TRUE, mc.cores = 1) {
    nms = names(lst)
    expr = parse(text = sprintf("cbind(%s = nm, df)", name, value))
    out = parallel::mcmapply(function(el, nm) {
        if (NROW(el)) {
            if (is.null(dim(el)))
                df = setColnames(as.data.frame(el), value)
            else
                df = as.data.frame(el)
            if (as.data.table)
                setDT(df)
            eval(expr)
        }
    }, lst, nms, SIMPLIFY = FALSE, mc.cores = mc.cores)
    if (rbind) {
        if (as.data.table)
            return(data.table::rbindlist(out))
        else
            return(do.call(rbind, out))
    }
}



#' is.na but also tests for "NA" character
#'
#' deals with NA characters
#'
#' @export
isNA = function(x, na.char = c("NA", "NULL", "na", "null")) {
    if (is.character(x)) {
        return(is.na(x) | x %in% na.char)
    } else {
        return(is.na(x))
    }
}


#' get upper and lower bounds of cut labels
#'
#' grab the upper and lower bounds from the default factor output
#' of cut()
#'
#' @export
uncut = function(v) {
    lst = lapply(tstrsplit(levels(v), ", "), function(x) as.numeric(gsub("[][)]+", "", x)))
    names(lst) = c("lb", "ub")
    return(lst)
}


#' testing if object is empty
#' 
#' @export
nodim = function(x) {
    any(DIM(x) == 0)
}

#' extending NCOL
#'
#' NCOL = 1 for NULL, or any vector with length == 0
#' seems counterintuitive so this is the fix
#' 
#' @export
NCOL2 <- function(x) {
  d = dim(x)
  ln = length(d)
  lx = length(x)
  if (ln > 1L) {
    d[2L]
  } else if (lx == 0L) {
    0L
  } else {
    1L
  }
}

#' NROW and NCOL convenience function
#'
#' extending NROW and NCOL
#' 
#' @export
DIM = function(x) {
    return(c(NROW(x), NCOL(x)))
}

#' extending NROW and NCOL2
#'
#' convenience wrapper 
#'
#' @export
DIM2 <- function(x) {
    return(c(NROW(x), NCOL2(x)))
}



#' weighted quantile
#'
#' weighted quantile
#' 
#' @name w.quantile
#' @export w.quantile
w.quantile = function(x, w = 1, qs) {
    ix = order(x)
    if (length(x) != length(x)) 
        w = rep_len(w, NROW(x))
    tot = sum(w)
    x[ix]
    cs = cumsum(w[ix])
    selectix = sapply(qs, function(thisx) which.max(cumsum(cs <= (tot * thisx))))
    return(x[ix][selectix])
}



#' metanames
#'
#' get colnames or metadata colnames (if S4 object)
#' 
#' @name metanames
#' @export
metanames = function(x) {
    if (inherits(x, c("GRanges", "GRangesList")) || "elementMetadata" %in% slotNames(x)) {
        x = mcols(x)
    }
    nm = colnames(x)
    if (is.null(nm)) {
        return(rep_len("", NCOL(x)))
    } else {
        return(nm)
    }
}


#' split data frame-like object based on columns
#'
#' split data frame for looping operations over splits 
#'
#' @name split_by
#' @export
split_by = function(dt, fields, do.unname = FALSE, split_col_sort = FALSE, sep = " ") {
    this.sep = sep
    force(this.sep)
    in_type_is_granges = inherits(dt, c("GRanges", "IRanges", "GRangesList", "IRangesList"))
    if (in_type_is_granges) {
        out = dt
        dt = mcols(dt)
        si = seqinfo(out)
        if (!NROW(out))
            return(gr.fix(GRangesList(), si))
    }
    if (!NROW(dt)) return(list())
    colix = match3(fields, names(dt))
    ## colix = which(names(dt) %in% fields)
    expr = parse(text = sprintf("dt[,%s,drop=FALSE]", mkst(colix)))
    cols = eval(expr)
    lstarg = as.list(cols)
    lstarg = c(lstarg, list(sep = sep))
    if (!isTRUE(split_col_sort)) 
        uf = dodo.call2(FUN = function(...) uniqf(...), lstarg)
    else
        uf = dodo.call2(FUN = function(...) paste(...), lstarg)
    ## rles = dodo.call2(FUN = rleseq, as.list(cols))
    if (in_type_is_granges) {
        out = split(out, uf)
        mcols(out) = dt[!duplicated(uf), fields]
        if (do.unname) out = unname(out)
        return(out)
    }
    out = split(dt, uf)
    if (do.unname) out = unname(out)
    return(out)
}


#' print error message from tryCatch
#'
#' useful for tryCatch(..., error = function(e) printerr(<some_custom_msg>))
#'
#' @export
printerr = function(msg = "", e) {
    if (missing(e)) {
        e = dg(e)
    }
    cm = as.character(conditionMessage(e))
    cc = as.character(conditionCall(e))
    eval(quote(print(structure(paste("error: ", msg, cm, cc), class = "err"))))
}

#' negate a function
#'
#' negate a function that returns boolean 
#'
#' @export
nott = function(f) {
    if (missing(f) || is.null(f) || !is.function(f)) f = identity
    return(Negate(f))
}


#' fill in variables of data table by combos
#'
#' especially useful for expanding factors excluded from a query
#'
#' @name fillby
#' @export
fillby = function(x, by, fillcol, fill = 0L, use_factor_levels = TRUE) {
    strby = paste0(by, "=", "unique(", by, ")")
    fc = which(sapply(x[, by, with = FALSE], inherits, "factor"))
    if (use_factor_levels && any(fc)) strby[fc] = paste0(by[fc], "=", "levels(", by[fc],")")
    strby = paste(strby, collapse = ",")
    cj = et(sprintf("x[, CJ(%s)]", strby))
    out = suppressWarnings(data.table::setkeyv(copy3(x), by)[cj])
    fill = rep_len(fill, length(fillcol))
    for (i in seq_along(fillcol)) {
        data.table::set(out, j = fillcol[i], value = replace_na(out[[fillcol[i]]], fill))
    }
    return(out)
}



#' see variables in environment
#' 
#' get variable names in the environment
#'
#' @export
seevar = function(calling_env = parent.frame()) {
    setdiff(ls(envir = calling_env), lsf.str(envir = calling_env))
}



#' complete.cases wrapper
#'
#' can use this within data.table
#'
#' @name complete.cases2
#' @export complete.cases2
complete.cases2 = function(...) {
    complete.cases(as.data.frame(list(...)))
}


#' Unique factor
#'
#' Make a unique factor based on one or more vectors
#' in parallel.
#'
#' @name uniqf
#' @export uniqf
uniqf = function (..., sep = " ")
{
    set.seed(10)
    lst = as.list(match.call()[-1])
    force(sep)
    nm = names(lst)
    if (is.null(nm)) {
        nm = rep_len("", length(lst))
    }
    ix = which(nm != "sep")
    tmpix = paste(..., sep = sep)
    ## tmpix = do.call(paste, c(lst[ix], alist(sep = sep)))
    tmpix = factor(tmpix, levels = unique(tmpix))
    tmpix
}



#' get actual quantile values of vector
#'
#' Use the empirical distribution of numeric values
#' to get the quantiles assigned to each value of a vector
#'
#' @name qq
#' @export qq
qq = function(x) {
    ecdf(x)(x)
}


#' cut off vector by quantiles
#'
#' Use the empirical distribution of numeric values
#' to get the quantiles assigned to each value of a vector.
#' then remove values that are above the quantile cutoff.
#'
#' @name qtrim
#' @export qtrim
qtrim = function(x, maxq = 0.99) {
    x[qq(x) < maxq]
}


#' modification of tailf from skitools
#'
#' overcomes the line limitations in the skitools version
#'
#' @export tailf
tailf = function (x, n = NULL, grep = NULL) {
    oldscipen = options()$scipen
    tmp = tempfile()
    on.exit({options(scipen = oldscipen); unlink(tmp)})
    options(scipen = 999)
    if (is.null(grep)) {
        if (is.null(n)) {
            x = paste("tail -F", paste(x, collapse = " "))
        }
        else {
            x = paste("tail -n", n, "-F", paste(x, collapse = " "))
        }
    }
    else {
        x = paste("grep -H", grep, paste(x, collapse = " "),
            " | more")
    }
    writeLines(x, tmp)
    system2("sh", tmp)
}

#' @name ne.na
#' @title mark x with NA if it does not exist
#'
#'
#' @export
ne.na = function(x, no = NA_character_) {
    ifelse(file.not.exists(x), no, x)
}


#' @name flag2int
#' @title convert bam flag to integer
#'
#'
#' @export
flag2int = function(flags) {
    apply(flags, 1, function(x) sum(2^((1:12) - 1) * x))
}


#' @name mkst
#' @title MaKe STring
#'
#' making string out of vector for eval(parse(text = ...))
#'
#'
#' @export
mkst = function(v, f = "c", po = "(", pc = ")", collapse = ",", asnull = FALSE) {
    if (identical(NROW(v), 0L)) {
        if (isTRUE(asnull)) return(NULL) else return("")
    }
    out = paste0(f, po, paste0(v, collapse = collapse), pc)
    return(out)
}


#' @name unI
#' @title remove "AsIs" from class, i.e. undo I(obj)
#'
#' undo I(obj)
#'
#'
#' @export
unI = function(x) {
  if (inherits(x, "AsIs")) class(x) = setdiff(class(x), "AsIs")
  return(x)
}


#' @name log10p
#' @title log10(x + 1)
#'
#'
#' @export
log10p = function(x) {
    log10(x + 1)
}


#' @name .gc
#' @title .gc
#'
#'
#'
#' @param df data frame
#' @param ptrn pattern
#' @author Kevin Hadi
#' @export
.gc = function(df, ptrn, invert = F, ignore.case = FALSE, exact = FALSE) {
    if (inherits(df, c("GRanges", "GRangesList")))
        cnames = colnames2(df@elementMetadata)
    else
        cnames = colnames2(df)
    if (is.character(ptrn)) {
        if (!exact) {
            ## ptrn = paste0("^(", ptrn, ")$")
            ix = loop_grep(ptrn, cnames, ignore.case = ignore.case)
        } else {
            ix = which(cnames %in% ptrn)
        }
    } else {
        ix = ptrn
    }
    if (NROW(ix)) {
        if (isTRUE(invert)) inv = "-" else inv = ""
        return(et(sprintf("df[,%s%s,drop=F]", inv,
                          paste0("c(", paste(ix, collapse = ","), ")"))))
    } else {
        if (isTRUE(invert))
            return(df)
        else
            return(df[,0,drop=F])
    }
}

#' @name rmcol
#' @title rmcol
#'
#'
#'
#' @param df object with !is.null(df) == TRUE
#' @author Kevin Hadi
#' @export rmcol
rmcol = function(df, rmcol = 1, usekey = T, invert = T, exact = FALSE) {
    if (is.data.table(df) && !is.null(key(df)) && usekey) {
        rmcol = key(df)
        exact = TRUE
        invert = TRUE
    }
    return(.gc(df, rmcol, invert = invert, exact = exact))
}

#' @name do.cols
#' @title do.cols
#'
#'
#'
#' @param x object with ncol >= 1
#' @author Kevin Hadi
#' @export do.cols
do.cols = function(x, rmcol = 1, FUN = rowSums, by.FUN = NULL, exact = F, invert = T) {
    if (NROW(by.FUN)) {
        if (names(as.list(args(by.FUN))[1]) == "...") {
            return(as.data.table(.gc(x, rmcol, invert = invert, exact = exact))[, I := .I][, do.call(by.FUN, .SD), by = I]$V1)
        } else {
            return(as.data.table(.gc(x, rmcol, invert = invert, exact = exact))[, I := .I][, by.FUN(.SD), by = I]$V1)
        }
    } else if (NROW(FUN)) {
        FUN(.gc(x, rmcol, invert = invert, exact = exact))
    } else {
        return(x)
    }
}


#' @name parasn
#' @title assign parallel columns
#'
#'
#'
#' @param x data.table
#' @param y data.table
#' @author Kevin Hadi
#' @export
parasn = function(x, y, cols, sans_key = TRUE, use.data.table = T) {
    if (missing(cols)) {
        if (sans_key)
            cols = setdiff(colnames(y), key(x))
        else
            cols = colnames(y)
    }
    dimx = DIM(x)
    dimy = DIM(y)
    if (dimx[1] != dimy[1]) {
        nr.x = dimx[1]
        id.x = seq_len(nr.x)
        nr.y = dimy[1]
        id.y = seq_len(nr.y)
        id.x = rep_len(nr.x, id.x)
        id.y = rep_len(nr.y, id.y)
        x = x[id.x]
        y = y[id.y]
    }
    if (use.data.table) {
        for (col in cols) {
            set(x, j = col, value = y[[col]])
        }
    } else {
        for (col in cols) {
            x[[col]] = y[[col]]
        }
    }
    return(x)
}


#' @name do.assign
#' @title assign columns or list elements
#'
#'
#'
#' @author Kevin Hadi
#' @export do.assign
do.assign = function(x, ..., pf = parent.frame()) {
  mc_2340873450987 = match.call(expand.dots = FALSE)
  ddd = as.list(mc_2340873450987)$`...`
  if (is.null(names(ddd))) names(ddd) = paste0(rep_len("V", length(ddd)), seq_along(ddd))
  for (i in seq_along(ddd)) {
    d = ddd[[i]]
    nml = names(ddd[i])
    if (is.call(d) || is.name(d)) {
      ev = BiocGenerics::eval(d, envir = parent.frame())
      nm = names(ev)
      .DIM = DIM2(ev)
      .dim = dim(ev)
      nr = .DIM[1L]
      nc = .DIM[2L]
      if (inherits(ev, c("list")) && nc == 1L) {
        if (is.null(nm)) nm = rep_len(nml, nr)
        for (ii in seq_len(nr)) {
          x[[nm[ii]]] = ev[[ii]]
        }
      } else if (length(.dim) > 0L) {
        if (is.null(nm)) nm = rep_len(nml, nc)
        for (ii in seq_len(nc)) {
          x[[nm[ii]]] = ev[[ii]]
        }
      } else {
        x[[nml]] = ev
      }
    }
  }
  return(x)
}


#' @name duped
#' @title duped
#'
#'
#'
#' @param ... vectors to paste by
#' @author Kevin Hadi
#' @export
duped = function(..., binder = "data.table") {
    duplicated(tryCatch(et(sprintf("%s(...)", binder)),
                        ## error = function(e) do.call(cbind, list(...))))
                        error = function(e) paste(...)))
}


#' @name colexists
#' @title find out if column is in data.table
#'
#'
#'
#' @param nm name of column
#' @param df data.frame or data.table
#' @author Kevin Hadi
#' @export colexists
colexists = function(nm, df) {
    cnames = colnames2(df)
    return(nm %in% cnames)
}

#' dodo.call+
#'
#' FUN can be an anonymous function call
#' dodo.call2({function(...) paste(..., collapse = " ")}, list(bstring1, bstring2))
#' can also omit the brackets
#'
#' @name dodo.call2
#' @param FUN function
#' @author Marcin Imielinski
#' @export dodo.call2
dodo.call2 = function (FUN, args, use.names = T)
{
    if (!is.character(FUN))
      FUN = substitute(FUN)
    if (isTRUE(use.names) && !is.null(names(args)))
      argn = paste0("\"", names(args), "\"", "=")
    else
        argn = NULL
    if (!is.matrix(args)) {
        cmd = paste(
            paste0("{", as.character(as.expression(FUN)), "}"),
            "(",
            paste0(argn, "args[[", 1:length(args), "]]", collapse = ","),
            ")",
            sep = "")
    } else {
        cmd = paste(
            paste0("{", as.character(as.expression(FUN)), "}"),
            "(",
            paste0(argn, "args[,", 1:ncol(args), "]", collapse = ","),
            ")",
            sep = "")
    }
    return(et(cmd))
}

#' @name nonacol
#' @title no columns named "NA"
#'
#'
#' @param x data frame
#' @author Kevin Hadi
#' @export nonacol
nonacol = function(x, napattern = "^NA") {
  good = grep(napattern, colnames(x), invert = T)
  return(et(sprintf("x[, %s,drop=F]", mkst(good))))
}



#' @name et
#' @title shortcut for eval(parse(text = <string>))
#'
#' @param txt string to evaluate
#' @author Kevin Hadi
#' @export
et = function(txt, eval = TRUE, envir = parent.frame(), enclos = parent.frame(2)) {
    out = parse(text = txt)
    ## enclos = stackenv2()
    if (eval) {
        return(eval(out, envir = envir, enclos = enclos))
    } else {
        return(out)
    }
}



#' same as dplyr::coalesce
#'
#' clobber NA, or some value between multiple vectors
#' bads can be a function that returns a logical
#'
#' @param ... vectors to merge together
#' @param bads a set of values to clobber, or a function that returns a logical
#' @param r2l merge from left to right per pair of vectors
#' @param fromLast if TRUE, merge from last vector to first
#' @param comparefun A 2 argument function (i.e. function(x,y) x < y), if r2l = FALSE, then the greater value will be chosen as y is on the right, for function(x,y) x < y. if r2l = TRUE, then the lesser value will be chosen
#' @export
clobber = function(..., bads = NA, bads.x = NULL, bads.y = NULL, r2l = FALSE, fromLast = FALSE, opposite = TRUE, comparefun = NULL, remove.empty = TRUE) {
    lst = list(...)
    lens = eNROW(lst)
    maxlen = max(lens)
    if (length(unique(lens)) > 1)
        lst = lapply(lst, function(x) rep(x, length.out = maxlen))
    if (remove.empty)
        lst = lst[eNROW(lst) > 0]
    if ( !length(bads) && !length(bads.x) && !length(bads.y))
        stop("You gotta set one of bads, bads.x, or bads.y")
    if ({ length(bads.x) && length(bads.y) }) {
        message("bads.x and bads.y both set explicitly")
        message("setting opposite to FALSE")
        opposite = FALSE
    }
    anytrue = function(vec) rep(TRUE, length.out = length(vec))
    if (isTRUE(bads) || !length(bads)) {
        message("bads set to NULL or TRUE")
        message("setting opposite to FALSE")
        bads = anytrue
        opposite = FALSE
    }
    if (opposite) {
        yfun = get("!", mode = "function")
    } else {
        yfun = get("identity", mode = "function")
    }
    if (!length(bads.x)) bads.x = bads
    if (!length(bads.y)) bads.y = bads
    dofun = function(x,y) {
        if (is.function(bads.x))
            badsx = which(bads.x(x))
        else
            badsx = which(x %in% bads.x)
        if (is.function(bads.y))
            nbadsy = which(yfun(bads.y(y)))
        else
            nbadsy = which(yfun(y %in% bads.y))
        ix = intersect(badsx, nbadsy)
        return(replace(x, ix, rep(y[ix], length.out = length(ix))))
    }
    if (is.null(comparefun)) {
        if (!r2l)
            return(Reduce(function(x,y) dofun(x,y), lst, right = fromLast))
        else
            return(Reduce(function(x,y) dofun(y,x), lst, right = fromLast))
    } else {
        yfun = get("identity", mode = "function")
        if (!r2l) {
            return(Reduce(function(x,y) {
                if (is.function(bads.x))
                    badsx = which(bads.x(x))
                else
                    badsx = which(x %in% bads.x)
                if (is.function(bads.y))
                    nbadsy = which(yfun(bads.y(y)))
                else
                    nbadsy = which(yfun(y %in% bads.y))
                lg = which(comparefun(x,y))
                lg = setdiff(lg, nbadsy)
                out = x
                out[badsx] = y[badsx]
                out[lg] = y[lg]
                out
            }, lst, right = fromLast))
        } else {
            return(Reduce(function(x,y) {
                if (is.function(bads.x))
                    badsx = which(bads.x(x))
                else
                    badsx = which(x %in% bads.x)
                if (is.function(bads.y))
                    nbadsy = which(yfun(bads.y(y)))
                else
                    nbadsy = which(yfun(y %in% bads.y))
                lg = which(comparefun(x,y))
                lg = setdiff(lg, nbadsy)
                out = y
                out[nbadsy] = x[nbadsy]
                out[lg] = x[lg]
                out
            }, lst, right = fromLast))
        }
    }
}

## clobber = function(..., bads = NA, bads.x = NULL, bads.y = NULL, r2l = FALSE, fromLast = FALSE, opposite = TRUE) {
##     lst = list(...)
##     lens = lengths(lst)
##     maxlen = max(lens)
##     if (length(unique(lens)) > 1)
##         lst = lapply(lst, function(x) rep(x, length.out = maxlen))
##     if ( !length(bads) && !length(bads.x) && !length(bads.y))
##         stop("You gotta set one of bads, bads.x, or bads.y")
##     if ({ length(bads.x) && length(bads.y) }) {
##         message("bads.x and bads.y both set explicitly")
##         message("setting opposite to FALSE")
##         opposite = FALSE
##     }
##     anytrue = function(vec) rep(TRUE, length.out = length(vec))
##     if (isTRUE(bads) || !length(bads)) {
##         message("bads set to NULL or TRUE")
##         message("setting opposite to FALSE")
##         bads = anytrue
##         opposite = FALSE
##     }
##     if (opposite) {
##         yfun = get("!", mode = "function")
##     } else {
##         yfun = get("identity", mode = "function")
##     }
##     if (!length(bads.x)) bads.x = bads
##     if (!length(bads.y)) bads.y = bads
##     dofun = function(x,y) {
##         if (is.function(bads.x))
##             badsx = which(bads.x(x))
##         else
##             badsx = which(x %in% bads.x)
##         if (is.function(bads.y))
##             nbadsy = which(yfun(bads.y(y)))
##         else
##             nbadsy = which(yfun(y %in% bads.y))
##         ix = intersect(badsx, nbadsy)
##         return(replace(x, ix, rep(y[ix], length.out = length(ix))))
##     }
##     if (!r2l)
##         return(Reduce(function(x,y) dofun(x,y), lst, right = fromLast))
##     else
##         return(Reduce(function(x,y) dofun(y,x), lst, right = fromLast))
## }

## clobber = function (..., bads = NA, r2l = FALSE, fromLast = FALSE, opposite = TRUE)
## {
##     lst = list(...)
##     lens = lengths(lst)
##     maxlen = max(lens)
##     if (length(unique(lens)) > 1)
##         lst = lapply(lst, function(x) rep_len(x, maxlen))
##     if (opposite) {
##         yfun = get("!", mode = "function")
##     }
##     else {
##         yfun = get("identity", mode = "function")
##     }
##     dofun = function(x, y) {
##         if (is.function(bads)) {
##             badsx = which(bads(x))
##             nbadsy = which(yfun(bads(y)))
##         }
##         else {
##             badsx = which(x %in% bads)
##             nbadsy = which(yfun(y %in% bads))
##         }
##         ix = intersect(badsx, nbadsy)
##         return(replace(x, ix, rep_len(y[ix], length(ix))))
##     }
##     if (!r2l)
##         return(Reduce(function(x, y) dofun(x, y), lst, right = fromLast))
##     else return(Reduce(function(x, y) dofun(y, x), lst, right = fromLast))
## }

#' same as dplyr::coalesce, khtools:coalesce is an alias for khtools::clobber
#'
#' clobber NA, or some value between multiple vectors
#' bads can be a function that returns a logical
#'
#' @export
coalesce = clobber

#' same as tibble::enframe
#'
#' kevin's implementation of tibble::enframe()
#'
#'
#' @param x named vector
#' @param name name of column containing names(x)
#' @param value name of column containing x values
#' @export
enframe = function(x, name = "name", value = "value", as.data.table = TRUE) {
    if (is.null(dim(x))) {
        nm = names2(x)
    } else {
        nm = rownames2(x)
    }
    out = cbind(data.frame(nm), data.frame(x))
    out = setColnames(out, c(name, value))
    if (as.data.table) {
        setDT(out)
        return(out)
    } else {
        return(out)
    }
}


#' kevin's modification of ppng
#'
#' height and width are specified in inches by default
#' resolution is specified as 300 by default
#'
#' @export
ppng = function (expr, filename = "plot.png", height = 10, width = 10,
                 dim = NULL, cex = 1, title = NULL,
                 h = height, w = width,
                 cex.title = 1, oma.scale = 0, units = "in", res = 300, oma.val = c(1,1,1,1), pars = list(), ...) {
    suppressWarnings({
        this.env = environment()
        if (length(cex) == 1) {
            cex = rep(cex, 2)
        }
        height = h
        width = w
        height = cex[1] * height
        width = cex[2] * width
        DEFAULT.OUTDIR = Sys.getenv("PPNG.DIR")
        if (nchar(DEFAULT.OUTDIR) == 0)
            DEFAULT.OUTDIR = normalizePath("~/public_html/")
        if (!grepl("^[~/]", filename))
            filename = paste(DEFAULT.OUTDIR, filename, sep = "/")
        if (!file.exists(file.dir(filename)))
            system(paste("mkdir -p", file.dir(filename)))

        cat("rendering to", filename, "\n")
        old_oma = par(no.readonly=T)$oma
        lst.par = par(no.readonly=T)
        goodnm = intersect(names(pars), names(lst.par))
        lst.old.par = lst.par = lst.par[goodnm]
        oldpars = allpars = paste(unlist(lapply(seq_along(lst.par), function(i) {
            paste0(names(lst.par)[i], "=c(",
                   paste0(ifelse(is.na(lst.par[[i]]) | !is.character(lst.par[[i]]),
                                 lst.par[[i]], paste0("'", lst.par[[i]], "'")), collapse = ","), ")",
                   collapse = ",")
        })), collapse = ",")
        oldstr = paste0("par(", allpars, ")")
        ## on.exit({
        ##     for (i in seq_along(lst.old.par)) {
        ##         arg = paste0(names(lst.old.par)[i], "=c(",
        ##                      paste0(ifelse(is.na(lst.old.par[[i]]) | !is.character(lst.old.par[[i]]),
        ##                                    lst.old.par[[i]], paste0("'", lst.old.par[[i]], "'")), collapse = ","), ")",
        ##                      collapse = ",")
        ##         eval(parse(text = paste0("par(", arg, ")")), envir = this.env)
        ##     }

        ## })
        pf2 = parent.frame(2)
        on.exit({eval(parse(text = oldstr), envir = parent.frame(), enclos = pf2); reset.dev()})
        if (length(pars) > 0) {
            goodnm = intersect(names(pars), names(lst.par))
            lst.par[goodnm] = pars[goodnm]
            ## for (i in seq_along(lst.par)) {
            ##     arg = paste0(names(lst.par)[i], "=c(",
            ##                  paste0(ifelse(is.na(lst.par[[i]]) | !is.character(lst.par[[i]]),
            ##                                lst.par[[i]], paste0("'", lst.par[[i]], "'")), collapse = ","), ")",
            ##                  collapse = ",")
            ##     eval(parse(text = paste0("par(", arg, ")")), envir = parent.frame())
            ## }
            newpars = allpars = paste(unlist(lapply(seq_along(lst.par), function(i) {
                paste0(names(lst.par)[i], "=c(",
                       paste0(ifelse(is.na(lst.par[[i]]) | !is.character(lst.par[[i]]),
                                     lst.par[[i]], paste0("'", lst.par[[i]], "'")), collapse = ","), ")",
                       collapse = ",")
            })), collapse = ",")
            newstr = paste0("par(", allpars, ")")
            eval(parse(text = newstr), envir = parent.frame(), enclos = pf2)
        } else {
            newstr = ""
            newpars = ""
        }
        png(filename, height = height, width = width, units = units, res = res, ...) ## pointsize default is 12... maybe the default previously was 24?

        if (oma.scale > 0) {
            ## par(oma = oma.val * oma.scale)
            newpars = paste0(newpars, "oma=c(", paste0(oma.val * oma.scale, collapse = ","), ")", collapse = ",")
        }
        if (!is.null(dim)) {
            if (length(dim) == 1)
                dim = rep(dim, 2)
            dim = dim[1:2]
            layout(matrix(1:prod(dim), nrow = dim[1], ncol = dim[2],
                          byrow = TRUE))
        }
        ## eval(parse(text = paste0("{ par(", newpars, ");", as.character(as.expression(substitute(expr))), "}")),
        ##      envir = parent.frame())
        ## eval(parse(text = paste0("{ par(", newpars, ");", as.character(as.expression(substitute(expr))), "}")),
        ##      envir = parent.frame(), enclos = stackenv(parent.frame(2)))
        eval(parse(text = paste0("{ par(", newpars, ");", as.character(as.expression(substitute(expr))), "}")),
             envir = parent.frame(), enclos = pf2)
        ## eval(expr, envir = this.env)
        if (!is.null(title))
            title(title, cex.main = cex.title * max(cex))
        silent({dev.off()})
    })
}


file.dir <- function (paths) {
    return(gsub("(^|(.*\\/))?([^\\/]*)$", "\\2", paths))
}

#' kevin's modification of ppdf
#'
#' height and width are specified in inches by default
#' resolution is specified as 300 but is not used
#'
#' @name ppdf
#' @export
ppdf = function (expr, filename = "plot.pdf", height = 10, width = 10,
                 h = height, w = width,
                 cex = 1, title = NULL, byrow = TRUE, dim = NULL, cex.title = 1,
                 oma.scale = 0, oma.val = c(1,1,1,1), useDingbats = FALSE, res = 0, pars = list(), ...) {
    suppressWarnings({
        this.env = environment()
        if (length(cex) == 1)
            cex = rep(cex, 2)
        height = h
        width = w
        height = cex[1] * height
        width = cex[2] * width
        DEFAULT.OUTDIR = Sys.getenv("PPDF.DIR")
        if (nchar(DEFAULT.OUTDIR) == 0)
            DEFAULT.OUTDIR = normalizePath("~/public_html/")
        if (!grepl("^[~/]", filename))
            filename = paste(DEFAULT.OUTDIR, filename, sep = "/")
        if (!file.exists(file.dir(filename)))
            system(paste("mkdir -p", file.dir(filename)))
        cat("rendering to", filename, "\n")

        old_oma = par(no.readonly=T)$oma
        lst.par = par(no.readonly=T)
        goodnm = intersect(names(pars), names(lst.par))
        lst.old.par = lst.par = lst.par[goodnm]
        oldpars = allpars = paste(unlist(lapply(seq_along(lst.par), function(i) {
            paste0(names(lst.par)[i], "=c(",
                   paste0(ifelse(is.na(lst.par[[i]]) | !is.character(lst.par[[i]]),
                                 lst.par[[i]], paste0("'", lst.par[[i]], "'")), collapse = ","), ")",
                   collapse = ",")
        })), collapse = ",")
        oldstr = paste0("par(", allpars, ")")
        ## on.exit({
        ##     for (i in seq_along(lst.old.par)) {
        ##         arg = paste0(names(lst.old.par)[i], "=c(",
        ##                      paste0(ifelse(is.na(lst.old.par[[i]]) | !is.character(lst.old.par[[i]]),
        ##                                    lst.old.par[[i]], paste0("'", lst.old.par[[i]], "'")), collapse = ","), ")",
        ##                      collapse = ",")
        ##         eval(parse(text = paste0("par(", arg, ")")), envir = this.env)
        ##     }

        ## })
        pf2 = parent.frame(2)
        on.exit({eval(parse(text = oldstr), envir = parent.frame(), enclos = pf2); reset.dev()})
        if (length(pars) > 0) {
            goodnm = intersect(names(pars), names(lst.par))
            lst.par[goodnm] = pars[goodnm]
            ## for (i in seq_along(lst.par)) {
            ##     arg = paste0(names(lst.par)[i], "=c(",
            ##                  paste0(ifelse(is.na(lst.par[[i]]) | !is.character(lst.par[[i]]),
            ##                                lst.par[[i]], paste0("'", lst.par[[i]], "'")), collapse = ","), ")",
            ##                  collapse = ",")
            ##     eval(parse(text = paste0("par(", arg, ")")), envir = parent.frame())
            ## }
            newpars = allpars = paste(unlist(lapply(seq_along(lst.par), function(i) {
                paste0(names(lst.par)[i], "=c(",
                       paste0(ifelse(is.na(lst.par[[i]]) | !is.character(lst.par[[i]]),
                                     lst.par[[i]], paste0("'", lst.par[[i]], "'")), collapse = ","), ")",
                       collapse = ",")
            })), collapse = ",")
            newstr = paste0("par(", allpars, ")")
            eval(parse(text = newstr), envir = parent.frame(), enclos = pf2)
        } else {
            newstr = ""
            newpars = ""
        }
        pdf(filename, height = height, width = width,
            useDingbats = useDingbats, ...) ## pointsize default is 12

        if (oma.scale > 0) {
            newpars = paste0(newpars, "oma=c(", paste0(oma.val * oma.scale, collapse = ","), ")", collapse = ",")
        }
        if (!is.null(dim)) {
            if (length(dim) == 1)
                dim = rep(dim, 2)
            dim = dim[1:2]
            graphics::layout(matrix(1:prod(dim), nrow = dim[1], ncol = dim[2],
                                    byrow = byrow))
        }
        ## eval(expr)
        ## eval(parse(text = paste0("{", newstr, ";", as.character(as.expression(substitute(expr))), "}")),
        ## envir = parent.frame())
        ## eval(parse(text = paste0("{ par(", newpars, ");", as.character(as.expression(substitute(expr))), "}")),
        ##      envir = parent.frame(), enclos = stackenv(parent.frame(2)))
        eval(parse(text = paste0("{ par(", newpars, ");", as.character(as.expression(substitute(expr))), "}")),
             envir = parent.frame(), enclos = pf2)
        if (!is.null(title))
            title(title, cex.main = cex.title * max(cex))
        silent({dev.off()})
    })
}


#' robust name()
#'
#' gives back character vector same length of input regardless whether named or not
#'
#' @param str a path string
#' @return a string with multiple parentheses replaced with a single parenthesis
#' @export
names2 = function(x) {
    nm = names(x)
    if (is.null(nm))
        return(rep("", length.out = length(x)))
    else
        return(nm)
}


#' robust rownames
#'
#' gives back character vector same number of rows of input regardless whether named or not
#'
#' @param str a path string
#' @return a string with multiple parentheses replaced with a single parenthesis
#' @export
rownames2 = function(x) {
    if (!is.null(dim(x)))
        nm = rownames(x)
    else
        nm = names(x)
    if (is.null(nm))
        return(rep("", length.out = len(x)))
    else
        return(rep(nm, length.out = len(x)))
}


#' robust colnames
#'
#' gives back character vector same number of columns of input regardless whether named or not
#'
#' @param str a path string
#' @return a string with multiple parentheses replaced with a single parenthesis
#' @export
colnames2 = function(x) {
    nm = colnames(x)
    if (is.null(nm))
        return(rep("", length.out = NCOL(x)))
    else
        return(nm)
}


#' robust name() assignment
#'
#' similar to rlang::names2
#'
#' @param x vector
#' @return x a vector with all names
#' @export
`names2<-` = function(x, value, useempty = FALSE) {
    names(x) = if (!is.null(value))
                   rep(value, length.out = length(x))
               else {
                   if (useempty)
                       rep("", length.out = length(x))
               }
    return(x)
}


#' robust rownames() assignment
#'
#' @param x vector or matrix
#' @return x a vector fully named or rownamed
#' @export
`rownames2<-` = function(x, value, useempty = FALSE) {
    if (!is.null(dim(x))) {
        rownames(x) = if (!is.null(value))
                       rep(value, length.out = nrow(x))
                   else {
                       if (useempty)
                           rep("", length.out = nrow(x))
                   }
    } else {
        names(x) = if (!is.null(value))
                       rep(value, length.out = length(x))
                   else {
                       if (useempty)
                           rep("", length.out = length(x))
                   }
    }
    return(x)
}


#' robust colnames() assignment
#'
#' @param x data with dimensions
#' @return x data fully colnamed
#' @export
`colnames2<-` = function(x, value, useempty = FALSE) {
    colnames(x) = if (!is.null(value))
                      rep(value, length.out = ncol(x))
                  else {
                      if (useempty)
                          rep("", length.out = ncol(x))
                  }
    return(x)
}


#' utility function for removing multiple parantheses
#'
#' probably not necessary
#'
#' @param str a path string
#' @return a string with multiple parentheses replaced with a single parenthesis
#' @export
rm_mparen  = function(str) {
    return(gsub('\\/{2,}', "/", str))
}


#' make training/test splits
#'
#' @author Kevin Hadi
#' @param dat data.table or data.frame of one row per observation
#' @param k number of groups
#' @return a list of row ids corresponding to each fold and training and test split
#' @export
make_xfold = function(dat, k = 10, nested = FALSE, times = 1, transpose = TRUE, seed = 10) {
  obs.id = seq_along2(dat)
  set.seed(seed)
  if (!nested) {
    train = caret::createFolds(obs.id, k = k, returnTrain = T)
  } else
    train = caret::createMultiFolds(obs.id, k = k, times = times)
  test = lapply(train, function(x) {
    setdiff(obs.id, x)
  })
  out = list(train = train, test = test)
  if (transpose)
    return(purrr::transpose(out))
  else
    return(out)
}



#' set random number generator AND seed
#'
#' @author Kevin Hadi
#' @param rngkind string specifying number generator
#' @param seed integer specifying seed
#' @param use.old.sample.kind logical specifying discrete uniform generation method from prior to R361
#' @return a list of row ids corresponding to each fold and training and test split
#' @export
set_rngseed = function(seed = 10, rngkind = "L'Ecuyer-CMRG", normal.kind = "Inversion",
                       sample.kind = "Rejection", use.old.sample.kind = FALSE,
                       verbose = TRUE)
{
    if ("sample.kind" %in% names(formals(RNGkind))) {
        if (use.old.sample.kind) {
            sample.kind = "Rounding"
            if (verbose) message("Using default sample.kind from <R-3.6.0 (Rounding)")
        }
        RNGkind(rngkind, normal.kind, sample.kind)
        vb = RNGkind()
        if (verbose)  {
            message("RNG: ", vb[1])
            message("normal.kind: ", vb[2])
            message("sample.kind: ", vb[3])
        }
    } else {
        RNGkind(rngkind, normal.kind)
        vb = RNGkind()
        if (verbose)  {
            message("RNG: ", vb[1])
            message("normal.kind: ", vb[2])
        }
    }
    set.seed(seed)
    if (verbose)
        message("seed: ", seed)
    return(invisible(NULL))
}

#' make training/test splits
#'
#' training and testing
#' 
#' @name make_ttsplit
#' @author Kevin Hadi
#' @param dat data.table or data.frame of one row per observation. assumes each observation is a row
#' @param k number of groups
#' @return a list of row ids corresponding to each fold and training and test split
#' @export
make_ttsplit = function(dat,
                        field = NULL, use.index = TRUE, k = 10,
                        nested = FALSE, times = 1, transpose = TRUE,
                        seed = 10, partition_prob = 0.632,
                        split_type = c("crossfold", "resample", "partition"),
                        as.data.table = FALSE, rngkind = "L'Ecuyer-CMRG") {
    
    current.rng = .Random.seed
    txt = parse(text = sprintf(".Random.seed = as.integer(%s)", mkst(current.rng)))
    set_rngseed(seed = seed, rngkind = rngkind)
    on.exit(eval(txt, envir = parent.frame()))
    
    o_split_type = c("crossfold", "resample", "partition")
    obs.id = seq_along2(dat)
    
    set.seed(seed)

    wtf = setdiff(split_type, o_split_type)
    if (length(wtf)) {
        warning("split_type should be one of ", paste(o_split_type, collapse = " "))
        split_type = split_type[split_type %in% o_split_type]
    }

    if (!nested & times != 1) times = 1

    if (NROW(split_type) > 1) {
        split_type = split_type[1]
        message("more than one split type designated, using ", split_type)
    }

    if (split_type == "crossfold") {
        if (!nested) {
            train = caret::createFolds(obs.id, k = k, returnTrain = T)
        } else
            train = caret::createMultiFolds(obs.id, k = k, times = times)
    } else if (split_type == "resample") {
        train = caret::createResample(obs.id, times * k)
    } else if (split_type == "partition") {
        train = caret::createDataPartition(obs.id, times * k, p = partition_prob)
    }
    
    test = lapply(train, function(x) {
        setdiff(obs.id, x)
    })

    trainl = list()
    testl = list()
    if (use.index == FALSE) {
        if (!is.null(ncol(dat))) {
            if (!is.null(field)) {
                trainl = lapply(train, function(x) dat[x,])
                testl = lapply(test, function(x) dat[x,])
            } else {
                if (anyDuplicated(dat[[field]])) warning("some indices are duplicated")
                trainl = lapply(train, function(x) setNames(dat[x,][[field]], x))
                testl = lapply(test, function(x) setNames(dat[x,][[field]], x))
            }
        } else {
            trainl = lapply(train, function(x) setNames(dat[x], x))
            testl = lapply(test, function(x) setNames(dat[x], x))
        }
    }

    if (as.data.table) {
        dt.train = stack.dt(train)[, set := "train"]
        dt.train$id = rep_len2(stack.dt(trainl)$values, dt.train)
        dt.test = stack.dt(test)[, set := "test"]
        dt.test$id = rep_len2(stack.dt(testl)$values, dt.test)
        return(
            setcols(
                rbind(dt.train, dt.test),
                c("ix", "partition", "set", "id"))
        )
    }

    if (NROW(trainl) && NROW(testl))
        out = list(train = trainl, test = testl)
    else
        out = list(train = train, test = test)
    if (transpose)
        return(purrr::transpose(out))
    else
        return(out)
}


#' make aggregated roc curve
#'
#' @author Kevin Hadi
#' @param dat data.table or data.frame of one row per observation. assumes each observation is a row
#' @param k number of groups
#' @return a list of row ids corresponding to each fold and training and test split
#' @export
aggregate_roc = function(dat, subgroup.field = "Method", score = "BRCA1", lab = "fmut_brca1", mc.cores = 1, only_unique = F, include_group = FALSE, return_raw = FALSE) {
    nms = unique(dat[[subgroup.field]])
    out = rbindlist(mclapply(nms, function(nm) {
        x = copy(dat[Method == nm])
        for (i in seq_len(NROW(score))) {
            cut = santoku::chop_evenly(x[[score[i]]], intervals = 50)
            rescore = normv(f2int(cut))
            x[[score[i]]] = rescore
        }
        if (isTRUE(return_raw))
            return(x)
        outt = make_roc(x, lab = lab, score = score, include_group = include_group)[, Method := nm]
        if (only_unique) {
            outt = rbind(outt[!duped(prd)], outt[1][, c("Specificity", "Sensitivity") := list(Specificity = 0, Sensitivity = 1)])
        }
        return(outt)
    }, mc.cores = mc.cores))
}



#' test if two vectors are equal (uses conversion to character)
#'
#' Robust to NA
#'
#' @author Kevin Hadi
#' @param x a vector
#' @param y a vector
#' @return logical vector
#' @export
`%=%` = function(x,y) {
    paste(x) == paste(y)
}



#' seq along either row of table or length of vector
#'
#' @author Kevin Hadi
#' @param x data
#' @return vector
#' @export
seq_along2 = function(x)  {
  seq_len(NROW(x))
}



#' recycle vector - shortcut for rep(x, each = each)
#'
#' @author Kevin Hadi
#' @param x data
#' @param each length to extend vector by
#' @return vector
#' @export
rep_each = function(x, each) {
    return(rep(x, each = each))
}


#' recycle vector - overload base::rep_len
#'
#' problem with base::rep_len is that it doesn't work with other objects
#'
#' @name rep_len
#' @author Kevin Hadi
#' @param x data
#' @param length.out length to extend vector by
#' @return vector
#' @export
rep_len = function(x, length.out) {
    return(rep(x, length.out = length.out))
}

#' recycle vector along length OR nrow of object
#'
#' repeat vector along the length or nrow of object
#'
#' @name rep_len2
#' @author Kevin Hadi
#' @param x data
#' @param objalong any object to recycle x along if uselen = TRUE, or an actual integer value if uselen = FALSE
#' @return vector
#' @export
rep_len2 = function(x, objalong, uselen = TRUE) {
    if (uselen)
        rep(x, length.out = NROW(objalong))
    else
        rep(x, length.out = objalong)
}

#' slightly more robust test for whether file exists
#'
#' test whether file exists. "/dev/null", NA, "NA", "NULL", values excluded
#' by default.
#'
#' @name file.exists2
#' @author Kevin Hadi
#' @param x a character vector
#' @return logical
#' @export file.exists2
file.exists2 = function(x, nullfile = "/dev/null", bad = c(NA, "NA", "NULL", "")) {
    return(!file.not.exists(x = x, nullfile = nullfile, bad = bad))
}


#' slightly more robust test for whether file does not exist
#'
#' test whether a file is NA, NULL, or /dev/null OR if
#' the file exists
#'
#' @name file.not.exists
#' @author Kevin Hadi
#' @param x a character vector
#' @return logical
#' @export file.not.exists
file.not.exists = function(x, nullfile = "/dev/null", bad = c(NA, "NA", "NULL", "")) {
    isnul = (is.null(x))
    isbadfile = (x %in% bad | x == nullfile)
    isgoodfile = which(!isbadfile)
    isbadfile[isgoodfile] = !file.exists(as.character(x[isgoodfile]))
    isnolength = len(x) == 0
    return(isnul | isnolength | isbadfile)
}




#' run expression without any printed output
#'
#' execute expression without any output to console.
#' silent({var = function_that_has_explicit_print(...)})
#'
#' @author Kevin Hadi
#' @param ... an expression
#' @return NULL
#' @export
silent <- function (this_expr, this_env = parent.frame(), enclos = parent.frame(2)) {
        eval(expr = {
            suppressWarnings(capture.output(capture.output(... = this_expr, file = "/dev/null", 
                                          type = c("output")), file = "/dev/null", type = "message"))
        }, envir = .GlobalEnv, enclos = .GlobalEnv)
        invisible()
}


#' overwrite a method in R6 class generator
#'
#' useful for dev purposes.
#'
#' @export overwriteR6
overwriteR6 = function(newfun, oldfun, r6gen, meth = "public_methods", package = NULL, envir = globalenv()) {
    meth = ifelse(grepl("^pub", meth), "public_methods",
           ifelse(grepl("^pri", meth), "private_methods",
           ifelse(grepl("^act", meth), "active",
                  NA_character_)))
    if (is.na(meth))
        stop("method must refer to public, private, or active method")
    if (!is.null(package)) {
        if (is.character(package))
            envpkg = asNamespace(package)
        else if (isNamespace(package))
            envpkg = package
        nmpkg = environmentName(envpkg)
    }
    r6 = get(r6gen)
    tmpfun = r6[[meth]][[oldfun]]
    .newfun = get(newfun)
    environment(.newfun) = environment(tmpfun)
    attributes(.newfun) = attributes(tmpfun)
    r6[[meth]][[oldfun]] = .newfun
    NULL
}

#' make deep copy
#'
#' useful for dev
#' makes deep copy of R6 object, S4 object, or anything else really
#'
#' @export copy2
copy2 = function(x) {
    if (inherits(x, "R6")) {
        x2 = x$clone(deep = T)
        for (name in intersect(names(x2$.__enclos_env__), c("private", "public")))
            for (nname in names(x2$.__enclos_env__[[name]]))
                tryCatch({
                    x2$.__enclos_env__[[name]][[nname]] = rlang::duplicate(x2$.__enclos_env__[[name]][[nname]])
                }, error = function(e) NULL)
        return(x2)
    } else if (isS4(x)) {
        x2 = rlang::duplicate(x)
        slns = slotNames(x2)
        for (sln in slns) {
            tryCatch({slot(x2, sln) = rlang::duplicate(slot(x2, sln))},
                     error = function(e) NULL)
        }
        return(x2)
    } else {
        x2 = rlang::duplicate(x)
        return(x2)
    }
}

#' make deep copy, recursively
#'
#' useful for dev
#' makes deep copy of R6 object, S4 object, or anything else really
#'
#' @name copy3
#' @export copy3
copy3 = function (x, recurse_list = TRUE) {
    if (inherits(x, "R6")) {
        x2 = rlang::duplicate(x$clone(deep = T))
        for (name in intersect(names(x2$.__enclos_env__), c("private", 
            "public"))) for (nname in names(x2$.__enclos_env__[[name]])) tryCatch({
            x2$.__enclos_env__[[name]][[nname]] = copy3(x2$.__enclos_env__[[name]][[nname]])
        }, error = function(e) NULL)
        return(x2)
    } else if (isS4(x)) {
        x2 = rlang::duplicate(x)
        slns = slotNames(x2)
        for (sln in slns) {
            tryCatch({
                slot(x2, sln) = copy3(slot(x2, sln))
            }, error = function(e) NULL)
        }
        return(x2)
    } else if (inherits(x, c("list"))) {
        x2 = rlang::duplicate(x)
        x2 = rapply(x2, copy3, how = "replace")
        return(x2)
    } else {
        x2 = rlang::duplicate(x)
        return(x2)
    }
}

#' make deep copy, recursively
#'
#' useful for dev
#' makes deep copy of R6 object, S4 object, or anything else really
#'
#' @name copy
#' @export copy
copy = copy3

#' peepr6
#'
#' useful for dev
#'
#' @name peepr6
#' @export peepr6
peepr6 = function(x) {
    if (inherits(x, "R6")) {
        return(x$.__enclos_env__)
    } else {
        message("object is not R6...")
        return(x)
    }
}

#' make deep copy of all non-function public and private fields in R6
#'
#' useful for dev
#'
#' @name copyr6
#' @export copyr6
copyr6 = function(x) {
    if (inherits(x, "R6")) {
        x2 = x$clone(deep = T)
        for (name in intersect(names(x2$.__enclos_env__), c("private", "public")))
            for (nname in names(x2$.__enclos_env__[[name]]))
                tryCatch({
                    x2$.__enclos_env__[[name]][[nname]] = rlang::duplicate(x2$.__enclos_env__[[name]][[nname]])
                }, error = function(e) NULL)
        return(x2)
    } else {
        message("object is not R6...")
        return(x)
    }
}

#' make deep copy of all private slots in s4 object
#'
#' useful for dev
#'
#' @name copys4
#' @export copys4
copys4 = function(x) {
    if (isS4(x)) {
        x2 = rlang::duplicate(x)
        slns = slotNames(x2)
        for (sln in slns) {
            tryCatch({slot(x2, sln) = rlang::duplicate(slot(x2, sln))},
                     error = function(e) NULL)
        }
        return(x2)
    } else {
        message("object is not s4...")
        return(x)
    }
}


#' overwrite a function in its namespace
#'
#' useful for dev purposes.
#'
#' @name overwritefun
#' @export overwritefun
overwritefun = function (newfun, oldfun, package, envir = globalenv())
{
    if (is.character(newfun) && is.character(oldfun) && missing(package))
        stop("must specify package for oldfun")
    if (!missing(package)) {
        if (is.character(package))
            envpkg = asNamespace(package)
        else if (isNamespace(package))
            envpkg = package
    } else {
        if (missing(package)) {
            envpkg = asNamespace(environment(oldfun))
        }
    }
    if (!is.character(oldfun)) {
        oldfun = deparse(tail(as.list(substitute(oldfun)), 1)[[1]])
    }
    if (!is.character(newfun)) {
        newfunenv = asNamespace(environment(newfun))
        newfun = deparse(tail(as.list(substitute(newfun)), 1)[[1]])
    } else {
        newfunenv = parent.frame()
    }
    nmpkg = environmentName(envpkg)
    tmpfun = get(oldfun, envir = envpkg)
    .newfun = get(newfun, envir = newfunenv)
    environment(.newfun) = environment(tmpfun)
    attributes(.newfun) = attributes(tmpfun)
    evalq(asn2(oldfun, .newfun, ns = nmpkg), environment(), parent.frame())
    globasn(.newfun, oldfun, vareval = T)
}



#' writing a comma separated table with quotes
#'
#' comma-separated table with quotes around strings
#'
#' @export write.ctab
write.ctab = function (x, ..., sep = ",", quote = T, row.names = F)
{
    if (!is.data.frame(x))
        x = as.data.frame(x)
    write.table(x, ..., sep = sep, quote = quote, row.names = row.names)
}





#' query a matrix with nonmatching entries as NA
#'
#' @export
qmat = function(mat, rid = NULL, cid = NULL) {
    rown_provided = FALSE
    coln_provided = FALSE
    ## if (is.null(rid)) {
    ##     rid = seq_len(nrow(mat))
    ## } else if (is.character(rid)) {
    ##     rid = setNames(match3(rid, rownames(mat)), rid)
    ##     rown_provided = TRUE
    ## }
    if (is.character(rid)) {
        rid = setNames(match3(rid, rownames(mat)), rid)
        rown_provided = TRUE
    }
    ## rst = mkst(rid)
    ## if (is.null(cid))
    ##     cid = seq_len(ncol(mat))
    ## else if (is.character(cid)) {
    ##     coln_provided = TRUE
    ##     cid = setNames(match3(cid, colnames(mat)), cid)
    ## }
    if (is.character(cid)) {
        coln_provided = TRUE
        cid = setNames(match3(cid, colnames(mat)), cid)
    }
    ## if (!inherits(rid, "integer")) rid = as.integer(rid)
    ## if (!inherits(cid, "integer")) cid = as.integer(cid)
    ## can work with data table
    if (!inherits(rid, "character")) {
        rid = structure(as.character(rid), names = names(rid))
        rid[is.na(rid)] = "NA_integer_"
    }
    if (!inherits(cid, "character")) {
        cid = structure(as.character(cid), names = names(cid))
        cid[is.na(cid)] = "NA_integer_"
    }
    out = et(sprintf("mat[%s,%s,drop = FALSE]", mkst(rid), mkst(cid)))
    ## out = mat[rid,cid,drop = FALSE]
    if (rown_provided) rownames(out) = names(rid)
    if (coln_provided) colnames(out) = names(cid)
    return(out)
}



#' similar to setkey except a general use utility
#'
#' very slow version of keying a la data.table
#' but for general/interactive use
#'
#' @export
match3 = function(x, table, nomatch = NA_integer_, old = FALSE, use.data.table = TRUE, return_match = TRUE) {
  out = if (use.data.table) {
    tryCatch({
      dx = data.table(x = x)[, id.x := seq_len(.N)]
      dtb = data.table(table = table)[, id.tb := seq_len(.N)]
      ## setkey(dtb, table)[list(dx$x)]$id.tb
      mtbl = setkey(dtb, table)[dx]
      if (identical(return_match, TRUE))
          return(mtbl$id.tb)
      return(mtbl)
    }, error = function(e) structure("err", class = "err"))
  }
  if (!is.null(out) && !inherits(out, "err")) return(out)
  if (!identical(use.data.table, TRUE) || identical(old, TRUE) || inherits(out, "err")) {
    dx = within(data.frame(x = x), {id.x = seq_along(x)})
    dtb = within(data.frame(table = table), {id.tb = seq_along(table)})
    mtbl = merge(dx, dtb, by.x = "x", by.y = "table", all.x = TRUE,
                allow.cartesian = TRUE)
    mtbl = mtbl[order(mtbl$id.x),]
    if (identical(return_match, TRUE))
        return(mtbl$id.tb)
    return(mtbl)
    ## return(res$id.tb[order(res$id.x)])
  }
}

#' similar to setkey except a general use utility
#'
#' very slow version of keying a la data.table
#' but for general/interactive use
#'
#' @export
match = match3


# match3 = function(x, table, nomatch = NA_integer_, old = TRUE, use.data.table = TRUE) {
#   out = if (use.data.table) {
#     tryCatch({
#       dx = data.table(x = x)[, id.x := seq_len(.N)]
#       dtb = data.table(table = table)[, id.tb := seq_len(.N)]
#       ## setkey(dx, x)[list(dtb$table)]$id.x
#       setkey(dtb, table)[list(dx$x)]$id.tb
#     }, error = function(e) structure("err", class = "err"))
#   }
#   if (!is.null(out) && !inherits(out, "err")) return(out)
#   if (old) {
#     dx = within(data.frame(x = x), {id.x = seq_along(x)})
#     dtb = within(data.frame(table = table), {id.tb = seq_along(table)})
#     res = merge(dx, dtb, by.x = "x", by.y = "table", all.x = TRUE,
#       allow.cartesian = TRUE)
#     return(res$id.tb[order(res$id.x)])
#   } else  {
#     m = match(table,x)
#     mat = cbind(m, seq_along(m))
#     mat = mat[!is.na(mat[,1]),,drop=FALSE]
#     mat = mat[order(mat[,1], na.last = FALSE),,drop = FALSE]
#     mat = cbind(mat, seq_len(dim(mat)[1]))
#     m2 = match(x,table)
#     ix = which(!duplicated(m2) & !is.na(m2))
#     mat_rix = unlist(rep(split(mat[,3], mat[,1]), base::tabulate(m2)[m2][ix]))
#     ## mat_rix = unlist(rep(split(mat[,3], mat[,1]), base::tabulate(m2)[m2][ix]))
#     ix = rep(1, length.out = length(m2))
#     ## original line
#     ## ix[!is.na(m2)] = base::tabulate(m)[!is.na(m2)]
#     ix[!is.na(m2)] = base::tabulate(m)[m][m2][!is.na(m2)]
#     out = rep(m2, ix)
#     out[!is.na(out)] = mat[mat_rix,,drop=F][,2]
#     return(out)
#     ## m = match(table, x)
#     ## mat = cbind(m, seq_along(m))
#     ## mat = mat[!is.na(mat[, 1]), , drop = FALSE]
#     ## mat = mat[order(mat[, 1]), , drop = FALSE]
#     ## mat = cbind(mat, seq_len(dim(mat)[1]))
#     ## m2 = match(x, table)
#     ## ix = which(!duplicated(m2))
#     ## mat_rix = unlist(rep(split(mat[, 3], mat[, 1]), base::tabulate(m2)[m2][ix]))
#     ## mat[mat_rix, , drop = F][, 2]
#   }
# }


#' similar to setkey except a general use utility
#'
#' slower version of setkey, but for interactive use
#'
#' @export
`%K%` = function(thisx,thisy, old = TRUE) {
    ## m = match(x,y)
    ## mat = cbind(m, seq_along(m))
    ## mat = mat[!is.na(mat[,1]),,drop=FALSE]
    ## mat = mat[order(mat[,1]),,drop = FALSE]
    ## mat = cbind(mat, seq_len(dim(mat)[1]))
    ## ## rleseq(x[which(x %in% y)], clump = T)
    ## m2 = match(y,x)
    ## ## lst = rleseq(m2, clump = T)
    ## ix = which(!duplicated(m2))
    ## base::tabulate(m2)[m2][ix]
    ## mat_rix = unlist(rep(split(mat[,3], mat[,1]), base::tabulate(m2)[m2][ix]))
    ## mat[mat_rix,,drop=F][,2]
    if (old)
        return(match3(table = thisx, x = thisy, old = TRUE))
    else
        return(match3(table = thisx, x = thisy, old = FALSE))
}


#' making column into rownames
#'
#' internal version that doesn't require library(tibble)
#'
#' @param .data a data frame/table
#' @return a data frame/table with rownames from a column
#' @export
column_to_rownames = function(.data, var = "rowname", force = T, sep = " ") {
  ## if (inherits(.data, c("data.frame", "DFrame"))) {
  if (!is.null(dim(.data))) {
    tmpfun = function(...) paste(..., sep = sep)
    if (!is.null(rownames(.data)) || force) {
      ## rn = .data[[var]]
      if (is.numeric(var)) {
        eva = eval(parse(text = paste(".data[,", paste("c(", paste0(var, collapse = ", "), ")"), ",drop=FALSE]")))
        if (ncol(eva) > 1) eva = dodo.call2(dg(tmpfun), eva)
        rn = unname(unlist(eva))
        colix = setdiff(seq_len(ncol(.data)), var)
      } else if (is.character(var)) {
        eva = eval(parse(text = paste(".data[,", paste("c(", paste0(paste0("\"", var, "\""), collapse = ", "), ")"), ",drop=FALSE]")))
        if (ncol(eva) > 1) eva = dodo.call2(dg(tmpfun), eva)
        rn = unname(unlist(eva))
        colix = setdiff(seq_len(ncol(.data)), match3(var,colnames(.data)))
      }
      eval(parse(text = paste(".data = .data[,", paste("c(", paste0(colix, collapse = ", "), ")"), ", drop = FALSE]")))
      ## .data = .data[, colix,drop = FALSE]
      if (inherits(.data, "tbl"))
        .data = as.data.frame(.data)
      if (inherits(.data, "data.frame")) {
        rownames(.data) = make.unique(replace(as.character(rn), is.na(rn), "NA"))
      } else {
        rownames(.data) = replace(as.character(rn), is.na(rn), "NA")
      }
      return(.data)
    } else
      return(.data)
  } else
    stop("must be a data frame-like object")
}


#' alias for column_to_rownames
#'
#' internal version that doesn't require library(tibble)
#'
#' @param .data a data frame/table
#' @return a data frame/table with rownames from a column
#' @export
col2rn = column_to_rownames



#' making column out of rownames
#'
#' internal version that doesn't require library(tibble)
#'
#' @param .data a data frame/table
#' @return a data frame/table with the rownames as an additional column
#' @export
rownames_to_column = function(.data, var = "rowname", keep.rownames = FALSE,
                              asdf = as.data.frame, as.data.frame = FALSE) {
    ## if (inherits(.data, c("data.frame", "DFrame"))) {
    as.data.frame = asdf
    if (!is.null(dim(.data))) {
        if (!is.null(rownames(.data))) {
            rn = rownames(.data)
            if (as.data.frame)
                .data = cbind(u.var5912349879872349876 = rn, as.data.frame(.data, row.names = make.unique(rn)))
            else
                .data = cbind(u.var5912349879872349876 = rn, .data)
            colnames(.data)[1] = var
            if (keep.rownames)
                rownames(.data) = rn
            return(.data)
        } else
            return(.data)
    } else
        stop("must be a data frame-like object")
}


#' alias for rownames_to_column
#'
#' internal version that doesn't require library(tibble)
#'
#' @param .data a data frame/table
#' @return a data frame/table with rownames from a column
#' @export
rn2col = rownames_to_column



#' normalize directory, but not basepath
#'
#' get the absolute file directory without following the
#' base path link
#'
#' @param str a path string
#' @return a normalized path
#' @export
normpath = function(p) {
    fe = file.exists2(p)
    bn = basename(p)
    d = normalizePath(dirname(p))
    return(ifelse(fe, paste0(d, "/", bn), as.character(p)))
    ## return(paste0(d, "/", bn))
}



#' Reduce intersect
#'
#' intersect more than 2 vectors
#'
#' @export
rintersect = function(...) {
    Reduce(intersect, list(...))
}



#' Reduce union
#'
#' get union of more than 2 vectors
#'
#' @export
runion = function(...) {
    Reduce(union, list(...))
}



#' @name rm_mparen
#' @title remove multiple parentheses from path
#'
#' utility function for removing multiple parantheses
#' probably not necessary
#'
#' @param str a path string
#' @return a string with multiple parentheses replaced with a single parenthesis
rm_mparen  = function(str) {
    return(gsub('\\/{2,}', "/", str))
}

#' @name numeq
#' @title test equality between numeric values with some tolerance
#'
#' @description
#' two numerical values may be slightly off in their decimal precision.
#' These may be considered equivalent values but the `==` operator will
#' return FALSE. This tests for equivalence of two values with some lower
#' tolerance limit
#'
#' @return logical vector
#' @export
numeq = function(x, y, tol = .Machine$double.eps^0.5) {
    abs(x - y) < tol
}


#' @name symdiff
#' @title data.table of all setdiff items in X and in Y
#'
#' gives back data table of setdiff elements
#' noting whether the element is in vector x or vector y
#' This gives back all elements, including non-unique
#'
#' @return data.table
#'
#' @export
symdiff = function(x, y, ignore.na = FALSE) {
    xy = setdiff(x,y)
    yx = setdiff(y,x)
    if (ignore.na) {
        xy = na.omit(xy)
        yx = na.omit(yx)
    }
    elx = x[which(x %in% xy)]
    ely = y[which(y %in% yx)]
    if (length(xy)) {
        xy = data.table(elements = elx,
                        ix.x = which(x %in% xy),
                        ix.y = NA_integer_,
                        inx = TRUE, iny = FALSE)
        lst = rleseq(xy$elements, clump = T)
        xy = cbind(xy, as.data.table(lst))
    } else
        xy = data.table()
    if (length(yx)) {
        yx = data.table(elements = yx,
                        ix.x = NA_integer_,
                        ix.y = which(y %in% yx),
                        inx = FALSE, iny = TRUE)
        lst = rleseq(yx$elements, clump = T)
        yx = cbind(yx, as.data.table(lst))
    } else
        yx = data.table()
    tb = rbind(xy,
               yx, fill = T)
    return(tb)
}



#' debug an S4 function
#'
#' wrapper around trace
#'
#' @export debug.s4
debug.s4 = function(what, signature, where) {
  trace(what = what, tracer = browser, at = 1, signature = signature, where = where)
}

#' undebug an S4 function
#'
#' wrapper around untrace
#'
#' @export undebug.s4
undebug.s4 = function(what, signature, where) {
  untrace(what = what, signature = signature, where = where)
}



#' interaction but orders levels based on input vectors
#'
#' Same as base::interaction but orders levels based on the appearance of elements
#' in input vector(s)
#'
#' @export
interaction2 = function(..., drop = FALSE, sep = ".", lex.order = FALSE)
{
  args <- list(...)
  narg <- length(args)
  if (narg < 1L)
    stop("No factors specified")
  if (narg == 1L && is.list(args[[1L]])) {
    args <- args[[1L]]
    narg <- length(args)
  }
  for (i in narg:1L) {
    unix = which(!duplicated(args[[i]]))
    f <- factor(args[[i]], levels = args[[i]][unix])[, drop = drop]
    l <- levels(f)
    if1 <- as.integer(f) - 1L
    if (i == narg) {
      ans <- if1
      lvs <- l
    }
    else {
      if (lex.order) {
        ll <- length(lvs)
        ans <- ans + ll * if1
        lvs <- paste(rep(l, each = ll), rep(lvs, length(l)),
          sep = sep)
      }
      else {
        ans <- ans * length(l) + if1
        lvs <- paste(rep(l, length(lvs)), rep(lvs, each = length(l)),
          sep = sep)
      }
      if (anyDuplicated(lvs)) {
        ulvs <- unique(lvs)
        while ((i <- anyDuplicated(flv <- match(lvs,
          ulvs)))) {
            lvs <- lvs[-i]
            ans[ans + 1L == i] <- match(flv[i], flv[1:(i -
                                                         1)]) - 1L
            ans[ans + 1L > i] <- ans[ans + 1L > i] - 1L
          }
        lvs <- ulvs
      }
      if (drop) {
        olvs <- lvs
        lvs <- lvs[sort(unique(ans + 1L))]
        ans <- match(olvs[ans + 1L], lvs) - 1L
      }
    }
  }
  structure(as.integer(ans + 1L), levels = lvs, class = "factor")
}

#' Flexibly apply function to columns of data.table/frame
#' 
#' Convenience function to apply a function to columns of a data.table/frame.
#' The syntax for the columns is flexible
#'
#' lapply_dt(c(newcolname = colname), dt, dosomething)
#' will give a data.table or list with the column name
#' renamed to newcolname
#' lapply_dt(.(newcolname = colname)...) also works as well.
#' Note that no quotations are needed.
#'
#' @name lapply_dt
#' @param x fields to apply function to
#' @param dt data.table/frame
#' @param LFUN either a character name of a function, or a function
#' @param natype the type of NA which is to be specified as one of NA, NA_character_, NA_real_, or NA_integer_
#' @param evalcall logical flag to evaluate x as a call or not
#' @param as.data.table a logical indicating whether to coerce the output to a data.table
#' @return data.table/frame if as.data.table is TRUE, otherwise a list
#' @export lapply_dt
lapply_dt = function(x, dt, LFUN = "identity", natype = NA, as.data.table = T, evalcall = FALSE) {
    if (!is.function(LFUN))
        LFUN = base::mget(x = 'identity', mode = "function", inherits = T)[[1]]
    expr = substitute(x)
    if (is.name(expr))
        x = x
    else if (evalcall && is.call(expr))
        expr = eval(expr)
    else {
        x = trimws(gsub(',', "", unlist(strsplit(toString(expr), " "))[-1]))
        if (!is.null(names(expr)))
            names(x) = names(expr)[-1]
    }
    if (!is.null(names(x)))
        nm = names(x)
    else
        nm = x
    out = setNames(lst.emptyreplace(lapply(x, function(x, dt) {
        dt[[x]]
    }, dt = dt), natype), nm)
    out = lapply(out, LFUN)
    if (as.data.table)
        return(as.data.table(out))
    else
        return(out)
}


#' Fix messed up data frame/table column names
#'
#' If there are any malformed columns
#' (e.g. those with numbers at the beginning
#' or a dash) these column names are fixed
#'
#'
#' @name fix.cols
#' @param dt data.table/frame
#' @param sep separator field
#' @return data.table/frame
#' @export fix.cols
fix.cols = function(dt, sep = "_") {
    this_sep = sep
    cl = colnames(dt)
    probs.num = grep("^[0-9]", cl)
    probs.dash = grep("-", cl)
    if (length(probs.num) | length(probs.dash)) {
        cl[probs.num] = paste0("X", this_sep, cl[probs.num])
        cl[probs.dash] = gsub("-", this_sep,  cl[probs.dash])
    }
    colnames(dt) = cl
    return(dt)
}


#' flexibly read in a field and append an id to the output
#'
#' perform an embarrassingly parallel operation on files
#' i.e. read in and do some pre processing
#'
#' @name process_tbl
#' @param tbl table with fields to read in
#' @param field field to read in
#' @param id.field field with id to append to output
#' @param read.fun function to read in, will try to guess based on extension, but may need to provide
#' @param remove_ext extension strings to remove from file
#' @param mc.cores number of cores
#' @return a list of idx and seq
#'
#' @export
process_tbl = function(tbl, field = "jabba_rds", id.field = "pair", read.fun, remove_ext = c(".gz", ".zip"), mc.cores = 1) {
    forceall()
    ## invisible(eapply(environment(), force, all.names = TRUE))
    tbl = tbl[file.exists(get(field))]
    lst = with(tbl, {
        mclapply(mc.cores = mc.cores, subset2(dg(field,F), file.exists(x)), function(x, field = field, id.field = id.field, read.fun = read.fun, ...) {
            id.field = dg(id.field)
            field = dg(field)
            remove_ext = dg(remove_ext)
            if (missing(read.fun)) {
                remove_expr = paste(paste0(remove_ext, "$"), collapse = "|")
                fext = file_ext(gsub(remove_expr, "", x))
                read.fun = switch(fext, "vcf" = read_vcf, "rds" = readRDS, "txt" = fread,
                                  "csv" = fread, "tab" = fread)
                ##if expression isn't missing, eval expression
            }
            out = read.fun(x, ...)
            if (inherits(out, c("data.frame", "GRanges", "list")))
                out$pair = g2()[get(field) == x]$pair
            return(out)
        })})
    names(lst) = tbl[[id.field]]
    return(lst)
}

#' dedup
#'
#' stolen from skitools
#'
#' @name dedup
#' @param x vector to dedup
#' @param suffix character separator
#' @return a vector
#' @author Marcin Imielinski
dedup = function(x, suffix = ".") {
    dup = duplicated(x)
    udup = setdiff(unique(x[dup]), NA)
    udup.ix = lapply(udup, function(y) which(x == y))
    udup.suffices = lapply(udup.ix, function(y) c("", paste(suffix,
        2:length(y), sep = "")))
    out = x
    out[unlist(udup.ix)] = paste(out[unlist(udup.ix)], unlist(udup.suffices),
        sep = "")
    return(out)
}



#' make a random string
#'
#' @name rand.string
#' @return random string
#' @author Someone from Stackoverflow
#' @export rand.string
rand.string = function(n=1, length=12)
{
    randomString <- c(1:n)                  # initialize vector
    for (i in 1:n)
    {
        randomString[i] <- paste(sample(c(0:9, letters, LETTERS),
                                        length, replace=TRUE),
                                 collapse="")
    }
    return(randomString)
}


#' numbers up within repeating elements of a vector
#'
#' returns unique id within each unique element of a vector or set of provided vectors
#' and also a running id within each unique element
#'
#' @name rleseq
#' @param ... Vector(s) to identify with unique id and a running id within each unique id
#' @param clump a logical specifying if duplicates are to be counted together
#' @param recurs a logical that is meant to only be set by the function when using clump = TRUE
#' @return a list of idx and seq
#' @author Kevin Hadi
#' @export
rleseq = function (..., clump = TRUE, recurs = FALSE, na.clump = TRUE, 
                   na.ignore = FALSE, sep = paste0(" ", rand.string(length = 6), 
                     " "), use.data.table = FALSE) 
{
    rand.string <- function(n = 1, length = 12) {
        randomString <- c(1:n)
        for (i in 1:n) {
            randomString[i] <- paste(sample(c(0:9, letters, LETTERS), 
                                            length, replace = TRUE), collapse = "")
        }
        return(randomString)
    }
    force(sep)
    out = if (use.data.table) {
              tryCatch(
              {
                  dt = data.table::data.table(...)
                  data.table::setnames(dt, base::make.names(rep("", ncol(dt)), unique = T))
                  ## make.unique
                  cmd = sprintf(
                      "dt[, I := .I][, .(idx = .GRP, seq = seq_len(.N), lns = .N, I), by = %s]",
                      mkst(colnames(dt), "list")
                  )
                  dt = eval(parse(text = cmd))
                  data.table::setkey(dt, I)[, .(idx, seq, lns)]
              }, error = function(e) structure("data table didn't work...", class = "err")
              )
          }
    if (!(is.null(out) || class(out)[1] == "err"))
        return(as.list(out))


    if (na.clump) {
        paste = function(..., sep) base::paste(..., sep = sep)
    } else {
        paste = function(..., sep) {
            tryCatch({
                stringr::str_c(..., sep = sep)
            }, error = function(e) {
                comp = complete.cases(list(...))
                out = base::paste(..., sep = sep)
                out[!comp] = NA_character_
                return(out)
            })
        }
    }
    ## vec = setNames(do.call(paste, ...), seq_len(fulllens))
    ddd = match.call(expand.dots = FALSE)$`...`
    doeval = length(ddd) == 1 && (is.call(ddd[[1]]) || is.symbol(ddd[[1]]))
    if (doeval) ddd = eval(ddd[[1]], parent.frame())
    dodocall = inherits(ddd, c("data.frame", "DataFrame", "list", "List"))
    if (dodocall) {
        ddd = as.list(ddd)
        lns = base::lengths(ddd)
        if (!all(lns == lns[1])) 
            warning("not all vectors provided have same length")
        fulllens = max(lns, na.rm = T)        
        vec = setNames(do.call(function(...) paste(..., sep = sep), ddd), seq_len(fulllens))
    } else {
        lns = base::lengths(list(...))
        if (!all(lns == lns[1])) 
            warning("not all vectors provided have same length")
        fulllens = max(lns, na.rm = T)        
        vec = setNames(paste(..., sep = sep), seq_len(fulllens))
    }
    if (length(vec) == 0) {
        out = list(idx = integer(0), seq = integer(0), lns = integer(0))
        return(out)
    }
    if (na.ignore) {
        if (!(doeval && dodocall))
            isnotna = which(rowSums(is.na(as.data.frame(list(...)))) == 0)
        else
            isnotna = which(rowSums(is.na(as.data.frame(ddd))) == 0)
        ## isnotna = which(rowSums(as.data.frame(lapply(list(...), 
        ##                                              is.na))) == 0)
        out = list(idx = rep(NA, fulllens), seq = rep(NA, fulllens), 
                   lns = rep(NA, fulllens))
        if (length(isnotna)) 
            vec = vec[isnotna]
        tmpout = do.call(
            rleseq,
            c(
                alist(... = vec),
                alist(clump = clump, 
                      recurs = recurs, na.clump = na.clump,
                      na.ignore = FALSE, use.data.table = FALSE)
            )
        )
        for (i in seq_along(out)) out[[i]][isnotna] = tmpout[[i]]
        return(out)
    }
    if (!clump) {
        rlev = rle(vec)
        if (recurs) {
            ## return(unlist(unname(lapply(rlev$lengths, seq_len))))
            return(sequence(rlev$lengths))
        }
        else {
            out = list(idx = rep(seq_along(rlev$lengths), times = rlev$lengths), 
                       ## seq = unlist(unname(lapply(rlev$lengths, seq_len))))
                       seq = sequence(rlev$lengths))
            out$lns = ave(out[[1]], out[[1]], FUN = length)
            return(out)
        }
    }
    else {
        if (!na.clump) {
            vec[which(isNA(vec))] = base::paste(make.unique(vec[which(isNA(vec))]))
        }
        vec = setNames(vec, seq_along(vec))
        lst = split(vec, factor(vec, levels = unique(vec)))
        ord = as.integer(names(unlist(unname(lst))))
        idx = rep(seq_along(lst), times = base::lengths(lst))
        out = list(
            idx = idx[order(ord)],
            seq = rleseq(
                idx, clump = FALSE, 
                recurs = TRUE,
                use.data.table = FALSE
            )[order(ord)]
        )
        ## out$lns = ave(out[[1]], out[[1]], FUN = length)
        out$lns = unname(rep(base::lengths(lst), times = base::lengths(lst)))
        return(out)
    }
}

## rleseq = function(..., clump = TRUE, recurs = FALSE, na.clump = TRUE, na.ignore = FALSE,
##                   sep = paste0(" ", rand.string(length = 6), " ")) {
##     force(sep)
##     rand.string <- function(n=1, length=12)
##     {
##         randomString <- c(1:n)                  # initialize vector
##         for (i in 1:n)
##         {
##             randomString[i] <- paste(sample(c(0:9, letters, LETTERS),
##                                             length, replace=TRUE),
##                                      collapse="")
##         }
##         return(randomString)
##     }
##     if (isTRUE(na.clump))
##         paste = function(...,
##                          sep) base::paste(..., sep = sep)
##     else
##         paste = function(...,
##                          sep) base::paste(stringr::str_c(..., sep = sep))
##     lns = lengths(list(...))
##     if (!all(lns == lns[1]))
##         warning("not all vectors provided have same length")
##     fulllens = max(lns, na.rm = T)
##     vec = setNames(paste(..., sep = sep), seq_len(fulllens))
##     if (length(vec) == 0) {
##         out = list(idx = integer(0), seq = integer(0), lns = integer(0))
##         return(out)
##     }
##     ## rlev = rle(paste(as.character(vec)))
##     if (na.ignore) {
##         isnotna = which(rowSums(as.data.frame(lapply(list(...), is.na))) == 0)
##         out = list(idx = rep(NA, fulllens), seq = rep(NA, fulllens), lns = rep(NA, fulllens))
##         if (length(isnotna))
##             vec = vec[isnotna]
##         tmpout = do.call(rleseq, c(alist(... = vec),
##                                    alist(clump = clump, recurs = recurs, na.clump = na.clump, na.ignore = FALSE)))
##         ## tmpout = rleseq(..., clump = clump, recurs = recurs, na.clump = FALSE, na.ignore = FALSE)
##         for (i in seq_along(out))
##             out[[i]][isnotna] = tmpout[[i]]
##         return(out)
##     }
##     if (!isTRUE(clump)) {
##         rlev = rle(vec)
##         if (isTRUE(recurs)) {
##             return(unlist(unname(lapply(rlev$lengths, seq_len))))
##         } else {
##             out = list(
##                 idx = rep(seq_along(rlev$lengths), times = rlev$lengths),
##                 seq = unlist(unname(lapply(rlev$lengths, seq_len))))
##             out$lns = ave(out[[1]], out[[1]], FUN = length)
##             ## if (na.ignore)
##             ##     complete.cases(as.data.frame(lapply(list(...), is.na)))
##             return(out)
##         }
##     } else {
##         if (!isTRUE(na.clump)) {
##             vec = replace2(vec, which(x == "NA"), dedup(dg(x)[dg(x) == "NA"]))
##         }
##         ## vec = setNames(paste(as.character(vec)), seq_along(vec))
##         vec = setNames(vec, seq_along(vec))
##         lst = split(vec, factor(vec, levels = unique(vec)))
##         ord = as.integer(names(unlist(unname(lst))))
##         idx = rep(seq_along(lst), times = lengths(lst))
##         out = list(
##             idx = idx[order(ord)],
##             seq = rleseq(idx, clump = FALSE, recurs = TRUE)[order(ord)])
##         out$lns = ave(out[[1]], out[[1]], FUN = length)
##         return(out)
##     }
    
## }




#' similar to lengths except gets nrows for those items that have dimensions
#'
#' figure out length or nrows of a list
#' if there are dimensions in the list element,
#' find out the number of rows
#'
#' @name lens
#' @param x A list
#' @return A numeric vector of lengths of each list
#' @export
lens = function(x, use.names = TRUE) {
    out = vapply(x, NROW, FUN.VALUE  = 1L, USE.NAMES = use.names)
    return(out)
}


#' similar to length except gets nrows for those items that have dimensions
#'
#' figure out length or nrow of an object
#' lol... this is base::NROW
#' also see base::NCOL for the alternative
#'
#' @param x an object
#' @return length or nrow of an object
#' @export
len = NROW



#' extracting substring match using regexpr
#'
#' extract the first portion of matched substring
#'
#' @return Character vector of the regex matched portions of the input string vector
#' @export
rg_sub = function(pattern, text, ...) {
    rg = regexpr(pattern, text, ...)
    out = substr(text, rg, rg + attributes(rg)$match.length - 1)
    return(replace2(out, !nzchar(x), NA_character_) %>% trimws)
}

#' extract portions of matched substring
#'
#' extract all portions of matched substring
#' and collapse
#'
#' @name grg_sub
#' @export
grg_sub = function(pattern, text, colsep = " ", ...) {
    grg = gregexpr(pattern, text, ...)
    rg = unlist(grg)
    m.len = unlist(lapply(grg, attr, "match.length"))
    lens = lengths(grg)
    dt = data.table(text = rep(text, times = lens),
                    ix = rep(seq_along(text), times = lens),
                    iix = unlist(lapply(lens, seq_len)),
                    dummy = "out_str")
    dt[, out_str := substr(text, rg, rg + m.len - 1)]
    out = dcast.wrap(dt, lh = "ix", rh = "dummy", value.var = "out_str", fun.aggregate = function(x) paste(x, collapse = colsep))[[2]]
    return(replace2(out, !nzchar(x), NA_character_) %>% trimws)
}


#' modification of base::dynGet()
#'
#' slight modification of base::dynGet()
#' minframe set to 0 to also look in global environment
#' and it's robust to using within functions
#' also takes the variable name without quotes as default
#' but can supply a character, and set px to FALSE
#'
#' @export
dynget = function(x, px = TRUE,
                  ifnotfound = stop(gettextf("%s not found", sQuote(x)),
                                    domain = NA),
                  minframe = 0L,
                  inherits = FALSE) ## modification of base::dynGet()
{
    tmp_x = as.list(match.call())$x
    if (is.name(tmp_x)) {
        if (isTRUE(px))
            x = as.character(tmp_x)
        else
            x = eval(tmp_x, parent.frame())
    }
    if (!is.character(x))
        stop("x must be a character or a name of a variable")
    n <- sys.nframe()
    myObj <- structure(list(.b = as.raw(7)), foo = 47L)
    while (n > minframe) {
        n <- n - 1L
        env <- sys.frame(n)
        r <- tryCatch(
            {
                get0(x, envir = env, inherits = inherits, ifnotfound = myObj)
            }, error = function(e) return(myObj)
        )
        if (!identical(r, myObj))
            return(r)
    }
    ifnotfound
}


#' alias of dynget
#'
#' convenience wrapper around dynget
#'
#' @export
dg = dynget


#' %inn%
#'
#' Same as %in% but keeps NA values as NA
#'
#' @return a logical vector
#' @export
`%inn%` = function(x, table) {
    vec = match(x, table, nomatch = 0L) > 0L
    vec[is.na(x)] = NA
    vec
}



#' dcast.count
#'
#' Counting up occurrences in a table while taking factor levels into account
#'
#' @return A data frame or data.table
#' @export dcast.count
dcast.count = function(tbl, lh, rh = NULL, countcol = "count", ...) {
    this.env = environment()
    if (is.null(rh))
        rh = "dummy"
    dcast.wrap(within(tbl, {dummy = this.env$countcol}), lh = lh, rh = rh, value.var = "dummy", fun.aggregate = length, fill = 0, ...)
}


#' Counts up occurrences from a melted table
#'
#' Counting up occurrences in a table while taking factor levels into account
#' Also allows for weighting the counts using flexible argument parsing
#' Can either provide a weight as a name of a column,
#' as values themselves, or don't provide at all, and the function looks for a
#' column named "wt" for its values
#'
#' @return A data frame or data.table
#' @export dcast.count2
dcast.count2 = function(tbl, lh, rh = NULL, countcol = "count", wt = 1, fun.aggregate = "sum", value.var = "dummy", ...) {
    suppressWarnings({tbl$dummy = NULL})
    lst.call = as.list(match.call())
    if (is.name(lst.call$fun.aggregate))
        fun.aggregate = get(as.character(lst.call$fun.aggregate))
    else if (is.call(lst.call$fun.aggregate))
        fun.aggregate
    else if (is.character(fun.aggregate))
        fun.aggregate = get(fun.aggregate)
    if ("wt" %in% names(lst.call))
        if (is.character(wt) && wt %in% colnames(tbl)) {
            expr = expression(within(tbl, {dummy = 1 * dg(wt, FALSE)}))
        } else if (is.numeric(wt)) {
            expr = expression(within(tbl, {wt = NULL; dummy = 1 * dg(wt)}))
        } else {
            stop("wt argument must be either a numeric vector, a name of a column, or a column that exists in the table")
        }
    else if (is.null(wt) || isFALSE(wt) || is.na(wt) || length(wt) == 0)
        expr = expression(within(tbl, {dummy = 1}))
    else if (!"wt" %in% names(lst.call)) {
        if ("wt" %in% colnames(tbl)) {
            message("column named \"wt\" found, will weight counts using values in this field")
        }
        expr = expression(within(tbl, {dummy = 1 * dg(wt)}))
    }
    this.env = environment()
    if (is.null(rh))
        rh = "dummy"
    out = dcast.wrap(eval(expr), lh = lh, rh = rh, value.var = value.var, fun.aggregate = fun.aggregate, fill = 0, ...)
    if ("1" %in% colnames(out))
        setnames(out, "1", countcol)
    return(out)
}






#' wrapper around dcast or dcast2
#'
#' A convenience wrapper around dcast to make formula generation more
#' programmatic
#'
#' @return A data frame or data.table
#' @export dcast.wrap
dcast.wrap = function (x, lh, rh, dcast.fun, ...) {
    if (missing(dcast.fun)) {
        if (inherits(x, "data.table")) 
            dcast.fun = dcast.data.table
        else dcast.fun = dcast
    }
    if (!isTRUE(is.function(dcast.fun))) 
        stop("provided dcast argument is not a function")
    dcast_form = formula(paste(paste(lh, collapse = "+"), paste(rh, 
        collapse = "+"), sep = "~"))
    return(dcast.fun(x, formula = dcast_form, ...))
}



#' normalize a vector
#'
#' i.e. rescale to have values between 0 and 1
#'
#' @return vector
#' @export
normv = function(x) {
    (x - min(x, na.rm = T)) / (max(x, na.rm = T) - min(x, na.rm = T))
}


#' normalize a vector, treating positives and negatives separately
#'
#' i.e. rescale negatives to be between 0-0.5
#' rescale positives to be between 0.5-1
#'
#' @return vector
#' @export
normv_sep = function(x) {
    if (any(x < 0, na.rm = T))
        x[which(x < 0)] = -normv(-(x[which(x < 0)])) - 0.05
    if (any(x >= 0, na.rm = T))
        x[which(x >= 0)] = normv((x[which(x >= 0)])) + 0.05
    return(normv(x))
}



#' zscore a numeric vector
#'
#' @return vector
#' @export
zscore <- function(x, na.rm = F) {
    ## (x - mean(x, na.rm = na.rm)) / sd(x, na.rm = na.rm)
    mn = mean(x, na.rm = na.rm)
    stddev = sd(x, na.rm = na.rm)
    out = (x - mn) / stddev
    structure(
        out,
        mean = mn,
        stddev = stddev
    )
}


#' select.matrix
#'
#' wrapper to pick out rows and columns without erroring out
#'
#' @return matrix
#' @export select.matrix
select.matrix = function(x, rows = NULL, cols = NULL, int.rows = TRUE, int.cols = TRUE) {
    errcol = ""
    errrow = ""
    if (!is.null(rows)) {
        if (inherits(rows, "character")) {
            if (int.rows) {
                sel.row = intersect(rows, rownames(x))
            } else {
                sel.row = rows
            }
        } else if (!inherits(col, c("numeric", "integer"))) {
            errrow = "incorrect column specification"
        }
    } else {
        sel.row = seq_len(dim(x)[1])
    }
    if (!is.null(cols)) {
        if (inherits(cols, "character")) {
            if (int.cols) {
                sel.col = intersect(cols, colnames(x))
            } else {
                sel.row = rows
            }
        } else if (!inherits(col, c("numeric", "integer"))) {
            errcol = "incorrect column specification"
        }
    } else  {
        sel.col = seq_len(dim(x)[2])
    }
    x[sel.row, sel.col, drop = FALSE]
}



#' "no error"
#'
#' "no error"
#'
#' @return NULL, if error
#' @export
ne = function(...) {
    return(tryCatch(..., error = function(e) NULL))
}



#' Does File exists and is file size greater than threshold
#'
#' Queries a set of file paths for whether the file exists AND
#' if the file is greater than a size threshold
#'
#' @param x character vector of file paths
#' @param size.thresh threshold of minimum file size
#' @return logical
#' @export good.file
good.file = function(x, size.thresh = 0) {
    (file.exists2(x) & na2false(file.size(x) > size.thresh))
}



#' wraps grep in for loop
#'
#' A wrapper around grep to identify string matches across multiple patterns
#'
#' @return integer vector of indices
#' @export
loop_grep = function(pattern, x, ignore.case = FALSE) {
    ## for (i in unique(pattern)) {
    ## matches = integer(0)
    ## for (i in seq_along(pattern)) {
    ##     this_id = grep(pattern = pattern[i], x)
    ##     matches = unique(c(matches, this_id))
    ## }
    pattern = unique(pattern)
    ind = unlist(lapply(pattern, function(this_pattern) {
        grep(pattern = this_pattern, x, ignore.case = ignore.case)
    }))
    return(ind)
}


#' wrapper around loop_grep
#'
#' Generating a logical vector from loop_grep
#'
#' @return logical vector of all matches
#' @export
loop_grepl = function(patterns, vec_char, ignore.case = FALSE) {
    lg = logical(length(vec_char))
    ind = loop_grep(patterns, vec_char, ignore.case = ignore.case)
    lg[ind] = TRUE
    lg
}




#' Recursively repeat a function call
#'
#' Recursively repeat a function
#' Found on stackoverflow
#'
#' @author StackOverflow
#' @return Same as .x
#' @export
rrrepeated <- function(.x, .reps = 1, .f, ...) {
                                        # A single, finite, non-negative number of repetitions
    assertthat::assert_that(
        length(.reps) == 1,
        !is.na(.reps),
        .reps >= 0,
        is.finite(.reps))

                                        # accept purrr-style formula functions
    .f <- rlang::as_function(.f, ...)

    recursively_repeat <- function(.x, .reps, .f, ...) {
        if (.reps == 0) {
            .x
        } else {
            ## recursively_repeat(.f(.x, ...), .reps - 1, .f, ...)
            Recall(.f(.x, ...), .reps - 1, .f, ...)
                                        # (It would be more correct to use `Recall()` so that renaming the function
                                        # doesn't break this line... -- how's that for an R deep cut?)
        }
    }

    recursively_repeat(.x, .reps, .f, ...)
}


#' don't use this. use base::dput()
#'
#' Don't use this... use base::dput() An easy way to print out a c() vector into a blog file
#'
#' @export
ez_string = function(string_vec, c = T, list = !c, quotes = T, ws = "\n") {
    ws = paste0(",", ws)
    op_string_c = "c("
    op_string_l = "list("
    if (quotes) {
        q = "\""
    }
    else
        q = NULL
    c_cmd = expression(cat(paste0(op_string_c, paste0(q, string_vec, q, collapse = ws), ")\n")))
    list_cmd = expression(cat(paste0(op_string_l, paste0(q, string_vec, q, collapse = ws), ")\n")))
    lst_args = as.list(match.call())
    c_arg = eval(lst_args$c)
    l_arg = eval(lst_args$list)
    c_arg_cond = tryCatch(! is.null(c_arg) & c_arg, error = function(e) FALSE)
    l_arg_cond = tryCatch(! is.null(l_arg) & l_arg, error = function(e) FALSE)
    if (all(is.null(c(c_arg, l_arg))))
        eval(c_cmd)
    else if(c_arg_cond & l_arg_cond) {
        eval(c_cmd)
        eval(list_cmd)
    }
    else if (l_arg_cond)
        eval(list_cmd)
    else if (is.null(c_arg) & ! l_arg_cond)
        eval(c_cmd)
    else if (! c_arg_cond)
        eval(list_cmd)
    else
        eval(c_cmd)
}



#' wrapper around tryCatch - robust to parallel:: functions
#'
#' A slightly more robust version of try that works within the parallel:: set of functions
#' that pre-deploy a cluster.
#'
#' @export
try2 = function(expr, ..., finally) {
    tryCatch(expr,
             error = function(e) {
                 msg = structure(paste(conditionMessage(e), conditionCall(e), sep = "\n"), class = "err")
                 cat("Error: ", msg, "\n\n")
                 return(msg)
             },
             finally = finally,
             ... = ...)
}



#' applies dedup to colnames
#'
#' dedup the column names of a data.frame/data.table
#'
#' @return A data.table or data.frame
#' @export dedup.cols
dedup.cols = function(tbl, remove = FALSE) {
    if (remove) {
        if (!inherits(tbl, "data.table"))
            return(tbl[, match(unique(colnames(tbl)), colnames(tbl))])
        else
            return(tbl[, match(unique(colnames(tbl)), colnames(tbl)), with = FALSE])
    } else {
            colnames(tbl) = dedup(colnames(tbl))
            return(tbl)
    }
}




#' pinch a vector
#'
#' A convenience function to transform proportions.
#' Useful for beta regression (library(betareg))
#'
#' @return A vector
#' @export
pinch.frac = function(x, fmin = 0.01, fmax = 0.99) {
    pmax(pmin(x, fmax), fmin)
}


#' pinch a vector
#'
#' A convenience function to transform proportions.
#' Useful for beta regression (library(betareg))
#' Can be used on any numeric values to squeeze between
#' an interval.
#'
#' @return A vector
#' @export
pinch = function(x, fmin = 0.01, fmax = 0.99) {
    pmax(pmin(x, fmax), fmin)
}



#' Get confidence intervals around fractions
#'
#' A convenience function to get confidence intervals around
#' proportions. To be used with gbar.error
#'
#' @return A vector
#' @export binom.conf
binom.conf = function(n, tot, alpha = 0.025, tol = 1e-8) {
    suppressWarnings({
        conf.low = qbinom(p = (1 - (alpha)), size = tot, prob = n / tot, lower.tail = FALSE) / tot
        conf.high= qbinom(p = (1 - (alpha)), size = tot, prob = n / tot, lower.tail = TRUE) / tot
    })
    dt = data.table(frac = n / (tot + tol),
                    conf.low = replace2(conf.low, is.na(x), 0),
                    conf.high = replace2(conf.high, is.na(x), 0))
    return(dt)
}





#' a method to get the "data" argument from a with/within expression
#'
#' to be used within "with()" within the expression
#'
#' @return data.frame/data.table
#' @export
getdat = function(n = 0L) { ## to be used within "with()" expr
    tmpfun = function() {
        current.n = sys.nframe()
        myObj <- structure(list(.b = as.raw(7)), foo = 47L)
        while (current.n >= 0) {
            out = tryCatch(mget("envir", sys.frame(current.n), mode = "list", ifnotfound = myObj), error = function(e) myObj)
            if (inherits(out[[1]], "data.table"))
                return(out[[1]])
            current.n = current.n - 1
        }
    }
    pf = parent.frame(3 + n)
    if (identical(environmentName(pf), "R_GlobalEnv"))
        return(invisible(NULL))
    if ("data" %in% names(pf))
        data = get("data", pf)
    else if ("envir" %in% names(parent.frame(2)) &&
             inherits(tmpfun(), "data.table")) ## recent addition
        data = tmpfun()
    else
        data = get("envir", pf)
    if (is.environment(data))
        data = get("data", data)
    ## data = data$data
    data
    ## with(, {
    ##     data = get("data", parent.frame(2))
    ## })
}


#' alias for getdat
#'
#' alias for getdat function
#'
#' @export
gd = getdat


#' getdat2
#'
#' another function to use inside the expression argument of "with/within" family
#' to grab the enclosing data environment
#'
#' @export
getdat2 = function(nm = "data") { ## to be used within "with()" expr
    this.environment = environment()
    return(dg(nm, F))
}



#' concatenate environments
#'
#' to be able to call a function within a function and access variables
#' use:
#' ev = "bla"
#' this.fun = function() anon()
#' datatable = data.table()
#' anon = function() {
#'     with(cenv(datatable), {print(ev)})
#' }
#' this.fun()
#'
#' @param env can be a data.frame/table, or list, or environment
#' @export
cenv = function(env = environment()) {
    expr_193659793_155174963 = as.expression(substitute(expr_6000525395_6907698684, env = env))
    if (is.environment(env))
        rm(expr_6000525395_6907698684, envir = env)
    thisenv = c(list(data = env), as.list(env))
    fms = sys.frames()
    for (i in rev(seq_along(fms))) {
        suppressWarnings(rm(expr_6000525395_6907698684, envir = fms[[i]]))
        thisenv = c(thisenv, tryCatch(as.list(fms[[i]]), error = function(e) NULL))
    }
    thisenv = c(list(expr_193659793_155174963 = expr_193659793_155174963), thisenv, as.list(globalenv()))
    return(thisenv)
}

## cenv = function(env = parent.frame()) {
##     thisenv = c(list(data = env), as.list(env))
##     fms = sys.frames()
##     for (i in rev(seq_along(fms))) {
##         thisenv = c(thisenv, tryCatch(as.list(fms[[i]]), error = function(e) NULL))
##     }
##     return(thisenv)
## }



#' wrapper around cenv
#'
#' to be able to main
#'
#' @param expr expression to evaluate
#' @param env environment
#' @param return logical
#' @export
main = function(expr_6000525395_6907698684, return = F) {
    env = environment()
    env.lst = cenv(env = env)
    out = with(env.lst, {
        tryCatch(expr_6000525395_6907698684, error = function(e) NULL);
        eval(expr_193659793_155174963, env.lst)
    })
    if (return)
        return(out)
    else
        return(NULL)
}


#' turn on verbose error tracing through call stack
#'
#' @export
errr = function(x = 2) {
    er = options()$error
    if (is.null(er) || !missing(x)) {
        message("error traceback on, traceback level set to ", x)
        options(error = function() { traceback(x); print("ERROR"); })
    } else {
        message("error traceback off")
        options(error = NULL)
    }
}



#' alias for getdat2
#'
#' alias for getdat2 function
#'
#' @export
g2 = getdat2


#' alias for getdat2
#'
#' alias for getdat2 function
#'
#' @export
g = getdat2


#' alias for getdat2(nm = "x")
#'
#' alias for getdat2(nm = "x")
#'
#' @export
gx = function(nm = "x") eval.parent(getdat2(nm = nm))


#' withx
#'
#' to be used for quick interactive programming
#' withx(toolongtotypemeagain, x * sum(x))
#'
#' @export
withx <- function(x, expr) {
    env = environment()
    senv = parent.frame()
    suppressWarnings(eval(substitute(expr), env, enclos = senv))
}


#'  withv
#'
#' to be used for quick interactive programming
#' withv(toolongtotypemeagain, x * sum(x))
#'
#' @export
withv = function(x, expr) {
    env = environment()
    senv = stackenv2(parent.frame())
    suppressWarnings(eval(substitute(expr), env, enclos = senv))
}


#' stackenv
#'
#' @export
stackenv = function(env = environment(), onlyanc = TRUE, asenv = TRUE) {
    fms = sys.frames()
    thisenv = as.list(env)
    ## these = rev(seq_along(fms))
    these = rev(seq_len(sys.nframe()))
    if (onlyanc) these = these[-1]
    for (i in these) {
        thisenv = c(thisenv, tryCatch(as.list(fms[[i]]), error = function(e) NULL))
    }
    thisenv = c(thisenv, as.list(globalenv()))
    if (asenv)
        return(as.environment(thisenv))
    else
        return(thisenv)
}



#' stackenv2 
#'
#' @export
stackenv2 = function(overwrite = FALSE, onlyanc = TRUE, verbose = FALSE) {
    fms = sys.frames()
    thisenv = new.env()
    parent.env(thisenv) = parent.frame()
    ## these = rev(seq_along(fms))
    these = rev(seq_len(sys.nframe()))
    if (onlyanc) these = these[-1]
    for (i in these) {
        thisenv = suppressWarnings(appendEnv(thisenv, tryCatch(fms[[i]], error = function(e) NULL), overwrite = overwrite))
        if (verbose) {
            message("frame i: ", i)
            print(ls(thisenv))
        }
    }
    thisenv = suppressWarnings(appendEnv(thisenv, globalenv(), overwrite = overwrite))
    return(thisenv)
}


#' appendEnv
#' 
#' @author qedqed from Stackoverflow
#' @export
appendEnv = function(e1, e2 = NULL, overwrite = FALSE) {
    if (is.null(e2))
        return(e1)
    e1name = deparse(substitute(e1))
    e2name = deparse(substitute(e2))
    listE1 = ls(e1, sorted = FALSE)
    listE2 = ls(e2, sorted = FALSE)
    rstring = rand.string()
    for(v in listE2) {
        if (v %in% listE1) {
            msg = sprintf("Variable %s is in e1, too!", v)
            if (!isTRUE(overwrite)) {
                paste0(msg, " ... skipping ...")
                next
            }
            warning(msg)
        }
        this = tryCatch(get0(v, envir = e2, inherits = FALSE,
                             ifnotfound = structure("missing", class = rstring)),
                        error = function(e) structure("missing", class = rstring))
        if (!class(this)[1] == rstring)
            e1[[v]] = e2[[v]]
    }
    return(e1)
}


#' alias for withv
#'
#' to be used for quick interactive programming
#' withv(toolongtotypemeagain, x * sum(x))
#'
#' @export
wv = withv


with2 = function(data, expr, ...) {
    data = data
    eval(substitute(expr), data)
}


#' file.info2
#'
#' A more robust file.info2 that removes any paths that do not exist
#'
#' @return data.frame/data.table
#' @export
file.info2 = function(fn, col = NULL, include.all = FALSE) {
    lst.call = as.list(match.call())
    if (!"col" %in% names(lst.call) & grepl("[/$]", base::toString(substitute(fn))))
        col = "path"
    if (is.null(col)) col = as.character(substitute(fn))
    fif = file.info(unique(subset2(fn, file.exists2(x)))) %>% rownames_to_column(col) %>% as.data.table
    if (include.all) {
        fif = merge(setnames(data.table(fn), col)[, tmp.ord := seq_along(fn)],
                    fif,
                    by = col, all = TRUE)[order(tmp.ord)][, tmp.ord := NULL]
    }
    fif
}



#' function to subset on a variable by using "x" as surrogate variable in expression
#'
#' convenience function to subset without having to type excessively
#' if the variable is arrived at through nested functions or long
#' variable names
#'
#' @author Kevin Hadi
#' @export
subset2 = function(x, sub.expr, ...) {
    if (!missing(sub.expr)) {
        this.sub = eval(as.list(match.call())$sub.expr)
        if (is.numeric(this.sub)) {
            if (any(this.sub %% 1))
                stop("subset must be integer")
            if (!is.null(dim(x))) {
                if (!all(this.sub %in% seq_len(nrow(x))))
                    stop("subset must be indexed within rows of x")
                else
                    this.sub = replace(logical(nrow(x)), this.sub, TRUE)
            } else {
                if (!all(this.sub %in% seq_along(x)))
                    stop("subset must be indexed within x")
                else
                    this.sub = replace(logical(length(x)), this.sub, TRUE)
            }
        }
    } else if (missing(sub.expr)) {
        if (!is.null(dim(x)))
            ## this.sub = seq_len(nrow(x))
            this.sub = logical(nrow(x)) | TRUE
        else
            this.sub = logical(length(x)) | TRUE
            ## this.sub = seq_along(x)
    }
    subset(x, this.sub, ...)
}


#' same as subset2
#'
#' convenience function to subset without having to type excessively
#' if the variable is arrived at through nested functions or long
#' variable names
#'
#' @author Kevin Hadi
#' @export
ss = subset2



#' function to replace elements of vector, can use "x" as surrogate variable in expression
#'
#' convenience function to replace without having to type excessively
#' if the variable is arrived at through nested functions or long
#' variable names
#'
#' @export
replace2 = function(x, repl.expr, values) {
    lst.call = as.list(match.call())
    if ("list"  == as.character(lst.call$repl.expr)[1]) {
        exprs = as.list(lst.call$repl.expr)[-1]
        length(values)
        if (!length(exprs) == length(values) && !length(values) == 1)
            stop("list of expressions must be the same length as values")
        for (i in seq_along(exprs)) {
            if (length(values) > 1)
                x[eval(exprs[[i]])] = values[[i]]
            else
                x[eval(exprs[[i]])] = values
        }
        return(x)
    } else {
        this.repl = eval(lst.call$repl.expr)
        if (inherits(this.repl, "list")) {
            if (!length(this.repl) == length(values) && !length(values) == 1)
                stop("list provided must be the same length as values")
            else {
                for (i in seq_along(this.repl)) {
                    if (length(values) > 1)
                        x[eval(this.repl[[i]])] = values[[i]]
                    else
                        x[eval(this.repl[[i]])] = values
                }
                return(x)
            }
        } else {
            this.repl = eval(lst.call$repl.expr)
            return(replace(x, this.repl, values = values))
        }
    }
}


#' replace NAs
#'
#' replace_na
#'
#' @export
replace_na = function(data, replace) {
    return(replace(data, is.na(data), replace))
}

#' @export
ave2 = function(x, ..., FUN = mean) {
    if (missing(...))
        x[] <- FUN(x)
    else {
        g <- interaction(...)
        x = lapply(split(x, g), FUN)
    }
    x
}


#' modification of ave
#'
#' slight update of ave
#'
#' @export
ave3 = function (x, ..., FUN = mean) 
{
    if (missing(...)) {
        x[] = FUN(x)
    }
    else {
        rl = rleseq(..., clump = TRUE)
        ## g = interaction2(...)
        lidx = .Internal(split(seq_along(x), factor(rl$idx)))
        spl = split(x, rl$idx)
        ## spl = split(x, g)
        lst = lapply(spl, FUN)
        ## return(rep(unlist(lst), lengths(lidx))[unlist(lidx)])
        return(unlist(rep(lst, lengths(lidx) - lengths(lst) + 1))[order(unlist(lidx))])
    }
    x
}



#' modification of ave
#'
#' slight update of ave
#'
#' @export
aved = function(..., FUN = length, drop = TRUE) {
  ## browser()
  ## ddd = as.list(match.call(expand.dots = FALSE)[["..."]])
  ## if (length(ddd) > 1) {
  ##   do.call(function(...) interaction2(..., drop = drop), ddd, envir = parent.frame())
  ## }
  g = interaction2(..., drop = drop)
  lidx = .Internal(split(seq_along(g), g))
  spl = split(g, g)
  lst = lapply(spl, FUN)
  return(rep(unlist(lst), lengths(lidx))[unlist(lidx)])
}






#' rematch
#'
#' reconstruct the original vector from a vmatch
#'
#' @export
rematch = function (vmatch_out)  {
    this = vmatch_out$matches
    this[is.na(this)] = na.omit(vmatch_out$unmatch)
    this
}


#' make list of matches and nonmatches
#'
#' get match indices and non matched indices
#'
#' @export
vmatch = function(x, y, ...) {
    m = match(x, y, ...)
    unm = rep(NA, length(x))
    unm[is.na(m)] = x[is.na(m)]
    list(matches = y[m], unmatch = unm)
}


#' file.mat.exists
#'
#' run file.exists2 on columns of a table
#'
#' @export file.mat.exists
file.mat.exists = function(x, rm_col1 = FALSE) {
    matrify(x, rm_col1 = rm_col1) %>% {setRownames(apply(., 2, file.exists2), rownames(.))}
}



#' not %in%
#'
#' Not match
#'
#' @export
`%nin%` = function (x, table) {
    match(x, table, nomatch = 0L) == 0L
}


#' factor to integer
#'
#' robustly convert a factor to integer
#'
#' @return factor
#' @export
f2int = function(this_factor) {
    if (inherits(this_factor, "factor")) {
        lvl = levels(this_factor)
        if (inherits(lvl, c("numeric", "integer"))) {
            as.integer(levels(this_factor))[this_factor]
        } else if (inherits(lvl, "character")) {
            match(as.character(this_factor), lvl)
        }
    } else {
        warning("Did not supply a factor, returning object as is")
        this_factor
    }
}


#' modified system2
#'
#' A modification of system2 to be able to return either or the stderr
#' or stdout to console or flexibly return to a path.
#'
#' @export
system3 = function (command, args = character(), stdout = "", stderr = "",
    stdin = "", input = NULL, env = character(), wait = TRUE,
    minimized = FALSE, invisible = TRUE, timeout = 0)
{
    isTRUE = function(x) identical(x, TRUE)
    isFALSE = function(x) identical(x, FALSE)
    if (!missing(minimized) || !missing(invisible))
        message("arguments 'minimized' and 'invisible' are for Windows only")
    if (!is.logical(wait) || is.na(wait))
        stop("'wait' must be TRUE or FALSE")
    intern <- FALSE
    command <- paste(c(env, shQuote(command), args), collapse = " ")
    if (is.null(stdout))
        stdout <- FALSE
    if (is.null(stderr))
        stderr <- FALSE
    if (isTRUE(stdout) || isTRUE(stderr))
        intern <- TRUE
    if (as.integer((isTRUE(stdout) | isFALSE(stdout)) + (isTRUE(stderr) | isFALSE(stderr))) == 2) {
        if (isTRUE(stderr) | isTRUE(stdout)) intern = TRUE
        if (isTRUE(stderr) & isFALSE(stdout)) {
            command = paste(command, "2>&1", ">/dev/null")
            ## command <- paste(command, "2>/dev/null")
        } else if (isTRUE(stderr) & isTRUE(stdout)) {
            command = paste(command, "2>&1")
        } else if (isFALSE(stderr) & isTRUE(stdout)) {
            command = paste(command, "2>/dev/null")
        }
    } else if (isTRUE(stderr) & is.character(stdout)) {
        if (length(stdout) != 1L) {
            stop("'stdout' must be of length 1")
        }
        if (nzchar(stdout)) {
            command = paste(command, "2>&1", ">", shQuote(stdout))
        } else {
            command = paste(command, "2>&1", ">/dev/null")
        }
    } else if (is.character(stderr) & isTRUE(stdout)) {
        if (length(stderr) != 1L) {
            stop("'stderr' must be of length 1")
        }
        if (nzchar(stderr)) {
            command = paste(command, "2>", shQuote(stderr))
        }
    } else if (is.character(stderr) & is.character(stdout)) {
        if (length(stdout) != 1L)
            stop("'stdout' must be of length 1")
        if (nzchar(stdout)) {
            command <- if (identical(stdout, stderr))
                           paste(command, ">", shQuote(stdout), "2>&1")
                       else paste(command, ">", shQuote(stdout))
        }
        if (length(stderr) != 1L)
            stop("'stderr' must be of length 1")
        if (nzchar(stderr) && !identical(stdout, stderr))
            command <- paste(command, "2>", shQuote(stderr))
    }
    if (!is.null(input)) {
        if (!is.character(input))
            stop("'input' must be a character vector or 'NULL'")
        f <- tempfile()
        on.exit(unlink(f))
        writeLines(input, f)
        command <- paste(command, "<", shQuote(f))
    }
    else if (nzchar(stdin))
        command <- paste(command, "<", stdin)
    if (!wait && !intern)
        command <- paste(command, "&")
    ## .Internal(system(command, intern, timeout))
    tmp = tempfile()
    on.exit(system2("rm", tmp), add = TRUE)
    writeLines(command, tmp)
    command2 = paste("sh", tmp)
    .Internal(system(command2, intern, timeout))
}


#' test if object is empty
#'
#' @author Kevin Hadi
#' @export
is.empty = function(x) {
    if (!is.null(dim(x))) {
        dim(x)[1] == 0
    } else {
        length(x) == 0 || is.null(x)
    }
}


#' minimum column per row (removing NA)
#'
#' Return the index of the minimum column per row while removing NA
#'
#' @author stackoverflow
#' @export min.col.narm
min.col.narm = function(mat, ties.method = "first") {
    ok = max.col(-replace(mat, is.na(mat), Inf), ties.method=ties.method) * NA ^ !rowSums(!is.na(mat))
    return(ok)
}


#' maximum column per row (removing NA)
#'
#' Return the index of the maximum column per row while removing NA
#'
#' @author Stackoverflow
#' @export max.col.narm
max.col.narm = function(mat, ties.method = "first") {
    ok = max.col(replace(mat, is.na(mat), -Inf), ties.method=ties.method) * NA ^ !rowSums(!is.na(mat))
    return(ok)
}



#' wrapper around table(), show NA counts if there are any
#'
#' Convenience wrapper around table to show NA if there are any
#'
#' @export
table2 = function(...) {
    return(table(..., useNA = "ifany"))
}


#' wrapper around table, always show NA counts
#'
#' Convenience function to always show NA counts
#'
#' @export
table3 = function(...) {
    return(table(..., useNA = "always"))
}

#' dir with full grep
#'
#' @author Kevin Hadi
#' @export
dir2 = function(path = ".", pattern = NULL, all.files = FALSE, full.names = FALSE,
    recursive = FALSE, ignore.case = FALSE, include.dirs = FALSE,
    no.. = FALSE, ...) {
    paths = dir(path = path, all.files = all.files,
                full.names = full.names, recursive = recursive,
                ignore.case = ignore.case, include.dirs = include.dirs,
        no.. = no..)
    if (!is.null(pattern))
        paths = grep(pattern = pattern, x = paths, value = TRUE, ...)
    return(paths)
}


#' dig into a file path's directory
#'
#' Convenience wrapper around dir() to pull out files from the same
#' directory of a given file.
#'
#' @author Kevin Hadi
#' @export
dig_dir = function (x, pattern = NULL, full.names = TRUE, mc.cores = 1,
    unlist = TRUE, do_dirname = TRUE, ...) {
    if (is.null(pattern)) {
        pattern = list(NULL)
    }
    if (isTRUE(do_dirname))
        input = dirname(x)
    else
        input = x
    lst = lst.empty2na(
        parallel::mcMap(
            function(m.x, m.pattern, ...) {
                dir(
                    path = m.x,
                    pattern = m.pattern,
                    full.names = full.names,
                    ...
                )
            },
            input,
            pattern,
            mc.cores = mc.cores,
            MoreArgs = list(...)
        )
    )
    if (unlist == TRUE) {
        if (!is.null(names(lst))) {
            en = eNROW(lst)
            nm = rep(names(lst), en)
        }
        ul = unlist(lst)
        names(ul) = nm
        return(ul)
    }
    return(lst)
}

## dig_dir = function (x, pattern = NULL, full.names = TRUE, mc.cores = 1,
##     unlist = TRUE, ...)
## {
##     if (is.null(pattern)) {
##         pattern = list(NULL)
##     }
##     if (unlist == TRUE) {
##         unlist(lst.empty2na(mcMap(function(m.x, m.pattern, ...) {
##             dir(path = m.x, pattern = m.pattern, full.names = full.names, ...)
##         }, dirname(x), pattern, mc.cores = mc.cores, MoreArgs = list(...))))
##     }
##     else {
##         lst.empty2na(mcMap(function(m.x, m.pattern, ...) {
##             dir(path = m.x, pattern = m.pattern, full.names = full.names, ...)
##         }, dirname(x), pattern, mc.cores = mc.cores, MoreArgs = list(...)))
##     }
## }



#' dig into a file path's directory
#'
#' Convenience wrapper around dir2() to pull out files from the same
#' directory of a given file.
#'
#' @author Kevin Hadi
#' @export
dig_dir2 = function (x, pattern = NULL, full.names = TRUE, mc.cores = 1,
    unlist = TRUE,  do_dirname = TRUE, ...) {
    if (is.null(pattern)) {
        pattern = list(NULL)
    }
    if (isTRUE(do_dirname))
        input = dirname(x)
    else
        input = x
    lst = lst.empty2na(
        parallel::mcMap(
            function(m.x, m.pattern, ...) {
                dir2(
                    path = m.x,
                    pattern = m.pattern,
                    full.names = full.names,
                    ...
                )
            },
            input,
            pattern,
            mc.cores = mc.cores,
            MoreArgs = list(...)
        )
    )
    if (unlist == TRUE) {
        if (!is.null(names(lst))) {
            en = eNROW(lst)
            nm = rep(names(lst), en)
        }
        ul = unlist(lst)
        names(ul) = nm
        return(ul)
    }
    return(lst)
}

## dig_dir2 = function (x, pattern = NULL, full.names = TRUE, mc.cores = 1,
##     unlist = TRUE, ...)
## {
##     if (is.null(pattern)) {
##         pattern = list(NULL)
##     }
##     if (unlist == TRUE) {
##         unlist(lst.empty2na(mcMap(function(m.x, m.pattern, ...) {
##             dir2(path = m.x, pattern = m.pattern, full.names = full.names, ...)
##         }, dirname(x), pattern, mc.cores = mc.cores, MoreArgs = list(...))))
##     }
##     else {
##         lst.empty2na(mcMap(function(m.x, m.pattern, ...) {
##             dir2(path = m.x, pattern = m.pattern, full.names = full.names, ...)
##         }, dirname(x), pattern, mc.cores = mc.cores, MoreArgs = list(...)))
##     }
## }




#' collapse a named list of vectors into a data.table
#'
#' Collapse a named list with vectors as each element into a data.table
#'
#' @export stack.dt
stack.dt = function(lst, ind = "ind", values = "values", ind.as.character = TRUE) {
    if (!length(lst) == 0) {
        dt = setDT(stack(lst))
        if (ind.as.character) {
            dt[, ind := as.character(ind)]
        }
    } else {
        dt = data.table(ind = character(0), values = numeric(0))
    }
    data.table::setnames(dt, c("ind", "values"), c(ind, values))
}


#' make_chunks
#'
#' Create chunks from a vector with a certain number of elements per chunk
#' OR create a certain number of chunks from a vector
#'
#' @return A list
#' @export
make_chunks = function(vec, n = 100, max_per_chunk = TRUE, num_chunk = !max_per_chunk, seed = 10) {
    set.seed(seed)
    lst.call = as.list(match.call())
    if (!is.null(lst.call$num_chunk) && is.null(lst.call$max_per_chunk)) {
        max_per_chunk = !eval(lst.call$num_chunk)
    }
    if ((isTRUE(max_per_chunk) & isTRUE(num_chunk)) ||
        (isFALSE(max_per_chunk) & isFALSE(num_chunk)) ||
        (!is.logical(max_per_chunk) & !is.logical(num_chunk)))
        stop("select either max_per_chunk OR num_chunk to be TRUE")
    if (max_per_chunk)
        splitter = ceiling(length(vec)) / n
    if (num_chunk)
        splitter = n
    ## ind = parallel::splitIndices(length(vec), ceiling(length(vec) / max_per_chunk))
    ind = parallel::splitIndices(length(vec), splitter)
    split(vec, rep(seq_along(ind), times = base::lengths(ind)))
}




#' assign an object to global environment
#'
#' ONLY USE IF YOU KNOW WHAT YOU ARE DOING
#' This function forces assignment of a variable/function
#' to the global environment, or an environment of your choosing
#'
#' @param obj The object to assign to the global environment
#' @param var Optional name of variable, specified as string
#' @return either NULL or the object being assigned
#' @export
globasn = function(obj, var = NULL, return_obj = TRUE, envir = .GlobalEnv, verbose = TRUE, vareval = F)
{
    var = as.list(match.call())$var
    if (is.null(var)) {
        globx = as.character(substitute(obj))
    } else {
        if (is.name(var)) {
            if (isFALSE(vareval))
                var = as.character(var)
            else
                var = eval(var, parent.frame())
        } else if (!is.character(var)) {
            stop("var must be coercible to a character")
        }
        if (inherits(var, "character")) {
            ## if (var != as.character(substitute(var))) {
            ##     message("variable being assigned to ", var)
            ## }
            globx = var
        } else {
            globx = as.character(substitute(var))
            ## message("variable being assigned to ", globx)
        }
    }
    if (verbose)
        message("variable being assigned to ", globx)
    assign(globx, value = obj, envir = envir)
    if (return_obj) {
        invisible(obj)
    } else {
        NULL
    }
}



#' reassign elements of named list into environment
#'
#' ONLY USE IF YOU KNOW WHAT YOU ARE DOING
#' this function takes a list of named objects and assigns them to the calling environment
#'
#' @param variables_lst A named list of variables
#' @return the input list
#' @export
reassign = function(variables_lst, calling_env = parent.frame()) {
    for (i in 1:length(variables_lst)) {
        message("variable assigned to: ", names(variables_lst[i]))
        assign(names(variables_lst[i]), variables_lst[[i]], envir = calling_env)
    }
    invisible(variables_lst)
}



##############################
##############################
############################## multiROC helpers
##############################
##############################


#' calculates various scores from actual classes and predicted classes
#'
#' Scores calculated from a classification task with known true labels:
#' precision, recall
#' F1, mean f1, weighted mean f1, mean precision,
#' mean recall, weighted mean recall, accuracy
#'
#' @param real true class labels
#' @param pred predicted class labels
#' @return a list of various statistics for clasification task
#' @export
classystat= function(real, pred) {
  if (!inherits(real, "factor"))
    real = factor(real)
  if (!inherits(pred, "factor"))
    pred = factor(pred)
  if (length(setdiff(levels(real), levels(pred))) > 0)
    stop("real and pred must have matching levels!")
  if (! all(levels(real) == levels(pred)))
    pred = factor(pred, levels = levels(real))
  rp = (table2(real = real, pred = pred) %>% melt %>% asdt)
  acc = with(rp, {
    sum(value[real == pred]) / sum(value)
  })
  prec = rp[, sum(value[real == pred]) / sum(value), by = pred]
  reca = rp[, sum(value[real == pred]) / sum(value), by = real]
  tots = rp[, sum(value), by = real]$V1
  grandtot = rp[, sum(value)]
  prec[, wV1 := tots * V1 / grandtot]
  reca[, wV1 := tots * V1 / grandtot]
  aggprec = mean(prec$V1)
  aggreca = mean(reca$V1)
  aggwprec = sum(prec$wV1)
  aggwreca = sum(reca$wV1)
  f1 = setNames(2 * (prec$V1 * reca$V1) / (prec$V1 + reca$V1), prec[[1]])
  aggf1 = mean(f1)
  wf1 = setNames(tots * f1 / grandtot, prec[[1]])
  ## weighted aggregate F1
  waggf1 = sum(wf1)
  list(
    total_true = rp[, sum(value), by =real][, setNames(V1, real)],
    precision = with(prec, setNames(V1, pred)),
    recall = with(reca, setNames(V1, real)),
    f1 = f1,
    mean_precision = aggprec,
    mean_recall = aggreca,
    weighted_mean_precision = aggwprec,
    weighted_mean_recall = aggwreca,
    mean_f1 = aggf1,
    weighted_mean_f1 = waggf1,
    accuracy = acc
  )
}


#' create one hot table of labels for multiROC
#'
#' @param y factor
#' @return the input list
#' @export
mroclab = function(y) {
    if (!inherits(y, "factor"))
        stop("y must be a factor")
    lbl = mltools::one_hot(data.table(y))
    colnm = gsub("y_", "", colnames(lbl))
    lbl = setColnames(lbl, paste0(colnm, " _true"))
    lbl
}


#' format predicted scores for multiROC
#'
#' from predict(...)
#'
#' @param prd0 a matrix of prediction scores
#' @return A prediction
#' @export
mrocpred = function(prd0, nm = "agg") {
    prd = asdf(prd0)
    prd = setColnames(prd, paste0(colnames(prd), " _pred_", nm))
    prd
}


#' create a data frame for ggplotting multiroc
#'
#' @param lbl output from mroclab
#' @param prd output from mrocpred
#' @return A data.frame that can be fed into ggplot
#' @export
mrocdat = function(lbl, prd) {
  prd00 = rbind(0, asm(mrocpred(prd)))
  mg = setcols(with((melt(prd00)), g2()[order(Var2, value),]), c("Var2", "value"), c("Group", "prd"))[, c("Group", "prd"), drop = F]
  cb = cbind(lbl, prd)
  roc_res = multiROC::multi_roc(cb, force_diag = T)
  plot_roc_df <- multiROC::plot_roc_data(roc_res)
  gdat = asdt(plot_roc_df)[Group %nin% c("Macro", "Micro")][, prd := mg$prd]
  gdat[, Group := trimws(Group)]
  withAutoprint(gdat, echo = F)$value
}



#' ggplot for multiROC
#'
#' helper function for outputting ggplot
#'
#' @param lbl output from mroclab
#' @param prd output from mrocpred
#' @return A data.frame that can be fed into ggplot
#' @export
ggmroc <- function (gdat, palette = "Moonrise2", color.field = "Group", 
    linetype.field = "Method", roc.size = 1) 
{
    gdat$linetype.field = gdat[[linetype.field]]
    gdat$color.field = gdat[[color.field]]
    g = with(gdat, {
        ggplot(g2(), aes(x = 1 - Specificity, y = Sensitivity)) + 
            geom_path(aes(color = color.field, linetype = linetype.field), 
                size = roc.size) + geom_segment(aes(x = 0, y = 0, 
            xend = 1, yend = 1), colour = "grey", linetype = "dotdash") + 
            scale_colour_manual(values = skitools::brewer.master(length(unique(color.field)), 
                wes = T, palette = palette)) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), 
            legend.justification = c(1, 0), legend.position = c(0.95, 
                0.05), legend.title = element_blank(), legend.background = element_rect(fill = NULL, 
                size = 0.5, linetype = "solid", colour = "black"))
    })
    return(g)
}



##############################
##############################
############################## end multiROC helpers
##############################
##############################



##############################
############################## factor helpers / forcats wrappers
##############################

#' get the levels in use in a factor
#'
#' @return the levels of a factor that are represented in the factor
#' @export
levelsinuse = function(fct) {
    levels(fct)[tabulate(fct, nbins = length(levels(fct))) != 0]
}


#' refactor
#'
#' Keep one level of a factor and set all others to a specified level
#'
#' @return A factor
#' @export
refactor = function(fac, keep, ref_level = "OTHER") {
    if (!inherits(fac, "factor")) {
        fac = factor(fac)
    }
    new_fac = forcats::fct_explicit_na(factor(fac, levels = intersect(levels(fac), keep), ordered = FALSE), na_level = ref_level) %>%
        stats::relevel(ref_level)
    new_fac
}



################################################## Flow utilities
##################################################
##################################################
##################################################
##################################################
#################################################



#' Kevin's implementation of sstat
#'
#' slow, but has complete names 
#'
#' @return character
#' @export
sstat <- function (full = FALSE, numslots = TRUE, resources = T) 
{
    asp = "username,groupname,state,name,jobid,associd"
    if (resources) {
        asp = paste0(asp, ",", "timelimit,timeused,submittime,starttime,endtime,eligibletime,minmemory,numcpus,numnodes,priority,prioritylong,nice,reason,reboot,partition,nodelist")
    }
    cmd = paste(
        "squeue -O",
        paste(
            paste0(unlist(strsplit(asp, ",")),
                   ":2000"),
            collapse = ","),
        "|",
        "sed 's/[[:space:]]\\{2,\\}/\\t/g'"
    )
    p = pipe(cmd)
    res = readLines(p)
    close(p)
    header = res[1]
    res = res[-1]
    nms = tolower(strsplit(header, "\t")[[1]])
    out = data.table::setnames(
        do.call(data.table::data.table, data.table::tstrsplit(res, "\t")),
        nms
    )
    out$state = factor(out$state, unique(c(out$state, "RUNNING"))) %>% 
        relevel("RUNNING")
    if (!full) {
        if (numslots) 
            out = dcast.data.table(out[, sum(as.numeric(cpus)), 
                by = .(user, state)], user ~ state, fill = 0, 
                value.var = "V1")[rev(order(RUNNING)), ]
        else out = dcast.data.table(out[, .N, by = .(user, state)], 
            user ~ state, fill = 0, value.var = "N")[rev(order(RUNNING)), 
            ]
        return(out)
    }

    out$memUnits = out$min_memory %>% gsub("[0-9]+\\.?([A-Z]+)?", "\\1", .)
    out$mem = out$min_memory %>% gsub("([0-9]+\\.?)([A-Z]+)?", "\\1", .) %>% as.integer()
    out$cpus = as.numeric(out$cpus)

    invisible(out[, memBytes := mem * dplyr::case_when(memUnits == "G" ~ (1024^3), memUnits == "M" ~ (1024^2), memUnits == "K" ~ (1024), memUnits == "" ~ 1)])
    invisible(out[, memGb := memBytes / (1024 ^ 3)])

    out$submitTimePosix = as.POSIXct(out$submit_time, format="%Y-%m-%dT%H:%M:%S")
    out$startTimePosix = as.POSIXct(out$start_time, format="%Y-%m-%dT%H:%M:%S")
    names(out) = base::make.unique(names(out)) # priority.1 is priorityLong integer
    out$timeLimitSecs = parse_slurm_time(out$time_limit)
    return(out)
}

#' Parse the slurm times into an R format
#'
#' Used with khtools sstat above
#'
#' @return character
#' @export
parse_slurm_time <- function(time_str) {
  # Regex pattern to capture days, hours, minutes, and seconds
  pattern <- "^(?:(\\d+)-)?(\\d+):(\\d+):(\\d+)$"
  m = regexec(pattern, time_str, perl = TRUE)
  lstmatches = regmatches(time_str, m)
  lstmatches[lengths(lstmatches) == 0] = rep_len("", 5)
  matches = do.call(rbind, lstmatches)
  matches = matches[,2:5]
  matches[matches == ""] = "0"
  mode(matches) = "numeric"
  
  days = matches[,1]
  hours = matches[,2]
  minutes = matches[,3]
  seconds = matches[,4]
  total_seconds <- days * 86400 + hours * 3600 + minutes * 60 + seconds
    
  # Return total time in seconds or as a difftime object
  # parsed_times = as.difftime(total_seconds, units = "secs")
  parsed_times = total_seconds
  
  return(parsed_times)
}




#' Dig into outdir of flow job
#'
#' look at output directory of flow job
#'
#' @return character
#' @export
dirfind <- function(job, pattern, full.names = TRUE, recursive = TRUE) {
  dir2(outdir(job), pattern, full.names = full.names, recursive = recursive)
}



#' Dig into Flow Job that generated an output
#'
#' takes a path of an output of a flow job and looks for
#' Job.rds and reads in Job object
#'
#' @return Job
#' @export
dig_job <- function(x, readin = TRUE, get_inputs = FALSE) {
  d = dig_dir(x, "Job.rds")
  if (readin) {
    d = readRDS(d)
    if (get_inputs)
      d = inputs(d)
    return(d)
  } else {
    return(d)
  }
}



#' Dig into Flow Job that generated an output
#'
#' takes a path of an output of a flow job and looks for
#' Job.rds and reads in Job object
#'
#' @return Job
#' @export
digjob <- dig_job


#' Dig into inputs Flow Job that generated an output
#'
#' takes a path of an output of a flow job and looks for
#' Job.rds and reads in input
#'
#' @return data.table 
#' @export
diginjob <- function(x) {
  dig_job(x, readin = TRUE, get_inputs = TRUE)
}


#' output the union columns from a Flow output
#'
#' @return character vector of all output columns to be expected
#' @export
output_cols = function(x, mc.cores = 1) {
    lst = mclapply(dig_dir(x, "Job.rds"), function(x) {
        op = outputs(readRDS(x))
        cn = setdiff(colnames(op), key(op))
        cn
    }, mc.cores = mc.cores)
    Reduce(f = union, lst)
}


#' convert job task to table
#'
#' Parse task from Flow Job object, character task file,
#' or Flow Task object
#'
#' @return A Flow job object
#' @export
viewtask = function(jb, arglst = c("name", "arg", "default")) {
  ifun = function(x, arglst = arglst) {
    unlist(lst.emptychar2na(lst.zerochar2empty(lapply(arglst, function(y)
      tryCatch((slot(x, y)), error = function(e) NA_character_)))))
  }
  if (inherits(jb, "Job"))
    obj = jb@task
  else if (inherits(jb, "Task"))
    obj = jb
  else if (inherits(jb, "character"))
    obj = Task(jb)
  as.data.table(data.table::transpose(lapply(obj@args, ifun, arglst = arglst)))
}



#' idj
#'
#' Match up ids to a job
#'
#' @return A Flow job object
#' @export
idj = function(x, these.ids) {
    x[match(these.ids, ids(x))]
}


#' reset.job
#'
#' Reset a job with different params
#'
#' @return A Flow job object
#' @export reset.job
reset.job = function(x, ..., i = NULL, rootdir = x@rootdir, jb.mem = x@runinfo$mem, jb.cores = x@runinfo$cores, jb.time = x@runinfo$time, update_cores = 1, task = NULL) {
    if (!inherits(x, "Job")) stop ("x must be a Flow Job object")
    if (is.null(task))
        usetask = x@task
    else if (is.character(task) || inherits(task, "Task"))
        usetask = task
    args = list(...)
    new.ent = copy(entities(x))
    if (!is.null(i)) {
        jb.mem = replace(x@runinfo$mem, i, jb.mem)
        jb.cores = replace(x@runinfo$cores, i, jb.cores)
    }
    tsk = viewtask(usetask)
    ## if (!all(names(args) %in% colnames(new.ent)))
    if (!all(names(args) %in% colnames(new.ent)) && !names(args) %in% viewtask(usetask)$V2)
        stop("adding additional column to entities... this function is just for resetting with new arguments")
    for (j in seq_along(args))
    {
        data.table::set(new.ent, i = i, j = names(args)[j], value = args[[j]])
    }
    these.forms = formals(body(findMethods("initialize")$Job@.Data)[[2]][[3]])
    if ("time" %in% names(these.forms)) {
        if ("update_cores" %in% names(these.forms))
            jb = Job(usetask, new.ent, rootdir = rootdir, mem = jb.mem, time = jb.time, cores = jb.cores, update_cores = update_cores)
        else
            jb = Job(usetask, new.ent, rootdir = rootdir, mem = jb.mem, time = jb.time, cores = jb.cores)
    } else {
        if ("update_cores" %in% names(these.forms))
            jb = Job(usetask, new.ent, rootdir = rootdir, mem = jb.mem, cores = jb.cores, update_cores = update_cores)
        else
            jb = Job(usetask, new.ent, rootdir = rootdir, mem = jb.mem, cores = jb.cores)
    }
    return(jb)
}


#' getcache
#'
#' Get the path of a Flow job object's cache.
#'
#' @return A character
#' @export
getcache = function(object) {
      path = paste(object@rootdir, "/", task(object)@name,
                   ".rds", sep = "")
      return(path)
}


############################## GLM Utilities
##############################
##############################
##############################


#' calculate accuracy based on contingency table
#'
#' @return numeric
#' @export
get_accuracy <- function(confus_mat) {
    correct = diag(confus_mat)
    sum(correct) / sum(off_diag(confus_mat), correct)
}


#' get off diagonal values
#'
#' used for get_accuracy()
#'
#' @return
#' @export
off_diag <- function(x) {
    diag(x) = NA
    as.vector(x) %>% na.omit
}


#' glm.nb2
#'
#' Run a negative binomial regression.
#' If it fails, run a poisson regression
#'
#' @return A GLM model
#' @export glm.nb2
glm.nb2 = function(...) {
    mod = tryCatch(MASS::glm.nb(...), error = function(e) {
        warning("glm.nb broke... using poisson")
        return(stats::glm(..., family = "poisson"))
    })
    ## mod = tryCatch(glm.nb(...), error = function(e) as.character(e))
    ## if (is.character(mod) &&
    ##     mod %in% c("Error in while ((it <- it + 1) < limit && abs(del) > eps) {: missing value where TRUE/FALSE needed\n",
    ##                "Error in glm.fitter(x = X, y = Y, w = w, etastart = eta, offset = offset, : NA/NaN/Inf in 'x'\n",
    ##                "Error: no valid set of coefficients has been found: please supply starting values\n")){
    ##     warning("theta parameter approaching infinity, resorting to poisson")
    ##     mod = glm(..., family = "poisson")
    ## }
    return(mod)
}



#' tabular summary of glm model coefficients
#'
#' Parse a tabular summary of a glm model
#'
#' @return A data.frame/data.table
#' @export
summ_glm = function(glm_mod, as.data.table = TRUE, ...) {
    this_summ = summary(glm_mod, ...)
    parse_glm_sum = function(x) {
        df = as.data.frame(x) %>%
            tibble::rownames_to_column(var = "name") %>%
            ## select(one_of(c("name", "estimate", "SE", "t.value", "p.value", "z value", "Estimate", "Std. Error", "t value", "Pr(>|t|)", "Pr(>|z|)")))
            select(matches("name"), matches("estimate"), matches("std.*error"), matches("z|t(\\.| )?value"), matches("pr\\(>\\|"))
        df = df[,c("name", intersect(colnames(x), colnames(df)))]
        if (!is.null(df[["t value"]])) {
            data.table::setnames(df, c("name", "estimate", "SE", "t.value", "p"))
        } else if (!is.null(df[["z value"]])) {
            data.table::setnames(df, c("name", "estimate", "SE", "z.value", "p"))
        }
        df = df %>% mutate_if(~inherits(., "character"), ~trimws(.))
        df
    }
    if (inherits(this_summ, c("summary.vglm", "vglm"))) {
        out = this_summ@coef3 %>% parse_glm_sum()
    } else if (inherits(this_summ$coefficients, "list")) {
        out = this_summ$coefficients %>% `[[`(1) %>% parse_glm_sum()
    } else {
        out = this_summ$coefficients %>% parse_glm_sum()
    }
    f.obj = family(glm_mod)
    fam = f.obj$family
    lin = f.obj$link
    ## if (!class(glm_mod)[1] == "lm") {
    ##     summ.ul = unlist(this_summ)
    ##     fam1 = unlist(summ.ul[names(summ.ul) == "family"])
    ##     fam2 = unlist(summ.ul[names(summ.ul) == "family.family"])
    ##     lin1 = unlist(summ.ul[names(summ.ul) == "link"])
    ##     lin2 = unlist(summ.ul[names(summ.ul) == "family.link"])
    ##     fam = trimws(paste(fam1, fam2))
    ##     lin = trimws(paste(lin1, lin2))
    ## } else {
    ##     fam = "gaussian"
    ##     lin = "identity"
    ## }
    out = mutate(out,
                 ci.lower = estimate - (1.96 * SE),
                 ci.upper = estimate + (1.96 * SE),
                 family = fam,
                 link = lin)
    if (as.data.table) {
        setDT(out)
    }
    return(out)
}


##################################################
##################################################
##################################################
##### gTrack stuff!

#' convenience function to label the nodes and edges of gGraph
#'
#' for gTrack visualization
#'
#' @export lbl.gg
lbl.gg = function(gg, do = T) {
    if (do) {
        gg2 = copy3(gg)
        gg2$nodes$mark(labels = gg2$nodes$dt$snode.id)
        gg2$edges[type == "ALT"]$mark(labels = gg2$edges[type == "ALT"]$dt$edge.id)
        return(gg2)
    } else {
        return(gg)
    }
}


#' convenience function to plot each element separately
#'
#' useful for grangeslists and gwalks to plot each one on a separate track
#' good for visualization
#'
#' @return gTrack
#' @export gt.each
gt.each = function(gr) {
    ix = seq_along(gr)
    gtrackfun = function(x, ...) {
        if (inherits(x, c("gWalk", "gGraph")))
            return(x$gtrack(...))
        else if (inherits(x, c("GRanges", "GRangesList")))
            return(gTrack(x, ...))
    }
    fx = function(i) {
        this = gr[i]
        gtrackfun(this, name = i)
    }
    lapply(ix, fx) %>% dodo.call2(FUN = c)
}

#' convenience function to pull out coverage data from table
#'
#'
#' @return GRanges of coverage data
#' @export
grabcov = function(pairs, id = NULL, field = "decomposed_cov", y.field = NULL, rel2abs = T) {
  if (is.null(y.field)) {
    if (identical(field, "decomposed_cov"))
      y.field = "foreground"
    else if (identical(field, "cbs_cov_rds"))
      y.field = "ratio"
  }
  if (is.null(id) && nrow(pairs) == 1) {
    covpath = pairs[[field]]
    if (isTRUE(rel2abs))
      jabpath = pairs$jabba_rds
  } else if (!is.null(id) && length(id) == 1) {
    covpath = pairs[id][[field]]
    if (isTRUE(rel2abs))
      jabpath = pairs$jabba_rds
  }
  cov = readRDS(covpath)
  if (isTRUE(rel2abs)) {
    lst.pp = with(readRDS(jabpath), list(purity = purity, ploidy = ploidy))
    mcols(cov)[[paste0(y.field, "_old")]] = mcols(cov)[[y.field]]
    mcols(cov)[[y.field]] = skitools::rel2abs(cov,
                                   field = y.field,
                                   purity = lst.pp$purity,
                                   ploidy = lst.pp$ploidy)
  }
  coln = colnames(mcols(cov))
  y.id = match3(y.field, coln)
  nony.id = seq_along(coln)[-y.id]
  return(et(sprintf("cov[,%s,drop=FALSE]", mkst(c(y.id, nony.id)))))
}


#' convenience function to pull out jabba model + coverage
#'
#' @return
#' @export
grabggtrack = function(pairs, pair = NULL, jab_field = "complex", cov_field = "decomposed_cov", cov_y_field = NULL) {
  if (!is.null(pair)) {
    if (length(pair) > 1) stop("provide only one id or index")
    pairs = pairs[pair, nomatch = 0]
  }
  if (nrow(pairs) != 1) stop("something messed up")
  this.env = environment()
  gg = gG(jabba = pairs[[jab_field]])
  gg = copy2(gg)
  gg$edges$mark(col = NULL)
  gg$nodes$mark(col = NULL)
  cov = grabcov(pairs, field = cov_field, y.field = cov_y_field)
  gt.cov = gTrack(cov, colnames(mcols(cov))[1], circles = T, lwd.border = 0.001)
  gt = c(gt.cov, gg$gtrack())
  return(gt)
}



## setMethod("with", signature(data = "gTrack"), NULL)
## setMethod("with", signature(data = "gTrack"), function(data, expr) {
##     df = as.data.frame(formatting(data))
##     eval(substitute(expr, parent.frame()), df, parent.frame())
## })

## setMethod("within", signature(data = "gTrack"), NULL)
## setMethod("within", signature(data = "gTrack"), function(data, expr) {
##     e = list2env(as.list(formatting(data)))
##     eval(substitute(expr, parent.frame()), e, parent.frame())
##     formatting(data) = as.data.frame(as.list(e))[, c(colnames(formatting(data))),drop = FALSE]
##     return(data)
## })


#' convenience function to pull out jabba model with allelic cn
#'
#' Convenience function uses a pairs table entry
#' and hunts for jabba. If allelic cn fields aren't present,
#' will look for hetpileups data and run JaBbA:::jabba.alleles
#'
#' @return
#' @export
grabhjab = function(pairs, id = NULL) {

    if (!is.null(id)) {
        pairs = pairs[id]
    } else if (len(pairs) != 1) {
        stop("pairs must be length 1")
    }

    jab = readRDS(pairs$jabba_rds)
    if (is.null(jab$agtrack)) {
        het.sites = with(pairs, coalesce(het_pileups_wgs, maf_approx, hmf_germline_txt, bads = Negate(file.exists2)))
        if (grepl(".rds$", het.sites)) {
            het.sites = readRDS(het.sites) %>% within({alt = alt.count.t; ref = ref.count.t})
        } else if (grepl(".txt(.gz){0,}", het.sites)) {
            het.sites = df2gr(fread(het.sites), 1, 2, 3)
            df = mcols(het.sites)
            mcols(het.sites) = setcols(df, c("REF_AD", "ALT_AD"), c("ref", "alt"))
        } else if (!file.exists2(het.sites)) {
            warning("no hets")
        }
        jaba = JaBbA:::jabba.alleles(jab, het.sites, uncoupled = TRUE)
        ## agt = jaba$agtrack
    } else {
        jaba = jab
    }
    
    if (is.null(jaba$agtrack)) stop("no allelic cn available!")

    return(jaba)
}



#' fix gtrack metadata elements for plotting
#'
#' Parse a tabular summary of a glm model
#'
#' @return A data.frame/data.table
#' @export gt.fix
gt.fix = function(gt, lwd.scale = 1, lwd.border.scale = 1, ywid.scale = 1) {
    len = function(ob) if (length(ob) == 0) NULL else ob
    for(i in seq_along(gt@edges) ) {
        if (inherits(gt@edges[[i]], "data.frame") && dim(gt@edges[[i]])[1] > 0) {
            gt@edges[[i]][["lwd"]] = len(gt@edges[[i]][["lwd"]] * lwd.scale)
        }
    }
    for(i in seq_along(gt@data) ) {
        if (inherits(gt@data[[i]], "GRanges") && length(gt@data[[i]]) > 0) {
            mcols(gt@data[[i]])[["ywid"]] = len(mcols(gt@data[[i]])[["ywid"]] * ywid.scale)
            mcols(gt@data[[i]])[["lwd.border"]] = len(mcols(gt@data[[i]])[["lwd.border"]] * lwd.border.scale)
        }
    }
    gt
}



##############################
############################## ggplot2 stuff
##############################

#' helper function to grab relevant ggplot_build outputs
#'
#' takes an input function and grabs the processed data frame
#' that ggplot employs to draw its plots
#'
#' Note: reordering plot data to fit the ggplot wrangled data is
#' (if there is a 1 to 1 mapping)
#' plot_data[order(<facet.x>, <facet.y>, <grouping_variable>)]
#'
#' OR
#'
#' plot_data[order(PANEL, <grouping_variable>)]
#'
#' useful for custom construction of layers
#'
#' @param ggplot_obj A ggplot object
#' @export
extract_ggplot = function(ggplot_obj) {
    gb = copy3(ggplot2::ggplot_build(ggplot_obj))
    gg.x = all.vars(gb$plot$mapping$x)
    if (NROW(gg.x)) gg.x = rlang::quo_name(gb$plot$mapping$x)
    gg.y = all.vars(gb$plot$mapping$y)
    if (NROW(gg.y)) gg.y = rlang::quo_name(gb$plot$mapping$y)
    gb.layout = data.table$as.data.table(gb$layout$layout)
    lst.d = lapply(gb$data, function(layer) {
        if (!inherits(layer$PANEL, "factor")) layer$PANEL = factor(layer$PANEL)
        return(merge.data.table(as.data.table(layer), gb.layout, by = c("PANEL")))
    })
    facet_nm = lapply(gb$plot$facet$params$facets, quo_name)
    facet.x = facet_nm[1][[1]]
    facet.y = facet_nm[2][[1]]
    plot_layers = gb$plot$layers
    lst.players = lapply(plot_layers, function(plot_layer) {
        x.field = all.vars(plot_layer$mapping$x)
        if (NROW(x.field)) x.field = rlang::quo_name(plot_layer$mapping$x)
        y.field = all.vars(plot_layer$mapping$y)
        if (NROW(y.field)) y.field = rlang::quo_name(plot_layer$mapping$y)
        list(x.field = x.field, y.field = y.field)
    })

    lst.gplayers = lapply(plot_layers, function(plot_layer) {

        x.field = all.vars(plot_layer$mapping$x)
        if (NROW(x.field)) x.field = rlang::quo_name(plot_layer$mapping$x)
        y.field = all.vars(plot_layer$mapping$y)
        if (NROW(y.field)) y.field = rlang::quo_name(plot_layer$mapping$y)
        list(x.field = x.field, y.field = y.field)
    })
    gb.plot.data = gb$plot$data
    gb.layers = gb$plot$layers
    if (class(gb.plot.data)[1] == "waiver") gb.plot.data = structure(gb.plot.data, class = NULL)
    gb.plot.data = as.data.table(gb.plot.data)
    lst.plotdata = mapply(function(data_, exprlst)
    {
        if (NROW(data_)) {
            if (NROW(exprlst$x.field))
                data_[["x"]] = eval(parse(text = exprlst$x.field), data_)
            if (NROW(exprlst$y.field))
                data_[["y"]] = eval(parse(text = exprlst$y.field), data_)
            data_ = tryCatch(merge.repl(data_, gb.layout), error = function(e) data_)
            ## data_ = merge.repl(data_, gb.layout)
            ## data_[["facet.x"]] = if (NROW(facet.x)) eval(parse(text = facet.x), data_) else ""
            ## data_[["facet.y"]] = if (NROW(facet.y)) eval(parse(text = facet.y), data_) else ""
            ## if (all(colexists(c("facet.x", "facet.y"), data_)))
            ##     data_[order(facet.x, facet.y), PANEL := .GRP, by = .(facet.x, facet.y)]
        }
        return(data_)
    }, list(gb.plot.data), lst.players, SIMPLIFY = FALSE)
    lst.layerdata = mapply(function(data_, exprlst)
    {
        data_ = data_$data
        if (class(data_)[1] == "waiver") data_ = structure(data_, class = NULL)
        if (NROW(data_)) {
            data_ = as.data.table(data_)
            if (NROW(exprlst$x.field))
                data_[["x"]] = eval(parse(text = exprlst$x.field), data_)
            if (NROW(exprlst$y.field))
                data_[["y"]] = eval(parse(text = exprlst$y.field), data_)
            data_ = tryCatch(merge.repl(data_, gb.layout), error = function(e) data_)
            ## data_ = merge.repl(data_, gb.layout)
            ## data_[["facet.x"]] = if (NROW(facet.x)) eval(parse(text = facet.x), data_) else ""
            ## data_[["facet.y"]] = if (NROW(facet.y)) eval(parse(text = facet.y), data_) else ""
            ## if (all(colexists(c("facet.x", "facet.y"), data_)))
            ##     data_[order(facet.x, facet.y), PANEL := .GRP, by = .(facet.x, facet.y)]
        }
        return(data_)
    }, gb.layers, lst.players, SIMPLIFY = FALSE)
    list(lst.d = lst.d, lst.plotdata = lst.plotdata, lst.players = lst.players, lst.layerdata = lst.layerdata, facet.x = facet.x, facet.y = facet.y, gg.x = gg.x, gg.y = gg.y, layout = gb.layout)
}


#' make integer breaks on axes
#'
#' a la scales::breaks_pretty
#'
#' @export
integer_breaks <- function(n = 5, ...) {
    fxn <- function(x) {
        breaks <- floor(pretty(x, n, ...))
        names(breaks) <- attr(breaks, "labels")
        breaks
    }
    return(fxn)
}



#' custom theme for ggplot
#'
#' A custom theme for ggplots
#'
#' @return A ggplot object
#' @export
gg_mytheme = function(gg,
                      base_size = 16,
                      legend.position = "none",
                      flip_x = TRUE,
                      x_axis_cex = 1,
                      y_axis_cex = 1,
                      ylab_cex = 1,
                      xlab_cex = 1,
                      title_cex = 1,
                      x_angle = 90,
                      x_axis_hjust = 1,
                      x_axis_vjust = 0.5,
                      y_axis_hjust = 1,
                      print = FALSE) {
    ## gg = gg + theme_bw(base_size = base_size) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), plot.background = element_blank(), axis.line = element_line(colour = "black"), axis.text.x  = element_text(angle = 90, vjust = .5), legend.position = legend.position)
    gg = gg +
        ggplot2::theme_bw(base_size = base_size) +
        ggplot2::theme(panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              plot.background = element_blank(),
              axis.line = element_line(colour = "black"),
              axis.text.x  = element_text(angle = x_angle, vjust = x_axis_vjust, hjust = x_axis_hjust, size = rel(x_axis_cex), colour = "black"),
              legend.position = legend.position,
              axis.text.y = element_text(size = rel(y_axis_cex), hjust = y_axis_hjust, colour = "black"),
              plot.title = element_text(size = rel(title_cex)),
              axis.title.x = element_text(size = rel(xlab_cex)),
              axis.title.y = element_text(size = rel(ylab_cex)))
    if (isTRUE(print)) return(print(gg)) else return(gg)
}


#' print gg_mytheme output
#'
#' convenience wrapper around gg_mytheme
#' to automatically print the output
#'
#' @return A ggplot object
#' @export
pg_mytheme = function(..., print = TRUE) gg_mytheme(..., print = TRUE)



#' wrapper around geom_point and geom_smooth
#'
#' a wrapper around geom_smooth to fit dot plot with lm line
#'
#' @param x numeric values
#' @param y numeric values
#' @param group vector parallel to x and y that specifies grouping
#' @param colour vector that specifies colouring of points
#' @return A ggplot object
#' @export gg.sline
gg.sline = function(x, y, group = "x", colour = NULL, smethod = "lm", dens_type = c("point", "hex"), facet1 = NULL, facet2 = NULL, transpose = FALSE, facet_scales = "fixed", line = TRUE, formula = y ~ x, print = FALSE, hex_par = list(bins = 50), wes = NULL, cex.scatter = 0.1) {
    if (is.null(facet1)) {
        facet1 = facet2
        facet2 = NULL
    }
    if (!is.null(facet1))
        if (!is.factor(facet1))
            facet1 = factor(facet1, unique(facet1))
    if (!is.null(facet2))
        if (!is.factor(facet2))
            facet2 = factor(facet2, unique(facet2))
    dat = data.table::data.table(x, y, group, facet1 = facet1, facet2 = facet2)
    gg = ggplot2::ggplot(dat, mapping = aes(x = x, y = y, group = group))
    if (identical(dens_type, c("point", "hex"))) {
        message("selecting geom_point() as default")
        dens_type = "point"
    }
    if (isTRUE(line)) {
        gg = gg +
            ggplot2::geom_smooth(method = smethod, size = 1, formula = formula)
    }
    if (identical(dens_type, "hex"))
        gg = gg + ggplot2::geom_hex(bins = hex_par$bin)
    else if (identical(dens_type, "point"))
        if (is.null(colour))
            gg = gg + ggplot2::geom_point(size = cex.scatter)
        else
            gg = gg + ggplot2::geom_point(mapping = aes(colour = colour), size = cex.scatter)
    if (!is.null(dat$facet1)) {
        if (!is.null(dat$facet2)) {
            if (transpose)
                gg = gg + ggplot2::facet_grid(facet2 ~ facet1, scales = facet_scales)
            else gg = gg + ggplot2::facet_grid(facet1 ~ facet2, scales = facet_scales)
        }
        else {
            if (transpose)
                gg = gg + ggplot2::facet_grid(. ~ facet1, scales = facet_scales)
            else gg = gg + ggplot2::facet_grid(facet1 ~ ., scales = facet_scales)
        }
    }
    if (!is.null(wes) && is.character(wes))
        gg = gg + ggplot2::scale_colour_manual(values = skitools::brewer.master(length(unique(colour)), wes = TRUE, palette = wes))
    if (print)
        print(gg)
    else
        gg
}



#'  barplot with errorbars
#'
#' Barplot with confidence intervals.
#' To plot with confidence intervals around an event that has a binary
#' outcome with a simple example:
#'
#' heads = 30, tails = 20,
#' y = heads / (heads + tails)
#' dt = binom.conf(y, heads + tails)
#' conf.low = dt$conf.low
#' conf.high = dt$conf.high
#'
#' gbar.error(y, conf.low, conf.high)
#'
#' @param y any numeric vector, can be a fraction
#' @param conf.low the lower bound of the confidence interval around y
#' @param conf.high the upper bound of the confidence interval around y
#'
#' @return A ggplot object
#' @export gbar.error
gbar.error = function(y, conf.low, conf.high, group, wes = "Royal1", other.palette = NULL, print = TRUE, fill = NULL, stat = "identity", facet1 = NULL, facet2 = NULL, bar.width = 0.9, position = position_dodge(width = 0.9), transpose = FALSE, facet.scales = "fixed") {
    dat = data.table(y = y, conf.low = conf.low, conf.high = conf.high, group = group)
    if (is.null(facet1)) {
        facet1 = facet2
        facet2 = NULL
    }
    if (!is.null(facet1))
        if (!is.factor(facet1))
            facet1 = factor(facet1, unique(facet1))
    if (!is.null(facet2))
        if (!is.factor(facet2))
            facet2 = factor(facet2, unique(facet2))
    suppressWarnings(dat[, `:=`(facet1, facet1)])
    suppressWarnings(dat[, `:=`(facet2, facet2)])
    if (is.null(fill)) fill.arg = group else fill.arg = fill
    dat[, fill.arg := fill.arg]
    if (is.character(stat) && stat == "count")
        gg = ggplot2::ggplot(dat, ggplot2::aes(fill = fill.arg, x = y))
    else
        gg = ggplot2::ggplot(dat, ggplot2::aes(x = group, fill = fill.arg, y = y))
    gg = gg + ggplot2::geom_bar(stat = stat, position = position, width = bar.width)
    if (any(!is.na(conf.low)) & any(!is.na(conf.high)))
        gg = gg + ggplot2::geom_errorbar(aes(ymin = conf.low, ymax = conf.high), size = 0.1, width = 0.3, position = position)
    if (!is.null(wes))
        gg = gg + ggplot2::scale_fill_manual(values = skitools::brewer.master(n = length(unique(fill.arg)), wes = TRUE, palette = wes))
        ## gg = gg + scale_fill_manual(values = wesanderson::wes_palette(wes))
    if (!is.null(other.palette))
        gg = gg + ggplot2::scale_fill_manual(values = other.palette)
    if (!is.null(dat$facet1)) {
        if (!is.null(dat$facet2)) {
            if (transpose)
                gg = gg + ggplot2::facet_grid(facet2 ~ facet1, scales = facet.scales)
            else gg = gg + ggplot2::facet_grid(facet1 ~ facet2, scales = facet.scales)
        }
        else {
            if (transpose)
                gg = gg + ggplot2::facet_grid(. ~ facet1, scales = facet.scales)
            else gg = gg + ggplot2::facet_grid(facet1 ~ ., scales = facet.scales)
        }
    }
    if (print) print(gg) else gg
}




#' generate ggplot histogram
#'
#' generate ggplot histogram
#'
#' @return A ggplot object
#' @export gg.hist
gg.hist = function(dat.x, as.frac = FALSE, bins = 50, center = NULL, boundary = NULL, trans = "identity", print = TRUE, xlim = NULL, ylim = NULL, xlab = "", x_breaks = 20, y_breaks = 10, expand = waiver(), ...) {
    gg = ggplot(mapping = aes(x = dat.x))
    if (isTRUE(as.frac))
        gg = gg + geom_histogram(aes(y = ..count.. / sum(..count..)), bins = bins, ...)
    else
        ## gg = gg + geom_histogram(stat = stat_bin(bins = bins), ...)
        gg = gg + geom_histogram(bins = bins, ...)
    gg = gg + scale_x_continuous(trans = trans, limits = xlim, breaks = scales::pretty_breaks(n = x_breaks)) +
        scale_y_continuous(breaks = scales::pretty_breaks(n = y_breaks), limits = ylim, expand = expand)
    if (!is.null(xlab) && any(!is.na(xlab)) && nzchar(xlab))
        gg = gg + xlab(xlab)
    if (print)
        print(gg)
    else
        gg
}



##############################
############################## htslib / skidb stuff
##############################


#' take vcf read in chunks and write clean vcf
#'
#' take vcf read in chunks and write clean vcf without sample info
#' i.e. fixing up svaba indel crap
#'
#' @return character
#' @export vcf_remove_sample
vcf_remove_sample = function(x = '/gpfs/commons/groups/imielinski_lab/data/PCAWG/mutations/f393baf9-2710-9203-e040-11ac0d484504,vcf',
                             out.vcf = "~/outtest.vcf",
                             ref = "~/DB/GATK/hg19_gatk_decoy.fasta", chunk = 10000,
                             verbose = TRUE) {
    ## fa = readinfasta(ref)
    f = file(x, "r")
    r = readLines(f, n = 1)
    ro = grep("^#", r, invert = F, value = T)
    header = c()
    while (NROW(ro)) {
        header = c(header, ro)
        r = readLines(f, n = 1)
        ro = grep("^#", r, invert = F, value = T)
    }
    contents = c()
    system2('rm', out.vcf)
    system2('touch', out.vcf)
    header.l = paste0(strsplit(header[length(header)], "\t")[[1]][-c(10:10000)], collapse = "\t")
    c(header[-length(header)], header.l)
    writeLines(c(header[-length(header)], header.l), out.vcf)
    while (NROW(r)) {
        tb = read.table(text = r, fill = T, sep = "\t", colClasses = "character")
        ## tb = fread(text = r, fill = T, sep = "\t", colClasses = "character")
        tb = tb[,-c(10:NCOL2(tb)),]
        ## tb$V9 = "."
        ## ins = which(tb$V4 == ".") ## Insertion, grab REF base at coordinate, add REF nucleotides to left of ALT, REF should be 1 base
        ## del = which(tb$V5 == ".") ## Deletion, subtract coordinate by 1, grab REF base at coordinate, add REF nucleotide to left of REF, ALT should be 1 base which is the REF
        ## if (NROW(del) > 0) {
        ##     tb[del,]$V2 = tb[del,]$V2 - 1
        ##     gr = with(tb[del,], GRanges(V1, IRanges(V2, V2)))
        ##     REF = as.character(fa[gr])
        ##     tb[del,]$V5 = REF
        ##     tb[del,]$V4 = paste0(REF, tb[del,]$V4)
        ## }
        ## if (NROW(ins) > 0) {
        ##     gr = with(tb[ins,], GRanges(V1, IRanges(V2, V2)))
        ##     REF = as.character(fa[gr])
        ##     tb[ins,]$V5 = paste0(REF, tb[ins,]$V5)
        ##     tb[ins,]$V4 = REF
        ## }
        tb$V9 = "."
        ## tb$V10 = "."
        fwrite(tb, out.vcf, append = T, sep = "\t")
        if (length(r) > 0 && verbose) message('processed: ', length(r), " variants")
        r = readLines(f, n = chunk)
    }
    out.vcf

    ## out.vcf = paste0(tools::file_path_sans_ext(path), "_fixed.vcf")
    ## sl = enframe(hg_seqlengths())
    ## cl = paste0("##contig=<ID=", sl[[1]], ",length=",sl[[2]], ">")
    ## system2("rm", c("-f", out.vcf)); system2("touch", out.vcf)
    ## writeLines(c("##fileformat=VCFv4.2", cl, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample"), out.vcf)
    ## fwrite(out, out.vcf, append = T, sep = "\t", col.names = FALSE)
    ## data.table(pair = p, mut_consensus_vcf = out.vcf)

}



#' write bed or bedpe into canonical formatted table
#'
#' Write a table into  a bed oe bedpe formatted file
#' Ensures the header is commented out
#'
#' @return A data.table
#' @export write_bed
write_bed = function(bed, outpath) {
    cn = colnames(bed)
    cn[1] = paste0("#", cn[1])
    bedhead = paste0(cn, collapse = "\t")
    writeLines(bedhead, outpath)
    tryCatch(fwrite(bed, outpath, sep = "\t", col.names = FALSE, row.names = FALSE, append = TRUE),
             error = function(e) {
                 write.table(bed, outpath, sep = "\t", col.names = FALSE, row.names = FALSE, append = TRUE, quote = FALSE)
             })
}


#' read bed or bedpe as a table
#'
#' Read in a bed oe bedpe formatted file into tabular format
#'
#' @return A data.table
#' @export read_bed
read_bed = function(bedpath) {
    f = file(bedpath, open = "r")
    thisline = readLines(f, 1)
    headers = character(0)
    while (length(grep("^((#)|(chrom)|(chr))", thisline, ignore.case = T))) {
        headers = c(headers, thisline)
        thisline = readLines(f, 1)
    }    
    lastheader = tail(headers, 1)
    ## ln = sum(length(headers), length(thisline))
    ## while (length(thisline) > 0) {
    ##     ## thisline = readBin(f, "raw", n = 50000)
    ##     ## sum(thisline == as.raw(10L))
    ##     thisline = readLines(f, n = 50000)
    ##     ln = length(thisline) + ln
    ## }
    ## fread(bedpath, skip = length(headers))
    ## bed = tryCatch(fread(bedpath, skip = NROW(headers), header = F),
    ##                error = function(e) {
    ##                    read.table(bedpath, comment.char = "", skip = NROW(headers), header = F)
    ##                })
    bedhead = gsub("^#", "", unlist(strsplit(lastheader, "\t")))
    bed = tryCatch(fread(bedpath, skip = NROW(headers), header = F), 
                   error = function(e) NULL)
    if (is.null(bed)) 
        bed = tryCatch(read.table(bedpath, comment.char = "", skip = NROW(headers), 
                                  header = F), error = function(e) NULL)
    
    if (is.null(bed)) {
        bed = matrix(integer(0), ncol = length(bedhead))
        bed = as.data.table(bed)
    }

    if (identical(NROW(bedhead), ncol(bed))) {
        colnames(bed) = bedhead
    }
    return(bed)
}


#' read.bam.header
#'
#' Read in a bam header into tabular format
#'
#' @return A data.table
#' @export read.bam.header
read.bam.header = function(bam, trim = FALSE) {
    cmd = sprintf("samtools view -H %s", bam)
    ## tb = fread(text = system(cmd, intern = TRUE), fill = TRUE, sep = "\t", header = F)
    tb = setDT(read.table(text = system(cmd, intern = TRUE), fill = TRUE, sep = "\t", header = F))
    if (!trim) {
        return(as.data.table(tb))
    } else {
        tb = tb[grepl("^SN", V2)][, V2 := gsub("SN:", "", V2)]
        identity(tb)
        return(withAutoprint(tb, echo = FALSE)$value)
    }
}





#' bcfindex
#'
#' index a bcf/vcf file
#'
#' @return vcf path
#' @export
bcfindex = function(vcf, force = TRUE) {
    ## if (!force) {
    if (!grepl(".[bv]cf(.gz)?$", vcf)) {
        stop("check if you have a valid bcf/vcf file")
    }
    if (!file.exists(paste0(vcf, ".tbi")) & !file.exists(paste0(vcf, ".csi")) || isTRUE(force)) {
        system(sprintf("bcftools index --tbi %s", vcf))
    }
    vcf
}




#' read_vcf2
#'
#' read in a vcf file to granges with additional processing with bcftools
#'
#' @return GRanges
#' @export
read_vcf2 = function(fn, gr = NULL, type = c("snps", "indels", "all"), hg = 'hg19', geno = NULL, swap.header = NULL, verbose = FALSE, add.path = FALSE, tmp.dir = tempdir(), ...) {
    if (any(!type %in% c("snps", "indels", "all"))) {
        stop("type must be one of \"snps\", \"indels\", \"all\"")
    }
    if (!missing(type)) {
        if ("all" %in% type) {
            v_query = ""
            message("grabbing all variants")
        } else if (all(c("snps", "indels") %in% type)) {
            v_query = "-v snps,indels"
            message("grabbing snps and indels")
        } else {
            v_query = sprintf("-v %s", type)
            message(sprintf("grabbing %s", type))
        }
    } else {
        v_query = ""
        message("Assuming all variants should be grabbed")
    }
    tmp.vcf = tempfile("tmp", fileext = ".vcf")
    if (!is.null(gr))
        grs = paste(gr.string(gr.stripstrand(gr)), collapse = " ")
    else
        grs = ""
    cmd = sprintf("(bcftools view %s -i 'FILTER==\"PASS\"' %s %s | bcftools norm -Ov -m-any) > %s", v_query, fn, grs, tmp.vcf)
    system(cmd)
    vars = gr.nochr(unname(read_vcf(tmp.vcf, gr = gr, hg = hg, geno = geno, swap.header = swap.header, verbose = verbose, add.path = add.path, tmp.dir = tmp.dir, ...)))
    vars$REF = as.character(vars$REF)
    vars$ALT = as.character(unstrsplit(vars$ALT))
    vars$type = with(mcols(vars), ifelse(nchar(REF) < nchar(ALT), "INS",
                                  ifelse(nchar(REF) > nchar(ALT), "DEL",
                                  ifelse(nchar(REF) == 1 & nchar(ALT) == 1, "SNV", NA_character_))))
    vars = sort(sortSeqlevels(vars), ignore.strand = FALSE)
    return(vars)
}




##############################
##############################
############################## data.table and general data.frame utilities
##############################
##############################



#' transpose data.table or data.frame
#'
#' transpose a data frame
#' by default, the first column is stripped
#' and used as colnames of the output table
#'
#' @export trans.df
trans.df = function(df, rn = 1) {
    if (inherits(df, "data.table")) {
        df = asdf(df)
    }
    if (!(is.null(rn) || is.na(rn))) {
        rn.cols = dodo.call2(paste, .gc(df, rn))
        df = .gc(df, rn, invert = T)
    }
    out = data.table::transpose(df)
    if (!(is.null(rn) || is.na(rn))) {
        (data.table::setnames(out, rn.cols))
    }
    return(out)
}


#' unlist into a data.table
#'
#' unlisting a list into a data table with
#' ids corresponding to each list element
#'
#' @export dunlist2
dunlist2 = function (x, simple = FALSE)
{
    if (is.null(names(x)))
        names(x) = 1:length(x)
    tmp = x
    ## for (i in seq_along(tmp))
    ##     tmp[[i]] = as.data.table(tmp[[i]])
    if (!simple) {
        tmp = lapply(x, as.data.table)
        out = cbind(data.table(listid = rep(names(x), lens(x))),
                               rbindlist(tmp, fill = TRUE))
    } else {
        out = cbind(data.table(listid = rep(names(x), lens(x))),
                               V1 = unlist(tmp))
    }
    nm = unlist(lapply(x, names2))
    out$names = nm
    setkey(out, listid)
    return(out)
}


#' trace into an S4 function
#'
#' Debugging an S4 function can't be done with debug().
#' This function is a convenience wrapper around trace()
#' to step into S4 methods
#'
#' @export debug.s4
debug.s4 = function(what, signature, where) {
    if (is.character(where)) {
        where = asNamespace(sub("package:", "", where))
    }
    if (is.character(what)) {
        what = get0(what, mode = "function", envir = where)
    }
    trace(what = what, tracer = browser,
          at = 1,
          signature = signature, where = where)
}

#' reverse debug.s4
#'
#' undebugging an S4 function when debug.s4/trace were
#' called on an S4 method.
#'
#' @export undebug.s4
undebug.s4 = function(what, signature, where) {
    if (is.character(where)) {
        where = asNamespace(sub("package:", "", where))
    }
    if (is.character(what)) {
        what = get0(what, mode = "function", envir = where)
    }
    untrace(what = what, signature = signature, where = where)
}

#' make matrix out of stringsplitted character vector
#'
#' split a vector by a delimiter,
#' fill in to make same length
#' bind into matrix
#' not as efficient as data.table::tstrsplit
#' but this is entirely base R
#'
#' @return a matrix
#' @export
mstrsplit = function(x, ...) {
    lst = strsplit(x = x, ...)
    mlen = max(lengths(lst))
    unid = seq_len(mlen)
    for (i in seq_along(lst)) {
        length(lst[[i]]) = mlen
    }
    return(do.call(rbind, lst))
}



#' grep_order
#'
#' order text based on the supplied order of multiple patterns
#'
#' @return A character vector
#' @export
grep_order = function(patterns, text, return_na = FALSE, first_only = FALSE, perl = TRUE, fixed = FALSE) {
    text_ix = 1:length(text)
    match_lst = lapply(1:length(patterns), function(i) {
        these_matches = regexpr(patterns[i], text, perl = perl, fixed = fixed)
        position = which(these_matches != -1)
        if (first_only) {
            position = position[1]
        }
        if (length(position) > 0) {
            return(text_ix[position])
        } else if (return_na) {
            return(NA)
        }
    })
    return(unlist(match_lst))
}

#' grep_col_sort
#'
#' order columns of a data.frame/data.table based on the supplied
#' order of multiple character patterns.
#'
#' @return A data.frame/data.table
#' @export
grep_col_sort = function(patterns, df, all_cols = TRUE, match_first = TRUE, perl = TRUE, fixed = FALSE) {
    is.data.table = FALSE
    if (inherits(df, "data.table")) {
        df = as.data.frame(df)
        is.data.table = TRUE
    }
    new_col_order = grep_order(patterns = patterns, text = colnames(df), return_na = FALSE, first_only = FALSE, perl = perl, fixed = fixed)
    if (all_cols) {
        other_cols = setdiff(1:ncol(df), new_col_order)
        if (!match_first) {
            col_ix = c(other_cols, new_col_order)
        } else {
            col_ix = c(new_col_order, other_cols)
        }
    } else {
        col_ix = new_col_order
    }
    df = df[, col_ix]
    if (is.data.table) {
        return(as.data.table(df))
    } else {
        return(df)
    }
}


#' merging data tables with collapsing columns with the same name
#'
#' Merge two data tables with various replacing strategies
#' for columns common between x and y that are not used to merge
#' (i.e. not specified in the "by" argument)
#'
#' @param replace_NA logical, only use values in dt.y, any dt.x not in dt.y is clobbered (NA)
#' @param force_y logical, should x and y common columns be merged?
#' @param overwrite_x logical, if force_y = TRUE, should NA values in y replace x?
#' @return A data.table
#' @export merge.repl
merge.repl = function(dt.x,
                      dt.y,
                      sep = "_",
                      replace_NA = TRUE,
                      force_y = TRUE,
                      overwrite_x = FALSE,
                      keep_order = FALSE,
                      keep_colorder = TRUE,
                      keep_factor = TRUE,
                      ...) {
    arg_lst = as.list(match.call())
    by.y = eval(arg_lst[['by.y']], parent.frame())
    by.x = eval(arg_lst[['by.x']], parent.frame())
    by = eval(arg_lst[['by']], parent.frame())
    all.x = eval(arg_lst[['all.x']], parent.frame())
    all.y = eval(arg_lst[['all.y']], parent.frame())
    all = eval(arg_lst[['all']], parent.frame())
    allow.cartesian = eval(arg_lst[['allow.cartesian']])
    key_x = key(dt.x)
    if (is.null(all.x)) {
        all.x = TRUE
    }
    if (is.null(all.y)) {
        all.y = FALSE
    }
    if (!is.null(all) && all) {
        all.y = TRUE
        all.x = TRUE
    }
    if (is.null(allow.cartesian)) {
        allow.cartesian = FALSE
    }
    if (!inherits(dt.x, "data.table")) {
        dt.x = as.data.table(dt.x)
    }
    if (!inherits(dt.y, "data.table")) {
        dt.y = as.data.table(dt.y)
    }
    if (keep_order == TRUE) {
        dt.x[['tmp.2345098712340987']] = seq_len(nrow(dt.x))
    }

    dt.x[['in.x.2345098712340987']] = rep(TRUE, length.out = nrow(dt.x))
    dt.y[['in.y.2345098712340987']] = rep(TRUE, length.out = nrow(dt.y))

    new_ddd_args = list(
      by = by, by.x = by.x, by.y = by.y,
      all.x = all.x, all.y = all.y,
      allow.cartesian = allow.cartesian
    )

    if (is.null(by.y) & is.null(by.x) & is.null(by)) {

        if (length(attributes(dt.x)[['sorted']]) > 0 &&
            length(attributes(dt.y)[['sorted']]) > 0) {
            k.x = data.table::key(dt.x)
            k.y = data.table::key(dt.y)
        } else {
            k.y = k.x = intersect(names2(dt.x), names2(dt.y))
            if (length(k.x) == 0)
                stop("no common columns to merge by!")
            message("intersecting by: ", paste(k.x, collapse = ", "))
            new_ddd_args[['by']] = k.x
        }
        if (is.null(k.x) | is.null(k.y) || (k.x != k.y)) {
          stop(
            "neither by.x/by.y nor by are supplied, ",
            "keys of dt.x and dt.y ",
            "must be identical and non NULL"
          )
        }
        x.cols = setdiff(names(dt.x), k.x)
        y.cols = setdiff(names(dt.y), k.y)

    } else if (!is.null(by.x) & !is.null(by.y)) {

        x.cols = setdiff(names(dt.x), by.x)
        y.cols = setdiff(names(dt.y), by.y)
        new_ddd_args = new_ddd_args[setdiff(names(new_ddd_args), c("by"))]

    } else if (!is.null(by)) {

      x.cols = setdiff(names(dt.x), by)
      y.cols = setdiff(names(dt.y), by)
      if (! all(by %in% colnames(dt.x)) | ! all(by %in% colnames(dt.y))) {
        stop(
          "column ",
          by,
          " does not exist in one of the tables supplied",
          "\nCheck the column names"
        )
      }
      new_ddd_args = new_ddd_args[setdiff(names(new_ddd_args), c("by.y", "by.x"))]

    }
    these_cols = intersect(x.cols, y.cols)
    ## if (replace_in_x) {
    if (!replace_NA) {
      dt.x.tmp = copy(dt.x)
      for (this_col in these_cols) {
        data.table::set(dt.x.tmp, i = NULL, j = this_col, value = NULL)
      }
      dt.repl = suppressWarnings(
        do.call(
          "merge",
          args = c(list(x = dt.x.tmp, y = dt.y), new_ddd_args)
        )
      )
        ## dt_na2false(dt.repl, c("in.x.2345098712340987", "in.y.2345098712340987"))
    } else {
      dt.repl = suppressWarnings(
        do.call(
          "merge",
          args = c(list(x = dt.x, y = dt.y), new_ddd_args)
        )
      )
      dt_na2false(dt.repl, c("in.x.2345098712340987", "in.y.2345098712340987"))
      in.x = which(dt.repl[["in.x.2345098712340987"]])
      in.y = which(dt.repl[["in.y.2345098712340987"]])
      this_env = environment()
      for (this_col in these_cols) {
        x_cname = paste0(this_col, ".x")
        y_cname = paste0(this_col, ".y")
        x_col = dt.repl[[x_cname]]
        y_col = dt.repl[[y_cname]]
        xf = inherits(x_col, "factor")
        yf = inherits(y_col, "factor")
        if ( (xf || yf) && keep_factor) {
          if (!xf) { x_col = factor(x_col); xf = TRUE }
          if (!yf) { y_col = factor(y_col); yf = TRUE }
        }
        if (xf && !keep_factor) { x_col = as.character(x_col); xf = FALSE } 
        if (yf && !keep_factor) { y_col = as.character(y_col); yf = FALSE }
        if (force_y) {
          if (!overwrite_x) {
            ## if (inherits(x_col, "factor") & inherits(y_col, "factor")) {
            ##     new_col = factor(y_col, forcats::lvls_union(list(y_col, x_col)))
            ##     new_col[is.na(new_col)] = x_col[is.na(new_col)]
            ## } else {
            ##     new_col = ifelse(!is.na(y_col), y_col, x_col)
            ## }                    
            if (xf || yf) {
              new_col = factor(y_col, forcats::lvls_union(list(y_col, x_col)))
              new_col[is.na(new_col)] = x_col[is.na(new_col)]
            } else {
              new_col = ifelse(!is.na(y_col), y_col, x_col)
            }
          } else {
            ## if (inherits(x_col, "factor") & inherits(y_col, "factor")) {
            ##     new_col = factor(x_col, forcats::lvls_union(list(y_col, x_col)))
            ## } else {
            ##     new_col = x_col
            ## }
            ## new_col[dt.repl[['in.y.2345098712340987']]] = y_col[dt.repl[['in.y.2345098712340987']]]
            if (xf || yf) {
              new_col = factor(x_col, forcats::lvls_union(list(y_col, x_col)))
            } else {
              new_col = x_col
            }
            new_col[in.y] = y_col[in.y]
          }
        } else {
          ## if (inherits(x_col, "factor") & inherits(y_col, "factor")) {
          ##     new_col = factor(x_col, forcats::lvls_union(list(x_col, y_col)))
          ##     new_col[is.na(new_col) & !is.na(y_col)] = y_col[is.na(new_col) & !is.na(y_col)]
          ## } else {
          ##     new_col = ifelse(is.na(x_col) & !is.na(y_col), y_col, x_col)
          ## }
          if (xf | yf) {
            new_col = factor(x_col, forcats::lvls_union(list(x_col, y_col)))
            new_col[is.na(new_col) & !is.na(y_col)] = y_col[is.na(new_col) & !is.na(y_col)]
                } else {
                    new_col = ifelse(is.na(x_col) & !is.na(y_col), y_col, x_col)
                }
            }
        data.table::set(
          dt.repl,
          j = c(x_cname, y_cname, this_col),
          value = list(NULL, NULL, this_env[["new_col"]])
        )
      }
    }
    ## } else if (!replace_in_x & !is.null(suffix)) {
    ##     y.suff.cols = paste0(y.cols, sep, suffix)
    ##     ## dt.y.tmp = copy(dt.y)[, eval(dc(y.suff.cols)) := eval(dl(y.cols))][, eval(dc(y.cols)) := NULL]
    ##     dt.y.tmp = copy(dt.y)
    ##     data.table::set(dt.y, j = y.suff.cols, value = dt.y[, y.cols, with = FALSE])
    ##     data.table::set(dt.y, j = y.cols, value = NULL)
    ##     ## dt.repl = merge(dt.x, dt.y.tmp, all.x = TRUE, ...)
    ##     dt.repl = do.call("merge", args = c(list(x = dt.x, y = dt.y.tmp), new_ddd_args))
    ## }
    if (keep_order == TRUE) {
        data.table::setorderv(dt.repl, "tmp.2345098712340987")
        dt.repl[['tmp.2345098712340987']] = NULL
    }
    data.table::set(
      dt.repl,
      j = c("in.y.2345098712340987", "in.x.2345098712340987"),
      value = list(NULL, NULL)
    )
    if (keep_colorder) {
        x_cols = colnames(dt.x)
        ## get the order of columns in dt.repl in order of X with
        ## additional columns tacked on end
        data.table::setcolorder(
          dt.repl,
          intersect(
            union(
              colnames(dt.x),
              colnames(dt.repl)),
            colnames(dt.repl)
          )
        )
    }
    return(dt.repl)
}



#' logical to integer in data tables
#'
#' coerce logical columns to integers in data table
#'
#' @param dt data.table
#' @return A data.table
#' @export
dt_lg2int = function(dt) {
    these_cols = which(sapply(dt, class) == "logical")
    for (this_col in these_cols) {
        this_val = as.data.frame(dt[, this_col, with = FALSE])[,1]
        data.table::set(dt, j = this_col, value = as.integer(this_val))
    }
    return(dt)
}


#' convert columns with NA to false
#'
#' coerce NA in columns of class "logical" to FALSE
#'
#' @param dt data.table
#' @param these_cols NULL by default, will select columns of class logical, otherwise will be specified
#' @return A data.table
#' @export
dt_na2false = function(dt, these_cols = NULL) {
    na2false = function(v)
    {
        ## v = ifelse(is.na(v), v, FALSE)
        v[is.na(v)] = FALSE
        as.logical(v)
    }
    if (is.null(these_cols)) {
        these_cols = which(sapply(dt, class) == "logical")
    }
    for (this_col in these_cols) {
        ## this_val = as.data.frame(dt[, this_col, with = FALSE])[,1]
        this_val = dt[[this_col]]
        data.table::set(dt, j = this_col, value = na2false(this_val))
    }
    return(dt)
}



#' convert columns with NA to true
#'
#' coerce NA in columns of class "logical" to TRUE
#'
#' @param dt data.table
#' @param these_cols NULL by default, will select columns of class logical, otherwise will be specified
#' @return A data.table
#' @export
dt_na2true = function(dt, these_cols = NULL) {
    if (is.null(these_cols)) {
        these_cols = which(sapply(dt, class) == "logical")
    }
    for (this_col in these_cols) {
        ## this_val = as.data.frame(dt[, this_col, with = FALSE])[,1]
        this_val = dt[[this_col]]
        data.table::set(dt, j = this_col, value = na2true(this_val))
    }
    return(dt)
}


#' convert columns with NA to zeros
#'
#' coerce NA in columns of class "numeric"/"integer" to FALSE
#'
#' @param dt data.table
#' @param these_cols NULL by default, will select columns of numeric class, otherwise will be specified
#' @return A data.table
#' @export
dt_na2zero = function(dt, these_cols = NULL) {
    if (is.null(these_cols)) {
        these_cols = which(sapply(dt, class) %in% c("numeric", "integer"))
    }
    if (!inherits(dt, "data.table")) {
        setDT(dt)
    }
    for (this_col in these_cols) {
        this_val = dt[[this_col]]
        ## this_val = as.data.frame(dt)[, this_col]
        this_val[is.na(this_val)] = 0
        data.table::set(dt, j = this_col, value = this_val)
        ## dt[, this_col] = this_val
    }
    return(dt)
}

#' convert columns with NA to empty character
#'
#' coerce NA in columns of class "character" to ""
#'
#' @param dt data.table
#' @param these_cols NULL by default, will select columns of character class, otherwise will be specified
#' @return A data.table
#' @export
dt_na2empty = function(dt) {
    these_cols = which(sapply(dt, class) == "character")
    for (this_col in these_cols) {
        ## this_val = as.data.frame(dt[, this_col, with = FALSE])[,1]
        this_val = dt[, this_col, with = FALSE][[1]]
        data.table::set(dt, j = this_col, value = na2empty(this_val))
    }
    return(dt)
}


#' convert columns with empty character to NA
#'
#' coerce "" in columns of class "character" to NA_character_
#'
#' @param dt data.table
#' @param these_cols NULL by default, will select columns of character class, otherwise will be specified
#' @return A data.table
#' @export
dt_empty2na = function(dt) {
    these_cols = which(sapply(dt, class) == "character")
    for (this_col in these_cols) {
        ## this_val = as.data.frame(dt[, this_col, with = FALSE])[,1]
        this_val = dt[, this_col, with = FALSE][[1]]
        ## browser()
        data.table::set(dt, j = this_col, value = empty2na(this_val))
    }
    return(dt)
}


#' set columnn to NULL in data.table
#'
#' remove column from data table by setting to NULL
#'
#' @param dt data.table
#' @param cols character vector of column names
#' @return A data.table
#' @export
dt_setnull = function(dt, cols) {
    for (this_col in cols) {
        data.table::set(dt, j = this_col, value = NULL)
    }
    return(dt)
}

#' convert column to integer
#'
#' coerce column of type "numeric" by default to "integer"
#'
#' @param dt data.table
#' @param cols character vector of column names
#' @return A data.table
#' @export
dt_setint = function(dt, cols = NULL) {
    if (is.null(cols)) {
        cols = names(dt)[which(sapply(dt, class) %in% c("numeric"))]
    }
    for (this_col in cols) {
        data.table::set(dt, j = this_col, value = as.integer(dt[[this_col]]))
    }
    return(dt)
}


#' convert all data.table entries to NA
#'
#' Set all columns to NA...
#' can't remember why one would want this...
#'
#' @param dt data.table
#' @param cols character vector of column names
#' @return A data.table
#' @export
dt_setallna = function(dt, cols = NULL, na_type = NA_integer_) {
    if (is.null(cols)) {
        cols = colnames(dt)
    }
    for (this_col in cols) {
        data.table::set(dt, j = this_col, value = NULL)
        data.table::set(dt, j = this_col, value = na_type)
    }
    return(dt)
}


#' convert column to character
#'
#' set columns to character
#' default is nonsensical?
#'
#' @param dt data.table
#' @param cols character vector of column names
#' @return A data.table
#' @export
dt_setchar = function(dt, cols = NULL) {
    if (is.null(cols)) {
        cols = names(dt)[which(!sapply(dt, class) == "character")]
    }
    for (this_col in cols) {
        data.table::set(dt, j = this_col, value = as.character(dt[[this_col]]))
    }
    return(dt)
}

#' convert all columns to logical
#'
#' set columns to logical
#'
#' @param dt data.table
#' @param cols character vector of column names
#' @return A data.table
#' @export
dt_any2lg = function(dt, cols = NULL) {
    if (is.null(cols)) {
        ## cols = names(dt)[which(!unlist(lapply(dt, class)) == "character")]
        cols = colnames(dt)
    }
    for (this_col in cols) {
        data.table::set(dt, j = this_col, value = as.logical(dt[[this_col]]))
    }
    return(dt)
}


#' convert factor columns to character
#'
#' coerce factor columns to character
#'
#' @param dt data.table
#' @param cols character vector of column names
#' @return A data.table
#' @export
dt_f2char = function(dt, cols = NULL) {
    if (is.null(cols)) {
        ## cols = names(dt)[which(!unlist(lapply(dt, class)) == "character")]
        cols = colnames(dt)[which(unlist(lapply(dt, class)) == "factor")]
    } else {
        cols = cols[which(unlist(lapply(as.list(dt)[cols], class)) == "factor")]
    }
    for (this_col in cols) {
        data.table::set(dt, j = this_col, value = as.character(dt[[this_col]]))
    }
    return(dt)
}


##############################
##############################
############################## Genomics / mskilab stuff
############################## gUtils stuff
##############################


make_granges <- function(x, seqnames.field = "seqnames", start.field = "start", end.field = "end", strand.field = "strand") {
    seqnames.ix = na.omit(match3(seqnames.field, colnames(x)))
    start.ix = na.omit(match3(start.field, colnames(x)))
    end.ix = na.omit(match3(end.field, colnames(x)))
    strand.ix = na.omit(match3(strand.field, colnames(x)))
    qseqnames.ix = na.omit(seqnames.ix[1])
    qstart.ix = na.omit(start.ix[1])
    qend.ix = na.omit(end.ix[1])
    qstrand.ix = na.omit(strand.ix[1])
    tmp = qmat(x,,c(qseqnames.ix, qstart.ix, qend.ix, qstrand.ix)) ## taking first match
    metas = qmat(x,,c(-seqnames.ix, -start.ix, -end.ix, -strand.ix))
    if (ncol(tmp) == 4)
        colnames(tmp) = c("seqnames", "start", "end", "strand")
    else
        colnames(tmp) = c("seqnames", "start", "end")
    gr = GRanges(
      tmp[["seqnames"]],
      IRanges(tmp[["start"]], tmp[["end"]]),
      strand = tmp[["strand"]]
    )
    mcols(gr) = metas
    return(gr)
}


#' simulate 10x Barcoded reads off of collection of walks
#' 
#' Take a collection of walks and simulate barcoded reads at the break sites
#'
#' @author Kevin Hadi
#' @export sim.bx
sim.bx <- function(wk, only_simulate_breaks = TRUE, numr = 24, bxwid = 30e3, physcov = 150, readlength = 150, fraglength = 1200) {
  ## NUMBX = numbx
  NUMR = numr
  
  if (inherits(wk, "gWalk")) {
    gw = wk
  } else if (inherits(wk, "GRangesList")) {
    gw = gW(grl = wk)
  }

  wk_seqnames = paste0(rep_len("W_", NROW(gw)), as.character(seq_along(gw)))

  ## spc = gChain::spChain(setNames2(gw$grl, wk_seqnames))
  
  grlby = gr_construct_by(grl.unlist(gw$grl), "grl.ix")
  grlby = split(grlby, mcols(grlby)$grl.ix)
  spc = gChain::spChain(setNames2(grlby, wk_seqnames))
  
  gw$set(alt.edge = gw$eval(edge = paste(which(type == "ALT"), collapse = ", ")))
  
  alt_ids = which(nzchar(gw$dt$alt.edge))
  any_alts = length(alt_ids) > 0

  get_ends = lapply(gGnome::lengths(gw), function(x) 1:(x - 1))

  ## alt_segs = grlby[IntegerList(strsplit(gw$dt$alt.edge, ", "))]
  ## breakends = gr.resize(alt_segs, width = 1, pad = FALSE, fix = "end")

  wkgr = spc@.galy
  wkgrl = split(wkgr, seqnames(wkgr))

  alt_segs = split(
    wkgr,
    seqnames(wkgr)
  )[IntegerList(strsplit(gw$dt$alt.edge, ", "))]
  breakends = gr.resize(
    alt_segs,
    width = 1,
    pad = FALSE,
    fix = "end"
  )
  breakpoints_somatic = unlist(
    gr.resize(
      breakends,
      width = 2,
      pad = FALSE,
      fix = "start"
    )
  )


  ## if (any_alts) {
  ##     alt_ends = as.integer(unlist(strsplit(gw$dt$alt.edge, ", ")))
  ##     breakpoints_somatic = GRanges(wk_seqnames, IRanges(end(wkgr[alt_ends]), start(wkgr[alt_ends + 1])))
  ## }
  
  ## wkgr = dt2gr(gr2dt(wk[[1]])[, `:=`(end = cumsum(width))][, start := end - width + 1][, seqnames := "A"])

  ## totwid = sum(width(wk))
  totwids = width(wk)

  ## NUMBX = round(totwid * physcov / bxwid)
  NUMBX = round(totwids * physcov / bxwid)

  set_rngseed(10)

  ## bc.start = sample(max(end(wkgr)), NUMBX)

  bc.starts = mapply(function(wkend, numbx) {
    return(sample(wkend, numbx))
  }, max(end(wkgrl)), NUMBX, SIMPLIFY = F)

  gr.w = data.table(seqnames = rep(wk_seqnames, elementNROWS(bc.starts)),
    start = unlist(bc.starts))
  gr.w[, end := (start + as.integer(jitter(rep(bxwid, length(start)), factor = 10))) - 1]
  gr.w[, BX := 1:.N]
  gr.w[, grl.ix := .GRP, by = seqnames]

  if (inherits(only_simulate_breaks, "logical")) {
    if (only_simulate_breaks) {
      ## interstitial_breaks = gr.end(wkgr[get_ends[[1]]])
      interstitial_breaks = gr.end(wkgrl[get_ends])
      query_footprint = GenomicRanges::reduce(interstitial_breaks + 1e5)
      query_footprint = unlist(query_footprint)
      
      ## gr.w = data.table(seqnames = "A", start = bc.start)
      ## gr.w[, end := (bc.start + as.integer(jitter(rep(bxwid, length(bc.start)), factor = 10))) - 1]
      ## gr.w[, BX := 1:.N]


      ## only simulate from those BX that are within query footprint
      ## let's not simulate the entire derivative chromosome
      bx.to.sim = (make_granges(gr.w) %&% query_footprint)$BX
      gr.w = gr.w[BX %in% bx.to.sim]

      bxwids = split(gr.w[, end + 1 - start], rleseq(gr.w$seqnames)$idx)

      ## NUMBX = length(bx.to.sim)
      NUMBX = gr.w[, length(unique(BX)), by = seqnames][, setNames2(V1, seqnames)]
    } else {
      bx.to.sim = 1:NROW(gr.w)
    }
  } else if (inherits(only_simulate_breaks, "GRanges")) {
    bx.to.sim = (make_granges(gr.w) %&% only_simulate_breaks)$BX
  }

  ## gr.w = GRanges("A", IRanges(bc.start, width = as.integer(jitter(rep(bxwid, length(bc.start)), factor = 10))))
  ## gr.w$BX = 1:NROW(gr.w)

  FRAGLENGTH=fraglength

  f.starts = mapply(function(bxwid, numbx) {
    sample(bxwid - FRAGLENGTH, numbx * NUMR, replace = T)
  }, bxwids, NUMBX, SIMPLIFY = F)
  
  ## f.starts = sample(bxwid - FRAGLENGTH, NUMBX * NUMR, replace = T)
  ## flst = split(f.starts, rep(1:NUMBX, each = NUMR))

  
  ## tmp = gr.sum(gr.w)
  ## sum(tmp$score * width(tmp)) / sum(width(tmp))



  ## bx.frag = GRanges("A", IRanges(rep(start(gr.w), each = NUMR) + f.starts, width = FRAGLENGTH))
  ## bx.frag$BX = rep(1:NUMBX, each = NUMR)
  ## bx.frag$qname = 1:NROW(bx.frag)

  READLENGTH=readlength

  ## bx.frag = data.table(seqnames = wk_seqnames, start = rep(gr.w$start, each = NUMR) + f.starts)
  ## bx.frag[, end := start + (FRAGLENGTH - 1)]
  ## bx.frag[, plus.end := start + (READLENGTH - 1)]
  ## bx.frag[, minus.start := end - (READLENGTH - 1)]
  ## bx.frag[, qname := 1:.N]
  ## bx.frag[, BX := rep(bx.to.sim, each= NUMR)]

  bx.frag = data.table(seqnames = rep(wk_seqnames, NUMBX * NUMR),
    start = rep(gr.w$start, each = NUMR) + unlist(f.starts))
  bx.frag[, end := start + (FRAGLENGTH - 1)]
  bx.frag[, plus.end := start + (READLENGTH - 1)]
  bx.frag[, minus.start := end - (READLENGTH - 1)]
  bx.frag[, qname := 1:.N]
  bx.frag[, BX := rep(bx.to.sim, each= NUMR)]
  bx.frag[, grl.ix := .GRP, by = seqnames]
  

  plusreads = bx.frag[, .(seqnames, start, end = plus.end, strand= "+", BX, qname, grl.ix, readid = 1)]
  minusreads = bx.frag[, .(seqnames, start = minus.start, end = end, strand= "-", BX, qname, grl.ix, readid = 2)]

  ## r1 = gr.resize(bx.frag, 150, pad = FALSE, fix = "start")
  ## r2 = gr.resize(bx.frag, 150, pad = FALSE, fix = "end")

  ## grr = gr.spreduce(grbind(r1, r2), BX)
  grr = rbind(plusreads, minusreads)
  ## grr.range = gr_deconstruct_by(range(gr_construct_by(grr, "BX")), "BX")
  ## grr.frag = gr.sprange(grr, "BX")

  gr.grr = make_granges(grr)

  if (any_alts) {
    grr$split_read_support = gr.grr %^% (breakpoints_somatic)
    ## grr$split_read_support = gr2dt(grr)[, .(.I, split_supporting = any(split_read_support)), by = BX]$split_supporting
    bx.frag$discordant_read_support = make_granges(bx.frag) %^% breakpoints_somatic
    grr$discordant_read_support = grr$qname %in% bx.frag$qname[bx.frag$discordant_read_support]
    gr.w$bx_support = make_granges(gr.w) %^% breakpoints_somatic
    grr$bx_support = grr$BX %in% gr.w[gr.w$bx_support == TRUE]$BX
  } else {
    grr$split_read_support = FALSE
    grr$discordant_read_support = FALSE
    grr$bx_support = FALSE
  }

  mcols(gr.grr) = cbind(mcols(gr.grr), grr[,-c(1:8)])
  grr2 = gChain::lift(t(spc), gr.grr)

  grr2 = gr_deconstruct_by(grr2, "grl.ix")
  return(grr2)
}

#' convert table of paired string coordinates to GRangesList 
#' 
#' @author Kevin Hadi
#' @export
bp2grl = function(df, bp1.field = "bp1", bp2.field = "bp2", sort = TRUE) {
    grl = gUtils::grl.pivot(with(df, GRangesList(parse.gr(bp1), parse.gr(bp2))))
    mcols(grl) = df
    if (sort) {
        seqlevels(grl) = GenomeInfoDb::sortSeqlevels(seqlevels(grl))
        grl = sort.GRangesList(grl, ignore.strand = T)
    }
    return(grl)
}



#' GenomicRanges::findOverlaps wrapper
#'
#' @author Kevin Hadi
#' @export
findov = function(query, subject, by = NULL, ...) {
    query = gr_construct_by(query, by = by)
    subject = gr_construct_by(subject, by = by)
    h = tryCatch(GenomicRanges::findOverlaps(query, subject, ...), error = function(e) {
        warning("findOverlaps applied to ranges with non-identical seqlengths")
        query = gUtils::gr.fix(query, subject)
        subject = gUtils::gr.fix(subject, query)
        return(GenomicRanges::findOverlaps(query, subject, ...))
    })
    tmp = setcols(asdt(h), c("query.id", "subject.id"))
    return(tmp)
}


#' works with GRangesLists
#'
#' to be used with gr_construct_by
#'
#' @return A GRanges with the by metadata field attached to the seqnames
#' @author Kevin Hadi
#' @export gr.flipstrand
gr.flipstrand = function(gr) {
    if (!inherits(gr, c("GRanges" ,"GRangesList"))) {
        stop("not a GRanges / GRangesList")
    }
    if (is(gr, "GRangesList")) {
        this.strand = gr@unlistData@strand
        gr@unlistData@strand = S4Vectors::Rle(
          factor(
            c("*" = "*", "+" = "-", "-" = "+")
            [as.character(this.strand)], levels(this.strand)
          )
        )
    } else {
        this.strand = gr@strand
        gr@strand = S4Vectors::Rle(
          factor(
            c("*" = "*", "+" = "-", "-" = "+")
            [as.character(this.strand)], levels(this.strand)
          )
        )
    }
    return(gr)
}


#' Substitute chr in GRanges or GRangesList
#' 
#' substitute characters in seqnames
#'
#' @return GRanges or GRangesList
#' @author Kevin Hadi
#' @export gr.sub2
gr.sub2 = function(x, sub.pt = c("^chr", "MT"), sub.rp = c("", "M")) {
    ans = copy3(x)
    osi = seqinfo(x)
    subs = cbind(sub.pt, sub.rp)
    seqn = seqo = seqnames(x)
    df.si = rn2col(as.data.frame(osi), "seqlevels")
    slsi = df.si$seqlevels
    if (inherits(seqn, "List")) seqn = unlist(seqn)
    slvn = levels(seqn)
    for (i in seq_len(NROW(subs))) {
        slsi = gsub(subs[i,1], subs[i,2], slsi)
        slvn = gsub(subs[i,1], subs[i,2], slvn)
    }
    levels(seqn) = slvn
    df.si$nseqlevels = slsi
    if (anyDuplicated(df.si$nseqlevels)) {
        rleix = rleseq(df.si$nseqlevels, use.data.table = FALSE, clump = TRUE)
        ix = rleix$idx[rleix$lns > 1]
        dupix = which(rleix$lns > 1)
        collapse_ix = unique(rleix$idx)
        unl = unlist(lapply(collapse_ix, function(i) {
            out = max(subset(df.si, df.si$nseqlevels == i)$seqlengths, na.rm = T)
            ifelse(is.infinite(out), NA_integer_, out)
        }))
        ## unl = as.vector(unlist(by(df.si, ix, function(x) {
        ##     out = max(x$seqlengths, na.rm = T)
        ##     ifelse(is.infinite(out), NA_integer_, out)
        ## })))
        df.si = df.si[!duplicated(df.si$nseqlevels),,drop=F]
        df.si[df.si$nseqlevels == collapse_ix,]$seqlengths = unl
    }
    newsi = Seqinfo(
      df.si$nseqlevels,
      seqlengths = df.si$seqlengths,
      isCircular = df.si$isCircular,
      genome = df.si$genome
    )
    if (inherits(x, "GRangesList")) {
        unlans = unlist(ans)
        unlans@seqnames = seqn
        unlans@seqinfo = newsi
        return(relist(unlans, ans))
    } else if (inherits(x, "GRanges")) {
        levels(seqn) = slvn
        ans@seqnames = seqn
        ans@seqinfo = newsi
        return(ans)
    }
}


#' removing by field and random string barcode to seqnames for more efficient by queries
#'
#' to be used with gr_construct_by
#'
#' @return A GRanges with the by metadata field attached to the seqnames
#' @author Kevin Hadi
#' @name gr_deconstruct_by
#' @export gr_deconstruct_by
gr_deconstruct_by <- function (x, by = NULL, meta = FALSE) 
{
    if (is.null(by) || length(x) == 0) 
        return(x)
    this.sep1 = " G89LbS7RCine "
    this.sep2 = " VxTofMAXRbkl "
    ans = x
    f1 = as.character(seqnames(x))
    f2 = sub(paste0(".*", this.sep2), "", f1)
    ## f2 = trimws(gsub(paste0(".*", this.sep2), "", f1))
    ## f2 = trimws(gsub(paste0(".*", this.sep1), "", f2))
    ui = which(!duplicated(f1))
    x_seqinfo <- seqinfo(x)
    seql = rleseq(f2[ui], clump = T)
    lst = lapply(split(seqlengths(x_seqinfo)[f1[ui]], seql$idx), 
        function(x) max(x))
    uii = which(!duplicated(f2[ui]))
    ans_seqlevels = f2[ui][uii]
    ans_seqlengths = setNames(unlist(lst), ans_seqlevels)
    ans_isCircular <- unname(isCircular(x_seqinfo))[ans_seqlevels]
    ans_seqinfo <- Seqinfo(ans_seqlevels, ans_seqlengths, ans_isCircular)
    ans@seqnames <- Rle(factor(f2, ans_seqlevels))
    ans@seqinfo <- ans_seqinfo
    if (isTRUE(meta)) {
        f0 = sub(paste0(this.sep2, ".*"), "", f1)
        f0 = data.table::tstrsplit(f0, this.sep1, fixed = T)
        ## f0 = strsplit(f0, this.sep1)
        ## f0 = trans(f0, c)
        mc = as.data.table(unname(f0))
        if (!(identical(by, "") | identical(by, NA) | identical(by, "NA")) &&
            length(by) == NCOL(mc)) {
            colnames(mc) = by
        }
        ## debugonce(do.assign)
        mcols(ans) = do.assign(mcols(ans), mc)
    }
    return(ans)
}


#' NA out seqlevels
#'
#' @return A GRanges with NA in all seqlevels
#' @author Kevin Hadi
#' @export na.seql
na.seql <- function (x) 
{
    x_seqinfo = seqinfo(x)
    ans = x
    ans_seqlengths = seqlengths(x_seqinfo)
    ans_seqlevels = seqlevels(x_seqinfo)
    ans_isCircular = isCircular(x_seqinfo)
    ans_seqlengths[] = NA_integer_
    ans_seqinfo = Seqinfo(ans_seqlevels, ans_seqlengths, ans_isCircular)
    ans@seqinfo = ans_seqinfo
    return(ans)
}



#' adding on by field to seqnames for more efficient by queries
#' 
#' Uses by field from metadata column to insert into seqnames
#' This is useful for more efficient queries findoverlaps queries between 2 ranges
#' when we want to stratify the query with a "by" field.
#' This feeds into the gr.findoverlaps family of gUtils tools.
#'
#' @return A GRanges with the by metadata field attached to the seqnames
#' @author Kevin Hadi
#' @export gr_construct_by
gr_construct_by = function(x, by = NULL, na.seql = TRUE) {
    if (is.null(by) || length(x) == 0) return(x)
    ## this.sep1 = {set.seed(10); paste0(" ", rand.string(), " ")}
    this.sep1 = " G89LbS7RCine "
    ## this.sep2 = {set.seed(11); paste0(" ", rand.string(), " ")}
    this.sep2 = " VxTofMAXRbkl "
    ## ans = copy2(x)
    ans = x
    thisp = function(...) paste(..., sep = this.sep1)
    f1 = do.call(paste, c(as.list(mcols(x)[, by, drop = FALSE]), sep = this.sep1))
    f2 = as.character(seqnames(x))
    f2i = as.integer(seqnames(x))
    f12 = paste(f1, f2, sep = this.sep2)
    ui = which(!duplicated(f12))
    ans_seqlevels = f12[ui]
    x_seqinfo <- seqinfo(x)
    ans_seqlengths = unname(seqlengths(x_seqinfo)[f2i[ui]])
    if (isTRUE(na.seql))
        ans_seqlengths[] = NA_integer_
    ans_isCircular <- unname(isCircular(x_seqinfo))[f2i[ui]]
    ans_seqinfo <- Seqinfo(ans_seqlevels, ans_seqlengths, ans_isCircular)
    ans@seqnames <- Rle(factor(f12, ans_seqlevels))
    ans@seqinfo <- ans_seqinfo
    return(ans)
}


#' shift_upstream from plyranges package
#'
#' works on genomicranges
#'
#' @return granges
#' @author Stuart Lee
#' @author Michael Lawrence
#' @author Dianne Cook
#' @export
shift_up = function (x, shift = 0L)
{
    strand = function(bla) {as.character(BiocGenerics::strand(bla))}
    neg <- strand(x) == "-"
    pos <- strand(x) %in% c("+", "*")
    if (length(x) == length(shift)) {
        shift_neg <- shift[which(neg)]
        shift_pos <- shift[which(pos)]
        x[neg] <- shift_right(x[neg], shift_neg)
        x[pos] <- shift_left(x[pos], shift_pos)
    }
    else {
        x[neg] <- shift_right(x[neg], shift)
        x[pos] <- shift_left(x[pos], shift)
    }
    return(x)
}


#' shift_downstream from plyranges package
#'
#' works on genomicranges
#'
#' @return granges
#' @author Stuart Lee
#' @author Michael Lawrence
#' @author Dianne Cook
#' @export
shift_down = function (x, shift = 0L)
{
    strand = function(bla) {as.character(BiocGenerics::strand(x))}
    neg <- strand(x) == "-"
    pos <- strand(x) %in% c("+", "*")
    if (length(x) == length(shift)) {
        shift_neg <- shift[which(neg)]
        shift_pos <- shift[which(pos)]
        x[neg] <- shift_left(x[neg], shift_neg)
        x[pos] <- shift_right(x[pos], shift_pos)
    }
    else {
        x[neg] <- shift_left(x[neg], shift)
        x[pos] <- shift_right(x[pos], shift)
    }
    return(x)
}


#' shift_left from plyranges package
#'
#' works on genomicranges
#'
#' @return granges
#' @author Stuart Lee
#' @author Michael Lawrence
#' @author Dianne Cook
#' @export
shift_left = function (x, shift = 0L)
{
    shift_l <- -1L * shift
    return(GenomicRanges::shift(x, shift_l))
}


#' shift_right from plyranges package
#'
#' works on genomicranges
#'
#' @return granges
#' @author Stuart Lee
#' @author Michael Lawrence
#' @author Dianne Cook
#' @export
shift_right = function (x, shift = 0L)
{
    return(GenomicRanges::shift(x, shift))
}

#' wrapper around readDNAStringSet to remove extraneous characters
#'
#' Uses Biostrings::readDNAStringSet to read in fasta
#' and remove all characters that do not correspond to rname
#'
#' @return DNAStringSet object
#' @name readinfasta
#' @author Kevin Hadi
#' @export
readinfasta = function(fa, allow_vertbar = FALSE) {
    if (!allow_vertbar)
        ptrn = " [ 0-9A-Za-z.\\/\\-\\(\\):,\\|_+\\[\\]]+$"
    else
        ptrn = " [ 0-9A-Za-z.\\/\\-\\(\\):,_+\\[\\]]+$"
    fa = Biostrings::readDNAStringSet(fa)
    names(fa) = sub(ptrn, "", khtools::names2(fa), perl = T)
    return(fa)
}


#' subgr
#'
#' @name subgr
#' @return granges
#' @author Kevin Hadi
#' @export
subgr = function(x, y) {
    expr = as.expression(substitute(y))
    pf = parent.frame()
    ## pf2 = parent.frame(2)
    pf2 = stackenv2()
    return(x[S4Vectors:::safeEval(substitute(expr), S4Vectors::as.env(x, pf), pf2)])
    ## x[S4Vectors::with(x, eval(expr, enclos = pf))]
}


#' query
#'
#' @name %Q%
#' @return granges
#' @author Kevin Hadi
#' @export
"%Q%" = function(x,y, ...) {
    UseMethod("%Q%")
}


#' query on GRangesList
#'
#' @name %Q%.GRanges
#' @return GRanges
#' @author Kevin Hadi
#' @export
"%Q%.GRanges" = subgr


#' query on GRangesList
#'
#' @name %Q%.GRangesList
#' @return GRangesList
#' @author Kevin Hadi
#' @export
"%Q%.GRangesList" = subgr

#' query on CompressedGRangesList
#'
#' @name  %Q%.CompressedGRangesList
#' @return GRangesList
#' @author Kevin Hadi
#' @export
"%Q%.CompressedGRangesList" = subgr


#' create GRanges of full genome coordinates
#'
#' Grabs *.chrom.sizes file from
#' environmental variable "DEFAULT_GENOME" or
#' "DEFAULT_BSGENOME"
#' May need to set either of these via
#' Sys.setenv(DEFAULT_BSGENOME = "path_to_ref.chrom.sizes")
#' Sys.setenv(DEFAULT_GENOME = "path_to_ref.chrom.sizes")
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export gr.genome
gr.genome = function(si, onlystandard = TRUE, genome = NULL) {
    if (missing(si)) {
        gr = si2gr(hg_seqlengths(include.junk = !onlystandard, genome = genome))
    } else {
        gr = si2gr(si)
    }
    if (onlystandard) gr = keepStandardChromosomes(gr, pruning.mode = "coarse")
    gr.sort(gr)
}

#' get permissive seqlengths from multiple GRanges-like objects
#'
#' takes seqlengths
#' May need to set either of these via
#' Sys.setenv(DEFAULT_BSGENOME = "path_to_ref.chrom.sizes")
#' Sys.setenv(DEFAULT_GENOME = "path_to_ref.chrom.sizes")
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export gr.fixseq
gr.fixseq = function(...) {
    output = Reduce(function(x, y) {
        if (!inherits(x, "data.frame")) x = within(enframe(x), {ord = seq_along(name)})
        if (!inherits(y, "data.frame")) y = enframe(y)
        df = merge(x, y, by = "name", all = T)
        df = df[order(df$ord),]
        out = data.frame(
          seqname = df$name,
          seqlength = pmax(df[['value.x']], df[['value.y']], na.rm = T)
        )
        out
    }, lapply(list(...), seqlengths))
    with(output, setNames2(seqlength, seqname))
}


#' conform_si
#'
#' force a ranges to conform to a new seqinfo
#' 
#' @return GRanges
#' @author Kevin Hadi
#' @export conform_si
conform_si = function(x, si) {
    ans = copy3(x)
    osi = seqinfo(x)
    osn = as.character(seqnames(x))
    newslev = union(seqlevels(si), seqlevels(osi))
    ## newsle = seqlengths(osi)
    new = rn2col(as.data.frame(si[newslev]), "seqnames")
    old = rn2col(as.data.frame(osi[newslev]), "seqnames")
    newsi = merge.repl(
        new, old, force_y = FALSE, overwrite_x = FALSE,
        by = "seqnames", keep_order = T, all = T
    )
    newsi = as(col2rn(asdf(newsi), "seqnames"), "Seqinfo")
    ans@seqnames = Rle(factor(osn, seqlevels(newsi)))
    ans@seqinfo = newsi
    return(ans)
}


#' same as gr.fix basically
#'
#' need to change implementation...
#' it doesn't add to gr.fix so far
#' except to add default genome seqlengths
#' on top of whatever is already part of the input
#'
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export gr.patch
gr.patch = function(gr, patch, onlystandard = TRUE) {
    if (missing(patch)) {
        patch = gr.genome(onlystandard = onlystandard)
    }
    sl = gr.fixseq(gr, patch)
    gr.fix(gr, sl)
}


#' granges to datatable via dataframe
#'
#' @return GRangesList
#' @author Kevin Hadi
#' @export gr2df
gr2df = function(gr, var = "rowname", as.data.table = FALSE) {
    sf = options()$stringsAsFactors
    on.exit({options(stringsAsFactors = sf)})
    options(stringsAsFactors = FALSE)
    rn = names(gr)
    if (!is.null(rn))
        rn2 = make.unique(rn)
    else
        rn2 = as.character(seq_along(gr))
    if (inherits(gr, "GRanges")) {
        df = GenomicRanges::as.data.frame(gr, row.names = rn2)
        if ("rowname" %in% df) {
            df$rowname = rn2
        } else {
            cmd = sprintf("cbind(%s = rn2, df)", var)
            df = et(cmd)
        }
    } else if (inherits(gr, "GRangesList"))
        df = GenomicRanges::as.data.frame(gr)
    if (identical(as.data.table, TRUE)) {
        setDT(df)
    }
    if (is.factor(df$seqnames)) {
        df$seqnames = as.character(df$seqnames)
    }
    if (is.factor(df$strand)) {
        df$strand = as.character(df$strand)
    }
    ## return(dt_f2char(df,c("seqnames", "strand")))
    return(df)
}



#' grangeslist to data table via dataframe
#'
#' @return data table
#' @author Kevin Hadi
#' @export grl.undf
grl.undf = function(grl) {
    sf = options()$stringsAsFactors
    on.exit({options(stringsAsFactors = sf)})
    options(stringsAsFactors = FALSE)
    gr = gr2df(grl)
    lst = rleseq(gr$group, clump = FALSE)
    mc = setDT(GenomicRanges::as.data.frame(mcols(grl)[lst$idx,,drop = FALSE]))
    names(lst) = c("grl.ix", "grl.iix", "grl.len")
    return(dedup.cols(cbind(gr, mc, as.data.frame(lst))))
}

#' coerce to data table via setDT
#'
#' @return data table
#' @author Kevin Hadi
#' @export
asdt = function(obj) {
    out = setDT(as.data.frame(obj))
    return(out)
}

#' coerce to data frame
#'
#' @return data frame
#' @author Kevin Hadi
#' @export
asdf = function(obj) {
    return(as.data.frame(obj))
}

#' asm
#' 
#' coerce to matrix
#'
#' @return matrix
#' @author Kevin Hadi
#' @export
asm = function(obj) {
    return(as.matrix(obj))
}

#' flip strand of grangeslist
#'
#' @return GRangesList
#' @author Kevin Hadi
#' @export grl.flipstrand
grl.flipstrand = function(grl) {
    tmp_vals = mcols(grl)
    tmp_gr = unlist(grl, use.names = FALSE)
    tmp_gr = gr.flipstrand(tmp_gr)
    new_grl = relist(tmp_gr, grl)
    mcols(new_grl) = tmp_vals
    return(new_grl)
}


#' specify strand of granges or grangeslist
#'
#' @return granges or grangeslist
#' @author Kevin Hadi
#' @export gr.strand
gr.strand = function(gr, str = "*") {
    strand(gr) = str
    return(gr)
}



#' data frame to GRanges
#'
#' data frame to GRanges
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export df2gr
df2gr = function (df, seqnames.field = "seqnames", start.field = "start", 
    end.field = "end", strand.field = "strand", ignore.strand = FALSE, 
    keep.extra.columns = TRUE, starts.in.df.are.0based = FALSE) {
    if (!inherits(df, "data.frame")) {
        df = as.data.frame(df)
    }
    if (inherits(seqnames.field, c("numeric", "integer"))) {
        seqnames.field = colnames(df)[seqnames.field]
    }
    if (inherits(start.field, c("numeric", "integer"))) {
        start.field = colnames(df)[start.field]
    }
    if (inherits(end.field, c("numeric", "integer"))) {
        end.field = colnames(df)[end.field]
    }
    if (inherits(strand.field, c("numeric", "integer"))) {
        strand.field = colnames(df)[strand.field]
    }
    badcols = c("seqnames", "start", "end", "strand", "ranges", 
        "width", "element", "seqlengths", "seqlevels", "isCircular")
    badcols = setdiff(badcols, c(seqnames.field, start.field, 
        end.field, strand.field))
    ix = which(colnames(df) %in% badcols)
    if (length(ix) > 0) 
        df = et(sprintf("df[, -%s]", mkst(ix)))
    cnames = colnames(df)
    names(cnames) = cnames
    relevant_cols = match(
      c(seqnames.field, start.field, end.field, strand.field),
      cnames
    )
    if (is.na(relevant_cols[4])) {
        relevant_cols = relevant_cols[-4]
        ignore.strand = TRUE
    }
    addon = rand.string()
    cnames[relevant_cols] = paste(
      cnames[relevant_cols],
      addon, 
      sep = "_"
    )
    colnames(df)[relevant_cols] = cnames[relevant_cols]
    seqnames.field = paste(seqnames.field, addon, sep = "_")
    start.field = paste(start.field, addon, sep = "_")
    end.field = paste(end.field, addon, sep = "_")
    strand.field = paste(strand.field, addon, sep = "_")
    GenomicRanges::makeGRangesFromDataFrame(
      df,
      seqnames.field = seqnames.field, 
      start.field = start.field,
      end.field = end.field,
      strand.field = strand.field, 
      ignore.strand = ignore.strand,
      keep.extra.columns = keep.extra.columns, 
      starts.in.df.are.0based = starts.in.df.are.0based
    )
}

#' data frame to GRangesList
#'
#' data frame to grl
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export df2grl
df2grl = function(df,
                  seqnames.field = "seqnames",
                  start.field = "start",
                  end.field = "end",
                  strand.field = "strand",
                  split.field = "grl.ix",
                  ignore.strand = FALSE,
                  keep.extra.columns = TRUE,
                  asmcols = NULL,
                  keepgrmeta=FALSE) {
    if (!inherits(df, "data.frame")) {
        df = as.data.frame(df)
    }
    if (inherits(seqnames.field, c("numeric", "integer"))) {
        seqnames.field = colnames(df)[seqnames.field]
    }
    if (inherits(start.field, c("numeric", "integer"))) {
        start.field = colnames(df)[start.field]
    }
    if (inherits(end.field, c("numeric", "integer"))) {
        end.field = colnames(df)[end.field]
    }
    if (inherits(strand.field, c("numeric", "integer"))) {
        strand.field = colnames(df)[strand.field]
    }
    if (inherits(split.field, c("numeric", "integer"))) {
        split.field = copy2(colnames(df)[split.field])
    }
    if (!is.null(asmcols) && inherits(asmcols, c("numeric", "integer"))) {
        asmcols = colnames(df)[asmcols]
    }
    o.split.field = copy2(split.field)
    badcols = c(
      "seqnames",
      "start",
      "end",
      "strand",
      "ranges",
      "width",
      "element",
      "seqlengths",
      "seqlevels",
      "isCircular"
    )
    badcols = setdiff(
      badcols,
      c(seqnames.field, start.field, end.field, strand.field, split.field)
    )
    ix = which(colnames(df) %in% badcols)
    if (length(ix) > 0) df = et(sprintf("df[, -%s]", mkst(ix)))
    cnames = colnames(df) # makeGRangesFromDataFrame makes use of tolower()
    names(cnames) = cnames
    ## cnames = tolower(cnames)
    ## relevant_cols = which(cnames %in% c(seqnames.field, start.field, end.field, strand.field))
    ## relevant_cols[duplicated(cnames[relevant_cols])]
    ## num = rleseq(cnames[relevant_cols], clump = T)$seq
    ## deduped = paste0(names(cnames[relevant_cols]),
    ##                  ifelse(num > 1, paste0(".", as.character(num)), ""))
    ## names(cnames)
    relevant_cols = match(c(seqnames.field, start.field, end.field, strand.field, split.field), cnames)
    if (is.na(relevant_cols[4])) {
        relevant_cols = relevant_cols[-4]
        ignore.strand = TRUE
    }
    if (is.na(relevant_cols[5])) {
        relevant_cols = relevant_cols[-5]
    }
    addon = rand.string()
    cnames[relevant_cols] = paste(cnames[relevant_cols], addon, sep = "_")
    colnames(df)[relevant_cols] = cnames[relevant_cols]
    seqnames.field = paste(seqnames.field, addon, sep = "_")
    start.field = paste(start.field, addon, sep = "_")
    end.field = paste(end.field, addon, sep = "_")
    strand.field = paste(strand.field, addon, sep = "_")
    split.field = paste(split.field, addon, sep = "_")
    spl = df[[split.field]]
    tmpix = which(!duplicated(spl))
    if (!is.null(asmcols)) {
        attach_to_mcols = qmat(df,cid = asmcols)
        ## rows = which(!duped(attach_to_mcols))
        rows = tmpix
        ## ix = do.call(rleseq, list(attach_to_mcols, clump = TRUE))
        if (!keepgrmeta)
            df = qmat(df, ,-which(colnames(df) %in% asmcols))
    }
    out = GenomicRanges::makeGRangesListFromDataFrame(
      df,
      seqnames.field = seqnames.field,
      start.field = start.field,
      end.field = end.field,
      strand.field = strand.field,
      split.field = split.field,
      ignore.strand = ignore.strand,
      keep.extra.columns = keep.extra.columns
    )
    
    mcols(out)[[o.split.field]] = spl[tmpix]
    if (!is.null(asmcols)) {
        mcols(out) = cbind(mcols(out), qmat(attach_to_mcols, rows))
    }
    return(out)
}


#' round window to nearest unit
#'
#' Rounding window to nearest unit.
#' meant to be used for plotting window in gTrack.
#' For that purpose, it's most useful to use reduce = TRUE
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export gr.round
gr.round = function(gr, nearest = 1e4, all = TRUE, reduce = FALSE) {
  if (reduce)
    gr = GenomicRanges::reduce(gr)
  wid = round((width(gr) + (0.5 * nearest)) / nearest) * nearest
  if (all) {
    wid = max(wid)
  }
  out = gr.resize(gr, wid = wid, pad = FALSE, reduce = T)
  return(out)
}


#' sort granges, grangeslist
#'
#' sort granges or grangeslist by seqlevels
#' also reorders seqlevels into 1:22, X, Y format
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export gr.sort
gr.sort = function(gr, ignore.strand = TRUE) {
    return(gr[gr.order(gr, ignore.strand = ignore.strand)])
    ## return(sort(sortSeqlevels(gr), ignore.strand = ignore.strand))
}

#' order granges, grangeslist
#'
#' order granges or grangeslist by seqlevels
#' also reorders seqlevels into 1:22, X, Y format
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export gr.order
gr.order = function(gr, ignore.strand = T) {
    sgr = GenomeInfoDb::sortSeqlevels(gr)
    if (isTRUE(ignore.strand))
        sgr = gUtils::gr.stripstrand(sgr)
    return(GenomicRanges::order(sgr))
}


#' Resize granges without running into negative width error
#'
#' lower size limit of window is 0
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export gr.resize
gr.resize = function (gr, width, pad = TRUE, minwid = 0, each = TRUE, ignore.strand = FALSE,
    fix = "center", reduce = FALSE)
{
    wid = width
    if (pad) {
        if (isTRUE(each)) {
            wid = wid * 2
        }
        width.arg = pmax(width(gr) + wid, minwid)
    }
    else width.arg = pmax(wid, minwid)
    if (reduce) {
        ## gr = GenomicRanges::reduce(gr + width.arg, ignore.strand = ignore.strand) -
        ##     width.arg
      out = gr.resize(
        gr,
        width,
        pad = pad,
        minwid = minwid,
        each = each,
        ignore.strand = ignore.strand,
        fix = fix,
        reduce = FALSE
      )
      out = GenomicRanges::reduce(out, ignore.strand = ignore.strand)
      out = gr.resize(
        out,
        width,
        pad = pad,
        minwid = minwid,
        each = each,
        ignore.strand = ignore.strand,
        fix = fix,
        reduce = FALSE
      )
        return(out)
    }
    return(
      GenomicRanges::resize(
        gr,
        width = width.arg,
        fix = fix,
        ignore.strand = ignore.strand
      )
    )
}




#' a robust parse.gr
#'
#' version of parse.gr that is able to convert ranges with minus signs
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export parse.gr2
parse.gr2 = function(...) {
    grl.unlist(parse.grl2(...))
}


#' a robust parse.grl
#'
#' version of parse.grl that is able to convert ranges with minus signs
#'
#' @param str A string that can be parsed as ranged data "A:1-10+"
#' @param meta A table that will be added as metadata
#' @return GRangesList
#' @author Kevin Hadi
#' @export parse.grl2
parse.grl2 = function (str, meta = NULL, fixna = FALSE) 
{
    tmp = stringi::stri_split_regex(str, pattern = ",|;")
    grl.ix = rep(seq_along(tmp), lengths(tmp))
    tmp = unlist(tmp)
    if (fixna) {
        wasna = is.na(tmp)
        tmp = ifelse(wasna, "1:0--1", tmp)
    }
    tmp = gsub("(\\w+)(:)([[:punct:]]?\\w+)(-)([[:punct:]]?\\w+)([[:punct:]]?)", 
        "\\1 \\2 \\3 \\4 \\5 \\6", tmp)
    mat = stringi::stri_split_fixed(tmp, pattern = " ", simplify = "TRUE")
    gr = GRanges(seqnames = mat[, 1], ranges = IRanges(as.integer(mat[, 
        3]), as.integer(mat[, 5])), strand = ifelse(nchar(mat[, 
        6]) == 0 | !mat[, 6] %in% c("+", "-"), "*", mat[, 6]))
    if (fixna) {
        gr$was_na = wasna
    }
    gr = GenomicRanges::split(gr, grl.ix)
    if (!is.null(meta) && nrow(meta) == length(gr)) 
        S4Vectors::values(gr) = meta
    return(gr)
}




## parse.grl2 = function(str, meta = NULL) {
##     ## library(stringi)
##     tmp = stringi::stri_split_regex(str, pattern = ",|;")
##     grl.ix = rep(seq_along(tmp), lengths(tmp))
##     tmp = unlist(tmp)
##     tmp = gsub("(\\w+)(:)([[:punct:]]?\\w+)(-)([[:punct:]]?\\w+)([[:punct:]]?)", "\\1 \\2 \\3 \\4 \\5 \\6", tmp)
##     mat = stringi::stri_split_fixed(tmp, pattern = " ", simplify = "TRUE")
##     gr = GRanges(seqnames = mat[,1],
##                  ranges = IRanges(as.integer(mat[,3]), as.integer(mat[,5])),
##                  strand = case_when(nchar(mat[,6]) == 0 | !mat[,6] %in% c("+", "-") ~ "*",
##                                     TRUE ~ mat[,6]),
##                  grl.ix = grl.ix)
##     gr = gr.noval(split(gr, gr$grl.ix))
##     if (!is.null(meta) && nrow(meta) == length(gr))
##         values(gr) = meta
##     return(gr)
## }

#' output data structure for ski slope from anchorlifted SNV
#'
#' @return A GRanges
#' @export gr_calc_cov
gr_calc_cov = function(gr, PAD = 50, field = NULL, start.base = -1e6, end.base = -5e3, win = 1e4, FUN = "mean", baseline = NULL, normfun = "*", normfactor = NULL) {
    `%&%` = gUtils::`%&%`
    if (inherits(gr, "data.frame")) {
        gr = dt2gr(gr)
    }
    win = GRanges("Anchor", IRanges(-abs(win), abs(win)))
    ## silent({library(plyranges); forceload()})
    grcov = gUtils::gr.sum(gr + PAD, field = field)
    if (!is.null(field))
        grcov = grcov %>% select(score = !!field)
    ## mcols(grcov)[["score"]] = pmax(0, mcols(grcov)[["score"]], 0)
    ## grcov2 = gr.tile(grcov, 1)
    grcov2 = gr.tile(GRanges("Anchor", IRanges(-1e6, 1e6)) + PAD, 1)
    ## grcov2 = gr.tile(GRanges("Anchor", IRanges(start(head(grcov, 1)), end(tail(grcov, 1)))), 1)
    if (!is.empty(grcov)) {
        ## grcov2$score = gr.eval(grcov2, grcov, score, 0)
        grcov2 = within(plyranges::join_overlap_left(grcov2, grcov), {score = replace_na(score, 0)})
        ## grcov2 = grcov2 %$% grcov
        ## grcov2$score = grcov2$score %>% replace_na(0)
    } else {
        grcov2$score = 0
    }
    if (!is.null(normfactor)) {
        if (!(length(normfactor) == length(grcov2) | length(normfactor == 1)))
            stop("normfactor needs to be same length")
        grcov2$score = get(normfun)(grcov2$score, normfactor)
    }
    if (is.null(baseline)) {
        baseline = with(grcov2, {
            ## this_subset = data.table::between(start, (abs(start.base) + PAD) * sign(start.base), ((abs(end.base) + PAD) * sign(end.base)) - 1)
            this_subset = data.table::between(start, start.base - PAD, end.base + PAD - 1)
            get(FUN)(score[this_subset])
            ## sum(score[this_subset] * width[this_subset]) / sum(width[this_subset])
        })
    }
    ## baseline = gr2dt(grcov2)[data.table::between(start, (abs(start.base) + PAD) * sign(start.base), ((abs(end.base) + PAD) * sign(end.base)) - 1)][, sum(score * width) / sum(width)]
    score = grcov2$score
    if (!(length(baseline) == length(score) | length(baseline == 1)))
        stop("baseline needs to be same length as score or a length 1 vector")
    rel = pmax(score, 0) / (baseline + 1e-12)
    ## grcov2$rel = (grcov2$score) / (baseline + 1e-12)
    grcov2$score = rel
    grcov2$baseline = baseline
    grcov2 %&% win
}


#' @export std.calc.cov
std.calc.cov = function(anci, pad, field = NULL, baseline = NULL, FUN = "median") {
    gr_calc_cov(anci %>% dt2gr, PAD = pad, start.base = -5e3, end.base = 0, FUN = FUN, field = field, win = 5e3, baseline = baseline)
}


#' updated gr.disjoin from gUtils to work with GRangesList
#'
#' @return GRanges or GRangesList
#' @author Kevin Hadi
#' @export gr.disjoin
gr.disjoin = function (x, ..., ignore.strand = TRUE)
{
    if (inherits(x, "GRangesList")) {
        gr = GenomicRanges:::deconstructGRLintoGR(x)
        if (ignore.strand) gr = gr.stripstrand(gr)
        return(GenomicRanges:::reconstructGRLfromGR(gUtils::gr.disjoin(gr, ..., ignore.strand = ignore.strand), x))
    }
    y = disjoin(x, ...)
    ix = gr.match(y, x, ignore.strand = ignore.strand)
    values(y) = values(x)[ix, , drop = FALSE]
    return(y)
}



#' disjoin on grangeslist
#'
#' @return GRangesList
#' @author Kevin Hadi
#' @export grl.disjoin
grl.disjoin = function(x, ..., ignore.strand = T) {
  gr = GenomicRanges:::deconstructGRLintoGR(x)
  if (ignore.strand) gr = gr.stripstrand(gr)
  GenomicRanges:::reconstructGRLfromGR(
    gr.disjoin(
      gr,
      ...,
      ignore.strand = ignore.strand
    ),
    x
  )
}


#' split a gr by field(s) in elementMetadata of GRanges or a given vector
#'
#' split GRanges by field(s)
#' if providing a variable not already within the GRanges,
#' may need to use dynget(variable_name)
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export gr.split
gr.split = function(gr, ..., sep = paste0(" ", rand.string(length = 8), " "), addmcols = TRUE) {
  lst = as.list(match.call())[-1]
  ix = which(!names(lst) %in% c("gr", "sep", "addmcols"))
  tmpix = eval(
    quote(
      do.call(
        paste,
        c(lst[ix], alist(sep = sep))
      )
    ),
    S4Vectors::as.env(mcols(gr), environment()),
    parent.frame()
  )
  ## tmpix = with(as.list(mcols(gr)), do.call(paste, c(lst[ix], alist(sep = sep))))
  uix = which(!duplicated(tmpix))
  tmpix = factor(tmpix, levels = tmpix[uix])
  grl = gr %>% GenomicRanges::split(tmpix)
  these = unlist(strsplit(toString(lst[ix]), ", "))
  if (addmcols) {
    grl@elementMetadata = mcols(gr)[uix, these,drop = F]
  }
  return(grl)
}


#' reduce based on a field(s) to split by in elementMetadata of GRanges, or given vector
#'
#' split and reduce GRanges by field(s)
#' if providing a variable not already within the GRanges,
#' may need to use dynget(variable_name)
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export gr.spreduce
gr.spreduce = function(gr,  ..., ignore.strand = FALSE, pad = 0, return.grl = FALSE, sep = paste0(" ", rand.string(length = 8), " ")) {
  lst = as.list(match.call())[-1]
  ix = which(!names(lst) %in% c("gr", "sep", "pad", "ignore.strand", "return.grl"))
  vars = unlist(sapply(lst[ix], function(x) unlist(sapply(x, toString))))
  if (length(vars) == 1) {
    if (!vars %in% colnames(mcols(gr)))
      vars = tryCatch(unlist(list(...)), error = function(e) vars)
  }
  if (!all(vars %in% colnames(mcols(gr))))
    stop("Must specify valid metadata columns in gr")
  tmpix = do.call(
    function(...) paste(..., sep = sep),
    as.list(mcols(gr)[,vars, drop = F]))
  unix = which(!duplicated(tmpix))
  tmpix = factor(tmpix, levels = tmpix[unix])
  grl = unname(gr.noval(gr) %>% GenomicRanges::split(tmpix))
  grl = GenomicRanges::reduce(grl + pad, ignore.strand = ignore.strand)
  if (return.grl) {
    mcols(grl) = mcols(gr)[unix,vars,drop = F]
    return(grl)
  } else {
    out = unlist(grl)
    mcols(out) = mcols(gr)[rep(unix, times = IRanges::width(grl@partitioning)),
      vars,drop = F]
    return(out)
  }
}

#' get range based on a field(s) to split by in elementMetadata of GRanges, or given vector
#'
#' split and get range of GRanges by field(s)
#' if providing a variable not already within the GRanges,
#' may need to use dynget(variable_name)
#'
#' @return GRanges
#' @author Kevin Hadi
#' @export gr.sprange
gr.sprange = function (gr, ..., ignore.strand = FALSE, pad = 0, return.grl = FALSE, 
    sep = paste0(" ", rand.string(length = 8), " ")) 
{
    lst = as.list(match.call())[-1]
    ix = which(!names(lst) %in% c("gr", "sep", "pad", "ignore.strand", 
        "return.grl"))
    vars = unlist(sapply(lst[ix], function(x) unlist(sapply(x, 
        toString))))
    if (length(vars) == 1) {
        if (!vars %in% colnames(mcols(gr))) 
            vars = tryCatch(unlist(list(...)), error = function(e) vars)
    }
    if (!all(vars %in% colnames(mcols(gr)))) 
        stop("Must specify valid metadata columns in gr")
    tmpix = do.call(function(...) paste(..., sep = sep), as.list(mcols(gr)[, 
        vars, drop = F]))
    unix = which(!duplicated(tmpix))
    tmpix = factor(tmpix, levels = tmpix[unix])
    grl = unname(gr.noval(gr) %>% GenomicRanges::split(tmpix))
    grl = range(grl + pad, ignore.strand = ignore.strand)
    if (return.grl) {
        mcols(grl) = mcols(gr)[unix, vars, drop = F]
        return(grl)
    }
    else {
        out = unlist(grl)
        mcols(out) = mcols(gr)[
          rep(unix, times = IRanges::width(grl@partitioning)),
          vars, drop = F
        ]
        return(out)
    }
}


## gr.spreduce = function(gr,  ..., pad = 0, sep = paste0(" ", rand.string(length = 8), " ")) {
##   lst = as.list(match.call())[-1]
##   ix = which(!names(lst) %in% c("gr", "sep", "pad"))
##   tmpix = with(as.list(mcols(gr)), do.call(paste, c(lst[ix], alist(sep = sep))))
##   tmpix = factor(tmpix, levels = unique(tmpix))
##   grl = gr %>% GenomicRanges::split(tmpix)
##   dt = as.data.table(GenomicRanges::reduce(grl + pad))
##   nmix = which(unlist(lapply(lst[ix], function(x) is.name(x) & !is.call(x))))
##   nm = lapply(lst[ix], toString)
##   rmix = which(unlist(nm) %in% colnames(dt))
##   nm[rmix] = list(character(0))
##   if (length(rmix))
##     nmix = nmix[-rmix]
##   ## nm[lengths(nm) == 0] = list(character(0))
##   ## nm[-nmix] = character(0)
##   nm[-nmix] = list(character(0))
##   ## nmix = which(!nm == "NULL")
##   dt = dt[, cbind(.SD, setnames(as.data.table(data.table::tstrsplit(group_name, split = sep)), nmix, unlist(nm)))][, group_name := NULL]
##   return(dt2gr(dt))
## }


#' get rid of mcols on GRanges/GRangesLists
#'
#' remove all metadata from GRanges or GRangesList
#'
#' @return GRanges or GRangesList
#' @author Kevin Hadi
#' @export gr.noval
gr.noval = function(gr, keep.col = NULL, drop.col = NULL) {
    if (is.null(keep.col) & is.null(drop.col)) {
        select_col = NULL
    } else {
        all_col = colnames(gr@elementMetadata)
        if (inherits(gr, "GRangesList")) {
            all_col = c(all_col, colnames(gr@unlistData@elementMetadata))
        }

        if (!is.null(keep.col) & is.null(drop.col)) {
            select_col = intersect(all_col, keep.col)
        } else if (is.null(keep.col) & !is.null(drop.col)) {
            select_col = setdiff(all_col, drop.col)
        } else if (!is.null(keep.col) && !is.null(drop.col)) {
            if (intersect(keep.col, drop.col) > 0) {
              warning(
                "drop.col and keep.col args have",
                " overlapping elements",
                "\nkeeping the columns that overlap"
              )
              select_col = intersect(
                setdiff(all_col, setdiff(drop.col, keep.col)),
                keep.col
              )
            }
        }
    }
    if (inherits(gr, "GRangesList")) {
        tmp_query = intersect(select_col, colnames(gr@unlistData@elementMetadata))
        gr@unlistData@elementMetadata = (
          gr
          @unlistData
          @elementMetadata
          [,c(tmp_query), drop = FALSE]
        )
    }
    tmp_query = intersect(select_col, colnames(gr@elementMetadata))
    gr@elementMetadata = gr@elementMetadata[,c(tmp_query),drop = FALSE]
    return(gr)
}

#' within on GRanges, S3
#'
#' s3 method for GRanges within
#'
#' @return GRanges
#' @rdname gr.within
#' @author Kevin Hadi
#' @export gr.within
gr.within = function(data, expr) {
    top_prenv1 = function (x, where = parent.frame())
    {
        sym <- substitute(x, where)
        if (!is.name(sym)) {
            stop("'x' did not substitute to a symbol")
        }
        if (!is.environment(where)) {
            stop("'where' must be an environment")
        }
        .Call2("top_prenv", sym, where, PACKAGE = "S4Vectors")
    }
    e <- list2env(as.list(as(data, "DataFrame")))
    orig.names = setdiff(rev(names(e)), "X")
    rm(list = "X", envir = e)
    ## e$X = NULL
    e$data <- granges(data)
    e$seqnames = seqnames(e$data)
    e$start = start(e$data)
    e$end = end(e$data)
    e$strand = as.character(strand(e$data))
    e$width = as.integer(width(e$data))
    S4Vectors:::safeEval(substitute(expr), e, top_prenv1(expr))
    ch.fields = all.vars(substitute(expr))
    reserved <- c("seqnames", "start", "end", "width", "strand", "data")
    if ("seqnames" %in% ch.fields)
      seqnames(e$data) = Rle(
        factor(e$seqnames, levels = levels(seqnames(e$data)))
      )
    if ("width" %in% ch.fields) width(e$data) = e$width
    if ("strand" %in% ch.fields) strand(e$data) = e$strand
    if (any(c("start", "end") %in% ch.fields)) e@ranges = IRanges(e$start, e$end)
    data = e$data
    l <- mget(setdiff(ls(e), reserved), e)
    l <- l[!sapply(l, is.null)]
    nD <- length(del <- setdiff(colnames(mcols(data)), (nl <- names(l))))
    tmp = as(l, "DataFrame")
    newn = unlist(lapply(as.list(substitute(expr))[-1], function(x) {
        x = as.character(x)
        if (x[1] == "=")
            return(x[2])
        else
            return(NULL)
    }))
    neword = union(union(orig.names, newn), colnames(tmp))
    mcols(data) = tmp[,na.omit(match3(neword, colnames(tmp))), drop = FALSE]
    if (nD) {
        for (nm in del)
            mcols(data)[[nm]] = NULL
    }
    ## if (!identical(granges(data), e$data)) {
    ##     granges(data) <- e$data
    ## }
    return(data)
}

gr.within2 = function(data, expr) {
    top_prenv1 = function (x, where = parent.frame())
    {
        sym <- substitute(x, where)
        if (!is.name(sym)) {
            stop("'x' did not substitute to a symbol")
        }
        if (!is.environment(where)) {
            stop("'where' must be an environment")
        }
        .Call2("top_prenv", sym, where, PACKAGE = "S4Vectors")
    }
    e <- list2env(as.list(as(data, "DataFrame")))
    orig.names = setdiff(rev(names(e)), "X")
    rm(list = "X", envir = e)
    ## e$X = NULL
    e$data <- granges(data)
    e$seqnames = as.integer(seqnames(e$data))
    e$start = start(e$data)
    e$end = end(e$data)
    e$strand = as.character(strand(e$data))
    e$width = as.integer(width(e$data))
    S4Vectors:::safeEval(substitute(expr, parent.frame()), e, top_prenv1(expr))
    ch.fields = all.vars(substitute(expr, parent.frame()))
    reserved <- c("seqnames", "start", "end", "width", "strand", "data")
    if ("seqnames" %in% ch.fields)
      seqnames(e$data) = Rle(
        factor(e$seqnames, levels = levels(seqnames(e$data)))
      )
    if ("width" %in% ch.fields) width(e$data) = e$width
    if ("strand" %in% ch.fields) strand(e$data) = e$strand
    if (any(c("start", "end") %in% ch.fields)) e@ranges = IRanges(e$start, e$end)
    data = e$data
    l <- mget(setdiff(ls(e), reserved), e)
    l <- l[!sapply(l, is.null)]
    nD <- length(del <- setdiff(colnames(mcols(data)), (nl <- names(l))))
    tmp = as(l, "DataFrame")
    newn = unlist(lapply(as.list(substitute(expr, parent.frame()))[-1], function(x) {
        x = as.character(x)
        if (x[1] == "=")
            return(x[2])
        else
            return(NULL)
    }))
    neword = union(union(orig.names, newn), colnames(tmp))
    mcols(data) = tmp[,na.omit(match3(neword, colnames(tmp))), drop = FALSE]
    if (nD) {
        for (nm in del)
            mcols(data)[[nm]] = NULL
    }
    ## if (!identical(granges(data), e$data)) {
    ##     granges(data) <- e$data
    ## }
    return(data)
}





#' within on GRanges
#' 
#' within() method on GRanges
#'
#' @return GRanges
#' @rdname gr_within
#' @exportMethod within
#' @aliases within,GRanges-method
#' @author Kevin Hadi
setGeneric('within', base::within)
setMethod("within", signature(data = "GRanges"), NULL)
setMethod("within", signature(data = "GRanges"), gr.within2)


tmpgrlwithin = function(data, expr) {
    top_prenv1 = function (x, where = parent.frame())
    {
        sym <- substitute(x, where)
        if (!is.name(sym)) {
            stop("'x' did not substitute to a symbol")
        }
        if (!is.environment(where)) {
            stop("'where' must be an environment")
        }
        .Call2("top_prenv", sym, where, PACKAGE = "S4Vectors")
    }
    e <- list2env(as.list(as(data, "DataFrame")))
    orig.names = setdiff(rev(names(e)), "X")
    rm(list = "X", envir = e)
    ## e$X = NULL
    e$grangeslist <- gr.noval(data)
    S4Vectors:::safeEval(substitute(expr, parent.frame()), e, top_prenv1(expr))
    ## reserved <- c("ranges", "start", "end", "width", "space")
    reserved <- c("seqnames", "start", "end", "width", "strand", "granges", "grangeslist")
    l <- mget(setdiff(ls(e), reserved), e)
    l <- l[!sapply(l, is.null)]
    nD <- length(del <- setdiff(colnames(mcols(data)), (nl <- names(l))))
    tmp = as(l, "DataFrame")
    newn = unlist(lapply(as.list(substitute(expr, parent.frame()))[-1], function(x) {
        x = as.character(x)
        if (x[1] == "=")
            return(x[2])
        else
            return(NULL)
    }))
    neword = union(union(orig.names, newn), colnames(tmp))
    mcols(data) = tmp[,na.omit(match3(neword, colnames(tmp))), drop = FALSE]
    ## mcols(data) = as(l, "DataFrame")
    if (nD) {
        for (nm in del)
            mcols(data)[[nm]] = NULL
    }
    if (!identical(gr.noval(data), e$grangeslist)) {
        stop("change in the grangeslist detected")
        ## granges(data) <- e$granges
    } ## else {
    ##     if (!identical(start(data), start(e$grangeslist)))
    ##         start(data) <- start(e$grangeslist)
    ##     if (!identical(end(data), end(e$grangeslist)))
    ##         end(data) <- end(e$grangeslist)
    ##     if (!identical(width(data), width(e$grangeslist)))
    ##         width(data) <- width(e$grangeslist)
    ## }
    data
}


setMethod("within", signature(data = "CompressedGRangesList"), NULL)
setMethod("within", signature(data = "GRangesList"), NULL)
setMethod("within", signature(data = "CompressedGRangesList"), tmpgrlwithin)
setMethod("within", signature(data = "GRangesList"), tmpgrlwithin)


setMethod("within", signature(data = "IRanges"), NULL)
setMethod("within", signature(data = "IRanges"), function(data, expr) {
    top_prenv1 = function (x, where = parent.frame())
    {
        sym <- substitute(x, where)
        if (!is.name(sym)) {
            stop("'x' did not substitute to a symbol")
        }
        if (!is.environment(where)) {
            stop("'where' must be an environment")
        }
        .Call2("top_prenv", sym, where, PACKAGE = "S4Vectors")
    }
    e <- list2env(as.list(as(data, "DataFrame")))
    e$X = NULL
    e$data <- ranges(data)
    e$start = start(e$data)
    e$end = end(e$data)
    e$width = as.integer(width(e$data))
    S4Vectors:::safeEval(substitute(expr, parent.frame()), e, top_prenv1(expr))
    reserved <- c("start", "end", "width", "data")
    l <- mget(setdiff(ls(e), reserved), e)
    l <- l[!sapply(l, is.null)]
    nD <- length(del <- setdiff(colnames(mcols(data)), (nl <- names(l))))
    mcols(data) = as(l, "DataFrame")
    if (nD) {
        for (nm in del)
            mcols(data)[[nm]] = NULL
    }
    if (!identical(ranges(data), e$data)) {
        ranges(data) <- e$data
    }
    data
})

tmpgrlgaps = function(x, start = 1L, end = seqlengths(x)) {
  ## if (!is.null(names(start)))
  ##   start <- start[seqlevels]
  ## if (!is.null(names(end)))
  ##   end <- end[seqlevels]
  ## start <- S4Vectors:::recycleVector(start, length(seqlevels))
  ## start <- rep(start, each = 3L)
  ## end <- S4Vectors:::recycleVector(end, length(seqlevels))
  ## end <- rep(end, each = 3L)
  expand.levels = TRUE
  gr = GenomicRanges:::deconstructGRLintoGR(x, expand.levels = expand.levels)
  grlix = formatC(seq_along(x), width = floor(log10(length(x))) + 1, format = "d", flag = "0")
  snid = as.integer(seqnames(x@unlistData))
  if (isTRUE(expand.levels)) seql = seq_along(seqlevels(x)) else seql = unique(snid)
  slix = formatC(seql, width = floor(log10(max(seql))) + 1, format = "d", flag = "0")
  cdt = data.table::CJ(Var1 = grlix, Var2 = slix)[, oix := seq_len(.N)]
  cdt = merge(cdt, data.frame(sl = seqlengths(x)[seql], Var2 = slix), by = "Var2")
  setkey(cdt, oix)
  nseqlevels = cdt[, paste(Var1, Var2, sep = "|")]
  f1 = rep(grlix, lengths(x))
  ## f2 = slix[snid]
  f2 = setkey(data.table(seql, slix), seql)[list(snid)]$slix
  seqn = paste(f1, f2, sep = "|")
  seqlevels(gr) = c(nseqlevels)
  seqlengths(gr) = c(cdt$sl)
  seqnames(gr) = S4Vectors::Rle(factor(seqn, nseqlevels))
  rgl = GenomicRanges:::deconstructGRintoRGL(gr)
  ## rgl2 = gaps(rgl, start = rep(rep(start, cdt[,.N]), each = 3L), end = rep(cdt$sl, each = 3L))
  rgl2 = GenomicRanges::gaps(rgl, start = rep(rep(1, cdt[,.N]), each = 3L), end = rep(cdt$sl, each = 3L))
  GenomicRanges:::reconstructGRLfromGR(GenomicRanges:::reconstructGRfromRGL(rgl2, gr), x)
}


tmpgrlgaps2 = function(x, start = 1L, end = seqlengths(x), expand.levels = TRUE) {
  ## if (!is.null(names(start)))
  ##   start <- start[seqlevels]
  ## if (!is.null(names(end)))
  ##   end <- end[seqlevels]
  ## start <- S4Vectors:::recycleVector(start, length(seqlevels))
  ## start <- rep(start, each = 3L)
  ## end <- S4Vectors:::recycleVector(end, length(seqlevels))
  ## end <- rep(end, each = 3L)
  gr = GenomicRanges:::deconstructGRLintoGR(x, expand.levels = expand.levels)
  grlix = formatC(seq_along(x), width = floor(log10(length(x))) + 1, format = "d", flag = "0")
  snid = as.integer(seqnames(x@unlistData))
  if (isTRUE(expand.levels)) seql = seq_along(seqlevels(x)) else seql = unique(snid)
  slix = formatC(seql, width = floor(log10(max(seql))) + 1, format = "d", flag = "0")
  cdt = data.table::CJ(Var1 = grlix, Var2 = slix)[, oix := seq_len(.N)]
  cdt = merge(cdt, data.frame(sl = seqlengths(x)[seql], Var2 = slix), by = "Var2")
  data.table::setkey(cdt, oix)
  nseqlevels = cdt[, paste(Var1, Var2, sep = "|")]
  f1 = rep(grlix, lengths(x))
  ## f2 = slix[snid]
  f2 = data.table::setkey(data.table(seql, slix), seql)[list(snid)]$slix
  seqn = paste(f1, f2, sep = "|")
  seqlevels(gr) = c(nseqlevels)
  seqlengths(gr) = c(cdt$sl)
  seqnames(gr) = S4Vectors::Rle(factor(seqn, nseqlevels))
  rgl = GenomicRanges:::deconstructGRintoRGL(gr)
  ## rgl2 = gaps(rgl, start = rep(rep(start, cdt[,.N]), each = 3L), end = rep(cdt$sl, each = 3L))
  rgl2 = gaps(rgl, start = rep(rep(1, cdt[,.N]), each = 3L), end = rep(cdt$sl, each = 3L))
  GenomicRanges:::reconstructGRLfromGR(GenomicRanges:::reconstructGRfromRGL(rgl2, gr), x)
}


#' gaps on GRangesList
#' 
#' gap methods on GRangesList
#'
#' @return GRangesList
#' @rdname grl_gaps
#' @exportMethod gaps
#' @aliases gaps,GRangesList-method
#' @author Kevin Hadi
setGeneric('gaps', GenomicRanges::gaps)
setMethod(f = "gaps", signature = signature(x = "CompressedGRangesList"), definition = NULL)
setMethod(f = "gaps", signature = signature(x = "GRangesList"), definition = NULL)
setMethod("gaps", signature(x = "GRangesList"), tmpgrlgaps)
setMethod("gaps", signature(x = "CompressedGRangesList"), tmpgrlgaps)

#' gaps on GRanges, splitting by values in a metadata field
#' 
#' Gaps method that extracts the differences of a GRangesList
#' object from the Seqinfo() reference.
#'
#' @return GRangesList
#' @rdname gr.splgaps
#' @author Kevin Hadi
#' @export gr.splgaps
gr.splgaps <- function(gr, ..., ignore.strand = TRUE, sep = paste0(" ", rand.string(length = 8), " "), start = 1L, end = seqlengths(gr), cleannm = TRUE, expand.levels = TRUE) {
  lst = as.list(match.call())[-1]
  ix = which(!names(lst) %in% c("gr", "sep", "cleannm", "start", "end", "expand.levels"))
  cl = sapply(lst[ix], class)
  vars = unlist(sapply(lst[ix], function(x) unlist(sapply(x, toString))))
  if (length(vars) == 1) {
    if (!vars %in% colnames(mcols(gr)))
      vars = tryCatch(unlist(list(...)), error = function(e) vars)
  }
  if (!all(vars %in% colnames(mcols(gr))))
    stop("Must specify valid metadata columns in gr")
  tmpix = S4Vectors::do.call(function(...) paste(..., sep = sep),
    mcols(gr)[,vars,drop = F])
  unix = which(!duplicated(tmpix))
  tmpix = factor(tmpix, levels = tmpix[unix])
  if (ignore.strand)
      gr = gUtils::gr.stripstrand(gr)
  grl = gr.noval(gr) %>% GenomicRanges::split(tmpix)
  out = tmpgrlgaps2(grl, start = start, end = end, expand.levels = expand.levels)
  ## out = gaps(grl, start = start, end = end)
  mcols(out) = mcols(gr)[unix,vars, drop = F]
  if (cleannm)
    names(out) = gsub(sep, " ", names(out))
  return(out)
}


#' gr.setdiff that works with multiple by columns
#' 
#' setdiff that allows for multiple columns
#'
#'
#' @return GRanges
#' @rdname gr.setdiff2
#' @author Kevin Hadi
#' @export gr.setdiff2
gr.setdiff2 = function (query, subject, ignore.strand = TRUE, by = NULL, new = TRUE, ...)
{
  if (!is.null(by)) {
    if (ignore.strand) {
      query = gUtils::gr.stripstrand(query)
      subject = gUtils::gr.stripstrand(subject)
    }
    sl = GenomeInfoDb::seqlengths(query)
    if (new) {
      ## gp = do.call(gr.splgaps, c(alist(gr = gr.fix(subject, query)), ... = lapply(by, str2lang)))
      cmd = sprintf("gr.splgaps(gr.fix(subject, query), %s, expand.levels = TRUE)", paste(collapse = ",", by))
      gp = eval(parse(text = cmd))
    } else {
      tmp = gUtils::gr2dt(subject)
      tmp$strand = factor(tmp$strand, c("+", "-", "*"))
      gp = dt2gr(tmp[, as.data.frame(GenomicRanges::gaps(GenomicRanges::GRanges(seqnames,
        IRanges::IRanges(start, end), seqlengths = sl, strand = strand))),
      , by = by], seqinfo = seqinfo(query))
    }
    qdt = as.data.table(mcols(gr.noval(query, keep.col = by)))[, query.id := seq_len(.N)]
    sdt = as.data.table(mcols(gr.noval(subject, keep.col = by)))[, subject.id := seq_len(.N)]
    mdt = merge(data.table::setkeyv(unique(qdt[,-c("query.id")][, inx := TRUE]), by),
      data.table::setkeyv(unique(sdt[, -c("subject.id")][, iny := TRUE]), by), all = T)[is.na(iny)]
    rm(sdt)
    gp = gUtils::grl.unlist(gp)
    if (nrow(mdt)) {
      gp = gUtils::grbind(gp, gr.spreduce(gr.noval(query[merge(qdt, mdt, by = by)$query.id],
        keep.col = by), by))
    }
    rm(mdt, qdt)
    if (ignore.strand)
      gp = gUtils::gr.stripstrand(gp[strand(gp) == "*"])
  }
  else {
    if (ignore.strand) {
      gp = GenomicRanges::gaps(gr.stripstrand(subject)) %Q% (strand ==
                                                "*")
    }
    else {
      gp = GenomicRanges::gaps(subject)
    }
  }
  if (new) {
    out = suppressWarnings({gUtils::gr.findoverlaps(gr_construct_by(query, by), gr_construct_by(gp, by),
      qcol = names(values(query)),
      ignore.strand = ignore.strand, ...)})
    out = gUtils::gr.fix(gr_deconstruct_by(out, by = by), query)
  } else {
    out = gUtils::gr.findoverlaps(query, gp,
      qcol = names(values(query)),
      ignore.strand = ignore.strand, by = by, ...)
  }
  return(out)
}




#' @export
en2DF = function(ed, nodes, from_field = "from", to_field = "to", strand_sign = NULL, from_strand_sign = NULL, to_strand_sign = NULL, from_strand = NULL, to_strand = NULL) {
    edf = S4Vectors::DataFrame(from = ed[[from_field]], to = ed[[to_field]], from.gr = unname(nodes[ed[[from_field]]][,c()]), to.gr = unname(nodes[ed[[to_field]]][,c()]))
    if (!is.null(strand_sign)) {
        from_new_strand = ifelse(strand_sign > 0, c("+" = "+", "-" = "-")[as.character(strand(edf$from.gr))], ifelse(strand_sign < 0, c("-" = "+", "+" = "-")[as.character(strand(edf$from.gr))], "*"))
        to_new_strand = ifelse(strand_sign > 0, c("+" = "+", "-" = "-")[as.character(strand(edf$to.gr))], ifelse(strand_sign < 0, c("-" = "+", "+" = "-")[as.character(strand(edf$to.gr))], "*"))
        edf$from.gr = gr.strand(edf$from.gr, from_new_strand)
        edf$to.gr = gr.strand(edf$to.gr, to_new_strand)
    }
    if (!is.null(from_strand_sign)) {
        from_new_strand = ifelse(from_strand_sign > 0, c("+" = "+", "-" = "-")[as.character(strand(edf$from.gr))], ifelse(from_strand_sign < 0, c("-" = "+", "+" = "-")[as.character(strand(edf$from.gr))], "*"))
        edf$from.gr = gr.strand(edf$from.gr, from_new_strand)
    }
    if (!is.null(to_strand_sign)) {
        to_new_strand = ifelse(to_strand_sign > 0, c("+" = "+", "-" = "-")[as.character(strand(edf$to.gr))], ifelse(to_strand_sign < 0, c("-" = "+", "+" = "-")[as.character(strand(edf$to.gr))], "*"))
        edf$to.gr = gr.strand(edf$to.gr, to_new_strand)
    }
    if (!is.null(from_strand)) {
        edf$from.gr = gr.strand(edf$from.gr, from_strand)
    }
    if (!is.null(to_strand)) {
        edf$to.gr = gr.strand(edf$to.gr, to_strand)
    }
    rownames(edf) = as.character(seq_len(nrow(ed)))
    edf$bp.left = gr.end(edf$from.gr, width = 1, ignore.strand = FALSE)[,c()]
    edf$bp.right = gr.start(edf$to.gr, width = 1, ignore.strand = FALSE)[,c()]
    strand(edf$bp.left) = ifelse(as.character(strand(edf$from.gr)) == "-", "+", "-")
    strand(edf$bp.right) = ifelse(as.character(strand(edf$to.gr)) == "-", "-", "+")
    edf$junction = gUtils::grl.pivot(GRangesList(edf$bp.left, edf$bp.right))
    edf$ref = as.logical(ifelse(seqnames(edf$bp.left) == seqnames(edf$bp.right), abs(start(edf$bp.left) - end(edf$bp.right)), Inf) == 1 &
        strand(edf$bp.left) != strand(edf$bp.right))
    return(edf)
}

#' @export
map_fus2unfus = function(ed, nodes, exact = TRUE) {
    if (!inherits.edf(ed)) {
        if (is.null(nodes)) {
            stop("Nodes must be supplied with ed to ascertain coordinates and junctions")
        }
        ed = en2DF(ed = ed, nodes = nodes)
    }
    if (is.null(ed$from.unfus.junc) | is.null(ed$to.unfus.junc)) {
        ed$bp.left.unfus.gr = gr.shift(gr.end(ed$from.gr, ignore.strand = FALSE), 1, ignore.strand = FALSE)
        ed$bp.right.unfus.gr = gr.flipstrand(gr.shift(gr.start(ed$to.gr, ignore.strand = FALSE), -1, ignore.strand = FALSE))
        ed$from.unfus.junc = gUtils::grl.pivot(GRangesList(ed$bp.left, ed$bp.left.unfus.gr))
        ed$to.unfus.junc = gUtils::grl.pivot(GRangesList(ed$bp.right, ed$bp.right.unfus.gr))
    }

    match_mat_ufrom = ra.overlaps2(ed$junction, ed$from.unfus.junc);
    setnames(match_mat_ufrom, c("ra1.ix", "ra2.ix")); match_mat_ufrom = as.matrix(match_mat_ufrom)
    match_mat_ufrom = match_mat_ufrom[gr.poverlaps(ed[match_mat_ufrom[,1],]$from.gr, ed[match_mat_ufrom[,2],]$from.gr),,drop = FALSE]
    match_mat_uto = ra.overlaps2(ed$junction, ed$to.unfus.junc, pad = 2); setnames(match_mat_uto, c("ra1.ix", "ra2.ix")); match_mat_uto = as.matrix(match_mat_uto)
    match_mat_uto = match_mat_uto[gr.poverlaps(ed[match_mat_uto[,1],]$to.gr, ed[match_mat_uto[,2],]$to.gr),,drop = FALSE]

    ed$from.unfus = as.integer(NA)
    ed$to.unfus = as.integer(NA)
    ed[match_mat_uto[,2],]$from.unfus = ed[match_mat_uto[,1],]$from
    ed[is.na(ed$from.unfus),][["from.unfus"]] = ed[is.na(ed$from.unfus),]$from ## loose ends -- just make unfus side the same

    ed[match_mat_ufrom[,2],]$to.unfus = ed[match_mat_ufrom[,1],]$to
    ed[is.na(ed$to.unfus),][["to.unfus"]] = ed[is.na(ed$to.unfus),]$to ## loose ends -- just make the unfus the same

    ed$from.unfus.gr = nodes[ed[["from.unfus"]]]
    ed$to.unfus.gr = nodes[ed[["to.unfus"]]]
    ## if (all(dim(match_mat) == c(1,2)) & all(is.na(match_mat)))  {
    ##     return(match_mat_)
    ## }
    return(ed)
}

gr.poverlaps = function(gr1, gr2, ignore.strand = FALSE, as.logical = TRUE) {
    minoverlaps <<- 1
    minoverlap <<- 0
    out = IRanges::poverlaps(gr.fix(gr1, gr2), gr.fix(gr2, gr1), ignore.strand = ignore.strand)
    if (as.logical) {
        return(as.logical(out))
    } else {
        return(out)
    }
}

inherits.edf = function(DF) {
    if (all(sapply(DF, class) %in% c("integer","GRanges","CompressedGRangesList","logical", "GRangesList"))) {
        if (inherits(DF, "DataFrame")) {
            if (all(c("from","to","from.gr","to.gr","bp.left","bp.right","junction","ref") %in% colnames(DF))) {
                return(TRUE)
            } else {
                return(FALSE)
            }
        } else {
            return(FALSE)
        }
    } else {
        return(FALSE)
    }
}




#' ra.overlaps6
#'
#' One of the many rewrites of ra.overlaps
#'
#' @export ra.overlaps6
ra.overlaps6 <- function(ra1, ra2, pad = 0, ignore.strand = ignore.strand) {
    ra1 = gr.noval(ra1)
    ra2 = gr.noval(ra2)
    bp1 = grl.unlist(ra1) + pad
    bp2 = grl.unlist(ra2) + pad
    ## data.table::foverlaps
    ix2 = findOverlaps(bp1, bp2, ignore.strand = FALSE)
    ix2 = as.data.table(ix2)
    ix2$grl.ix.x = bp1$grl.ix[ix2$queryHits]
    ix2$grl.iix.x = bp1$grl.iix[ix2$queryHits]
    ix2$grl.ix.y = bp2$grl.ix[ix2$subjectHits]
    ix2$grl.iix.y = bp2$grl.iix[ix2$subjectHits]
    ## ix2 = unname(plyranges::find_overlaps_directed(bp1, bp2))
    ## ix2 = gr2dt(ix2)
    mat = ix2[, cbind(grl.ix.x, grl.ix.y)]
    ## letsee = apply(mat, 1, function(x) c(min(x), max(x)))
    ## ix2[, uix := paste(letsee[1,], letsee[2,])]
    ix2[, uix := paste(matrixStats::rowMins(mat), matrixStats::rowMaxs(mat))]
    ## ix2[, ra.match := all(c(1,2) %in% grl.iix.x) & all(c(1,2) %in% grl.iix.y), by = .(grl.ix.x, grl.ix.y)]
    ok1 = unique(ix2[, .(uix, grl.iix.x)])
    ok1[, has1 := any(grl.iix.x == 1), by = uix]
    ok1[, has2 := any(grl.iix.x == 2), by = uix]
    ok2 = unique(ix2[, .(uix, grl.iix.y)])
    ok2[, has1 := any(grl.iix.y == 1), by = uix]
    ok2[, has2 := any(grl.iix.y == 2), by = uix]
    ## ix2[, ra.match := all(c(1,2) %in% grl.iix.x) & all(c(1,2) %in% grl.iix.y), keyby = uix]
    meth1 = intersect(ok1[(has1 & has2)][!duplicated(uix)]$uix, ok2[(has1 & has2)][!duplicated(uix)]$uix)
    ## setdiff(meth1, ix2[ra.match == TRUE]$uix)
    ## setkey(ix2, grl.ix.x, grl.ix.y)
    ## ix2 = ix2[ra.match == TRUE][!duplicated(data.table(grl.ix.x, grl.ix.y))]
    return(setkey(ix2[!duplicated(uix)], uix)[meth1][, cbind(grl.ix.x, grl.ix.y)])
    ## return(ix2[ra.match == TRUE][!duplicated(data.table(grl.ix.x, grl.ix.y))][, cbind(grl.ix.x, grl.ix.y)])
}

#' ra.dedup6
#'
#' One of the many rewrites of ra.overlaps
#'
#' @export
ra.dedup6 <- function (grl, pad = 500, ignore.strand = FALSE) 
{
    if (!is(grl, "GRangesList")) {
        stop("Error: Input must be GRangesList!")
    }
    if (any(elementNROWS(grl) != 2)) {
        stop("Error: Each element must be length 2!")
    }
    if (length(grl) == 0 | length(grl) == 1) {
        return(grl)
    }
    if (length(grl) > 1) {
        ix.pair = as.data.table(ra.overlaps6(grl, grl, pad = pad, 
            ignore.strand = ignore.strand))[grl.ix.x != grl.ix.y]
        if (nrow(ix.pair) == 0) {
            return(grl)
        }
        else {
            dup.ix = unique(rowMax(as.matrix(ix.pair)))
            return(grl[-dup.ix])
        }
    }
}


#' gr2bed
#'
#' converting gr to bed like table
#' also shifts coordinates to half closed 0 based
#'
#' @export
gr2bed <- function(gr) {
    df = as.data.frame(gr)
    colnames(df)[1:3] = c("chrom", "chromStart", "chromEnd")
    df$width = NULL
    out = cbind(df[,1:3,drop=F], name = as.character(seq_len(NROW(df))),
                score = rep_len(0, NROW(df)), df[,-c(1:3),drop=F])
    out$chromStart = out$chromStart - 1
    return(out)
}



#' grl2bedpe
#'
#' converting grl to bedpe-like table
#' also shifts coordinates to half closed 0 based
#'
#' @export
grl2bedpe = function(grl, add_breakend_mcol = FALSE, flip = FALSE, as.data.table = TRUE, zerobased = TRUE) {
    grpiv = gUtils::grl.pivot(grl)
    if (zerobased) {
        grpiv[[1]] = gr.resize(grpiv[[1]], width = 2, pad = FALSE, fix = "end")
        grpiv[[2]] = gr.resize(grpiv[[2]], width = 2, pad = FALSE, fix = "end")
    }


    mcgrl = as.data.frame(mcols(grl))

    df1 = as.data.frame(grpiv[[1]])[, c(1:3, 5), drop=F]
    df2 = as.data.frame(grpiv[[2]])[, c(1:3, 5), drop=F]
    colnames(df1) = c("chrom1", "start1", "end1", "strand1")
    colnames(df2) = c("chrom2", "start2", "end2", "strand2")

    mc1 = data.frame()[seq_len(NROW(df1)),,drop=F]
    mc2 = data.frame()[seq_len(NROW(df2)),,drop=F]

    if (isTRUE(add_breakend_mcol)) {
        mc1 = as.data.frame(grpiv[[1]])[,-c(1:5),drop=F]
        mc2 = as.data.frame(grpiv[[2]])[,-c(1:5),drop=F]

        colnames(mc1) = paste0("first.", colnames(mc1))
        colnames(mc2) = paste0("second.", colnames(mc2))
    }

    out = cbind(df1, df2, name = as.character(seq_len(NROW(df1))), score = rep_len(0, NROW(df1)), mcgrl, mc1, mc2)

    canon_col = c(1, 2, 3, 5, 6, 7, 9, 10, 4, 8)
    ## out[, canon_col, drop = F]
    nix = seq_len(ncol(out))

    ## reorder
    out = out[, c(canon_col, nix[!nix %in% canon_col]), drop = F]

    if (flip) {
        out$strand1 = c("+" = "-", "-" = "+")[out$strand1]
        out$strand2 = c("+" = "-", "-" = "+")[out$strand2]
    }

    if (as.data.table)
        return(data.table::as.data.table(out))
    else
        return(out)
    
}


#' bedpe2grl
#'
#' converting bedpe to grl
#'
#' @export
bedpe2grl = function(bedpe, flip = FALSE, trim = TRUE, genome = NULL, sort = TRUE) {
    if (!NROW(bedpe)) return(GenomicRanges::GRangesList())
    if (!is.character(bedpe$chrom1))
        bedpe$chrom1 = as.character(bedpe$chrom1)
    if (!is.character(bedpe$chrom2))
    bedpe$chrom2 = as.character(bedpe$chrom2)
    st1 = bedpe$strand1
    st2 = bedpe$strand2
    if (isTRUE(flip)) {
        st1 = c("+" = "-", "-" = "+")[st1]
        st2 = c("+" = "-", "-" = "+")[st2]
    }
    gr1 = data.frame(seqnames = bedpe$chrom1, start = bedpe$start1,
                     end = bedpe$end1, strand = st1)
    gr1 = GenomicRanges::makeGRangesFromDataFrame(gr1)
    gr1 = gr.resize(gr1, 1, pad = FALSE, fix = "end")
    gr2 = data.frame(seqnames = bedpe$chrom2, start = bedpe$start2,
                end = bedpe$end2, strand = st2)
    gr2 = GenomicRanges::makeGRangesFromDataFrame(gr2)
    gr2 = gr.resize(gr2, 1, pad = FALSE, fix = "end")
    d1.cols = intersect(c("name", "score"), colnames(bedpe))
    if (length(d1.cols))
      d1 = tryCatch(
        bedpe[, d1.cols, drop = F, with = F],
        error = function(e) bedpe[, d1.cols, drop = F]
      )
    else
      d1 = tryCatch(
        bedpe[, 0, drop = F, with = F],
        error = function(e) bedpe[, 0, drop = F]
      )
    ## d1 = bedpe[, c("name", "score"), drop=F]
    d2 = bedpe[, -c(1:10), drop=F]    
    grl = gUtils::grl.pivot(GenomicRanges::GRangesList(gr1, gr2))
    mcols(grl) = cbind(d1, d2)
    if (sort) grl = gr.sort(grl)
    return(grl)
}

## bedpe2grl = function(bdpe, genome = NULL) {
##     bdpe$chrom1 = as.character(bdpe$chrom1)
##     bdpe$chrom2 = as.character(bdpe$chrom2)
##     dat = tidyr::pivot_longer(bdpe, cols = c("chrom1", "start1", "end1", "strand1", "chrom2", "start2", "end2", "strand2"), names_to = c(".value", "name"), names_pattern = "([A-Za-z]+)([12]$)")
##     dat = dplyr::mutate_at(dat, vars(matches("^start$")), ~(. + 1))
##     dat = dt2gr(dat)
##     return(grl.pivot(gr.fix(split(dat, dat$name), hg_seqlengths(genome = genome))))
## }

gr.shift = function(gr, shift = 1, ignore.strand = FALSE) {
    if (!ignore.strand) {
        return(GenomicRanges::shift(gr, c("+" = 1, "-" = -1)[as.character(strand(gr))] * shift))
    } else {
        return(GenomicRanges::shift(gr, shift))
    }
}

ra.overlaps2 = function(ra1, ra2, pad = 0, ignore.strand = FALSE) {
    if (length(ra1) == 0 | length(ra2) == 0) {
        return(data.table(query.id = as.integer(NA), subject.id = as.integer(NA)))
    }
    ## forcibly removing all metadata before doing the query... the finagling
    ## of metadata could be problematic especially when they are
    ## not standard S3 classes
    ra1@unlistData@elementMetadata = ra1@unlistData@elementMetadata[,c()]
    ra1@elementMetadata = ra1@elementMetadata[,c()]
    ra2@unlistData@elementMetadata = ra2@unlistData@elementMetadata[,c()]
    ra2@elementMetadata = ra2@elementMetadata[,c()]
    bp1 = grl.unlist(ra1)
    bp2 = grl.unlist(ra2)
    bp1 = gr.fix(bp1, bp2)
    sbp1 = seqinfo(bp1)
    bp2 = gr.fix(bp2, bp1)
    sbp2 = seqinfo(bp2)
    bp1 = sort(sortSeqlevels(bp1), ignore.strand = FALSE) + pad
    bp2 = sort(sortSeqlevels(bp2), ignore.strand = FALSE) + pad
    bp1 = gr2dt(bp1)
    bp2 = gr2dt(bp2)
    bp1[, grl.iix := seq_len(.N), by= grl.ix]
    bp2[, grl.iix := seq_len(.N), by= grl.ix]
    data.table::setorderv(bp1, "grl.ix")
    data.table::setorderv(bp2, "grl.ix")
    bp1 = dt2gr(bp1, seqlengths = seqlengths(sbp1), seqinfo = sbp1)
    bp2 = dt2gr(bp2, seqlengths = seqlengths(sbp2), seqinfo = sbp2)
    ## bp1 = gr.fix(bp1, bp2)
    ## bp2 = gr.fix(bp2, bp1)
    ix1 = findOverlaps(bp1 %Q% (grl.iix == 1),
                       bp2 %Q% (grl.iix == 1),
                       ignore.strand = ignore.strand)
    ix2 = findOverlaps(bp1 %Q% (grl.iix == 2),
                       bp2 %Q% (grl.iix == 2),
                       ignore.strand = ignore.strand)
    ix1 = as.data.table(ix1); data.table::setnames(ix1, c("query.id", "subject.id"))
    ix2 = as.data.table(ix2); data.table::setnames(ix2, c("query.id", "subject.id"))
    data.table::setkeyv(ix1, c("query.id", "subject.id"))
    data.table::setkeyv(ix2, c("query.id", "subject.id"))
    if (nrow(ix1) == 0 | nrow(ix2) == 0) {
        return(data.table(query.id = as.integer(NA), subject.id = as.integer(NA)))
    }
    ## mg = merge(ix1, ix2, by = c("query.id", "subject.id"), allow.cartesian = TRUE)
    mg = merge(ix1, ix2, allow.cartesian = TRUE)
    return(mg)
}


#' filter sv by overlaps with another
#'
#' To filter out a grangeslist of sv by another grangeslist of SV
#'
#' @param sv GRangesList with all elements length 2 (specifying breakpoint pairs of a junction)
#' @param filt_sv GRangesList with all elements length 2 (usually a pon)
#' @param pad Exposed argument to skitools::ra.overlaps()
#' @return GRangesList of breakpoint pairs with junctions that overlap removed
#' @export
sv_filter = function(sv, filt_sv, pad = 500)
{
    ## within_filt = ra.overlaps2(sv, filt_sv, pad = pad)
    if (length(sv) == 0) {
        return(sv)
    }
    within_filt = suppressWarnings(ra.overlaps6(sv, filt_sv, pad = pad))
    ## filter_these = unique(within_tcga_germ[,"ra1.ix"])
    ## filter_these = unique(within_filt[,1, with = FALSE][[1]])
    filter_these = unique(within_filt[,1])
    sv = ix_sdiff(sv, filter_these)
    return(sv)
}



#' .filter_sv
#'
#' perform sv filtering on a pairs entry
#'
#' @export
.filter_sv = function(ent, overwrite = FALSE) {
    if (file.exists(ent$svaba_unfiltered_somatic_vcf)) {
        outpath = paste0(file_path_sans_ext(ent$svaba_unfiltered_somatic_vcf), ".pon.filtered.rds")
        if (isTRUE(overwrite) || isFALSE(file.exists(outpath))) {
            sv = JaBbA::read.junctions(ent$svaba_unfiltered_somatic_vcf)
            if (!exists("sv_pon")) {
                sv_pon = gr.noval(readRDS('~/lab/projects/CCLE/db/tcga_and_1kg_sv_pon.rds'))
            }
            sv = sv_filter(sv, sv_pon, pad = 1000)
            outpath = paste0(file_path_sans_ext(ent$svaba_unfiltered_somatic_vcf), ".pon.filtered.rds")
            saveRDS(sv, outpath, compress = FALSE)
            message(ent$pair, " finished")
            message("\n")
            data.table(pair = ent$pair, svaba_unfiltered_somatic_vcf_sv_pon_filtered = outpath)
        } else if (isTRUE(file.exists(outpath))) {
            data.table(pair = ent$pair, svaba_unfiltered_somatic_vcf_sv_pon_filtered = outpath)
        }
    } else {
        data.table(pair = ent$pair, svaba_unfiltered_somatic_vcf_sv_pon_filtered = NA_character_)
    }
}


#' fit battenberg copy number to CN signatures (Nature 2022)
#'
#' extract copy number CN signatures and fit using NNLS
#' 
#' @name fit.cnv.sig
#' @export fit.cnv.sig
fit.cnv.sig = function(gr.seg, sig.cnv = "~/Dropbox/Isabl/HRD/Steele-cnv-signature-definitions.txt",
         id = NULL) {
    
    features = c('0:homdel:0-100kb', '0:homdel:100kb-1Mb', '0:homdel:>1Mb', '1:LOH:0-100kb', 
                 '1:LOH:100kb-1Mb', '1:LOH:1Mb-10Mb', '1:LOH:10Mb-40Mb', '1:LOH:>40Mb', 
                 '2:LOH:0-100kb', '2:LOH:100kb-1Mb', '2:LOH:1Mb-10Mb', '2:LOH:10Mb-40Mb', '2:LOH:>40Mb', 
                 '3-4:LOH:0-100kb', '3-4:LOH:100kb-1Mb', '3-4:LOH:1Mb-10Mb', '3-4:LOH:10Mb-40Mb', '3-4:LOH:>40Mb', 
                 '5-8:LOH:0-100kb', '5-8:LOH:100kb-1Mb', '5-8:LOH:1Mb-10Mb', '5-8:LOH:10Mb-40Mb', '5-8:LOH:>40Mb', 
                 '9+:LOH:0-100kb', '9+:LOH:100kb-1Mb', '9+:LOH:1Mb-10Mb', '9+:LOH:10Mb-40Mb', '9+:LOH:>40Mb', 
                 '2:het:0-100kb', '2:het:100kb-1Mb', '2:het:1Mb-10Mb', '2:het:10Mb-40Mb', '2:het:>40Mb', 
                 '3-4:het:0-100kb', '3-4:het:100kb-1Mb', '3-4:het:1Mb-10Mb', '3-4:het:10Mb-40Mb', '3-4:het:>40Mb', 
                 '5-8:het:0-100kb', '5-8:het:100kb-1Mb', '5-8:het:1Mb-10Mb', '5-8:het:10Mb-40Mb', '5-8:het:>40Mb', 
                 '9+:het:0-100kb', '9+:het:100kb-1Mb', '9+:het:1Mb-10Mb', '9+:het:10Mb-40Mb', '9+:het:>40Mb')


    if (is.character(gr.seg) && file.exists(gr.seg)) {
        gr.seg = readin(gr.seg, other.txt = c("seg"))
    }

    if (inherits(gr.seg, "data.frame")) {
        gr.seg = df2gr(gr.seg, 2, 3, 4)
    } else if (inherits(gr.seg, "GRanges")) {
        NULL
    } else {
        stop("seg must be a path to a CN segmentation file or a GRanges/data frame")
    }
    

    super_class = c('het', 'LOH', "homdel")
    hom_del_class = c('0-100kb', '100kb-1Mb', '>1Mb')
    # x_labels = c('>40Mb', '10Mb-40Mb', '1Mb-10Mb', '100kb-1Mb', '0-100kb')
    x_labels = c("0-100kb","100kb-1Mb","1Mb-10Mb","10Mb-40Mb",">40Mb")
    CN_classes = c("1","2","3-4","5-8","9+") # different total CN states

    gr.seg$hom_seg = cut(width(gr.seg), c(-1, 100e3, 1e6, Inf), labels = hom_del_class)
    gr.seg$x_seg = cut(width(gr.seg), c(-1, 100e3, 1e6, 10e6, 40e6, Inf), labels = x_labels)
    gr.seg$tot_cn = with(gr.seg, nMaj1_A + nMin1_A)
    gr.seg$minor = gr.seg$nMin1_A
    gr.seg$major = gr.seg$nMaj1_A
    gr.seg$cn_state = cut(gr.seg$tot_cn, c(-1, 1, 2, 4, 8, Inf), labels = CN_classes)
    gr.seg$CN_class = with(gr.seg, case_when(tot_cn == 0 ~ "homdel",
                                             minor == 0 & major > 0 ~ "LOH",
                                             TRUE ~ "het"))

    gr.seg$x_lv = with(gr.seg, paste(cn_state, CN_class, x_seg, sep = ":"))
    gr.seg$hom_lv = with(gr.seg, paste(cn_state, CN_class, hom_seg, sep = ":"))
    gr.seg$is_homdel = with(gr.seg, CN_class == "homdel")
    gr.seg$feature = with(gr.seg, factor(ifelse(is_homdel, hom_lv, x_lv), features))

    ## sig.cnv.path = "~/Dropbox/Isabl/HRD/Steele-Nature-2022-supp-table2.xlsx"
    ## esh = readxl::excel_sheets(sig.cnv.path)
    ## "Pancan sig definitions"

    if (is.character(sig.cnv) && file.exists(sig.cnv)) {
        sig.def = read.table(sig.cnv)
    } else if (inherits(sig.def, c("data.frame", "matrix"))) {
        sig.def = sig.cnv
    } else {
        stop("sig.cnv must be a path to signature definition or a matrix/data.frame")
    }

    nbootFit = 100
    methodFit = "KLD"
    threshold_percentFit = 5
    bootstrapSignatureFit = TRUE
    nbootFit = 100
    threshold_p.valueFit = 0.05
    bootstrapHRDetectScores = FALSE
    nparallel = 1
    randomSeed = 10

    cat_cnv = matrify(as.data.frame(gr.seg$feature %>% table))

    bootstrap_fit_cnv <- signature.tools.lib::SignatureFit_withBootstrap(
      cat_cnv, 
      sig.def, nboot = nbootFit, method = methodFit, 
      threshold_percent = threshold_percentFit, threshold_p.value = threshold_p.valueFit, 
      verbose = FALSE, nparallel = nparallel, randomSeed = randomSeed
    )

    exposures_cnv <- bootstrap_fit_cnv$E_median_filtered
    exposures_cnv[is.nan(exposures_cnv)] <- 0

    out = as.data.table(transp(exposures_cnv)[[1]])
    setnames(out, rownames(exposures_cnv))
    if (!is.null(id)) {
        out$id = id
        setcolorder(out, "id")
    }
    return(out)
}

#' isv2grl
#' 
#' Transform format to GRangesList
#' 
#' @export 
isv2grl = function(sv, flipstrand = TRUE) {
    if (is.character(sv) && file.exists(sv)) {
        sv.path = sv
        vcf = readVcf(sv)
        sv = rowRanges(vcf)
        mcols(sv) = cbind(mcols(sv), info(vcf))
    }
    if (NROW(sv)) {
        gstrands = transp(strsplit(sv$STRANDS, ""), c)
        if (flipstrand) {
            gstrands[[1]] = unname(c("+" = "-", "-" = "+")[gstrands[1][[1]]])
            gstrands[[2]] = unname(c("+" = "-", "-" = "+")[gstrands[2][[1]]])
        }
        strand(sv) = gstrands[[1]]
        sv$ALT = unlist(sv$ALT)
        sv$alt = gsub("[\\[N\\]]", "", sv$ALT, perl = T)
        gr.2 = GRanges(sv$alt)
        strand(gr.2) = gstrands[[2]]
        grl = GRangesList(unname(gr.noval(sv)), unname(gr.2))
        grl = grl.pivot(grl)
    } else {
        grl = GRangesList()
    }
    mcols(grl) = mcols(sv)
    return(grl)
}




##############################
##############################

#' parse snpeff output into granges
#'
#' parse snpeff output into granges
#' expand all annotations into separate rows
#'
#' @param vcf path to snpeff vcf
#' @param pad Exposed argument to skitools::ra.overlaps()
#' @return GRangesList of breakpoint pairs with junctions that overlap removed
#' @export
parsesnpeff = function (vcf, id = NULL, filterpass = TRUE, coding_alt_only = TRUE, 
    geno = NULL, gr = NULL, keepfile = FALSE, altpipe = FALSE, 
    debug = FALSE, snpeffpath = "~/modules/SnpEff", filters = "PASS,.") 
{
    if (debug) 
        browser()
    out.name = paste0("tmp_", rand.string(), ".vcf.gz")
    tmp.path = paste0(tempdir(), "/", out.name)
    if (!keepfile) 
        on.exit(unlink(tmp.path))
    try2({
        catcmd = if (grepl("(.gz)$", vcf)) "zcat" else "cat"
        ## onepline = "/gpfs/commons/groups/imielinski_lab/git/mskilab/flows/modules/SnpEff/source/snpEff/scripts/vcfEffOnePerLine.pl"
        onepline = paste0(snpeffpath, "/source/snpEff/scripts/vcfEffOnePerLine.pl")
        if (coding_alt_only) {
            filt = sprintf("java -Xmx20m -Xms20m -XX:ParallelGCThreads=1 -jar %s filter \"( ANN =~ 'chromosome_number_variation|exon_loss_variant|rare_amino_acid|stop_lost|transcript_ablation|coding_sequence|regulatory_region_ablation|TFBS|exon_loss|truncation|start_lost|missense|splice|stop_gained|frame' )\"",
                           paste0(snpeffpath, "/source/snpEff/SnpSift.jar"))
            if (filterpass)
                cmd = sprintf(paste("bcftools view -f %s %s | %s | %s | bgzip -c > %s"), 
                  filters, vcf, onepline, filt, tmp.path)
            else cmd = sprintf("cat %s | %s | %s | bcftools norm -Ov -m-any | bgzip -c > %s", 
                vcf, onepline, filt, tmp.path)
        }
        else {
            filt = ""
            if (filterpass) 
                cmd = sprintf(paste(catcmd, "%s | %s | bcftools view -i 'FILTER==\"PASS\"' | bgzip -c > %s"), 
                  vcf, onepline, tmp.path)
            else cmd = sprintf(paste(catcmd, "%s | %s | bcftools norm -Ov -m-any | bgzip -c > %s"), 
                vcf, onepline, tmp.path)
        }
        system(cmd)
    })
    if (!altpipe) 
        out = grok_vcf(tmp.path, long = TRUE, geno = geno, gr = gr)
    else {
        vcf = VariantAnnotation::readVcf(tmp.path)
        vcf = S4Vectors::expand(vcf)
        rr = within(rowRanges(vcf), {
            REF = as.character(REF)
            ALT = as.character(ALT)
        })
        ann = data.table::as.data.table(data.table::tstrsplit(unlist(info(vcf)$ANN), 
            "\\|"))[, 1:15, with = FALSE, drop = FALSE]
        fn = c("allele", "annotation", "impact", "gene", "gene_id", 
            "feature_type", "feature_id", "transcript_type", 
            "rank", "variant.c", "variant.p", "cdna_pos", "cds_pos", 
            "protein_pos", "distance")
        data.table::setnames(ann, fn)
        if ("AD" %in% names(geno(vcf))) {
            adep = data.table::setnames(data.table::as.data.table(geno(vcf)$AD[, , 1:2]), 
                c("ref", "alt"))
            gt = VariantAnnotation::geno(vcf)$GT
        }
        else if (all(c("AU", "GU", "CU", "TU", "TAR", "TIR") %in% 
            c(names(VariantAnnotation::geno(vcf))))) {
            this.col = dim(VariantAnnotation::geno(vcf)[["AU"]])[2]
            d.a = VariantAnnotation::geno(vcf)[["AU"]][, , 1, drop = F][, this.col, 
                1]
            d.g = VariantAnnotation::geno(vcf)[["GU"]][, , 1, drop = F][, this.col, 
                1]
            d.t = VariantAnnotation::geno(vcf)[["TU"]][, , 1, drop = F][, this.col, 
                1]
            d.c = VariantAnnotation::geno(vcf)[["CU"]][, , 1, drop = F][, this.col, 
                1]
            mat = cbind(A = d.a, G = d.g, T = d.t, C = d.c)
            rm("d.a", "d.g", "d.t", "d.c")
            refid = match(as.character(VariantAnnotation::fixed(vcf)$REF), colnames(mat))
            refid = ifelse(!isSNV(vcf), NA_integer_, refid)
            altid = match(as.character(VariantAnnotation::fixed(vcf)$ALT), colnames(mat))
            altid = ifelse(!isSNV(vcf), NA_integer_, altid)
            refsnv = mat[cbind(seq_len(nrow(mat)), refid)]
            altsnv = mat[cbind(seq_len(nrow(mat)), altid)]
            this.icol = dim(VariantAnnotation::geno(vcf)[["TAR"]])[2]
            refindel = d.tar = VariantAnnotation::geno(vcf)[["TAR"]][, , 1, drop = F][, 
                this.icol, 1]
            altindel = d.tir = VariantAnnotation::geno(vcf)[["TIR"]][, , 1, drop = F][, 
                this.icol, 1]
            adep = data.table(ref = coalesce(refsnv, refindel), 
                alt = coalesce(altsnv, altindel))
            gt = NULL
        }
        else {
            message("ref and alt count columns not recognized")
            adep = NULL
            gt = NULL
        }
        mcols(rr) = BiocGenerics::cbind(mcols(rr), ann, adep, 
            gt = gt[, 1])
        out = rr
    }
    this.env = environment()
    return(this.env$out)
}



## parsesnpeff = function (vcf, id = NULL, filterpass = TRUE, coding_alt_only = TRUE, 
##     geno = NULL, gr = NULL, keepfile = FALSE, altpipe = FALSE, 
##     debug = FALSE, snpeffpath = "~/modules/SnpEff") 
## {
##     if (debug) 
##         browser()
##     out.name = paste0("tmp_", rand.string(), ".vcf.gz")
##     tmp.path = paste0(tempdir(), "/", out.name)
##     if (!keepfile) 
##         on.exit(unlink(tmp.path))
##     try2({
##         catcmd = if (grepl("(.gz)$", vcf)) "zcat" else "cat"
##         ## onepline = "/gpfs/commons/groups/imielinski_lab/git/mskilab/flows/modules/SnpEff/source/snpEff/scripts/vcfEffOnePerLine.pl"
##         onepline = paste0(snpeffpath, "/source/snpEff/scripts/vcfEffOnePerLine.pl")
##         if (coding_alt_only) {
##             filt = sprintf("java -Xmx20m -Xms20m -XX:ParallelGCThreads=1 -jar %s filter \"( ANN =~ 'chromosome_number_variation|exon_loss_variant|rare_amino_acid|stop_lost|transcript_ablation|coding_sequence|regulatory_region_ablation|TFBS|exon_loss|truncation|start_lost|missense|splice|stop_gained|frame' )\"",
##                            paste0(snpeffpath, "/source/snpEff/SnpSift.jar"))
##             if (filterpass)
##                 cmd = sprintf(paste("bcftools view -i 'FILTER==\"PASS\"' %s | %s | %s | bgzip -c > %s"), 
##                   vcf, onepline, filt, tmp.path)
##             else cmd = sprintf("cat %s | %s | %s | bcftools norm -Ov -m-any | bgzip -c > %s", 
##                 vcf, onepline, filt, tmp.path)
##         }
##         else {
##             filt = ""
##             if (filterpass) 
##                 cmd = sprintf(paste(catcmd, "%s | %s | bcftools view -i 'FILTER==\"PASS\"' | bgzip -c > %s"), 
##                   vcf, onepline, tmp.path)
##             else cmd = sprintf(paste(catcmd, "%s | %s | bcftools norm -Ov -m-any | bgzip -c > %s"), 
##                 vcf, onepline, tmp.path)
##         }
##         system(cmd)
##     })
##     if (!altpipe) 
##         out = grok_vcf(tmp.path, long = TRUE, geno = geno, gr = gr)
##     else {
##         vcf = readVcf(tmp.path)
##         vcf = S4Vectors::expand(vcf)
##         rr = within(rowRanges(vcf), {
##             REF = as.character(REF)
##             ALT = as.character(ALT)
##         })
##         ann = as.data.table(tstrsplit(unlist(info(vcf)$ANN), 
##             "\\|"))[, 1:15, with = FALSE, drop = FALSE]
##         fn = c("allele", "annotation", "impact", "gene", "gene_id", 
##             "feature_type", "feature_id", "transcript_type", 
##             "rank", "variant.c", "variant.p", "cdna_pos", "cds_pos", 
##             "protein_pos", "distance")
##         data.table::setnames(ann, fn)
##         if ("AD" %in% names(geno(vcf))) {
##             adep = setnames(as.data.table(geno(vcf)$AD[, , 1:2]), 
##                 c("ref", "alt"))
##             gt = geno(vcf)$GT
##         }
##         else if (all(c("AU", "GU", "CU", "TU", "TAR", "TIR") %in% 
##             c(names(geno(vcf))))) {
##             this.col = dim(geno(vcf)[["AU"]])[2]
##             d.a = geno(vcf)[["AU"]][, , 1, drop = F][, this.col, 
##                 1]
##             d.g = geno(vcf)[["GU"]][, , 1, drop = F][, this.col, 
##                 1]
##             d.t = geno(vcf)[["TU"]][, , 1, drop = F][, this.col, 
##                 1]
##             d.c = geno(vcf)[["CU"]][, , 1, drop = F][, this.col, 
##                 1]
##             mat = cbind(A = d.a, G = d.g, T = d.t, C = d.c)
##             rm("d.a", "d.g", "d.t", "d.c")
##             refid = match(as.character(VariantAnnotation::fixed(vcf)$REF), colnames(mat))
##             refid = ifelse(!isSNV(vcf), NA_integer_, refid)
##             altid = match(as.character(VariantAnnotation::fixed(vcf)$ALT), colnames(mat))
##             altid = ifelse(!isSNV(vcf), NA_integer_, altid)
##             refsnv = mat[cbind(seq_len(nrow(mat)), refid)]
##             altsnv = mat[cbind(seq_len(nrow(mat)), altid)]
##             this.icol = dim(geno(vcf)[["TAR"]])[2]
##             refindel = d.tar = geno(vcf)[["TAR"]][, , 1, drop = F][, 
##                 this.icol, 1]
##             altindel = d.tir = geno(vcf)[["TIR"]][, , 1, drop = F][, 
##                 this.icol, 1]
##             adep = data.table(ref = coalesce(refsnv, refindel), 
##                 alt = coalesce(altsnv, altindel))
##             gt = NULL
##         }
##         else {
##             message("ref and alt count columns not recognized")
##             adep = NULL
##             gt = NULL
##         }
##         mcols(rr) = BiocGenerics::cbind(mcols(rr), ann, adep, 
##             gt = gt[, 1])
##         out = rr
##     }
##     this.env = environment()
##     return(this.env$out)
## }

#' modded grok_vcf
#'
#' @param x path to vcf
#' @return GRanges
#' @author Marcin Imielinski
#' @export
grok_vcf = function(x, label = NA, keep.modifier = TRUE, long = FALSE, oneliner = FALSE, verbose = FALSE, geno = NULL, tmp.dir = tempdir(), gr = NULL)
{
  fn = c('allele', 'annotation', 'impact', 'gene', 'gene_id', 'feature_type', 'feature_id', 'transcript_type', 'rank', 'variant.c', 'variant.p', 'cdna_pos', 'cds_pos', 'protein_pos', 'distance')

  if (is.character(x))
    {
        out = suppressWarnings(skidb::read_vcf(x, tmp.dir = tmp.dir, geno = geno, gr = gr))
        if (length(out) == 0) {
            return(out)
        }
      if (is.na(label))
        label = x
    }
  else
    out = x

  if (is.na(label))
    label = ''

  if (verbose)
    message('Grokking vcf ', label)

  if (!long)
  {
        vcf = out
        if (length(vcf)>0)
        {
          if (!is.null(vcf$ANN))
          {
            vcf$eff = unstrsplit(vcf$ANN)
            vcf$modifier = !grepl('(HIGH)|(LOW)|(MODERATE)', vcf$eff)
            if (!keep.modifier)
              vcf = vcf[!vcf$modifier]
          }
          vcf$ref = as.character(vcf$REF)
          vcf$alt = as.character(unstrsplit(vcf$ALT))
          vcf = vcf[, sapply(values(vcf), class) %in% c('factor', 'numeric', 'integer', 'logical', 'character')]
          vcf$var.id = 1:length(vcf)
          vcf$type = ifelse(nchar(vcf$ref)==nchar(vcf$alt), 'SNV',
                     ifelse(nchar(vcf$ref)<nchar(vcf$alt),
                            'INS', 'DEL'))
          vcf$label = label
        }
        return(vcf)
  }
  else if (length(out)>0)
    {
        out$REF = as.character(out$REF)
        out$ALT = as.character(unstrsplit(out$ALT))
        out$vartype = ifelse(nchar(out$REF) == nchar(out$ALT), 'SNV',
                      ifelse(nchar(out$REF) < nchar(out$ALT), 'INS', 'DEL'))
        tmp = lapply(out$ANN, function(y) do.call(rbind, lapply(strsplit(y, '\\|'), '[', 1:15)))
        tmpix = rep(1:length(out), sapply(tmp, NROW))
        meta = as.data.frame(do.call(rbind, tmp))
        colnames(meta) = fn
        meta$varid = tmpix
        meta$file = x
        out2 = out[tmpix]
        rownames(meta) = NULL
        values(out2) = cbind(values(out2), meta)
        names(out2) = NULL
        out2$ANN = NULL
        if (oneliner)
          out2$oneliner = paste(
            ifelse(!is.na(out2$gene),
                   as.character(out2$gene),
                   as.character(out2$annotation)),
            ifelse(nchar(as.character(out2$variant.p))>0,
                   as.character(out2$variant.p),
                   as.character(out2$variant.c)))
    }
    return(out2)
}


#' estimate snv cn stub
#'
#'
#' @param vcf path to vcf
#' @param jab path to jabba
#' @export
est_snv_cn_stub = function (vcf, jab, tumbam = NULL, germ_subsample = 200000, somatic = FALSE, 
    saveme = FALSE) 
{
    oldsaf = options()$stringsAsFactors
    options(stringsAsFactors = FALSE)
    oldscipen = options()$scipen
    options(scipen = 999)
    on.exit({
        options(scipen = oldscipen)
        options(stringsAsFactors = oldsaf)
        unlink(tmpvcf)
        unlink(tmpvcf2)
    })
    tmpvcf = tempfile(fileext = ".vcf")
    tmpvcf2 = tempfile(fileext = ".vcf")
    if (!somatic) {
        message("starting germline processing")
        system2("bcftools", c("view -i 'FILTER==\"PASS\"'", vcf), 
            stdout = tmpvcf)
        system2("java", sprintf("-jar ~/software/jvarkit/dist/downsamplevcf.jar -N 10 -n %s %s", 
            germ_subsample, tmpvcf), stdout = tmpvcf2, env = "module unload java; module load java/1.8;")
        gvcf = parsesnpeff(tmpvcf, coding_alt_only = TRUE, keepfile = FALSE, 
            altpipe = TRUE)
        gvcf_subsam = parsesnpeff(tmpvcf2, coding_alt_only = FALSE, 
            keepfile = FALSE, altpipe = TRUE)
        gvcf = unique(dt2gr(rbind(gr2dt(gvcf_subsam), gr2dt(gvcf))))
        input = gvcf
        rm("gvcf", "gvcf_subsam")
        fif = file.info(dir("./"))
        fif = arrange(cbind(path = rownames(fif), fif), desc(mtime))
        tmp.t = grep("reg_.*.tsv", fif$path, value = TRUE)[1]
        tmp.b = grep("reg_.*.bed", fif$path, value = TRUE)[1]
        callout = grep("mpileup_", fif$path, value = TRUE)[1]
        if (!file.exists(tmp.t)) 
            tmp.t = tempfile(pattern = "reg_", fileext = ".tsv", 
                tmpdir = ".")
        if (!file.exists(tmp.b)) 
            tmp.b = tempfile(pattern = "reg_", fileext = ".bed", 
                tmpdir = ".")
        if (!file.exists(callout)) 
            callout = tempfile(pattern = "mpileup_", fileext = ".vcf", 
                tmpdir = ".")
        input = within(input, {
            nref = nchar(REF)
            nalt = nchar(ALT)
            vartype = ifelse(nref > 1 & nalt == 1, "DEL", ifelse(nref == 
                1 & nalt > 1, "INS", ifelse(nref == 1 & nalt == 
                1, "SNV", NA_character_)))
            maxchar = pmax(nref, nalt)
        })
        input$nref = NULL
        input$nalt = NULL
        input2 = GenomicRanges::reduce(gr.resize(input, ifelse(input$maxchar > 
            1, 201, input$maxchar), pad = FALSE) %>% gr.sort)
        fwrite(gr2dt(input2[, c()])[, 1:3, with = F][, `:=`(start, 
            pmax(start, 1))][, `:=`(end, pmax(end, 1))], tmp.t, 
            sep = "\t", col.names = FALSE)
        fwrite(gr2dt(input2[, c()])[, 1:3, with = F][, `:=`(start, 
            pmax(start - 1, 0))][, `:=`(end, pmax(end, 1))], 
            tmp.b, sep = "\t", col.names = FALSE)
        if (!file.exists(callout)) {
            message("starting germline mpileup to call variants in tumor")
            clock(system(sprintf("(bcftools mpileup -d 8000 -Q 0 -q 0 -B -R %s -f ~/DB/GATK/human_g1k_v37_decoy.fasta %s | bcftools call -m --prior 0 -v) > %s", 
                tmp.t, tumbam, callout)))
        }
        excls = tempfile(pattern = "excludesam_", fileext = ".txt", 
            tmpdir = tempdir())
        writeLines(system(sprintf("bcftools query -l %s", callout), 
            intern = T), excls)
        cntmp = tempfile(pattern = "cntmp_", fileext = ".vcf.gz", 
            tmpdir = "./")
        system(sprintf("(bcftools view -S ^%s %s | bcftools norm -c f -f /gpfs/commons/home/khadi/DB/GATK/human_g1k_v37_decoy.fasta | bcftools norm -Ov -m-any | bgzip -c) > %s", 
            excls, callout, cntmp))
        cnfin = S4Vectors::expand(readVcf(cntmp))
        dp4mat = do.call(rbind, as.list(info(cnfin)$DP4))
        altv = dp4mat[, 3] + dp4mat[, 4]
        idv = info(cnfin)$IDV
        refv = dp4mat[, 1] + dp4mat[, 2]
        idrefv = (altv + refv) - idv
        gr4est = rowRanges(cnfin)
        gr4est$ref = coalesce(idrefv, refv)
        gr4est$alt = coalesce(idv, altv)
        gr4est$ALT = as.character(gr4est$ALT)
        gr4est$REF = as.character(gr4est$REF)
        gr4est = merge(gr2dt(input), gr2dt(gr4est)[, `:=`(pileupfound, 
            TRUE)], by = c("seqnames", "start", "end", "REF", 
            "ALT"), suffixes = c("_normal", ""), all = TRUE, 
            allow.cartesian = TRUE)
        gr4est = unique(gr4est)
        gr4est[, `:=`(pileupfound, pileupfound %in% TRUE)]
        if (saveme) 
            saveRDS(gr4est, "gr4est_germline.rds")
        hold = gr4est[is.na(ref)]
        hold[, `:=`(pileupnotfound, TRUE)]
        germbin = gr4est[is.na(ref_normal)]
        if (saveme) 
            saveRDS(germbin, "germpileupbin.rds")
        gr4est = gr4est[!is.na(ref)][!is.na(ref_normal)]
    }
    else {
        message("reading in somatic variants")
        gr4est = parsesnpeff(vcf, coding_alt_only = FALSE, keepfile = FALSE, 
            altpipe = TRUE)
        if (saveme) 
            saveRDS(gr4est, "gr4est_somatic.rds")
        hold = NULL
    }
    out = est_snv_cn(gr4est, jab, somatic = somatic)
    out = rbind(out, hold, fill = TRUE)
    if (saveme) 
        if (somatic) 
            saveRDS(out, "est_snv_cn_somatic.rds")
        else saveRDS(out, "est_snv_cn_germline.rds")
    return(out)
}


#' estimate snv cn
#'
#' @param gr GRanges
#' @param jab jabba rds or jabba list object
#' @export
est_snv_cn = function(gr, jabba, somatic = FALSE) {
  if (length(gr) == 0)
    return(NULL)
  if (is.character(jabba))
    jab = readRDS(jabba)
  gg = gG(jabba = jab)
  lpp = with(jab, list(purity = purity, ploidy = ploidy))
  if (inherits(gr, "data.table"))
    gr = dt2gr(gr)
  gr = gr %*% within(gg$nodes$gr[, c("snode.id", "cn")], {segwid = width})
  dt = gr2dt(gr)
  dt = dt[order(seqnames, start, end, -alt)][, rtot := ref + alt]
  dt = cbind(dt, with(dt, as.data.table(rleseq(seqnames, start, REF, ALT, clump = T))))
  dt[, rtot := sum(ref[1], alt[1]), by = idx]
  dt[, seg_rtot := {u = !duplicated(idx); mean(rtot[u]) %>% round}, by = snode.id]
  dt[, vaf := alt / rtot]
  dt[, vaf_segt := pinch(alt / seg_rtot, 0, 1)]
  if (isFALSE(somatic)) {
    message("calculating cn of somatic variants")
    dt[, norm_term := ifelse(grepl("^0[/|]1$", gt), 2, ifelse(grepl("^1[/|]1$", gt), 1, NA_integer_))]
    dt[!is.na(cn),
       c("est_cn", "est_cn_rm", "est_cn_ll", "est_cn_llrm") :=
         {
           cn = cn[1]
           rtot = rtot[1]
           seg_rtot = seg_rtot[1]
           vaf_segt = vaf_segt[1]
           alt = alt[1]
           norm_term = norm_term[1]
           estcn = round((cn * ((norm_term * vaf) - ((1 - lpp$purity)))) / lpp$purity)
           estcnrm = round((cn * ((norm_term * vaf_segt) - ((1 - lpp$purity)))) / lpp$purity)
           centers = pinch((lpp$purity * (0:cn) / cn ) + ((1 - lpp$purity) / 2))
           ifun = function(cnid, rtot, vaf, alt) {
             dbinom(alt, rtot, prob = centers[cnid + 1], log = T)
           }
           estllcn = which.max(
               withv(sapply((0:cn), ifun,
                            rtot = rtot, vaf = vaf, alt = alt), x - min(x))) - 1
           estllrcn = which.max(
               withv(sapply((0:cn), ifun,
                            rtot = seg_rtot, vaf = vaf_segt, alt = alt), x - min(x))) - 1
           list(estcn,
                estcnrm,
                estllcn,
                estllrcn)
         }, by = .(snode.id, idx)]
  } else {
    message("calculating cn of normal variants")
    dt[!is.na(cn),
       c("est_cn", "est_cn_rm", "est_cn_ll", "est_cn_llrm") :=
         {
           cn = cn[1]
           rtot = rtot[1]
           seg_rtot = seg_rtot[1]
           vaf_segt = vaf_segt[1]
           alt = alt[1]
           estcn = round((cn * (2 * vaf)) / lpp$purity)
           estcnrm = round((cn * (2 * vaf_segt)) / lpp$purity)
           centers = pinch((lpp$purity * (0:cn) / cn ))
           ifun = function(cnid, rtot, vaf, alt) {
             out = dbinom(alt, rtot, prob = centers[cnid + 1], log = T)
             out = replace(out, is.infinite(out) & sign(out) < 0,  -2e9)
             out = replace(out, is.infinite(out) & sign(out) > 0,   2e9)
             return(out)
           }
           estllcn =
             which.max(
               withv(sapply((0:cn), ifun,
                            rtot = rtot, vaf = vaf, alt = alt), x - min(x))) - 1
           estllrcn = which.max(
               withv(sapply((0:cn), ifun,
                            rtot = seg_rtot, vaf = vaf_segt, alt = alt), x - min(x))) - 1
           list(estcn,
                estcnrm,
                estllcn,
                estllrcn)
         }, by = .(snode.id, idx)]
  }
  return(dt)
}

#' breakend exact homology
#'
#' pull out homology across exact breakends from gGraph object
#'
#' @name behomology
#' @param gg gGraph (R6) object
#' @param hg character path to fasta or rtracklayer representation of fasta-like file
#' @export
behomology = function (gg, hg, PAD = 2) {
    if (inherits(gg, "gGraph")) {
        gg = khtools::copy2(gg)
        ed = gg$edges[type == "ALT"]
        if (!length(ed)) {
            gg$edges$mark(bh = NA_integer_)
            gg$edges$mark(bh.1 = NA_integer_)
            return(gg)
        }
        ## bp1 = ed$junctions$left %>% gr.flipstrand
        bp1 = ed$junctions$left %>% gr.noval
        bp2 = ed$junctions$right %>% gr.noval
    }
    else if (inherits(gg, "Junction")) {
        if (!length(gg))
            return(gg)
        ## bp1 = gg$left %>% gr.flipstrand
        bp1 = gg$left %>% gr.noval
        bp2 = gg$right %>% gr.noval
    }
    else stop("Input must be either gGraph or Junction object")
    if (is.character(hg))
        hg = khtools::readinfasta(hg)
    bp1 = dt2gr(gr2dt(bp1))
    bp2 = dt2gr(gr2dt(bp2))
    if (length(setdiff(c(seqnames(bp1), seqnames(bp1)), seqlevels(hg))))
        stop("seqnames in breakpoints missing from the provided reference, plesae check and fix the seqlevels of the provided graph / junctions / and/or reference")
    dodo.call = function(FUN, args) {
        if (!is.character(FUN))
            FUN = substitute(FUN)
        cmd = paste(FUN, "(", paste("args[[", 1:length(args),
            "]]", collapse = ","), ")", sep = "")
        return(eval(parse(text = cmd)))
    }
    .getseq = function(hg, gr) {
        res = dodo.call("c", mapply(function(c, s, e) subseq(hg[c],
            start = s, end = e), seqnames(gr) %>% as.character,
            start(gr), end(gr)))
        res = ifelse(strand(gr) == "+", res, reverseComplement(res)) %>%
            DNAStringSet
        return(res)
    }

    collect_seq = function(bp1, bp2, hg, PAD = 50, shift_pos_bp = FALSE) {
        suppressWarnings({
            .getseq = function(hg, gr) {
                res = dodo.call("c", mapply(function(c, s, e) subseq(hg[c],
                                                                     start = s, end = e), seqnames(gr) %>% as.character,
                                            start(gr), end(gr)))
                res = ifelse(strand(gr) == "+", res, reverseComplement(res)) %>%
                    DNAStringSet
                return(res)
            }
            if (isTRUE(shift_pos_bp)) {
                bp1 = shift_right(bp1, ifelse(as.logical(strand(bp1) == "+"), 1, 0))
                bp2 = shift_right(bp2, ifelse(as.logical(strand(bp2) == "+"), 1, 0))
            }
            bpfrag1.l = gr.resize(rep_each(bp1, PAD), 1:PAD, F, fix = "start", ignore.strand = F)
            bpfrag2.l = gr.flipstrand(gr.resize(rep_each(bp2, PAD), 1:PAD, pad = F, fix = "end", ignore.strand = F))
            bpfrag1.r = gr.resize(rep_each(bp1, PAD), 1:PAD, pad = F, fix = "end", ignore.strand = F)
            bpfrag2.r = gr.flipstrand(gr.resize(rep_each(bp2, PAD), 1:PAD, pad = F, fix = "start", ignore.strand = F))
            bpfrag1.fu = gr.resize(rep_each(bp1, PAD), 1:PAD, pad = F, fix = "start", ignore.strand = F)
            bpfrag2.fu = gr.flipstrand(gr.resize(rep_each(bp2, PAD), 1:PAD, pad = F, fix = "start", ignore.strand = F))
            bpfrag1.c = gr.resize(rep_each(bp1, PAD), 1:PAD, pad = F, fix = "center", ignore.strand = F)
            bpfrag2.c = gr.flipstrand(gr.resize(rep_each(bp2, PAD), 1:PAD, pad = F, fix = "center", ignore.strand = F))
            ## exseq1.l = .getseq(hg, bpfrag1.l)
            exseq1.l = tryCatch(hg[bpfrag1.l], error = function(e) .getseq(hg, bpfrag1.l))
            ## exseq2.l = .getseq(hg, bpfrag2.l)
            exseq2.l = tryCatch(hg[bpfrag2.l], error = function(e) .getseq(hg, bpfrag2.l))
            ## exseq1.r = .getseq(hg, bpfrag1.r)
            exseq1.r = tryCatch(hg[bpfrag1.r], error = function(e) .getseq(hg, bpfrag1.r))
            ## exseq2.r = .getseq(hg, bpfrag2.r)
            exseq2.r = tryCatch(hg[bpfrag2.r], error = function(e) .getseq(hg, bpfrag2.r))
            ## exseq1.fu = .getseq(hg, bpfrag1.fu)
            exseq1.fu = tryCatch(hg[bpfrag1.fu], error = function(e) .getseq(hg, bpfrag1.fu))
            ## exseq2.fu = .getseq(hg, bpfrag2.fu)
            exseq2.fu = tryCatch(hg[bpfrag2.fu], error = function(e) .getseq(hg, bpfrag2.fu))
            exseq1.c = tryCatch(hg[bpfrag1.c], error = function(e) .getseq(hg, bpfrag1.c))
            exseq2.c = tryCatch(hg[bpfrag2.c], error = function(e) .getseq(hg, bpfrag2.c))
            dt = data.table(ix = rep(seq_along(bp1), each = PAD),
                       PAD = as.numeric(rep(1:PAD, PAD)),
                       lmatch = exseq1.l == exseq2.l,
                       rmatch = exseq1.r == exseq2.r,
                       fumatch = exseq1.fu == exseq2.fu,
                       cmatch = exseq1.c == exseq2.c)
            dt2 = dt[,
                     .(bh.l = pmax(max(PAD[lmatch]), 0),
                       bh.r = pmax(max(PAD[rmatch]), 0),
                       bh.fu = pmax(max(PAD[fumatch]), 0),
                       bh.c = pmax(max(PAD[cmatch]), 0)), by = ix]
            return(dt2)

        })
    }
    bhom = collect_seq(bp1, bp2, hg, PAD = PAD)
    bhom.1 = collect_seq(bp1, bp2, hg, PAD = PAD, shift_pos_bp = TRUE)

    if (inherits(gg, "gGraph")) {
        et(sprintf("ed$mark(bh%s.fu = bhom$bh.fu)", PAD))
        et(sprintf("ed$mark(bh%s.l = bhom$bh.l)", PAD))
        et(sprintf("ed$mark(bh%s.r = bhom$bh.r)", PAD))
        et(sprintf("ed$mark(bh%s.c = bhom$bh.c)", PAD))
        et(sprintf("ed$mark(bh%s.1fu = bhom.1$bh.fu)", PAD))
        et(sprintf("ed$mark(bh%s.1l = bhom.1$bh.l)", PAD))
        et(sprintf("ed$mark(bh%s.1r = bhom.1$bh.r)", PAD))
        et(sprintf("ed$mark(bh%s.1c = bhom.1$bh.c)", PAD))
    }
    else {
        et(sprintf("gg$mark(bh%s.fu = bhom$bh.fu)", PAD))
        et(sprintf("gg$mark(bh%s.l = bhom$bh.l)", PAD))
        et(sprintf("gg$mark(bh%s.r = bhom$bh.r)", PAD))
        et(sprintf("gg$mark(bh%s.1fu = bhom.1$bh.fu)", PAD))
        et(sprintf("gg$mark(bh%s.c = bhom$bh.c)", PAD))
        et(sprintf("gg$mark(bh%s.1l = bhom.1$bh.l)", PAD))
        et(sprintf("gg$mark(bh%s.1r = bhom.1$bh.r)", PAD))
        et(sprintf("gg$mark(bh%s.1c = bhom.1$bh.c)", PAD))
    }
    return(gg)

}



##############################
##############################


############################## gGnome helpers



#' flip each GRangesList element
#'
#' flip the elements within GRangesList
#' 
#' @export grl.flip
grl.flip = function(x, flipstrand = TRUE) {
    if (!inherits(x, "GRangesList")) stop("x is not a GRangesList")
    ir = IRanges(start = 1, end = lengths(x))
    ## seems to be much faster than applying revElements directly to GRangesList
    irl = S4Vectors::revElements(as(ir, "IntegerList"))
    out = x[irl]
    if (flipstrand)
        out = gr.flipstrand(out)
    return(out)
}


#' grab edges from walk
#'
#' get breakpoints
#' 
#' @export
alt_in_cis = function(gw, full = FALSE) {
    edt = copy(gw$edgesdt)
    gwe = gw$edges[as.character(edt$sedge.id)]
    gw.edge.meta = gwe$dt
    edt$type = gw.edge.meta$type
    edt$class = gw.edge.meta$class
    edt = cbind(edt, asdt(with(edt, rleseq(walk.id, type == "ALT", clump = F, use.data.table = F))))
    gr.bp = grl.unlist(sort(gr.noval(gwe$grl), ignore.strand = T))
    grs = gr.string(gr.bp)
    edt$bp1 = grs[gr.bp$grl.iix == 1]
    edt$bp2 = grs[gr.bp$grl.iix == 2]
    ## parasn(edt, asdt(with(edt, rleseq(walk.id, type == "ALT", clump = F, use.data.table = F))), use.data.table = T)
    edt[, alt_adjacent := any(type == "ALT" & lns > 1), by = walk.id]
    if (full)
        return(withAutoprint(edt, echo = FALSE)$value)
    else
        return(unique(edt[, .(walk.id = walk.id, alt_adjacent = alt_adjacent)]))
}


#' pull edge metadata from gwalk
#'
#' get all edges from a walk (junctions in cis)
#' 
#' @export
gw_edges = alt_in_cis


#' get breakpoints 
#'
#' grab breakpoints from gGraph alt edges and mark with ecluster filters
#' 
#' @export
getbp = function(gg, ignore.small = T, ignore.isolated = T, max.small = 1e4, min.isolated = max.small, sort = FALSE) {
    self = gg
    self$edges$mark(ecluster = as.integer(NA))
    altedges = self$edges[type == "ALT", ]
    o.altedges = copy3(altedges)
    if (length(altedges) == 0) {
        return(NULL)
    }
    if (ignore.small) {
        altedges = altedges[!((class == "DUP-like" | class == "DEL-like") & altedges$span <= max.small)]
    }
    deldup = if (length(altedges) == 0) {
        copy3(altedges)[class %in% c("DUP-like", "DEL-like")]
    }
    if (NROW(deldup) > 0 && ignore.isolated) {
        altes = deldup$shadow
        ## altes$sedge.id = altedges[class %in% c("DUP-like", "DEL-like")]$dt[altes$id]$sedge.id
        bp = gr.noval(grl.unlist(altedges$grl), keep.col = c("grl.ix", "grl.iix", "class", "sedge.id"))
        bp$sedge.id.y = bp$sedge.id; bp$sedge.id = NULL
        addon = deldup$dt[altes$id][, .(sedge.id, class)]
        altes$sedge.id = addon$sedge.id
        altes$class = addon$class
        altes$nbp = altes %N% bp # number of breakpoints of any SV that fall within segment
        numsum = altedges$shadow %>% gr.sum # using the shadows of all of the SVs not just dels and dups
        altes = altes %$% numsum
        iso = ((altes) %Q% (score == 1.0))$id
        ## rm.edges = unique(altes[iso] %Q% (width < thresh))$sedge.id ## old
        rm.edges = unique(altes[iso] %Q% (width < min.isolated))$sedge.id
        rm.dups = S4Vectors::with(altes, sedge.id[class == "DUP-like" & nbp <= 2])
        rm.dups = c(rm.dups, dedup.cols(gr2dt(altes %*% bp))[sedge.id != sedge.id.y][class == "DUP-like"][, .(all(1:2 %in% grl.iix), class.1 = class.1[1]), by = .(sedge.id, sedge.id.y)][, all(V1 == TRUE) & all(class.1 == "DUP-like"), by = sedge.id][V1 == TRUE]$sedge.id) # removing dups that have only other nested dups 
        rm.edges = union(rm.edges, rm.dups)
        keepeid = setdiff(altedges$dt$sedge.id, rm.edges)
        altedges = altedges[as.character(keepeid)]
    } # ignoring isolated dup and del edges that are smaller than threshold
    ## if (length(altedges) == 0) {
    ##         message("No junction in this graph")
    ##     }
    ##     return(NULL)
    ## }
    if (NROW(altedges) > 0)
        o.altedges[as.character(altedges$dt$sedge.id)]$mark(ecluster_filter = "pass")
    else
        return(NULL)
    bp = grl.unlist(o.altedges$grl)[, c("grl.ix", "grl.iix", "edge.id", "ecluster_filter")]
    if (isTRUE(sort)) {
        bp = gr.sort(bp, ignore.strand = T)
    }
    bp$m.ix = seq_along(bp)
    bp.dt = gr2dt(bp)
    return(bp.dt)
}

#' interbp_dist
#'
#' get base pair distances between breakends
#' 
#' @export
interbp_dist = function(gg) {
    
    bp.dt = getbp(gg, ignore.small = T, ignore.isolated = T)
    if (nodim(bp.dt))
        return(list(bp.dt = NULL, dists = NULL))
    bp.dt = bp.dt[(GenomicRanges::order(gr.stripstrand(dt2gr(bp.dt))))]
    bp.dt[, genome_order := seq_len(.N)]

    distmat = gr.dist(dt2gr(bp.dt), ignore.strand = T)

    dists = asdt(melt(distmat, value.name = "distance"))[Var1 != Var2][!is.na(distance)][order(distance)]
    dists[, c("minidx", "maxidx") := {mat = cbind(Var1, Var2); list(rowMins(mat), rowMaxs(mat))}]

    dists[, grl.ix1 := bp.dt[dists$Var1]$grl.ix]
    dists[, grl.ix2 := bp.dt[dists$Var2]$grl.ix]


    dists[, edge.id1 := bp.dt[dists$Var1]$edge.id]
    dists[, edge.id2 := bp.dt[dists$Var2]$edge.id]


    dists[, grl.iix1 := bp.dt[dists$Var1]$grl.iix]
    dists[, grl.iix2 := bp.dt[dists$Var2]$grl.iix]

    dists[, strand1 := bp.dt[dists$Var1]$strand]
    dists[, strand2 := bp.dt[dists$Var2]$strand]

    dists[, filt1 := bp.dt[dists$Var1]$ecluster_filter]
    dists[, filt2 := bp.dt[dists$Var2]$ecluster_filter]


    dists[, filt1 := bp.dt[dists$Var1]$ecluster_filter]
    dists[, filt2 := bp.dt[dists$Var2]$ecluster_filter]

    dists2 = dists[order(distance)][!duplicated(Var1)][!duplicated(Var2)] ## get every breakend to its nearest neighbor, 2nd dedup means that there is no extramarital partner 
    dists2[, numocc := .N, by = .(minidx, maxidx)]

    dists2[numocc == 2][!duped(minidx, maxidx)][grl.ix1 != grl.ix2]

    goods = dists2[numocc == 2][!duped(minidx, maxidx)][filt1 == "pass" & filt2 == "pass"][grl.ix1 != grl.ix2]

    dists = merge.repl(dists, goods[, .(minidx, maxidx, goodpair = "pass")], by = c("minidx", "maxidx"))[order(distance)] # goodpair = closest pairing

    return(list(bp.dt = bp.dt, dists = dists))
}


#' grab hrdetect features from hrdetect results
#' 
#' read in HRDetect results
#' 
#' @export grab.hrdetect.features
grab.hrdetect.features <- function(hrdetect_results, goodpairs, field, id.field = id.field) {
    path = hrdetect_results
    res = readRDS(path)
    id = which(goodpairs[[field]] %in% path)
    x = goodpairs[id]
    pr = unique(x[[id.field]])
    df = as.data.table(res$data_matrix)
    df$pair = pr
    cid = which(colnames(df) %in% "pair")
    indels.class = as.data.table(res$indels_classification)
    indels.class$sample = NULL
    hrd_out = as.data.table(res$hrdetect_output)
    cnames = colnames(hrd_out)
    cnames = paste0("w_", cnames)
    cnames[8] = "HRDetect"
    colnames(hrd_out) = cnames
    df = cbind(qmat(df,,cid), qmat(df,,-cid), indels.class, hrd_out)
    return(df)
}


#' grab hrdetect features from pairs table
#' 
#' wrapper around grab.hrdetect.features
#' 
#' @export pairs.grab.hrdetect.features
pairs.grab.hrdetect.features <- function(pairs, field = "hrd_results", id.field = "pair", mc.cores = 1) {
    paths = pairs[[field]]
    paths = unique(paths[file.exists(paths)])
    goodpairs = pairs[pairs[[field]] %in% paths]
    allpr = unique(goodpairs[[id.field]])
    lst = mclapply(paths, grab.hrdetect.features, mc.cores = mc.cores, goodpairs = goodpairs, field = field, id.field = id.field)
    out = rbindlist(lst)
    out$fpair = factor(out[[id.field]], allpr)
    return(out)
}


grab.ot.features <- function(ent, id.field = "pair") {
    path = ent[["fpaths"]]
    if (file.exists(path)) {
        ot = readRDS(path)$expl_variables
        ot[[id.field]] = ent[["ids"]]
        return(ot)
    }
}



#' grab oneness twoness features from pairs table
#' 
#' wrapper around grab.ot.features
#' 
#' @export pairs.grab.ot.features
pairs.grab.ot.features <- function(pairs, field = "oneness_twoness", id.field = "pair", mc.cores = 1) {
    fpaths = pairs[[field]]
    dt = data.table(
        fpaths,
        ids = pairs[[id.field]],
        fe = file.exists(fpaths)
    )
    dt = unique(dt[dt$fe == TRUE,])
    lst = dt %>% split_by("ids")
    lst.res = mclapply(lst, grab.ot.features, id.field = id.field, mc.cores = mc.cores)
    out = rbindlist(lst.res, fill = T)
    out$`NA` = NULL
    out$`"sample"` = NULL
    out$`.` = NULL
    return(out)
}



#' @export pairs.filter.sv
pairs.filter.sv = function(tbl, id.field, sv.field = "svaba_unfiltered_somatic_vcf", mc.cores = 1, pon.path = '~/lab/projects/CCLE/db/tcga_and_1kg_sv_pon.rds') {
  if (missing(id.field))
    id.field = key(tbl)
  if (is.null(id.field))
    stop("please specify an id field")
  if (!exists("sv_pon")) {
    message("no sv_pon variable found...", "\n",
      "loading ", pon.path)
    sv_pon = gr.noval(readRDS(pon.path))
  }
  thisenv = environment()
  iter.fun = function(pr, tbl) {
    try2({
      ent = tbl[get(id.field) == thisenv$pr]
      return(.filter_sv(ent))
    })
  }
  out = mclapply(mc.cores = mc.cores,
    tbl[[id.field]], iter.fun, tbl = tbl)
  out = tryCatch(rbindlist(out), error = function(e) {
    message("error at rbindlist, returning list"); out
  })
  return(out)
}


#' @export plot.jabba
plot.jabba = function(pairs, win, filename, use.jab.cov = TRUE, field.name = "jabba_rds", cov.field.name = "cbs_cov_rds", cov.y.field = "ratio", title = "", doplot = TRUE, gt, plotfun = "ppng", h = 10, w = 10, rebin = FALSE, binwidth = 1e3, lwd.border = 0.0001, ...) {
    if (is.character(plotfun)) {
        plotfun = get(plotfun)
    } else if (!is.function(plotfun)) {
        stop("plotfun needs to be a function")
    }
    if (missing(gt)) {
        gg = gG(jabba = pairs[[field.name]])
        if (isTRUE(use.jab.cov))
            cov = readRDS(diginjob(pairs[[field.name]])$CovFile)
            ## cov = readRDS(inputs(readRDS(pairs[[field.name]] %>% dig_dir("Job.rds$")))$CovFile)
        else
            cov = readRDS(pairs[[cov.field.name]])
        if (rebin)
            cov = rebin(cov, binwidth = binwidth)
        gcov = gTrack(cov, cov.y.field, circles = TRUE, lwd.border = lwd.border, y0 = 0)
        gt = within(c(gcov, gg$gtrack()), {y0 = 0})
    }
    if (missing(win))
        win = si2gr(hg_seqlengths()) %>% keepStandardChromosomes(pruning.mode = "coarse") %>% gr.sort
    if (isTRUE(doplot)) {
        if (missing(filename))
            plotfun(plot(gt, win = win, ...), res = 200, title = title, h = h, w = w)
        else
            plotfun(plot(gt, win = win, ...), filename = filename, res = 200, title = title, h = h, w = w)
    }
    return(gt)
}


#' @export pairs.plot.jabba
pairs.plot.jabba = function(pairs, dirpath = "~/public_html/jabba_output", jabba.field = "jabba_rds", cov.y.field = "foreground", id.field = "pair", mc.cores = 1) {
    paths = subset2(pairs[[jabba.field]], file.exists(x))
    iter.fun = function(x, tbl) {
        ent = tbl[get(jabba.field) == x]
        ttl = ent[[id.field]]
        plot.jabba(ent, use.jab.cov = TRUE, field.name = jabba.field, filename = paste0(dirpath, "/", ent[[id.field]], ".png"), cov.y.field = cov.y.field, y.quantile = 0.01, title = ttl)
    }
    mclapply(paths, iter.fun, tbl = pairs, mc.cores = mc.cores)
    NULL
}


#' @export pairs.process.events
pairs.process.events = function(pairs, events.field = "complex", id.field = "pair", mc.cores = 1) {
    paths = unique(subset2(pairs[[events.field]], file.exists(x)))
    iter.fun = function(x, tbl) {
        tryCatch({
            ent = pairs[get(events.field) == x]
            gg = readRDS(unique(ent[[events.field]]))
            out = copy(gg$meta$events)
            set(out, j = id.field, value = unique(ent[[id.field]]))
            out
        }, error = function(e) printerr(x))
    }
    lst = mclapply(paths, iter.fun, tbl = pairs, mc.cores = mc.cores)
    evs = rbindlist(lst, fill = TRUE)
    fid = evs[, factor(get(id.field), levels = unique(pairs[get(events.field) %in% paths][[id.field]]))]
    set(evs, j = paste0("f", id.field), value = fid)
}

#' @export pairs.process.rbp
pairs.process.rbp = function(pairs, rbp.field = "complex", id.field = "pair", mc.cores = 1) {
    tryCatch({
        path = pairs[[rbp.field]]
        if (!file.exists(path)) return(NULL)
        gg = readRDS(path)
        out = gg$meta$recip_bp
        dt.tic = gg$meta$tic
        if (!NROW(out)) out = data.table()
        if (NROW(dt.tic)) {
            dt.tic = dt.tic %>% rename_at(vars(-1), ~paste0("tic", "_", .))
            out = merge.repl(out,
                       dt.tic, by = "tic", all = T)
        }
        out$pair = pairs$pair
        return(out)
    }, error = function(e) printerr(pairs$pair))
}

#' @export process_for_rbp
process_for_rbp <- function(ent, field = "complex", id.field = "pair") {
    tryCatch({
        gg = readRDS(ent[[field]])
        out = gg$meta$recip_bp
        dt.tic = gg$meta$tic
        if (!NROW(out)) out = data.table()
        if (NROW(dt.tic)) {
            dt.tic = dt.tic %>% rename_at(vars(-1), ~paste0("tic", "_", .))
            out = merge.repl(out,
                       dt.tic, by = "tic", all = T)
        }
        out[[id.field]] = rep_len2(ent[[id.field]], out)
        return(out)
    }, error = function(e) printerr(ent[[id.field]]))
}

#' @export pairs.process.rbp
pairs.process.rbp <- function(pairs, field = "complex",
                              id.field = "pair", mc.cores = 1) {
  envr = environment()
  lg = which(file.exists(pairs[[field]]))
  ents = pairs[envr$lg,]
  lg.d = which(!duplicated(ents[[id.field]]))
  ents = ents[envr$lg.d,]
  lst = split(ents, ents[[id.field]])
  res = mclapply(lst, process_for_rbp,
           mc.cores = mc.cores,
           field = field, id.field = id.field)
  rbp = rbindlist(res, fill = T)
  fid.field = paste0("f", id.field)
  rbp[[fid.field]] = factor(rbp[[id.field]], levels = ents[[id.field]])
  return(rbp)
}


#' @export pairs.process.homeology
pairs.process.homeology = function(pairs, id.field = "pair", mc.cores = 1) {
    coolp = pairs[file.exists2(homeology) & file.exists2(homeology_stats)]$pair
    subp = pairs[coolp]
    ifun =  function(x, debug = FALSE) {
        try2({
            if (debug) browser()
            addon = c("iw", "jw", "r", "minpx")
            homout = fread(x$homeology)
            homstat = fread(x$homeology_stats)
            if (len(homstat) == 0) return(NULL)
            if (!all(colexists(addon, homstat))) {
                homstat = cbind(homstat, lapply_dt(addon, homstat))
            }
            out = merge.repl(homstat, homout[, .(seq, edge.id)], by = "seq")[, pair := x$pair]
            gri = parse.gr2(with(out, ifelse(nzchar(iw) & !is.na(iw), iw, "0:1-1")))
            grj = parse.gr2(with(out, ifelse(nzchar(jw) & !is.na(jw), jw, "0:1-1")))
            out[, atbp := gri %^% "Left:0-0" & grj %^% "Right:0-0"]
            ## thresh 8 levenshtein dist... 32 / 40 bases must match per window
            ## 80% similarity seems to work?
            ## what does it mean if you have 5 pixels of match?
            ## 5 + 40 bases of at least 80% sequence similarity
            ## using a pad of 20 means that we cannot look for stretches smaller than 40 bp...
            ## which is fine... it seems like that is a fine threshold to start at
            ## since we stopped looking at 40 bp...
            ## let's see what this distribution looks like
            out = out[, .(
                hlen = max(ifelse(na2false(r > 0.9), minpx, 0L)),
                hlen_be = max(ifelse(na2false(r > 0.9) & atbp, minpx, 0L))),
                by = .(pair, seq, edge.id)]
            return(out)
        })
    }
    out = rbindlist(mclapply(split(subp, subp$pair), ifun, mc.cores = mc.cores), fill = T)
    et(sprintf("out[, f%s := %s]", id.field, id.field))
    return(out)
}

#' collect junctions from pairs as breakpoints
#'
#' get junctions from gGraph object column in pairs table
#' 
#' @export pairs.collect.junctions
pairs.collect.junctions <- function(pairs, jn.field = "complex", id.field = "pair", mc.cores = 1, mask = '/gpfs/commons/groups/imielinski_lab/DB/Broad/um75-hs37d5.bed.gz', ev.types = c("bfb", "chromoplexy", "chromothripsis", "del", "dm", "dup", "fbi", "pyrgo", "qrdup", "qrdel", "qrp", "rigma", "simple_inv", "simple_invdup", "simple_tra", "tic", "tyfonas", "cpxdm", "tib", "qrppos", "qrpmix", "qrpmin")) {
    paths = unique(subset2(pairs[[jn.field]], file.exists(x)))
    prs = unique(pairs[[id.field]][pairs[[jn.field]] %in% paths])
    mask = rtracklayer::import(mask)
    iter.fun <- function(x, tbl) {
        ent = tbl[get(jn.field) == x]
        ent = ent[!duplicated(ent[[id.field]]),,drop=F]
        .fun <- function(gg) {
            ## dd.ov = gr.sum(gg$edges[type == "ALT"][class %in% c("DEL-like", "DUP-like")]$shadow) %Q% (score > 1)
            gg.alt.edge = gg$edges[type == "ALT"]
            if (length(gg.alt.edge) > 0) {
                tra_like = gg.alt.edge$dt[,class == "TRA-like"]
                gg.shad = gg.alt.edge$shadow
                gg.shad[tra_like] = gg.shad[tra_like] + 1e6
                dd.ov = gr.sum(gg.shad) %Q% (score > 1)
                gg.shad = gg.shad %>% split(.$id)
                gg$edges[type == "ALT"]$mark(overlapped = gg.shad %^% dd.ov)
            }
            gg
        }
        pr = unique(ent[[id.field]])
        ## pth = unique(ent[[jn.field]])
        message("processing ", pr)
        cx = readRDS(x)
        if (!length(cx)) return(NULL)
        cx = .fun(cx)
        cx$edges$mark(jspan = cx$edges$span)
        cx$edges$mark(shadow = grl.string(cx$edges$shadow %>% split(.$id)))
        cx$edges$mark(sv.in.mask = grl.in(cx$edges$grl, mask, logical = FALSE) > 0)
        ## cx$edges[edge.id %in% these_id]$mark(within_node_cluster = TRUE)
        ## these_id = cx$nodes[!is.na(cluster)]$edges$dt$edge.id
        out = copy(gr2df(grl.unlist(cx$edges[type == "ALT"]$grl)))
        if (!is.null(dim(out)) && !dim(out)[1] == 0) {
            set(out, j = "pair", value = pr)
            tmp = cx$.__enclos_env__$private$pedges
            ## tmp = tmp[order(edge.id)][order(abs(sedge.id))]
            ## snode = copy(cx$.__enclos_env__$private$pnodes)
            snode = cx$.__enclos_env__$private$pnodes
            ## this = as.data.frame(tmp)
            DF = map_fus2unfus(ed = tmp, nodes = snode)
            DF$edge.id = tmp$edge.id
            DF$sedge.id = tmp$sedge.id
            df = as.data.frame(lapply(DF, function(x) {
                if (inherits(x, "GRanges")) {
                    gr.string(x)
                }  else if (inherits(x, "GRangesList")) {
                    grl.string(x)
                } else {
                    x
                }
            }))
            seg_cn = df %>% filter(ref == FALSE) %>% {
                data.table(edge.id = .$edge.id,
                           max_scn = pmax(snode[.$from]$cn, snode[.$to]$cn),
                           min_scn = pmin(snode[.$from]$cn, snode[.$to]$cn))
            } %>% distinct(edge.id, .keep_all = TRUE)
            out = dplyr::left_join(out, seg_cn, by = "edge.id") %>% setDT(key = "sedge.id")
            out = gr2df(gr.val(df2gr(out),
                               plyranges::select(cx$nodes$gr, bp_scn = cn), "bp_scn"))
            out = df2gr(out) %>% mutate(bp.in.mask = (.) %^% mask) %>% gr2df
        }
        badcols = which(sapply(out, function(x) inherits(x, c("AsIs", "List", "list"))))
        if (NROW(badcols)) out = out[, -badcols,with=F]
        return(out)
    }
    lst = mclapply(paths, iter.fun, tbl = pairs, mc.cores = mc.cores)
    cx.edt = rbindlist(lst, fill = TRUE)
    if (NROW(cx.edt) == 0) return(cx.edt)
    set(cx.edt, j = "fpair", value = factor(cx.edt[[id.field]], levels = prs))
    cx.edt = cx.edt[, !colnames(cx.edt) == "rowname", with = FALSE]
    cx.edt = merge.repl(cx.edt, unique(cx.edt[, .(pair, edge.id, simple_type = gsub("([A-Z]+)([0-9]+)", "\\1", simple), simple_num = gsub("([A-Z]+)([0-9]+)", "\\2", simple))]), by = c("pair", "edge.id"))
    cx.edt$simple_type = factor(cx.edt$simple_type, levels = c("INV", "INVDUP", "TRA"))
    cx.edt[, simple_type := fct_explicit_na(simple_type, "NA")]
    mod.dt = mltools::one_hot(cx.edt[, .(simple_type)])
    cx.edt = cbind(dplyr::select(cx.edt, -dplyr::matches("^simple_.*$")),
                   dplyr::rename_all(mod.dt, ~paste0("simple", gsub("simple_type", "", tolower(.)))))
    ## cx.mat = as.matrix(mutate_all(replace_na(cx.edt[, ev.types,with = FALSE], 0), as.numeric))
    cx.mat = as.matrix(dplyr::mutate_all(replace_na(dplyr::select(cx.edt, dplyr::one_of(ev.types)), 0), as.numeric))
    cx.mat = cx.mat > 0
    mode(cx.mat) = "integer"
    cx.edt[, unclassified := rowSums(cx.mat) == 0]
    return(withAutoprint(cx.edt, echo = F)$value)
}
## overwritefun("pairs.collect.junctions", "pairs.collect.junctions", "khtools")


#' @export pairs.jabba.opt.report
pairs.jabba.opt.report = function(pairs, jabba.field = "jabba_rds", id.field = "pair", mc.cores = 1) {
    pairs= copy(pairs)
    pairs$opt.report = dig_dir(pairs[[jabba.field]], "opt.report.rds")
    paths = subset2(pairs$opt.report, file.exists(x))
    iter.fun = function(x, tbl) {
        ent = tbl[opt.report == x]
        id = ent[[id.field]]
        jab.path = ent[[jabba.field]]
        out = readRDS(x)
        out[[id.field]] = id
        out = copy(out)
        gg = gG(jabba = jab.path)
        converged = gg$nodes$dt[, sum(width[epgap < 0.1], na.rm = T) / sum(width, na.rm = T)]
        out$converged = converged
        out
    }
    out = rbindlist(mclapply(paths, iter.fun, tbl = pairs, mc.cores = mc.cores), fill = TRUE)
    setkeyv(out, id.field)
}

#' @export pairs.diagnose.jabba
pairs.diagnose.jabba = function(pairs, jabba.field = "jabba_rds", id.field = "pair", mc.cores = 1) {
    pairs= copy(pairs)
    pairs$opt.report = dig_dir(pairs[[jabba.field]], "opt.report.rds")
    paths = subset2(pairs$opt.report, file.exists(x))
    iter.fun = function(x, tbl) {
        ent = tbl[opt.report == x]
        id = ent[[id.field]]
        jab.path = ent[[jabba.field]]
        out = readRDS(x)
        out[[id.field]] = id
        out = copy(out)
        gg = gG(jabba = jab.path)
        kag = gG(jabba = readRDS(dig_dir(jab.path, "karyograph.rds$")))
        kag.rds = dig_dir(jab.path, "karyograph.rds$")
        ppfit.png = normalizePath(dig_dir(jab.path, "karyograph.rds.ppfit.png"))
        gg.raw = gG(jabba = readRDS(dig_dir(jab.path, "jabba.raw.rds")))
        converged = gg$nodes$dt[, sum(width[epgap < 0.1], na.rm = T) / sum(width, na.rm = T)]
        mat1 = as.matrix(factor(kag$nodes$gr$var < 0, levels = c(FALSE, TRUE)) %>% table3) %>% t
        colnames(mat1) = c("posvar", "negvar", "navar")
        top3conv = head(out, 3)[, all(epgap < 0.1)]
        mat2 = t(as.matrix(gg.raw$nodes$dt$cn.fix %>% is.na %>% table))
        colnames(mat2) = c("fixed", "unfixed")
        summary.stat = data.table(conv = converged) %>% cbind(mat1, top3conv = top3conv, mat2)
        summary.stat[[id.field]] = id
        summary.stat = copy(summary.stat)[, ppfit.png := ppfit.png][, kag.rds := kag.rds]
        summary.stat
    }
    out = rbindlist(mclapply(paths, iter.fun, tbl = pairs, mc.cores = mc.cores), fill = TRUE)
    setkeyv(out, id.field)
}

#' @export pairs.get.jabba.pp
pairs.get.jabba.pp = function(pairs, jabba.field = "jabba_rds", id.field = "pair", mc.cores = 1) {
    paths = subset2(pairs[[jabba.field]], file.exists(x))
    iter.fun = function(x, tbl) {
        ent = tbl[get(jabba.field) == x]
        purity = readRDS(ent[[jabba.field]])$purity
        ploidy = gG(jabba = ent[[jabba.field]])$nodes$dt[, sum(width * cn, na.rm = T) / sum(width, na.rm = T)]
        out = setnames(data.table(id.field = ent[[id.field]], purity, ploidy), "id.field", id.field)
        out
    }
    setkeyv(rbindlist(mclapply(paths, iter.fun, tbl = pairs, mc.cores = mc.cores), fill = TRUE), id.field)
}


#' force functions to load from all libraries
#'
#' A function to evaluate all functions in all loaded
#' and attached packages to prevent errors upon reinstallation of
#' a package
#'
#' @param envir environment to evaluate in (probably doesn't matter)
#' @export
forceload = function(envir = globalenv()) {
    force = function(x) x
    pkgs = gsub("package:", "", grep('package:', search(), value = TRUE))
    pkgs = c(pkgs, names(sessionInfo()$loadedOnly))
    for (pkg in pkgs) {
        tryCatch( {
            message("force loading ", pkg)
            invisible(eval(as.list((asNamespace(pkg))), envir = envir))
            invisible(eval(eapply(asNamespace(pkg), force, all.names = TRUE), envir = envir))
        }, error = function(e) message("could not force load ", pkg))
    }
}


## forceload = function(envir = globalenv()) {
##     pkgs = gsub("package:", "", grep('package:', search(), value = TRUE))
##     pkgs = c(pkgs, names(sessionInfo()$loadedOnly))
##     for (pkg in pkgs) {
##         tryCatch( {
##             message("force loading ", pkg)
##             invisible(eval(as.list((asNamespace(pkg))), envir = envir))
##             invisible(eval(eapply(asNamespace(pkg), base::force, all.names = TRUE), envir = envir))
##         }, error = function(e) message("could not force load ", pkg))
##     }
## }

#' force with a tryCatch
#'
#' evaluate with tryCatch
#'
#' @param x an object
#' @export
force2 = function(x)
    tryCatch(x, error = function(e) NULL)


#' force functions to load
#'
#' A function to evaluate all functions in a single environment
#'
#' @param envir environment to grab all functions from
#' @param evalenvir environment to evaluate in (probably doesn't matter)
#' @export
forcefun = function(envir = globalenv(), evalenvir = globalenv()) {
    funnames = as.character(lsf.str(envir = envir))
    for (fun in funnames) {
        tryCatch( {
            message("force loading ", fun)
            eval(force(get(fun, envir = envir)), envir = evalenvir)
        }, error = function(e) message("could not force load ", fun))
    }
}


#' force objects (including functions) to evaluate from environment
#'
#' A function to evaluate all objects in an environment
#' to be used within a function or some other environment
#'
#' @param invisible logical whether to print the objects in the environmnet or not
#' @param envir environment with objects to evaluate
#' @param evalenvir environment to evaluate in (probably doesn't matter)
#'
#' @export
forceall = function(invisible = TRUE, envir = parent.frame(), evalenvir = parent.frame()) {
    if (invisible)  {
        invisible(eval(as.list(envir), envir = evalenvir))
        invisible(eval(eapply(envir, force2, all.names = TRUE), envir = evalenvir))
    } else {
        print(eval(as.list(envir), envir = evalenvir))
        print(eval(eapply(envir, force2, all.names = TRUE), envir = evalenvir))
    }
}


#' version of utils::assignInNamespace
#'
#' can be used to reassign function into a namespace
#' USE WITH CAUTION
#'
#' @export
asn2 = function (x, value, ns, pos = -1, envir = as.environment(pos)) {
    nf <- sys.nframe()
    if (missing(ns)) {
        nm <- attr(envir, "name", exact = TRUE)
        if (is.null(nm) || substr(nm, 1L, 8L) != "package:")
            stop("environment specified is not a package")
        ns <- asNamespace(substring(nm, 9L))
    }
    else ns <- asNamespace(ns)
    ns_name <- getNamespaceName(ns)
    ## if (nf > 1L) {
    ##     if (ns_name %in% tools:::.get_standard_package_names()$base)
    ##         stop("locked binding of ", sQuote(x), " cannot be changed",
    ##             domain = NA)
    ## }
    if (bindingIsLocked(x, ns)) {
        in_load <- Sys.getenv("_R_NS_LOAD_")
        if (nzchar(in_load)) {
            if (in_load != ns_name) {
                msg <- gettextf("changing locked binding for %s in %s whilst loading %s",
                  sQuote(x), sQuote(ns_name), sQuote(in_load))
                if (!in_load %in% c("Matrix", "SparseM"))
                  warning(msg, call. = FALSE, domain = NA, immediate. = TRUE)
            }
        }
        else if (nzchar(Sys.getenv("_R_WARN_ON_LOCKED_BINDINGS_"))) {
            warning(gettextf("changing locked binding for %s in %s",
                sQuote(x), sQuote(ns_name)), call. = FALSE, domain = NA,
                immediate. = TRUE)
        }
        unlockBinding(x, ns)
        assign(x, value, envir = ns, inherits = FALSE)
        w <- options("warn")
        on.exit(options(w))
        options(warn = -1)
        lockBinding(x, ns)
    }
    else {
        assign(x, value, envir = ns, inherits = FALSE)
    }
    if (!isBaseNamespace(ns)) {
        S3 <- .getNamespaceInfo(ns, "S3methods")
        if (!length(S3))
            return(invisible(NULL))
        S3names <- S3[, 3L]
        if (x %in% S3names) {
            i <- match(x, S3names)
            genfun <- get(S3[i, 1L], mode = "function", envir = parent.frame())
            if (.isMethodsDispatchOn() && methods::is(genfun,
                "genericFunction"))
                genfun <- methods::slot(genfun, "default")@methods$ANY
            defenv <- if (typeof(genfun) == "closure")
                environment(genfun)
            else .BaseNamespaceEnv
            S3Table <- get(".__S3MethodsTable__.", envir = defenv)
            remappedName <- paste(S3[i, 1L], S3[i, 2L], sep = ".")
            if (exists(remappedName, envir = S3Table, inherits = FALSE))
                assign(remappedName, value, S3Table)
        }
    }
    invisible(NULL)
}



## .onLoad = function(libname, pkgname) {
##     message("khtools forcing functions to evaluate on load...")
##     forceall(T, envir = asNamespace("khtools"), evalenvir = globalenv())
## }

## .onAttach = function(libname, pkgname) {
##     message("khtools forcing functions to evaluate on attach...")
##     forceall(T, envir = asNamespace("khtools"), evalenvir = globalenv())
## }
kevinmhadi/khtools documentation built on Jan. 16, 2025, 4:18 p.m.