## parse UCSC track lines
## gokey grouped format
#
# RNA-based Data
# Start-seq
# track type=bigWig name="UL3 5p F" description="UL3 Start 5p F" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/UL3_Veh_Start_5p_forward.bw visibility=full alwaysZero=on maxHeightPixels=30 color=254,39,18
# track type=bigWig name="UL3 5p R" description="UL3 Start 5p R" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/UL3_Veh_Start_5p_reverse_flip.bw visibility=full alwaysZero=on maxHeightPixels=30 color=254,39,18
# track type=bigWig name="p4w4 5p F" description="p4w4 Start 5p F" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/p4w4_Veh_Start_5p_forward.bw visibility=full alwaysZero=on maxHeightPixels=30 color=254,39,18
# track type=bigWig name="p4w4 5p R" description="p4w4 Start 5p R" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/p4w4_Veh_Start_5p_reverse_flip.bw visibility=full alwaysZero=on maxHeightPixels=30 color=254,39,18
# RNA-seq
# track type=bigWig name="UL3 RNA F" description="UL3 RNA F" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/UL3_Veh_RNAseq.plus.mergeNorm.bw visibility=full alwaysZero=on maxHeightPixels=30 color=252,96,10
# track type=bigWig name="UL3 RNA R" description="UL3 RNA R" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/UL3_Veh_RNAseq.minus.mergeNorm_flip.bw visibility=full alwaysZero=on maxHeightPixels=30 color=252,96,10
#
# Chromatin Data
# ATAC data nucleosome deprived region
# track type=bigWig name="ATAC UL3 NDR" description="ATAC UL3 NDR" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/ATACv3_UL3_NDRnormAVE.bw visibility=full alwaysZero=on maxHeightPixels=30 color=128,0,0
# track type=bigWig name="ATAC p2w5 NDR" description="ATAC p2w5 NDR" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/ATACv3_p2w5_NDRnormAVE.bw visibility=full alwaysZero=on maxHeightPixels=30 color=128,0,0
# track type=bigWig name="ATAC p4w4 NDR" description="ATAC p4w4 NDR" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/ATACv3_p4w4_NDRnormAVE.bw visibility=full alwaysZero=on maxHeightPixels=30 color=128,0,0
# ATAC data full coverage
# track type=bigWig name="ATAC UL3" description="ATAC UL3" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/ATACv3_UL3_covNormAVE.bw visibility=full alwaysZero=on maxHeightPixels=30 color=128,128,128
# track type=bigWig name="ATAC p2w5" description="ATAC p2w5" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/ATACv3_p2w5_covNormAVE.bw visibility=full alwaysZero=on maxHeightPixels=30 color=128,128,128
# track type=bigWig name="ATAC p4w4" description="ATAC p4w4" db=hg19 bigDataUrl=https://snpinfo.niehs.nih.gov/ucscview/gokeyng/ATACv3_p4w4_covNormAVE.bw visibility=full alwaysZero=on maxHeightPixels=30 color=128,128,128
#
## Overlay track example
## ---------------------
#
# supergroup
# group
# track name="track name setA F"
# track name="track name setA R"
# track name="track name setB F"
# track name="track name setB R"
#
## Composite track example
## -----------------------
# supergroup
# group
# track type=bigWig name="track nameA"
# track type=bigWig name="track nameB"
# track type=bigWig name="track nameC"
#' Parse UCSC tracks that use the Gokey format
#'
#' Parse UCSC tracks that use the Gokey format
#'
#' Given a text file, or lines from a text file, representing
#' the `Gokey format`, this function will parse the track
#' lines into groups, and return a text string usable in
#' a UCSC genome browser track hub.
#'
#' In general, the intention is to convert a set of UCSC
#' track lines to a track hub format, where common track options
#' are converted to relevant track hub configuration lines.
#'
#' Tracks are generally divided into two types of groupings:
#'
#' ## multiWig Overlay Tracks
#'
#' Track name that matches `overlay_grep` regular expression pattern
#' are configured as `multiWig` overlay tracks. This configuration
#' uses the UCSC multiWig format as described here
#' https://genome.ucsc.edu/goldenPath/help/trackDb/trackDbHub.html#aggregate
#'
#' * Tracks must be named such that after `overlay_grep` is removed, the
#' resulting string defines a set of tracks. The tracks that share this
#' string are assigned to the same multiWig track.
#' * To customize the default aggregate method, supply for example
#' `aggregate="none"` in the `...` arguments to this function, which
#' will cause multiWig tracks to use `"none"` when tracks are displayed.
#' * Each parent track is configured as a `superTrack`, which contains
#' one or more multiWig tracks beneath it.
#' * It is recommended to have one heading `"ChIP-seq"` with sub-heading
#' `"coverage"`, then each track is grouped by name after removing the
#' pattern matched by `overlay_grep`. This configuration will create
#' one pulldown entry in the track hub configuration representing the
#' superTrack. The superTrack will contain one multiWig track for each unique
#' name (after removing `overlap_grep`), each of which contains all
#' tracks that match that name.
#' * Each track group is assigned priority in order of each unique track
#' group defined in the track config lines.
#' * Individual tracks are configured as child tracks to the track groups.
#'
#' ## Composite View Tracks
#'
#' All other tracks are grouped as composite tracks, specifically
#' using composite track view, as described here
#' https://genome.ucsc.edu/goldenPath/help/trackDb/trackDbHub.html#compositeTrack
#'
#' * More specifically, the parent track is configured as a `compositeTrack`,
#' including views as `"view Views COV=Coverage JUNC=Junctions PEAK=Peaks"`
#' by default.
#' * An intermediate track is created to represent each view, by default
#' `"JUNC"` however this value is not visible to users unless there are
#' multiple different view values.
#' * Each track is configured as a child to the relevant view track.
#' Track priority is assigned in the order it appears in the track config
#' lines. The priority allows peak tracks to be ordered directly after
#' or before the associated coverage track.
#'
#' ## Top-Level Parent Tracks
#'
#' Note that in both scenarios above, there is one top-level parent
#' track that contains a subset of tracks. The top-level grouping can
#' be defined in the track lines by supplying two header lines immediately
#' before each top-level grouping of tracks, referred to as `header1`
#' and `header2` for clarity.
#'
#' The first header line `header1` is used as the top-level track.
#' For composite tracks, one composite track view is created underneath
#' the top-level track for each secondary header `header2`.
#' Composite tracks can associate two views to the same parent by
#' using only second header line `header2` for subsequent track groups.
#' In this way, composite views can effectively contain a subgroup of
#' tracks within each top-level header `header1`.
#'
#' For multiWig overlay tracks, each overlay track is grouped into
#' the top-level header `header1` track. However, there is no additional
#' subgroup available.
#'
#' An example for two composite tracks, each with one view.
#'
#' ```
#' headingA1
#' headingA2
#' track name=trackname1
#' track name=trackname2
#'
#' headingB1
#' headingB2
#' track name=trackname5
#' track name=trackname6
#' ```
#'
#' In this case, there will be two top-level parent tracks, labeled
#' `"headingA1"` and `"headingB1"`, which appear inside the track hub.
#' Within each track, there will be one composite view:
#' for `headingA1` there is one internal track `headingA2`; and
#' for `headingB1` there is one internal track `headingB2`.
#'
#'
#' @return by default a character string suitable to `cat()`
#' directly into a text file, when `output_format="text"`.
#' When `output_format="list"` it returns a
#' list of `glue` objects, which can be concatenated into
#' one character string with `Reduce("+", trackline_list)`.
#'
#' @family jam ucsc browser functions
#'
#' @param track_lines `character` vector containing lines read from
#' a track file, or valid path or connection to a track file.
#' @param overlay_grep `character` vector containing valid regular
#' expression patterns used to recognize when a track should
#' be considered an overlay coverage track. For example
#' `track name="trackA F"` and `track name="trackA R"` would
#' be recognized as forward and reverse strand for a track
#' named `"trackA"`. Overlay tracks are handled using the UCSC
#' `"multiWig"` approach, and not the composite track approach.
#' To disable overlay_grep, use `overlay_grep="^$"`. To enable
#' overlay_grep for all tracks, use `overlay_grep="$"`.
#' @param priority `integer` value indicating the priority to
#' start when assigning priority to each track.
#' @param output_format `character` string indicating the
#' output format, where `"text"` will return one long character
#' string, and `"list"` will return a `list` with one track
#' per list element with class `"glue","character"`.
#' @param debug `character` indicating type of debug output:
#' * `df`: returns the intermediate `track_df` data.frame;
#' * `pri`: prints priority during track parsing;
#' * `none`: does no debug, the default.
#' @param multiwig_concat_header `logical` indicating whether
#' multiWig parent tracks should be named by concatenating
#' `header1` and `header2` values.
#' @param group_header_delim `character` string used as delimiter
#' between track group and track header label, where for example
#' `"headingA1"` and `"headingA2"` would be combined with
#' delimiter `": "` to form `"headingA1: headingA2"` as the
#' visible label for each group of tracks.
#' @param verbose `logical` indicating whether to print verbose
#' output during processing.
#' @param ... additional arguments are treated as a named list
#' of track parameters that override existing parameter values.
#' For example `scoreFilter=1` will override the default
#' for bigBed tracks `scoreFilter=5`.
#'
#' @examples
#' # example of two composite track top-level parent tracks
#' track_lines_text <- c("headingA1
#' headingA2
#' track name=trackname1 shortLabel=trackname1 bigDataUrl=some_url
#' track name=trackname2 shortLabel=trackname2 bigDataUrl=some_url
#' track name=trackname3 shortLabel=trackname3 bigDataUrl=some_url
#' track name=trackname4 shortLabel=trackname4 bigDataUrl=some_url
#'
#' headingB1
#' headingB2
#' track name=trackname5 shortLabel=trackname5 bigDataUrl=some_url
#' track name=trackname6 shortLabel=trackname6 bigDataUrl=some_url
#' track name=trackname7 shortLabel=trackname7 bigDataUrl=some_url
#' track name=trackname8 shortLabel=trackname8 bigDataUrl=some_url
#' ")
#' track_lines <- unlist(strsplit(track_lines_text, "\n"));
#' cat(parse_ucsc_gokey(track_lines))
#' track_df <- parse_ucsc_gokey(track_lines, debug="df")
#'
#' # example of two multiWig track top-level parent tracks
#' # each of which contain two tracks with positive/negative coverage
#' track_lines_text2 <- c("headingA1
#' headingA2
#' track name=trackname1_pos shortLabel=trackname1_pos bigDataUrl=some_url
#' track name=trackname1_neg shortLabel=trackname1_neg bigDataUrl=some_url
#' track name=trackname2_pos shortLabel=trackname2_pos bigDataUrl=some_url
#' track name=trackname2_neg shortLabel=trackname2_neg bigDataUrl=some_url
#'
#' headingB1
#' headingB2
#' track name=trackname3_pos shortLabel=trackname3_pos bigDataUrl=some_url
#' track name=trackname3_neg shortLabel=trackname3_neg bigDataUrl=some_url
#' track name=trackname4_pos shortLabel=trackname4_pos bigDataUrl=some_url
#' track name=trackname4_neg shortLabel=trackname4_neg bigDataUrl=some_url
#' ")
#' track_lines2 <- unlist(strsplit(track_lines_text2, "\n"));
#' track_text2 <- parse_ucsc_gokey(track_lines2);
#' cat(track_text2);
#'
#' # the final step is to save into a text file
#' if (FALSE) {
#' cat(track_text2, file="trackDb_platjam.txt")
#' }
#'
#' @export
parse_ucsc_gokey <- function
(track_lines,
overlay_grep=c("[ -._](plus|minus|pos|neg)($|[ -._])"),
priority=5000,
output_format=c("text", "list"),
debug=c("none"),
multiwig_concat_header=TRUE,
group_header_delim=": ",
default_env=new.env(),
verbose=FALSE,
...)
{
#
output_format <- match.arg(output_format);
# optional dots list
dotlist <- list(...);
# check if track_lines is a file
if (length(track_lines) == 1) {
if (file.exists(track_lines)) {
track_lines <- readLines(track_lines);
} else {
stop("track_lines should be a character vector of track lines, or a single entry filename.");
}
}
# detect inline newline characters, and split into lines
if (any(grepl("[\r]", track_lines))) {
# simply remove Windows \r (since it usually occurs as "\r\n"
track_lines <- gsub("\r", "", track_lines);
}
if (any(grepl("[\n]", track_lines))) {
track_lines <- unlist(strsplit(track_lines, "[\n]+"));
}
# Skip lines with comment
drop_lines <- grepl("^[ ]*[#]", track_lines);
if (any(drop_lines)) {
if (verbose) {
jamba::printDebug("parse_ucsc_gokey(): ",
"Dropped ", jamba::formatInt(sum(drop_lines)),
" comment lines.");
}
track_lines <- track_lines[!drop_lines]
}
# Get rid of any non-ASCII characters (for now)
track_lines <- gsub("[^-_ a-zA-Z0-9:/=\"'.,]", "", track_lines);
# remove empty track_lines
track_lines <- track_lines[nchar(enc2utf8(track_lines)) > 0];
## determine non-track lines
nontrack <- which(!grepl("^track", track_lines));
## determine supergroups
supergroup_lines <- nontrack[diff(c(nontrack, Inf)) == 1];
supergroup_labels <- jamba::makeNames(track_lines[supergroup_lines],
suffix="_set");
group_lines <- setdiff(nontrack, supergroup_lines);
## split tracks by supergroup
track_seq <- setdiff(seq_along(track_lines),
supergroup_lines);
track_supergroup <- split(track_seq,
cut(track_seq,
breaks=c(supergroup_lines, Inf),
labels=supergroup_labels))
track_supergroup
track_supergroup_dfs <- lapply(jamba::nameVectorN(track_supergroup), function(isupergroup_label){
i_lines <- track_supergroup[[isupergroup_label]];
igroup_lines <- intersect(group_lines, i_lines);
igroup_labels <- jamba::makeNames(track_lines[igroup_lines],
suffix="_set");
itrack_seq <- setdiff(i_lines, igroup_lines)
itrack_group <- split(itrack_seq,
cut(itrack_seq,
breaks=c(igroup_lines, Inf),
labels=igroup_labels));
data.frame(group=isupergroup_label,
header=rep(names(itrack_group), lengths(itrack_group)),
tracknum=unlist(itrack_group))
})
track_df <- jamba::rbindList(track_supergroup_dfs)
## convert each track line to data.frame
track_dfl <- lapply(track_lines[track_df$tracknum], function(i){
j <- gsub(" ([a-zA-Z]+)=", "!!\\1!", i);
k <- tail(jamba::rbindList(strsplit(gsub('"', '', strsplit(j, "!!")[[1]]), "!")), -1);
as.data.frame(as.list(jamba::nameVector(k[,2:1])))
});
## Parse track names, determine overlay tracks
track_names <- sapply(track_dfl, function(i){
i$name
});
track_names_dupe <- jamba::tcount(track_names, minCount=2);
if (length(track_names_dupe) > 0) {
stop(paste0(
"There are duplicated track names: ",
jamba::cPaste(names(track_names_dupe)))
);
}
names(track_dfl) <- track_names;
track_df$name <- track_names;
track_df$is_overlay <- (track_names %in% jamba::provigrep(overlay_grep, track_names) &
!grepl("[.](bigBed|bb|bed)",
ignore.case=TRUE,
track_lines[track_df$tracknum]));
## apply parent and header values
track_df$superTrack <- ifelse(
track_df$is_overlay &
!track_df$group == track_df$header &
multiwig_concat_header,
jamba::pasteByRow(track_df[,c("group","header")],
sep=group_header_delim),
track_df$group);
track_df$parent <- ifelse(track_df$is_overlay,
gsub(overlay_grep, "", track_df$name),
track_df$header);
## If the superTrack and parent have the same name, append " super" to the superTrack
track_df$superTrack_2 <- ifelse(track_df$superTrack == track_df$parent,
paste0(track_df$superTrack, " super"),
track_df$superTrack);
## If the superTrack and parent have the same name, append " set" to the parent
track_df$parent <- ifelse(track_df$superTrack == track_df$parent,
paste0(track_df$parent, " set"),
track_df$parent);
## add track url and isbed flag
if ("df" %in% debug) {
jamba::printDebug("jamba::sdim(track_dfl):");
print(jamba::sdim(track_dfl));
print(head(track_dfl, 2));
jamba::printDebug("head(track_df$name, 4):");
print(head(track_df$name, 4));
}
track_df$url <- sapply(track_dfl[track_df$name], function(idf){
idf$bigDataUrl
});
track_df$isbed <- grepl("[.](bed|bb|bigbed)$",
ignore.case=TRUE,
track_df$url);
show_env <- function(env){
ls_names <- ls(envir=env);
ls_values <- lapply(jamba::nameVector(ls_names), function(i){
get(i, envir=env)
});
ls_df <- data.frame(
name=format(justify="left", names(ls_values)),
value=format(justify="left", unlist(ls_values)));
print(ls_df);
invisible(ls_df);
}
## Get track defaults and templates
default_env <- get_track_defaults(default_env);
## overlay tracks
trackline_list <- list();
track_df$superTrack <- factor(track_df$superTrack,
levels=unique(track_df$superTrack));
track_df$parent <- factor(track_df$parent,
levels=unique(track_df$parent));
## Optional debug, return the data.frame
if ("df" %in% debug) {
return(track_df);
}
track_dfhs <- split(track_df, track_df$superTrack);
pri_env <- new.env();
assign("priority",
value=priority,
envir=pri_env);
# iterate each track set
for (hname in names(track_dfhs)) {
priority <- get("priority", envir=pri_env);
priority <- priority + 100;
assign("priority",
value=priority,
envir=pri_env);
if ("pri" %in% debug) jamba::printDebug("100 priority:", priority)
track_dfh <- track_dfhs[[hname]];
track_env <- new.env();
if (any(track_dfh$is_overlay)) {
if (verbose) {
jamba::printDebug("parse_ucsc_gokey(): ",
"recognized overlay bigWig tracks");
}
default_values <- default_env$overlay_defaults;
tmpl_header <- default_env$overlay_header;
tmpl_parent <- default_env$overlay_parent;
tmpl_track <- default_env$overlay_track;
} else if (any(track_dfh$isbed)) {
if (verbose) {
jamba::printDebug("parse_ucsc_gokey(): ",
"recognized bigBed tracks");
}
default_values <- default_env$composite_bed_defaults;
tmpl_header <- default_env$composite_bed_header;
tmpl_parent <- default_env$composite_bed_parent;
tmpl_track <- default_env$composite_bed_track;
} else {
if (verbose) {
jamba::printDebug("parse_ucsc_gokey(): ",
"recognized composite bigWig tracks");
}
default_values <- default_env$composite_defaults;
tmpl_header <- default_env$composite_header;
tmpl_parent <- default_env$composite_parent;
tmpl_track <- default_env$composite_track;
}
# define overall default track parameters
assign_track_defaults(env=track_env,
defaults=default_values);
# override with shortLabel and longLabel
assign_track_defaults(env=track_env,
defaults=list(
shortLabel=hname,
longLabel=hname));
# override with other values provided in the track lines
assign_track_defaults(env=track_env,
defaults=track_dfh);
# optionally override with dotlist values from ...
if (length(dotlist) > 0) {
assign_track_defaults(env=track_env,
defaults=dotlist);
}
if (verbose) {
jamba::printDebug("parse_ucsc_gokey(): ",
"show_env(superTrack):");
show_env(track_env);
}
new_trackline <- glue::glue(tmpl_header,
.trim=FALSE,
.envir=track_env);
trackline_list <- c(trackline_list,
list(new_trackline)
)
###############################################
## split track_dfh by parent (+/- strand)
track_dfhps <- split(track_dfh, track_dfh$parent);
for (pname in names(track_dfhps)) {
priority <- priority + 10;
if ("pri" %in% debug) jamba::printDebug("10 priority:", priority)
track_dfhp <- track_dfhps[[pname]];
track_env <- new.env();
assign_track_defaults(track_env,
defaults=default_values);
assign_track_defaults(track_env,
defaults=track_dfhp);
assign_track_defaults(track_env,
defaults=list(
shortLabel=pname,
longLabel=pname));
if (length(dotlist) > 0) {
assign_track_defaults(env=track_env,
defaults=dotlist);
}
if (verbose) {
jamba::printDebug("parse_ucsc_gokey(): ",
"show_env(parent):");
show_env(track_env);
}
new_trackline <- glue::glue(tmpl_parent,
.trim=FALSE,
.envir=track_env);
trackline_list <- c(trackline_list,
list(new_trackline)
)
###############################################
## split track_dfhp by parent (+/- strand)
for (irow in seq_len(nrow(track_dfhp))) {
priority <- priority + 2;
if ("pri" %in% debug) jamba::printDebug("2 priority:", priority)
track_dfhpt <- track_dfhp[irow,,drop=FALSE];
track <- track_dfhpt$name;
track_env <- new.env();
assign_track_defaults(track_env,
defaults=default_values);
assign_track_defaults(track_env,
defaults=track_dfhpt);
assign_track_defaults(track_env,
defaults=track_dfl[[track]]);
assign_track_defaults(track_env,
defaults=list(
track=track,
shortLabel=track,
longLabel=track));
if (length(dotlist) > 0) {
assign_track_defaults(env=track_env,
defaults=dotlist);
}
if (verbose) {
jamba::printDebug("parse_ucsc_gokey(): ",
"show_env(track):");
show_env(track_env);
}
new_trackline <- glue::glue(tmpl_track,
.trim=FALSE,
.envir=track_env);
trackline_list <- c(trackline_list,
list(new_trackline)
)
}
priority <- floor((priority + 10) / 10) * 10;
if ("pri" %in% debug) jamba::printDebug("floor 10 priority:", priority)
assign("priority",
value=priority,
envir=pri_env);
}
priority <- get("priority", envir=pri_env);
if ("pri" %in% debug) jamba::printDebug("floor 100 priority (before):", priority)
priority <- floor((priority + 100) / 100) * 100;
if ("pri" %in% debug) jamba::printDebug("floor 100 priority (after):", priority)
assign("priority",
value=priority,
envir=pri_env);
}
if ("text" %in% output_format) {
trackline_list <- do.call(paste, trackline_list);
}
return(trackline_list);
}
#' Make valid UCSC track name from character string
#'
#' Make valid UCSC track name from character string
#'
#' This function takes a character vector, and removes characters
#' which are considered invalid for a UCSC track name: whitespace
#' and non-ASCII characters.
#'
#' @return character vector modified to remove invalid characters.
#'
#' @family jam ucsc browser functions
#'
#' @param x character vector.
#'
#' @export
make_ucsc_trackname <- function
(x)
{
gsub("^[_]+|[_]+$", "",
gsub("[^a-zA-Z0-9]+", "_",
x));
}
#' Assign default UCSC track values to environment
#'
#' Assign default UCSC track values to environment
#'
#' This function assigns values by name, to the specified
#' environment. The values can be supplied as a list, or
#' data.frame.
#'
#' When `singlet_only=TRUE`, each named value must have only
#' one unique value, otherwise it is not assigned to the environment.
#'
#' The `track_types` argument is used to help handle track names
#' by removing invalid track characters, for example for `"track"`,
#' `"superTrack"`, and `"parent"`, these must be modified consistently
#' for the track hierarchy to remain valid.
#'
#' @return `environment`` is returned invisibly.
#'
#' @family jam ucsc browser functions
#'
#' @param env `environment` in which the default values will be
#' assigned. If the environment already has these values assigned,
#' those values will be replaced with values in `defaults`.
#' @param defaults `list` or `data.frame` of named values,
#' whose names are used for assignment in the environment `env`.
#' @param singlet_only logical indicating whether to perform assignment
#' only when there is one unique value to assign. Useful when passing
#' `defaults` as a `data.frame`, when `singlet_only=TRUE` it will
#' only assign columns containing one unique value.
#' @param track_types character vector containing regular expression
#' patterns, used to recognize track name fields. The values for
#' track name fields are passed to `make_ucsc_trackname()` which
#' replaces invalid characters with underscore.
#' @param ... additional arguments are passed to `base::assign()`.
#'
#' @export
assign_track_defaults <- function
(env=new.env(),
defaults,
singlet_only=TRUE,
track_types=c("track", "superTrack", "parent"),
type_mapping=c(description=c("longLabel")),
...)
{
if (!is.environment(env)) {
stop("env must be an environment.");
}
for (n in names(defaults)) {
vals <- unique(as.character(defaults[[n]]));
if (length(vals) > 1) {
if (singlet_only) {
next;
}
vals <- jamba::cPaste(vals);
}
if (length(jamba::provigrep(track_types, n)) > 0) {
vals <- make_ucsc_trackname(vals);
}
base::assign(x=n,
value=vals,
envir=env,
...);
if (n %in% names(type_mapping)) {
for (n1 in type_mapping[[n]]) {
base::assign(x=n1,
value=vals,
envir=env,
...);
}
}
}
invisible(env);
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.