# Misc. initializers ------------------------------------------------------
#' Loads biocLite() from bioconductor.org
#'
#' @return NULL Called for side effect
#' @export
#'
sourcebioc <- function () {
source("https://bioconductor.org/biocLite.R")
}
mygetmousemart <- function() {
mart <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",
dataset="mmusculus_gene_ensembl",
host="www.ensembl.org")
return(mart)
}
installed_date <- function (lib_index = 1L, mtime_first = FALSE) {
if (Sys.info()["sysname"] == "Darwin" & !mtime_first) {
tibble(package = list.files(.libPaths()[lib_index], full.names = TRUE)) %>%
mutate(creation_date =
lubridate::mdy_hms(
system(
str_c(c(
"echo",
package,
"| xargs -n1 GetFileInfo -d"
), collapse = " "),
intern = TRUE))) %>%
mutate(across(package, basename)) %>%
arrange(desc(creation_date))
} else {
tibble(package = list.files(.libPaths()[lib_index], full.names = TRUE)) %>%
mutate(file.info(package)) %>%
dplyr::select(package, mtime) %>%
mutate(across(package, basename)) %>%
arrange(desc(mtime))
}
}
# String utilities --------------------------------------------------------
#' Convert Zeitgeber Time Points to Character Strings with Custom Prefix
#'
#' Translates numeric Zeitgeber time points into character strings formatted as
#' "{prefix}<time>_<replicate>".
#'
#' @param time_points A numeric vector of Zeitgeber time points.
#' @param prefix A character string to prefix each time point. Defaults to "ZT".
#'
#' @return A character vector with formatted Zeitgeber strings.
#'
#' @examples
#' ztct_string(c(0, 0, 2, 2, 4, 4))
#' # Returns: "ZT0_1" "ZT0_2" "ZT2_1" "ZT2_2" "ZT4_1" "ZT4_2"
#'
#' ztct_string(c(0, 0, 2, 2, 4, 4), prefix = "CT")
#' # Returns: "CT0_1" "CT0_2" "CT2_1" "CT2_2" "CT4_1" "CT4_2"
#'
#' @export
ztct_string <- function(time_points, prefix = "ZT") {
paste0(
prefix, time_points, "_",
stats::ave(time_points, time_points, FUN = seq_along)
)
}
returngoodmgi <- function(mgis) {
## assumes a vector, returns the "best" mgi symbol
good <- grep("[0-9]Rik$", mgis, invert=TRUE)
if (length(good) > 0) {
return(mgis[good[1]])
} else {
return(mgis[1])
}
}
weedriken <- function(mgi) {
riks <- grepl("\\w+Rik", mgi)
if (all(riks))
## return last one if only rikens
return(mgi[length(mgi)])
else {
## return last non-riken
nonriks <- mgi[!riks]
return(nonriks[length(nonriks)])
}
}
weedmgi <- function (mgi, reference_mgi="xxxxxx") {
## if looking to choose a symbol from alias, first check whether a
## symbol is equal to a reference symbol
if (any(mgi == reference_mgi))
return(mgi[mgi == reference_mgi])
## Function to weed out RIKEN and such mgi symbols, if possible, and to keep
## only one mgi symbol in the end
## Start by sorting alphabetically, will return first hit (below)
mgi <- sort(mgi)
## find out problematic mgi symbols
riks <- grepl("\\w+Rik$", mgi)
gms <- grepl("^Gm\\d+$", mgi)
locs <- grepl("^LOC\\d+$", mgi)
## handle case when only problematic symbols are present
if (all(riks | gms | locs)) {
## prefer returning Gm symbols
if (any(gms))
return(mgi[which(gms)[1]])
else
return(mgi[1])
}
## otherwise, let's eliminate problematic symbols
mgi <- mgi[!(riks | gms | locs)]
return(mgi[1])
}
weed_refseq <- function (refseq_ids) {
if (any(str_detect(refseq_ids, "^NM")))
refseq_ids <- refseq_ids[str_detect(refseq_ids, "^NM")]
if (any(str_detect(refseq_ids, "^NR")))
refseq_ids <- refseq_ids[str_detect(refseq_ids, "^NR")]
if (any(str_detect(refseq_ids, "^XM")))
refseq_ids <- refseq_ids[str_detect(refseq_ids, "^XM")]
if (any(str_detect(refseq_ids, "^XR")))
refseq_ids <- refseq_ids[str_detect(refseq_ids, "^XR")]
## choose only one
refseq_ids <- refseq_ids[which.min(nchar(refseq_ids))]
refseq_ids
}
strrev <- function(x)
sapply(lapply(strsplit(x, NULL), rev), paste, collapse="")
#' Converts parameters to a string of par__value pairs
#'
#' @param ...
#'
#' @return string of par__value pairs separated by ___
#' @export
#'
#' @examples
#' a <- 1
#' b <- 2
#' var_value_string(a, b)
var_value_string <- function (...) {
dots <- substitute(list(...))[-1]
varnames <- sapply(dots, deparse)
values <- mget(varnames, inherits=TRUE)
paste(varnames, values, sep="__", collapse="___")
}
# Misc. utilities ---------------------------------------------------------
#' Round to nearest integer multiple of n
#'
#' @param x A vector
#' @param n A number
#'
#' @return Rounded vector
#' @export
#'
#' @examples
#' roundn(runif(10)*10, 2)
roundn <- function (x, n=1) {
round(x/n)*n
}
#' Print the fraction of TRUEs in a vector of logicals
#'
#' @param vec Vector of logicals
#'
#' @return Fraction of TRUE values in vec
#' @export
#'
#' @examples
#' set.seed(123)
#' myfrac(rnorm(10) > 0)
#' myfrac(TRUE)
#' myfrac(FALSE)
myfrac <- function (vec) {
tally <- table(vec)
if (length(tally) == 2L)
return(tally["TRUE"]/sum(tally))
if (names(tally) == "FALSE")
return(stats::setNames(0, "TRUE"))
stats::setNames(1, "TRUE")
}
# curve of fraction true
myfraccurve <- function(pvec, limits)
sapply(limits, function (x) myfrac(pvec < x))
val.field <- function(df, field) {
vals <- df[, field]
return(vals[!is.na(vals)])
}
#' Make a vector of stretches of indices of contigs satisfying a predicate
#' vector
#'
#' Useful together with split(), see example
#'
#' @param vec A vector
#' @param predicate A TRUE/FALSE vector of same length as vec, defaults to
#' !is.na(vec)
#'
#' @return vector of indices
#' @export
#'
#' @examples
#' tosplit <- c(TRUE, TRUE, TRUE, NA, NA, TRUE, TRUE, NA,
#' TRUE, TRUE, TRUE, TRUE)
#' (index <- make_index_vector(tosplit))
#' split(tosplit[!is.na(tosplit)], index)
#'
#' set.seed(1234)
#' (tosplit2 <- rnorm(10))
#' make_index_vector(tosplit2, tosplit2 > 0)
#'
make_index_vector <- function (vec, predicate=!is.na(vec)) {
vrle <- rle(predicate)
## values will be TRUE or FALSE
vec_nonna_lengths <- vrle$lengths[vrle$values]
rep(seq(1, length(vec_nonna_lengths)), times=vec_nonna_lengths)
}
#' Converts character vector to sorted factor
#'
#' Sorting is carried out using "human" criteria with gtools::mixedsort.
#' Useful with e.g., ggplot or tidyr::spread
#'
#' @param charvec A character vector
#' @param ... Further arguments to gtools::mixedsort
#'
#' @return Sorted factor
#' @export
#'
#' @examples
#' df <- dplyr::data_frame(cells=paste0("cell", 1:15), vals=1:15)
#' df %>% tidyr::spread(cells, vals)
#' df %>% dplyr::mutate(cells=char2sorted_factor(cells)) %>%
#' tidyr::spread(cells, vals)
#'
char2sorted_factor <- function(charvec, ...) {
stopifnot(is.character(charvec))
factor(charvec, levels=gtools::mixedsort(unique(charvec), ...))
}
#' Shifts vector by cyclic permutation
#'
#' @param vec vector to cycle
#' @param n amount of cyclic permutation
#'
#' @return shifted vector
#' @export
#'
#' @references
#' https://stackoverflow.com/questions/30542128/
#' circular-shifting-arrays-in-r-by-distance-n
#' @examples
#' shifter(1:5, 2)
#' shifter(1:5, -2)
shifter <- function (vec, n=1L) {
if (n == 0) vec else c(tail(vec, -n), head(vec, n))
}
#' Create inverse function
#'
#' From
#' https://stackoverflow.com/questions/45616072/find-the-inverse-function-in-r
#'
#' @param fn Monotonous function
#' @param interval Vector defining the domain
#' @param lower Numeric, inferred from interval or given explicitly here
#' @param upper Numeric, inferred from interval or given explicitly here
#' @param ... Further parameters to uniroot()
#'
#' @return Function (vectorized) computing the inverse of fn
#' @export
#'
#' @examples
#' x <- 1:10
#' y <- sqrt(x)
#' sqrt.inv <- inverse(sqrt, lower = 1, upper = 10)
#' sqrt.inv(y)
inverse <- function(fn, interval = NULL, lower = min(interval),
upper = max(interval), ...) {
Vectorize(function(y) {
uniroot(f = function(x) {fn(x) - y}, lower = lower, upper = upper, ...)$root
})
}
# Matrix utilities --------------------------------------------------------
returnslice <- function(position, vec, posminus, posplus) {
return(vec[(position-posminus):(position+posplus)])
}
#' Matrix columnwise slice centered on different positions for each column
#'
#' @param mat A matrix
#' @param positions Center positions, vector. One element for each column in mat
#' @param posminus Scalar number of upstream positions in addition to center
#' @param posplus Scalar number of downstream positions in addition to center
#'
#' @return Matrix of column slices
#' @export
#'
#' @examples
#' A <- matrix(1:10, ncol=2)
#' matrixslices(A, c(4, 2), 1, 1)
#'
matrixslices <- function (mat, positions, posminus, posplus) {
toret <- mapply(returnslice, positions,
split(mat, col(mat)),
MoreArgs=list(posminus=posminus, posplus=posplus))
colnames(toret) <- colnames(mat)
return(toret)
}
#' Running average
#'
#' @param vec Vector to perform running average on
#' @param winlength Averaging window length
#' @param keep.na Should NAs at beginning and end be kept?
#' (They start at the end)
#'
#' @return Running average
#' @export
#'
#' @examples
#' vec <- 1:4
#' running_average(vec, 2)
#' running_average(vec, 3)
#' running_average(vec, 2, keep.na=FALSE)
running_average <- function (vec, winlength, keep.na=TRUE) {
runav <- stats::filter(vec, rep(1/winlength, winlength),
method="convolution") %>% as.vector()
if (keep.na)
runav
else
runav[!is.na(runav)]
}
compute.matrix.ratios <- function (mat1, mat2) {
toret <- mat1/mat2
toret[is.infinite(toret) | is.nan(toret)] <- NA
return(toret)
}
m.head <- function (mat, n=6) mat[1:min(nrow(mat), n), 1:min(ncol(mat), n)]
#' Interleave segments of a matrix based on specified indices
#'
#' @param matrix_data A matrix to interleave
#' @param ... Index vectors to extract segments (columns or rows) to interleave
#' @param use_columns Logical, whether to operate on columns. If TRUE, the
#' matrix is transposed before and after interleaving.
#'
#' @return Interleaved matrix
#'
#' @export
#' @examples
#' a <- matrix(1:8, ncol = 4)
#' interleave_segments(a, 1:2, 3:4, use_columns = TRUE)
#' interleave_segments(a, 1:2, 3:4, use_columns = FALSE)
interleave_segments <- function(matrix_data, ..., use_columns = TRUE) {
# Capture index vectors
segment_indices <- list(...)
# Extract specified segments based on the indices provided
extracted_segments <- lapply(segment_indices, function(indices) {
if (use_columns) {
matrix_data[, indices]
} else {
matrix_data[indices, ]
}
})
if (use_columns) {
# Transpose for column interleaving
interleaved_matrix <- t(
do.call(gdata::interleave, lapply(extracted_segments, t))
)
} else {
# Directly interleave rows
interleaved_matrix <- do.call(gdata::interleave, extracted_segments)
}
return(interleaved_matrix)
}
#' Calculate Empirical False Discovery Rate (FDR)
#'
#' This function computes the empirical false discovery rate (FDR) for a given
#' set of p-values or similar scores, assuming a subset of indices represent
#' true null hypotheses.
#'
#' @param val A numeric vector of p-values or test statistics.
#' @param true_null_indices A numeric vector of indices indicating the positions
#' in \code{val} that correspond to true null hypotheses.
#' @param ... Further arguments to \code{order()}; typically use
#' \code{decreasing = TRUE} if supplying test statistics.
#' @return A numeric vector of FDR values corresponding to each threshold in the
#' sorted \code{val}.
#' @details The FDR is computed as the cumulative count of true nulls up to a
#' given rank, divided by the rank itself. The function returns values sorted
#' in ascending order.
#'
#' @examples
#' set.seed(123)
#' val <- runif(10) # Generate 10 random uniform values
#' true_null_indices <- c(3, 6, 8) # Indices of true nulls
#' empirical_fdr(val, true_null_indices)
#'
#' @export
empirical_fdr <- function(val, true_null_indices, ...) {
is_true_null <- seq_along(val) %in% true_null_indices
cumsum(is_true_null[order(val, ...)])/seq_along(val)
}
# Circular helpers --------------------------------------------------------
#' Analyze Cosinor Design
#'
#' This function analyzes a vector of time points and computes various metrics
#' related to periodic sampling. It handles both absolute and modulo-per time
#' representations.
#'
#' @param timepoints A numeric vector of time points to analyze.
#' @param per The cosinor period
#' @param accuracy Absolute accuracy for determining centeredness and phase
#' invariance
#'
#' @return An object of class \code{cosinor_design} containing the following
#' elements:
#' \describe{
#' \item{n_points}{The total number of time points.}
#' \item{n_uniq}{The number of unique time points (sorted).}
#' \item{reps}{A vector indicating the number of replicates per unique time
#' point (sorted).}
#' \item{reps_uniq}{A vector of unique replicate counts across all time
#' points.}
#' \item{n_unique_modper}{The number of unique time points within a per-hour
#' cycle.}
#' \item{reps_modper}{A vector of replicate counts per unique time point
#' modulo per.}
#' \item{reps_modper_uniq}{A vector of unique replicate counts across all time
#' points modulo per.}
#' \item{sampling_int_uniq}{A vector of unique sampling intervals between
#' consecutive time points (sorted).}
#' \item{tps_sorted}{A vector of the input time points sorted in ascending
#' order.}
#' \item{tps_modper_sorted}{A vector of the time points modulo per, sorted.}
#' \item{input_sort_idx}{The index mapping the original input time points to
#' the sorted time points.}
#' \item{input_to_modper_idx}{The index mapping the original input time points
#' to the sorted modulo-per time points.}
#' \item{equal_reps}{Logical, indicating whether the number of replicates
#' per unique time point modulo per is identical.}
#' \item{evenly_spaced}{Logical, indicating whether time points are evenly
#' spaced within a cycle, also considering wrap-around from end to beginning.}
#' \item{is_centered}{Logical, indicating whether the design is centered.
#' Centered designs potentially reach the highest power.}
#' \item{is_phaseinv}{Logical, indicating whether the design is
#' phase-invariant. Phase-invariant designs lead to power independent of
#' phase.}
#' }
#'
#' @details The function performs several analyses on the provided time points:
#' \itemize{
#' \item Computes the number of replicates for each unique time point.
#' \item Computes the number of unique time points and replicates modulo per.
#' \item Checks for balance in replicates, sampling intervals, and
#' distribution within a cycle.
#' }
#'
#'
#' @examples
#' # Simulate a time point vector
#' set.seed(123)
#' timepoints <- rep(seq(0, 44, by = 4), each = 3) |> sample()
#'
#' # Analyze time points
#' results <- analyze_cosinor_design(timepoints)
#'
#' # View results
#' print(results)
#'
#' @export
analyze_cosinor_design <- function(timepoints, per = 24, accuracy = 1e-4) {
tps <- timepoints[!is.na(timepoints)]
if (!is.numeric(tps)) {
stop("Input must be a numeric vector.")
}
# Step 1: Sort the original time points and get sorting index
input_sort_idx <- order(tps) # Sort index for input time points
tps_sorted <- tps[input_sort_idx]
# Step 2: Compute modulo per of sorted time points
tps_modper <- tps_sorted %% per
# Step 3: Sort modulo per values and get sorting index
modper_sort_idx <- order(tps_modper) # Sort index for tps_modper
# Fully sorted modulo-per time points
tps_modper_sorted <- tps_modper[modper_sort_idx]
# Step 4: Compose indices to map input time points to sorted modulo-per time
# points
input_to_modper_idx <- input_sort_idx[modper_sort_idx]
# Unique time points (sorted by input time points)
tps_unique <- unique(tps_sorted)
n_unique <- length(tps_unique)
# Replicates per unique time point
reps <- as.vector(table(tps_sorted))
# Unique modulo per time points
tps_unique_modper <- sort(unique(tps_modper_sorted))
# Total number of unique time points within a per-hour period
n_unique_modper <- length(tps_unique_modper)
# Replicates per unique time point modulo per
reps_modper <- as.vector(table(tps_modper_sorted))
# Check if all replicates are identical for modulo per
reps_modper_uniq <- unique(reps_modper)
equal_reps <- length(reps_modper_uniq) == 1L
# Check if intervals modulo per are equidistant
modper_intervals <- diff(c(tps_unique_modper, tps_unique_modper[1])) %% per
evenly_spaced <- length(unique(modper_intervals)) == 1
# Check for centeredness (maximal sum of eigenvalues) and phase invariance
omega <- 2*pi/per
harms <- cbind(cos(omega*tps_sorted), sin(omega*tps_sorted))
csmeans <- colMeans(harms)
cscov <- crossprod(harms)/length(tps_sorted) - outer(csmeans, csmeans)
is_centered <- all(csmeans < accuracy)
is_phaseinv <- abs(diff(eigen(cscov)$values)) < accuracy
# Return results
structure(
list(
tspan = c(min(tps), max(tps)),
n_points = length(tps),
# number of unique time points, for jtkdist for example
n_uniq = n_unique,
reps = reps, # implicitly clear that this is sorted
reps_uniq = unique(reps),
# Or mod per, if treating e.g., 2 equivalent to 26 (usually correct
# approach)
n_uniq_modper = n_unique_modper,
reps_modper = reps_modper,
reps_uniq_modper = reps_modper_uniq,
# this is also needed for JTK, and we measure period by normalizing to
# this
sampling_int_uniq = unique(diff(tps_unique)),
sampling_int_uniq_modper = unique(modper_intervals),
tps_sorted = tps_sorted,
tps_modper_sorted = tps_modper_sorted,
input_sort_idx = input_sort_idx,
input_to_modper_idx = input_to_modper_idx,
equal_reps = equal_reps,
evenly_spaced = evenly_spaced,
is_centered = is_centered,
is_phaseinv = is_phaseinv
),
class = "cosinor_design"
)
}
print.cosinor_design <- function(x, ...) {
cat("\n")
cat("\t\tCosinor experimental design:\n")
cat("\t\t---------------------------\n")
cat("\n")
cat("Total time points: ", x$n_points, "\n")
cat("Spanning time points: ", paste(x$tspan, collapse = " - "), "\n")
cat("Unique time points: ", x$n_uniq, "\n")
cat("Replicates per unique time point: ",
paste(x$reps, collapse = ", "), "\n")
cat("Unique time points per cycle (mod per): ", x$n_uniq_modper, "\n")
cat("Replicates per unique time point per cycle (mod per): ",
paste(x$reps_modper, collapse = ", "), "\n")
cat("Unique sampling intervals: ",
paste(x$sampling_int_uniq, collapse = ", "), "\n")
cat("Unique sampling intervals mod per (wrapped): ",
paste(x$sampling_int_uniq_modper, collapse = ", "), "\n")
cat("Equal number of replicates per time point: ",
ifelse(x$equal_reps, "Yes", "No"), "\n")
cat("Evenly spaced design: ", ifelse(x$evenly_spaced, "Yes", "No"), "\n")
cat("Centered design: ", ifelse(x$is_centered, "Yes", "No"), "\n")
cat("Phase-invariant design: ", ifelse(x$is_phaseinv, "Yes", "No"), "\n")
invisible(x) # Ensure the object is returned invisibly
}
circ_mean_24 <- function (x) {
(as.numeric(circular::mean.circular(
circular::circular(x, units="hours")))) %% 24
}
circ_sd_24 <- function (x) {
as.numeric(circular::sd(circular::circular(x, units="hours")))*12/pi
}
circ_summary_24 <- function (x) {
sumvec <- circular:::summary.circular(circular::circular(x, units="hours"))
change_logic <- names(sumvec) %in% c("Min.", "1st Qu.", "Median",
"Mean", "3rd Qu.", "Max.")
sumvec[change_logic] <- sumvec[change_logic] %% 24
sumvec
}
circ_kappa_24 <- function (x) {
circular::circular(x, units="hours") %>%
circular::mle.vonmises(bias=TRUE) %>%
unclass %>%
magrittr::extract(c("kappa", "se.kappa")) %>%
unlist
}
circ_mu_24 <- function (x) {
circular::circular(x, units="hours") %>%
circular::mle.vonmises(bias=TRUE) %>%
unclass %>%
magrittr::extract(c("mu", "se.mu")) %>%
unlist
}
## simple circular permutation around index
mycycleperm <- function (vec, ind) {
if (!is.integer(ind)) ind <- as.integer(ind)
if (ind == 1L)
## same phase
return(vec)
else
## reorder to put time matching nascent bin first
return(vec[c(ind:length(vec), 1:(ind - 1))])
}
mycircpairdist.ct <- function(x, y) {
delta.deg <- (x - y) %% 24
delta.deg[delta.deg > 12] <- 24 - delta.deg[delta.deg > 12]
return(delta.deg)
}
## alternative unsigned circular diff
ct.diff <- function (ct1, ct2) {
cts <- cbind(ct1, ct2)
return(apply(cts, 1, function(x) range(circular::circular(x, units="hours"))))
}
#' Signed differences in phase (0-24 hrs scale)
#'
#' Computes phase difference between x and y, between -12 and 12.
#' A positive result means that x has earlier phase than y. See example.
#'
#' @param x Vector, phases between 0 and 24
#' @param y Vector, phases between 0 and 24
#'
#' @return Vector of signed phase differences
#' @export
#'
#' @examples
#' mysignedcircpairdist.ct(c(10, 22, 2), c(12, 2, 23))
mysignedcircpairdist.ct <- function(x, y) {
## positive sign == x is ahead
delta.deg <- (y - x)
## x more than 12h ahead == less than 12h behind
delta.deg[delta.deg > 12 & !is.na(delta.deg)] <-
-(24 - delta.deg[delta.deg > 12 & !is.na(delta.deg)])
## x more than 12h behind == less than 12h ahead
delta.deg[delta.deg <= -12 & !is.na(delta.deg)] <-
(24 + delta.deg[delta.deg <= -12 & !is.na(delta.deg)])
return(delta.deg)
}
my.watson <- function(x, y) {
xx <- circular::circular(x, units="hours")
yy <- circular::circular(y, units="hours")
circular::watson.two.test(xx, yy)
}
#'Split data frame containing ZTs over the 24/0 border
#'
#'@param df Data frame to split
#'@param grp Grouping variable (quosure), split will be made for each group
#'@param x Starting points, secondary variable in polar coordinates, e.g. length
#' or amplitude
#'@param y Primary polar variable, e.g. time of day, ZT
#'@param xend Corresponding x end points
#'@param yend y end points
#'
#'@return Split data frame
#'@export
#'
#' @examples
#' # From plot in our paper 10.1073/pnas.1613103114
#' PFK <- 10.626
#' PFKup <- PFK + 2.9822/2
#' PFKdown <- PFK - 2.9822/2
#' ciopt <- dplyr::data_frame(x=1.25,
#' y=c(PFKdown + 7, PFKdown + 12) %% 24,
#' xend=1.25,
#' yend=c(PFKup + 7, PFKup + 12) %% 24,
#' what=c("PDH", "GS"))
#' ciopt
#' split024intervals(ciopt)
split024intervals <- function (df, grp=rlang::quo(what),
x="x", y="y",
xend="xend", yend="yend") {
df %>% dplyr::group_by(!! grp) %>%
dplyr::do (
if (.[[yend]] > .[[y]])
.
else {
tochange <- .[c(x, xend, y, yend)]
therest <- setdiff(., tochange)
changed <- setNames(dplyr::data_frame(x=.[[x]], xend=.[[xend]],
y=c(.[[y]], 0),
yend=c(24, .[[yend]])),
c(x, xend, y, yend))
dplyr::bind_cols(dplyr::bind_rows(therest, therest), changed)
}
)
}
#' Constructs data_frame suitable for polar circadian phase arrows
#'
#' @param df input data_frame with:
#' @param amp_mean amplitudes (quosure or values overriding those in data frame)
#' @param phase_mean phases (quosure, see above)
#' @param grp grouping variable (quosure)
#'
#' @return data_frame suitable for ggplot
#' @export
#'
#' @examples
#' df <- dplyr::data_frame(what="test", amp=0.5, phase=12)
#' df %>% forge_polar_amp_df(rlang::quo(amp), rlang::quo(phase),
#' rlang::quo(what))
forge_polar_amp_df <- function (df, amp_mean, phase_mean,
grp=rlang::quo(what)) {
df %>%
dplyr::mutate(y = ((!! phase_mean)) %% 24,
yend = ((!! phase_mean)) %% 24,
x = 0, xend=(!! amp_mean)) %>%
dplyr::select(!! grp, x, xend, y, yend)
}
#' Constructs data_frame suitable for polar circadian phase intervals
#'
#' @param df input data_frame with:
#' @param rad radial distance(s) for interval segment (quosure)
#' @param phase_mean phase means (quosure)
#' @param phase_int e.g. phase SD, positive (quosure)
#' @param grp grouping variable (quosure)
#'
#' @return data_frame suitable for ggplot
#' @export
#'
#' @examples
#' df <- dplyr::data_frame(what="test", rad=1, phase_mean=23, phase_int=2)
#' df %>% forge_polar_phaseint_df(rlang::quo(rad),
#' rlang::quo(phase_mean), rlang::quo(phase_int))
forge_polar_phaseint_df <- function (df, rad, phase_mean,
phase_int,
grp=rlang::quo(what)) {
df %>%
dplyr::mutate(y = ((!! phase_mean) - (!! phase_int)) %% 24,
yend = ((!! phase_mean) + (!! phase_int)) %% 24,
x = (!! rad), xend=(!! rad)) %>%
dplyr::select(!! grp, x, xend, y, yend) %>%
split024intervals(grp=grp)
}
# parallel cores ----------------------------------------------------------
mycores <- function() {
return(min(30, parallel::detectCores()))
}
# ggplot: Basic publication themes ----------------------------------------
pw_theme <- ggplot2::theme_classic(base_size=7) +
ggplot2::theme(
#panel.background=element_rect(linetype="solid", color="black", size=1,
# fill=NA),
#panel.border=element_rect(linetype="solid",
# color="black", size=1, fill="red"),
#panel.grid.major.x=element_line(color="grey80"),
#panel.spacing.y=unit(-0.5, "mm"),
plot.margin=ggplot2::margin(3, 3, 3, 3),
legend.key.size=ggplot2::unit(2.5, "mm"),
legend.box.spacing=ggplot2::unit(0, "mm"),
legend.justification=c(1, 1),
legend.text=ggplot2::element_text(size=ggplot2::rel(1), color="black"),
legend.title=ggplot2::element_text(size=ggplot2::rel(1), color="black"),
axis.text=ggplot2::element_text(size=ggplot2::rel(1), color="black"),
axis.line=ggplot2::element_line(size=0.5, lineend="round"),
axis.ticks=ggplot2::element_line(color="black", lineend = "round")
)
pw_theme_lattice <- ggplot2::theme(
panel.background=ggplot2::element_rect(linetype="blank",
fill=NA),
panel.border=ggplot2::element_rect(linetype="solid",
color="black", size=1, fill=NA),
axis.line=ggplot2::element_blank(),
strip.background=ggplot2::element_rect(fill="grey91", size=1),
strip.placement="outside",
strip.text=ggplot2::element_text(size=ggplot2::rel(1)),
strip.text.x=ggplot2::element_text(margin=
ggplot2::margin(2, 0, 2, 0, unit="pt")),
strip.text.y=ggplot2::element_text(margin=
ggplot2::margin(2, 0, 2, 0, unit="pt")),
# panel.grid.major.x=element_line(color="grey80"),
panel.spacing.y=ggplot2::unit(-0.5, "mm"),
)
pw_phase_theme <- pw_theme +
ggplot2::theme(
plot.margin=ggplot2::margin(0, 3, 0, 0, unit="pt"),
panel.grid.major=ggplot2::element_line(color="grey92"),
panel.grid.major.x=ggplot2::element_blank(),
panel.grid.minor=ggplot2::element_blank(),
panel.border=ggplot2::element_blank(),
panel.spacing=ggplot2::unit(1, "cm"),
plot.title=ggplot2::element_text(size=ggplot2::rel(1)),
legend.margin=ggplot2::margin(0, 0, 0, 0, unit="lines"),
legend.spacing=ggplot2::unit(0.6, "lines"),
legend.key.width=ggplot2::unit(1, "lines"),
legend.key.height=ggplot2::unit(0.6, "lines"),
axis.line=ggplot2::element_blank(),
axis.ticks=ggplot2::element_blank(),
axis.text.y=ggplot2::element_blank(),
axis.text.x=ggplot2::element_text(size=ggplot2::rel(1),
color="black",
margin=ggplot2::
margin(0, 1, 0, 1, unit="cm")))
pw_theme_lattice_modern <-
pw_theme_lattice +
ggplot2::theme(
legend.position = "top", legend.justification = "right",
legend.title = ggplot2::element_text(face = "bold"),
legend.box.just = "center",
plot.title = ggplot2::element_text(face = "bold"),
strip.background = ggplot2::element_rect(fill = "gray93", color = NA),
panel.spacing.y = ggplot2::unit(1, "mm")
)
# ggplot: Log 10 scales ---------------------------------------------------
## https://stackoverflow.com/questions/30179442/
## plotting-minor-breaks-on-a-log-scale-with-ggplot
log10_minor_break = function (...) {
function (x) {
minx = floor(min(log10(x), na.rm=T)) - 1;
maxx = ceiling(max(log10(x), na.rm=T)) + 1;
n_major = maxx - minx + 1;
major_breaks = seq(minx, maxx, by=1)
minor_breaks =
rep(log10(seq(1, 9, by=1)), times=n_major) +
rep(major_breaks, each=9)
10^(minor_breaks)
}
}
my_no0trail <- scales::format_format(drop0trailing=TRUE,
scientific=FALSE)
my_x <- function (thebreaks=ggplot2::waiver(), ...) {
ggplot2::scale_x_continuous(...,
breaks=thebreaks,
labels=my_no0trail)
}
my_y <- function (thebreaks=ggplot2::waiver(), ...) {
ggplot2::scale_y_continuous(...,
breaks=thebreaks,
labels=my_no0trail)
}
my_xlog10 <- function (nbreaks=7,
thebreaks=scales::log_breaks(n=nbreaks), ...) {
ggplot2::scale_x_log10(...,
breaks=thebreaks,
labels=scales::format_format(drop0trailing=TRUE,
scientific=FALSE))
}
my_ylog10 <- function (nbreaks=7,
thebreaks=scales::log_breaks(n=nbreaks), ...) {
ggplot2::scale_y_log10(...,
breaks=thebreaks,
labels=scales::format_format(drop0trailing=TRUE,
scientific=FALSE))
}
my_xlog10p <- function (...) {
ggplot2::scale_x_log10(breaks=scales::trans_breaks("log10",
function (x) 10^x,
...),
labels=scales::
trans_format("log10", scales::math_format(10^.x)))
}
my_ylog10p <- function (...) {
ggplot2::scale_y_log10(breaks=scales::trans_breaks("log10",
function (x) 10^x,
...),
labels=scales::
trans_format("log10", scales::math_format(10^.x)))
}
## for e.g. kb
my_xlog10k <- function (nbreaks=7, scalefac=1000,
thebreaks=scales::log_breaks(n=nbreaks), ...) {
ggplot2::scale_x_log10(breaks=thebreaks,
labels=scales::
trans_format(function (x) x/scalefac,
scales::
format_format(drop0trailing=TRUE,
scientific=FALSE)))
}
my_ylog10k <- function (nbreaks=7, scalefac=1000,
thebreaks=scales::log_breaks(n=nbreaks), ...) {
ggplot2::scale_y_log10(breaks=thebreaks,
labels=scales::
trans_format(function (x) x/scalefac,
scales::
format_format(drop0trailing=TRUE,
scientific=FALSE)))
}
# ggplot: Circadian scales ------------------------------------------------
## returns function returning breaks with mystep intervals.
## limits gets rounded down and up, respectively, to multiples
## of mystep/rangedivfac. There is a possibility of offset, to.
my_circbreaks <- function (mystep=12, rangedivfac=1, offset=0) {
function (x) {
myrangegrid <- mystep/rangedivfac
minx <- x[1] - x[1] %% myrangegrid
maxx <- x[2] + myrangegrid - x[2] %% myrangegrid
seq(minx, maxx, by=mystep) + offset
}
}
## usage example
# my_x_circscale <- function (mystep=12, rangedivfac=1, ...) {
# thebreaks <- my_circbreaks(mystep, rangedivfac)
# scale_x_continuous(breaks=thebreaks,
# limits=c(thebreaks[1], thebreaks[length(thebreaks)]),
# ...)
# }
#
# my_y_circscale <- function (mystep=12, rangedivfac=1, ...) {
# thebreaks <- my_circbreaks(mystep, rangedivfac)
# scale_y_continuous(breaks=thebreaks,
# limits=c(thebreaks[1], thebreaks[length(thebreaks)]),
# ...)
# }
# ggplot: circadian extensions --------------------------------------------
log_relamp_2_relamp <- function (means, log_relamp)
(2^(2*means*log_relamp) - 1)/(2^(2*means*log_relamp) + 1)
log_amp_2_relamp <- function (log_amp)
(2^(2*log_amp) - 1)/(2^(2*log_amp) + 1)
compute_hrfit <- function (data, scales, params, n=101L, per=24) {
rng <- range(data$x, na.rm = TRUE)
grid <- data.frame(x = seq(rng[1], rng[2], length=n))
logdata <- data
logdata$y <- log2(data$y)
mod <- Rfit::rfit(y ~ cos(2*pi/per*x) + sin(2*pi/per*x),
data = logdata)
rmean_l <- coef(mod)[1]
rcos_l <- coef(mod)[2]
rsin_l <- coef(mod)[3]
amp_l <- sqrt(rcos_l^2 + rsin_l^2)
phi_l <- atan2(rsin_l, rcos_l) %% (2*pi)
rmean <- 2^rmean_l
relamp <- log_amp_2_relamp(amp_l)
grid$y <- rmean + rmean*relamp*cos(2*pi/per*grid$x - phi_l)
grid
}
# _________________
compute_fit_dlnorm <- function (data, scales, params,
na.rm = TRUE, n = 101L,
freq = TRUE, binwidth = 1) {
rng <- range(data$x, na.rm = na.rm)
grid <- data.frame(x = seq(rng[1], rng[2], length = n))
grid$y <- dlnorm(grid$x, meanlog = mean(log(data$x)), sdlog = sd(log(data$x)))
if(freq) {
grid$y <- grid$y*length(data$x)*binwidth
}
grid
}
StatFitDlnorm <- ggplot2::ggproto("StatFitDlnorm", ggplot2::Stat,
required_aes = "x",
compute_group = compute_fit_dlnorm)
stat_fit_dlnorm <- function (mapping = NULL, data = NULL, geom = "line",
position = "identity", na.rm = TRUE,
show.legend = NA, inherit.aes = TRUE,
n = 101L, freq = TRUE, binwidth = 1, ...) {
ggplot2::layer(
stat = StatFitDlnorm, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, n = n, freq = freq, binwidth = binwidth, ...)
)
}
# _________________
compute_fit_dnorm <- function (data, scales, params,
na.rm = TRUE, n = 101L,
freq = TRUE, binwidth = 1) {
rng <- range(data$x, na.rm = na.rm)
grid <- data.frame(x = seq(rng[1], rng[2], length = n))
grid$y <- dnorm(grid$x, mean = mean(data$x), sd = sd(data$x))
if(freq) {
grid$y <- grid$y*length(data$x)*binwidth
}
grid
}
StatFitDnorm <- ggplot2::ggproto("StatFitDnorm", ggplot2::Stat,
required_aes = "x",
compute_group = compute_fit_dnorm)
stat_fit_dnorm <- function (mapping = NULL, data = NULL, geom = "line",
position = "identity", na.rm = TRUE,
show.legend = NA, inherit.aes = TRUE,
n = 101L, freq = TRUE, binwidth = 1, ...) {
ggplot2::layer(
stat = StatFitDnorm, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, n = n, freq = freq, binwidth = binwidth, ...)
)
}
# _________________
compute_pfit_logtrans <- function (data, scales, params,
na.rm = TRUE, n = 101L) {
rng <- range(data$x, na.rm = na.rm)
grid <- data.frame(x = seq(rng[1], rng[2], length = n))
logdata <- data
logdata$x <- log(data$x)
logdata$y <- log(data$y)
# bootstrap weights for weighted linear least squares
boot_fit <- lm(y ~ x, data = logdata)
# obtain averaged standardized variance as function of y
residual_fit <- loess(rstandard(boot_fit)^2 ~ fitted(boot_fit))
# use inverse variance as weights for the final weighted fit
fit <- lm(y ~ x, data = logdata, weights = 1/fitted(residual_fit))
grid$y <- exp(predict(fit, data.frame(x = log(grid$x))))
grid
}
StatPfitLog <- ggplot2::ggproto("StatPfitLog", ggplot2::Stat,
required_aes = c("x", "y"),
compute_group = compute_pfit_logtrans)
stat_pfit <- function (mapping = NULL, data = NULL, geom = "line",
position = "identity", na.rm = TRUE, show.legend = NA,
inherit.aes = TRUE, n = 101L, ...) {
ggplot2::layer(
stat = StatPfitLog, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, n = n, ...)
)
}
# _________________
compute_hrfit_logtrans <- function (data, scales, params, na.rm = TRUE,
n = 101L, per = 24) {
rng <- range(data$x, na.rm = na.rm)
grid <- data.frame(x = seq(rng[1], rng[2], length=n))
logdata <- data
logdata$y <- log2(data$y)
mod <- Rfit::rfit(y ~ cos(2*pi/per*x) + sin(2*pi/per*x),
data = logdata)
## there's no predict() in Rfit
grid$y <- 2^(coef(mod)[1] + coef(mod)[2]*cos(2*pi/per*grid$x) +
coef(mod)[3]*sin(2*pi/per*grid$x))
grid
}
StatHrfitLog <- ggplot2::ggproto("StatHrfitLog", ggplot2::Stat,
required_aes = c("x", "y"),
compute_group = compute_hrfit_logtrans)
stat_hrfit <- function (mapping = NULL, data = NULL, geom = "line",
position = "identity", na.rm = TRUE, show.legend = NA,
inherit.aes = TRUE, n=101L, per=24, ...) {
ggplot2::layer(
stat = StatHrfitLog, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, n = n, per = per, ...)
)
}
# _________________
compute_hrhline_logtrans <- function (data, scales, params,
na.rm = TRUE, per = 24) {
rng <- range(data$x, na.rm = na.rm)
grid <- data.frame(x = c(rng[1], rng[2]))
logdata <- data
logdata$y <- log2(data$y)
mod <- Rfit::rfit(y ~ cos(2*pi/per*x) + sin(2*pi/per*x),
data = logdata)
## there's no predict() in Rfit
grid$y <- rep(2^(coef(mod)[1]), 2)
grid
}
StatHrhlineLog <- ggplot2::ggproto("StatHrhlineLog", ggplot2::Stat,
required_aes = c("x", "y"),
compute_group = compute_hrhline_logtrans)
stat_hrhline <- function (mapping = NULL, data = NULL, geom = "line",
position = "identity", na.rm = TRUE,
show.legend = NA,
inherit.aes = TRUE, per=24, ...) {
ggplot2::layer(
stat = StatHrhlineLog, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm = na.rm, per = per, ...)
)
}
# _________________
compute_density_phase <- function (data, scales, params, bw=6) {
densest <-
circular::density.circular(circular::circular(data$x, units="hour"), bw=bw)
grid <- data.frame(x = unclass(densest$x),
y = densest$y*pi/12)
grid
}
StatDensPhase <- ggplot2::ggproto("StatDensPhase", ggplot2::Stat,
required_aes="x",
compute_group=compute_density_phase)
stat_density_phase <- function (mapping = NULL, data = NULL, geom = "line",
position = "identity", na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE, bw = 6, ...) {
ggplot2::layer(
stat = StatDensPhase, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(na.rm=na.rm, bw=bw, ...)
)
}
# ggplot: other extensions ------------------------------------------------
## uperrorbar from
## https://stackoverflow.com/questions/27585776/
## error-bars-for-barplot-only-in-one-direction
geom_uperrorbar <- function (mapping = NULL, data = NULL,
stat = "identity", position = "identity",
...,
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE) {
ggplot2::layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomUperrorbar,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
na.rm = na.rm,
...
)
)
}
GeomUperrorbar <-
ggplot2::ggproto("GeomUperrorbar", ggplot2::Geom,
default_aes = ggplot2::aes(colour = "black", size = 0.5,
linetype = 1, width = 0.5,
alpha = NA),
draw_key = ggplot2::draw_key_path,
required_aes = c("x", "y", "ymax"),
setup_data = function (data, params) {
data$width <- data$width %||%
params$width %||%
(ggplot2::resolution(data$x, FALSE) * 0.9)
transform(data,
xmin = x - width / 2,
xmax = x + width / 2, width = NULL
)
},
draw_panel = function (data, panel_scales, coord,
width = NULL) {
ggplot2::GeomPath$draw_panel(data.frame(
x = as.vector(rbind(data$xmin, data$xmax, NA,
data$x, data$x)),
y = as.vector(rbind(data$ymax, data$ymax, NA,
data$ymax, data$y)),
colour = rep(data$colour, each = 5),
alpha = rep(data$alpha, each = 5),
size = rep(data$size, each = 5),
linetype = rep(data$linetype, each = 5),
group = rep(1:(nrow(data)), each = 5),
stringsAsFactors = FALSE,
row.names = 1:(nrow(data) * 5)
), panel_scales, coord)
}
)
## NOTE
## can then use geom="uperrorbar" in stat_summary
# ggplot: custom plots ----------------------------------------------------
my_hist <- function (values, n = 10, breaks = NA,
binwidth = NA, boundary = floor(min(values)),
breaks_x = 6, breaks_y = 10,
color = "black", fill = "grey95") {
if (!all(is.na(breaks))) {
the_hist <- ggplot2::geom_histogram(
breaks = breaks,
color = color, fill = fill
)
} else if (!is.na(binwidth)) {
the_hist <- ggplot2::geom_histogram(
binwidth = binwidth, boundary = boundary,
color = color, fill = fill
)
} else {
the_hist <- ggplot2::geom_histogram(
bins = n,
color = color, fill = fill
)
}
if (is.function(breaks_x))
thebreaks_x <- breaks_x
else
thebreaks_x <- scales::breaks_extended(breaks_x)
if (is.function(breaks_y))
thebreaks_y <- breaks_y
else
thebreaks_y <- scales::breaks_extended(breaks_y)
dplyr::tibble(x = values) %>%
ggplot2::ggplot(ggplot2::aes(x)) +
the_hist +
my_x(expand = ggplot2::expand_scale(mult = 0.01),
thebreaks = thebreaks_x) +
my_y(expand = ggplot2::expand_scale(mult = c(0.002, 0.05)),
thebreaks = thebreaks_y) +
pw_theme +
ggplot2::theme(
axis.line.y = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
axis.line.x = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_line(color = "grey90"),
panel.grid.minor.y = ggplot2::element_line(color = "grey90")
)
}
# Graphics ----------------------------------------------------------------
#' Computes color components along a circular spectrum
#'
#' Uses RColorBrewer as basis
#'
#' @param ncolor Number of desired colors
#' @param ncolor.orig Number of colors in the original spectrum, defaults to 11,
#' the maximal number of available colors in RColorBrewer
#'
#' @return Desired colors in hex format
#' @export
#'
#' @examples
#' graphics::pie(rep(1, 24), col=circular_spectral_colors(24L))
circular_spectral_colors <- function (ncolor=24L, ncolor.orig=11L) {
all_cb_cols <- RColorBrewer::brewer.pal(ncolor.orig, "Spectral")
## get a cyclically defined ramp function
## It has to go all the way, otherwise a big gap between the endings will
## arise.
cyclic_ramp_fun <- grDevices::colorRampPalette(c(all_cb_cols, all_cb_cols[1]))
## return requested amount of colors, let the ramp go all the way around then
## discard the last value
cyclic_ramp_fun(ncolor + 1)[1:ncolor]
}
logexp2plotmath <- function(theseq) {
sapply(theseq,
function(x) as.expression(substitute(10^y, list(y=x))))
}
# Genomics ----------------------------------------------------------------
#' List of genes with introns
#'
#' @param txdb TxDb database object
#'
#' @return GRangesList with flattened genes with introns
#' @export
#'
#' @references See post on (https://stat.ethz.ch/pipermail/bioconductor/
#' 2013-February/050725.html)
#'
#' @examples
#' library(TxDb.Mmusculus.UCSC.mm9.refGene)
#' my_intronsByGene(TxDb.Mmusculus.UCSC.mm9.refGene)
#'
my_intronsByGene <- function (txdb) {
introns <- GenomicFeatures::intronsByTranscript(txdb, use.names=TRUE)
## this creates a GRanges with all introns, not ordered according to anything
ulst <- unlist(introns)
## for now, just remove dupes
intronsNoDups <- ulst[!duplicated(ulst)]
## obtains a gene id for each transcript name
suppressMessages(
themap <- AnnotationDbi::select(txdb,
keys=names(intronsNoDups),
keytype="TXNAME", columns="GENEID"))
## there must be no NAs
stopifnot(all(!is.na(themap$GENEID)))
## this works only when there is a 1:1 or many:1 mapping between txid and
## geneid
if (length(intronsNoDups) != nrow(themap))
stop(paste("Intron to gene annotation is not possible in this annotation,",
"one-to-many mapping(s) between transcript and gene present"))
S4Vectors::split(intronsNoDups, themap$GENEID)
}
#' List of genes with introns
#'
#' @param txdb TxDb database object
#'
#' @return GRangesList with flattened genes with introns
#' @export
#'
#' @references See post on (https://stat.ethz.ch/pipermail/
#' bioconductor/2013-February/050725.html)
#'
#' @examples
#' library(TxDb.Mmusculus.UCSC.mm9.refGene)
#' my_intronsByGene(TxDb.Mmusculus.UCSC.mm9.refGene)
#'
my_intronsByGene2 <- function (txdb) {
introns <- GenomicFeatures::intronsByTranscript(txdb, use.names=TRUE)
## this creates a GRanges with all introns, not ordered according to anything
ulst <- unlist(introns)
## for now, just remove dupes
intronsNoDups <- ulst[!duplicated(ulst)]
classic_names <- names(intronsNoDups) %>%
stringr::str_extract("^\\w+")
names(intronsNoDups) <- classic_names
## obtains a gene id for each transcript name
suppressMessages(
themap <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db,
keys=classic_names,
keytype="REFSEQ", columns="ENTREZID"))
## there must be no NAs
stopifnot(all(!is.na(themap$ENTREZID)))
## this works only when there is a 1:1 or many:1 mapping between txid and
## geneid
if (length(intronsNoDups) != nrow(themap))
stop(paste("Intron to gene annotation is not possible in this annotation,",
"one-to-many mapping(s) between transcript and gene present"))
S4Vectors::split(intronsNoDups, themap$ENTREZID)
}
#' List of genes with exons
#'
#' @param txdb TxDb database object
#'
#' @return GRangesList with flattened genes with introns
#' @export
#'
#' @references See post on (https://stat.ethz.ch/pipermail/
#' bioconductor/2013-February/050725.html)
#'
#' @examples
#' library(TxDb.Mmusculus.UCSC.mm9.refGene)
#' my_intronsByGene(TxDb.Mmusculus.UCSC.mm9.refGene)
#'
my_exonsByGene2 <- function (txdb) {
exons <- GenomicFeatures::exonsBy(txdb, by="tx", use.names=TRUE)
## this creates a GRanges with all introns, not ordered according to anything
ulst <- unlist(exons)
## for now, just remove dupes
exonsNoDups <- ulst[!duplicated(ulst)]
classic_names <- names(exonsNoDups) %>%
stringr::str_extract("^\\w+")
names(exonsNoDups) <- classic_names
## obtains a gene id for each transcript name
suppressMessages(
themap <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db,
keys=classic_names,
keytype="REFSEQ", columns="ENTREZID"))
## there must be no NAs
stopifnot(all(!is.na(themap$ENTREZID)))
## this works only when there is a 1:1 or many:1 mapping between txid and
## geneid
if (length(exonsNoDups) != nrow(themap))
stop(paste("Exon to gene annotation is not possible in this annotation,",
"one-to-many mapping(s) between transcript and gene present"))
S4Vectors::split(exonsNoDups, themap$ENTREZID)
}
m2e <- function (mgisyms) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=mgisyms,
keytype="SYMBOL",
columns="ENTREZID")
df$ENTREZID
}
e2m <- function (entrezids) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=entrezids,
keytype="ENTREZID",
columns="SYMBOL")
df$SYMBOL
}
e2me <- function (entrezids) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=entrezids,
keytype="ENTREZID",
columns=c("SYMBOL", "ENTREZID")) %>%
as_tibble %>% rlang::set_names("entrez_id", "mgi_symbol")
}
m2me <- function (mgisyms) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=mgisyms,
keytype="SYMBOL",
columns=c("SYMBOL", "ENTREZID")) %>%
as_tibble %>% rlang::set_names("mgi_symbol", "entrez_id")
}
a2ae <- function (mgisyms) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=mgisyms,
keytype="ALIAS",
columns=c("ALIAS", "ENTREZID")) %>%
as_tibble %>% rlang::set_names("alias", "entrez_id")
}
m2ens <- function (mgisyms) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=mgisyms,
keytype="SYMBOL",
columns="ENSEMBL")
df$ENSEMBL
}
ens2m <- function (ensemblids) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=ensemblids,
keytype="ENSEMBL",
columns="SYMBOL")
df$SYMBOL
}
ens2mens <- function (ensemblids) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=ensemblids,
keytype="ENSEMBL",
columns=c("SYMBOL", "ENSEMBL")) %>%
as_tibble %>% rlang::set_names("ensembl_id", "mgi_symbol")
}
m2mens <- function (mgisyms) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=mgisyms,
keytype="SYMBOL",
columns=c("SYMBOL", "ENSEMBL")) %>%
as_tibble %>% rlang::set_names("mgi_symbol", "ensembl_id")
}
e2ens <- function (entrezids) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=entrezids,
keytype="ENTREZID",
columns="ENSEMBL")
df$ENSEMBL
}
ens2e <- function (ensemblids) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=ensemblids,
keytype="ENSEMBL",
columns="ENTREZID")
df$ENTREZID
}
ens2eens <- function (ensemblids) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=ensemblids,
keytype="ENSEMBL",
columns=c("ENTREZID", "ENSEMBL")) %>%
as_tibble %>% rlang::set_names("ensembl_id", "entrez_id")
}
e2eens <- function (entrezids) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=entrezids,
keytype="ENTREZID",
columns=c("ENTREZID", "ENSEMBL")) %>%
as_tibble %>% rlang::set_names("entrez_id", "ensembl_id")
}
m2r <- function (mgisyms) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=mgisyms,
keytype="SYMBOL",
columns="REFSEQ")
df$REFSEQ
}
r2m <- function (refseqids) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=refseqids,
keytype="REFSEQ",
columns="SYMBOL")
df$SYMBOL
}
r2mr <- function (refseqids) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=refseqids,
keytype="REFSEQ",
columns=c("SYMBOL", "REFSEQ")) %>%
as_tibble %>% rlang::set_names("refseq_id", "mgi_symbol")
}
m2mr <- function (mgisyms) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=mgisyms,
keytype="SYMBOL",
columns=c("SYMBOL", "REFSEQ")) %>%
as_tibble %>% rlang::set_names("mgi_symbol", "refseq_id")
}
e2r <- function (entrezids) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=entrezids,
keytype="ENTREZID",
columns="REFSEQ")
df$REFSEQ
}
r2e <- function (refseqids) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=refseqids,
keytype="REFSEQ",
columns="ENTREZID")
df$ENTREZID
}
r2er <- function (refseqids) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=refseqids,
keytype="REFSEQ",
columns=c("ENTREZID", "REFSEQ")) %>%
as_tibble %>% rlang::set_names("refseq_id", "entrez_id")
}
e2er <- function (entrezids) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=entrezids,
keytype="ENTREZID",
columns=c("ENTREZID", "REFSEQ")) %>%
as_tibble %>% rlang::set_names("entrez_id", "refseq_id")
}
ens2r <- function (enstids) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=enstids,
keytype="ENSEMBLTRANS",
columns="REFSEQ")
df$REFSEQ
}
r2ens <- function (refseqids) {
df <- AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=refseqids,
keytype="REFSEQ",
columns="ENSEMBLTRANS")
df$ENSEMBLTRANS
}
r2ensr <- function (refseqids) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=refseqids,
keytype="REFSEQ",
columns=c("ENSEMBLTRANS", "REFSEQ")) %>%
as_tibble %>% rlang::set_names("refseq_id", "enst_id")
}
ens2ensr <- function (enstids) {
AnnotationDbi::select(org.Mm.eg.db::org.Mm.eg.db, keys=enstids,
keytype="ENSEMBLTRANS",
columns=c("ENSEMBLTRANS", "REFSEQ")) %>%
as_tibble %>% rlang::set_names("enst_id", "refseq_id")
}
keepsynhelper <- function (alias, sym) {
## if there is exact synonym, return the index for only these (can be many)
if (any(alias == sym, na.rm=TRUE))
which(alias == sym)
## otherwise, keep everything
else
seq_along(alias)
}
## keeps true synonyms in a symbol alias data frame
keepsyn <- function (ttab) {
do.call(rbind, by(ttab, ttab$ALIAS, function (df) {
df[keepsynhelper(df$ALIAS, df$SYMBOL), ]
}))
}
#' Search Entrez IDs for gene names that are not necessarily MGI symbols
#'
#' @param gene_names Character vector of gene names
#'
#' @return Data frame (tibble) with columns "alias" and "entrez_id" for unique
#' matches
#' @export
#'
#' @examples
#' gene_names_to_entrez(c("Bmal1", "Ciart", "Chrono"))
gene_names_to_entrez <- function (gene_names) {
if (any(duplicated(gene_names))) {
warning("Duplicated input names will be removed")
gene_names <- unique(gene_names)
}
alias_full <- try(
suppressMessages(a2ae(gene_names)),
silent = TRUE
)
# Handle case when no gene names are found
select_err <- "None of the keys entered are valid keys"
if (isa(alias_full, "try-error")) {
if (stringr::str_detect(alias_full, select_err)) {
alias_full <- tibble(alias = gene_names, entrez_id = NA)
} else {
stop("Invalid input")
}
}
# We only keep unambiguous aliases (occurring once in the translation table)
alias_full %>%
group_by(alias) %>%
dplyr::filter(n() == 1L)
}
# Helper function: removes part of element of larger named list full_list that
# occur in any other elements (For use with imap() or mapply())
list_element_reducer <- function (el, el_name, full_list) {
rest_list <- full_list[setdiff(names(full_list), el_name)]
setdiff(el, unique(unlist(rest_list)))
}
#' Remove some ambiguous entries in a gene translation data frame
#'
#' Assuming a data frame with at least 2 columns, we may split 1 column on the
#' other. We may then remove entries in multiple-entry elements that occur in
#' 1-entry elements - these are likely mis-assigned annotations.
#' @param gene_table Data frame
#' @param id1 Name of column to split (unquoted)
#' @param id2 Index column for the split
#'
#' @return Reduced data frame
#' @export
#'
#' @examples
#' test <- data.frame(ida = c("a", "b", "a"), idb = c("x", "x", "y"))
#' reduce_gene_table_ambiguity(test, ida, idb)
#'
reduce_gene_table_ambiguity <- function (gene_table, id1, id2) {
arg1 <- substitute(id1); arg2 <- substitute(id2)
mapping_list <- with(
gene_table,
split(eval(arg1), eval(arg2))
)
len1_list <- mapping_list[lengths(mapping_list) == 1L]
len1_list_elements <- unique(unlist(len1_list))
toreduce_list <- mapping_list[lengths(mapping_list) > 1L]
reduced_list <- purrr::map(
toreduce_list,
~ setdiff(.x, len1_list_elements)
)
stack(c(len1_list, reduced_list)) %>%
purrr::set_names(as.character(c(arg1, arg2)))
}
# Stats -------------------------------------------------------------------
my.fisher.test.num <- function(n1a, n1b, n2a, n2b, alternative="t",
verbose=FALSE, pvalando=TRUE) {
fm <- matrix(c(n1a, n1b, n2a, n2b), nrow=2)
ft <- fisher.test(fm, alternative=alternative)
if (verbose) {
return(list(ft=ft, fm=fm, fr=fm[1, ]/(fm[1, ] + fm[2, ])))
}
if (pvalando) {
return(c(`p-value`=ft$p.value, ft$estimate))
}
else {
print(ft$p.value)
}
}
#' Performs Fisher test for odds difference of condition 1 contingent on
#' condition 2.
#'
#' NAs must be taken into account when formulating the conditions below,
#' if desired.
#'
#' The matrix (returned if \code{verbose=TRUE}) is constructed with cond1
#' and !cond1 for the first and second row, respectively; likewise with cond2
#' and !cond2 for the first and second column, respectively
#'
#' @param df tibble
#' @param cond1 quosure evaluating to a logical for condition 1
#' @param cond2 quosure evaluating to a logical for condition 2
#' @param alternative default="two.sided", see fisher.test().
#' @param ... further arguments to \code{my.fisher.test.num()} such as
#' \code{verbose}
#'
#' @return p value and odds ratio (c1c2/!c1c2) / (c1!c2/!c1!c2)
#' @export
#'
#' @examples
#' df <- data_frame(a=1:10, b=4:13)
#' fisher_test_df(df, quo(a < 4), quo(b < 7))
#' fisher_test_df(df, quo(a > 2), quo(b < 8))
fisher_test_df <- function (df, cond1, cond2, alternative="two.sided",
...) {
# Contingency table for "outof" version
#
# cond2 | !cond2
# -----------------------------
# cond1 | i(df_1, df_2) | i(df_1, !df_2)
# !cond1 | i(!df_1, df_2) | i(!df_1, !df_2)
#
my.fisher.test.num(nrow(dplyr::filter(df, (!! cond1) & (!! cond2)) ),
nrow(dplyr::filter(df, (!(!! cond1)) & (!! cond2)) ),
nrow(dplyr::filter(df, (!! cond1) & (!(!! cond2)) ) ),
nrow(dplyr::filter(df, (!(!! cond1)) & (!(!! cond2)) ) ),
alternative=alternative, ...
)
}
#' Perform p value meta analysis using MetaDE
#'
#' @param ... 2 or more vectors of equal lengths of p values
#' @param metameth meta analysis method, defaluts to minP
#' (OR=union-intersection)
#'
#' @return vector of p values
#' @export
#'
#' @examples qmetade(runif(10), runif(10))
qmetade <- function (..., metameth="minP") {
# metaan <- MetaDE::MetaDE.pvalue(list(p=do.call(cbind, list(...)),
# bp=NULL),
# meta.method=metameth)
# as.numeric(metaan$meta.analysis$pval)
pvals <- do.call(cbind, list(...))
if (ncol(pvals) == 1L)
return(as.vector(pvals))
else {
switch(metameth,
minP = apply(pvals, 1, function (x)
ifelse(any(is.na(x)), NA, metap::minimump(x)$p)),
maxP = apply(pvals, 1, function (x)
ifelse(any(is.na(x)), NA, metap::maximump(x)$p)),
Fisher = apply(pvals, 1, function (x)
ifelse(any(is.na(x)), NA, metap::sumlog(x)$p)),
Stouffer = apply(pvals, 1, function (x)
ifelse(any(is.na(x)), NA, metap::sumz(x)$p)),
stop("Incorrect metameth argument"))
}
}
# Adjusted p values with pre-filtering
#' Compute adjusted p values with pre-filtering
#'
#' @param pvals vector of input p values
#' @param ind logical vector corresponding to pre-filtering condition. NAs are
#' allowed and reinterprated as FALSE.
#' @param method method selection for p.adjust
#'
#' @return vector of adjusted p values with NAs for non-satisfied pre-filtering
#' @export
#'
#' @examples
#' pvals <- runif(10)
#' effectsize <- runif(10)
#' sel_padj(pvals, effectsize > 0.5)
sel_padj <- function (pvals, ind, method="BH") {
indnona <- ind & !is.na(ind)
toret <- rep(NA, length(pvals))
toret[indnona] <- stats::p.adjust(pvals[indnona], method)
toret
}
# q values with pre-filtering
#' Compute q values with pre-filtering
#'
#' @param pvals vector of input p values
#' @param ind logical vector corresponding to pre-filtering condition. NAs are
#' allowed and reinterprated as FALSE. If NULL, all are interpreted as TRUE.
#' @param method method selection for qvalue::pi0est
#' @param ... passed on to qvalue::qvalue (and then to pi0est)
#'
#' @return vector of q values with NAs for non-satisfied pre-filtering
#' @export
#'
#' @examples
#' pvals <- runif(10)
#' effectsize <- runif(10)
#' sel_qval(pvals, effectsize > 0.5)
sel_qval <- function (pvals, ind = NULL, method = "bootstrap", ...) {
if (is.null(ind))
ind <- rep(TRUE, length(pvals))
indnona <- ind & !is.na(ind) & !is.na(pvals)
toret <- rep(NA, length(pvals))
if (sum(indnona) > 0L)
toret[indnona] <- qvalue::qvalue(pvals[indnona],
pi0.method = method,
...)$qvalues
toret
}
# proportion.true.disc <- function(qvals) {
# qvals <- qvals[order(qvals)]
# rho <- (1:length(qvals))/length(qvals)
# proportions <- (1 - qvals)*rho
# return(list(proportion=max(proportions), qvals.sorted=qvals,
# proportions.vec=proportions))
# }
# Circadian reference data ------------------------------------------------
core_clock_genes_noror <- c("Arntl", "Clock", "Npas2",
"Per1", "Per2", "Per3",
"Cry1", "Cry2",
"Nr1d1", "Nr1d2",
"Bhlhe40", "Bhlhe41",
"Dbp", "Nfil3", "Hlf", "Tef",
"Ciart")
core_clock_genes <- c("Arntl", "Clock", "Npas2",
"Per1", "Per2", "Per3",
"Cry1", "Cry2",
"Nr1d1", "Nr1d2", "Rora", "Rorc",
"Bhlhe40", "Bhlhe41",
"Dbp", "Nfil3", "Hlf", "Tef",
"Ciart")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.