library(dplyr)
library(Rcpp)
# Fit weighted circadian --------------------------------------------------
FitWeightedSCircadian <- function(dat, wake.collapsed, exprs.cname = "exprs", time.cname = "time",
condensed=FALSE, pseudo = 1, min.hl = 0.5, max.hl = 24,
include.mix.weight = FALSE,
include.intercept = FALSE,
do.lowpass=FALSE, min.hl.mrna=NA, max.hl.mrna=NA, low.pass.filter.times=NA,
jlambda = 0, jmaxit = 100){
# fit data to weighted Process S and Circadian
# return ML estimates, variance of estimates (from Hessian), and BIC
# pseudo: additional constant to allow for max/min expression in upper/lower limits
# include.mix.weight: add a mix weight to weigh sleep with circ model (useless I think)
# get inits
max.exprs <- max(dat[[exprs.cname]])
min.exprs <- min(dat[[exprs.cname]])
if (!do.lowpass){
inits.novar.S <- c(dat[[exprs.cname]][1], max.exprs, 10, min.exprs, 8)
lims.S <- GetLims.S(max.exprs, min.exprs, pseudo = pseudo, min.hl = min.hl, max.hl = max.hl)
} else {
inits.novar.S <- c(dat[[exprs.cname]][1], max.exprs, 10, min.exprs, 8, 2) # add mRNA lifetime
lims.S <- GetLims.S.LowPass(max.exprs, min.exprs, pseudo = pseudo, min.hl = min.hl, max.hl = max.hl, min.hl.mrna = min.hl.mrna, max.hl.mrna)
}
# inits for circadian fit by doing linear fit as initial guess
fit.circ <- FitRhythmic(dat, T.period = 24, use.weights = FALSE, get.bic = FALSE, condensed = FALSE) # params for circadian
if (include.intercept){
inits.novar.circ <- c(fit.circ$intercept, fit.circ$cos.part, fit.circ$sin.part)
} else {
inits.novar.circ <- c(fit.circ$cos.part, fit.circ$sin.part)
}
if (include.mix.weight){
# concatenate and add weights
w.init <- 0.5
inits.novar <- c(inits.novar.S, inits.novar.circ, w.init)
} else {
inits.novar <- c(inits.novar.S, inits.novar.circ)
}
# get lims
# sleep lims independent of whether or not we include mix weights
# circadian lims depend on whether or not we include mix weights?
lims.circ <- GetLims.circ(Inf, -Inf, include.intercept = include.intercept, lowerpart = -Inf) # max and min exprs only takes into effect if include.intercept is TRUE
if (include.mix.weight){
# concatenate and add weights
w.min <- 0; w.max <- 1
lims.lower <- c(lims.S$lower, lims.circ$lower, w.min)
lims.upper <- c(lims.S$upper, lims.circ$upper, w.max)
} else {
lims.lower <- c(lims.S$lower, lims.circ$lower)
lims.upper <- c(lims.S$upper, lims.circ$upper)
}
if (any(is.na(c(inits.novar, lims.lower, lims.upper)))){
print(c(inits.novar, lims.lower, lims.upper))
stop("Inits, limits, cannot contain NAs")
}
fit.novar.optim <- tryCatch({
fit.novar.optim <- optim(inits.novar, fn = logL.WeightedSCircadian,
x = dat[[exprs.cname]], wake.collapsed = wake.collapsed, filter.times = dat[[time.cname]],
has.reps = TRUE,
estimate.var=TRUE,
include.mix.weight = include.mix.weight,
include.intercept = include.intercept,
do.lowpass=do.lowpass, low.pass.filter.times = low.pass.filter.times,
lambda = jlambda,
hessian=TRUE, method = "L-BFGS-B",
lower = lims.lower, upper = lims.upper,
control = list(maxit = jmaxit))
# lower = rep(0, length(inits.novar)), upper = rep(Inf, length(inits.novar)))
fit.novar.optim
}, error = function(e) {
# print(paste("problem with:", dat$gene[[1]]))
fit.novar.optim <- list()
fit.novar.optim$par <- rep(NA, length(inits.novar))
fit.novar.optim$value <- NA
fit.novar.optim$convergence <- paste(dat$gene[[1]], e$message)
fit.novar.optim$hessian <- diag(NA, length(inits.novar))
fit.novar.optim
})
params <- fit.novar.optim$par # 5 params from sleep, 3 or 2 from circ, 1 from mixing (optional)
logL <- -1 * fit.novar.optim$value # optim minimizes -logL
bic <- GetBIC.logL(logL, n = nrow(dat), k = length(params))
# get param estimates: S
init <- params[[1]]; U <- params[[2]]; tau.w <- params[[3]]; L <- params[[4]]; tau.s <- params[[5]]
if (do.lowpass){
tau.mrna <- params[[6]]
circ.begin.i <- 7
} else {
circ.begin.i <- 6
}
# get param estimates: circadian
if (include.intercept){
intercept <- params[[circ.begin.i]]; cos.part <- params[[circ.begin.i + 1]]; sin.part <- params[[circ.begin.i + 2]]
mix.weight.i <- 9
} else {
cos.part <- params[[circ.begin.i]]; sin.part <- params[[circ.begin.i + 1]]
mix.weight.i <- 8
intercept <- NA
}
if (include.mix.weight){
# mixing weights
mix.weight <- params[[mix.weight.i]]
} else {
mix.weight <- NA
}
if (!condensed){
# get variance estimates
params.var <- diag(fit.novar.optim$hessian) # standard error is sqrt of this
# var estimates: S
init.var <- params.var[[1]]; U.var <- params.var[[2]]; tau.w.var <- params.var[[3]]; L.var <- params.var[[4]]; tau.s.var <- params.var[[5]]
if (do.lowpass){
tau.mrna.var <- params.var[[6]]
}
# var estimates: circ
if (include.intercept){
intercept.var <- params.var[[circ.begin.i]]; cos.part.var <- params.var[[circ.begin.i + 1]]; sin.part.var <- params.var[[circ.begin.i + 2]]
} else {
cos.part.var <- params.var[[circ.begin.i]]; sin.part.var <- params.var[[circ.begin.i + 1]]
intercept.var <- NA
}
}
if (!condensed){
if (!do.lowpass){
dat.out <- data.frame(init = init,
U = U, # upperobund
tau.w = tau.w, # when awake
L = L, # lowerbound
tau.s = tau.s, # when asleep
intercept = intercept,
cos.part = cos.part,
sin.part = sin.part,
mix.weight = mix.weight,
logL = logL,
init.var = init.var,
U.var = U.var,
tau.w.var = tau.w.var,
L.var = L.var,
tau.s.var = tau.s.var,
intercept.var = intercept.var,
cos.part.var = cos.part.var,
sin.part.var = sin.part.var,
bic = bic,
convergence = fit.novar.optim$convergence,
msg = fit.novar.optim$message)
} else {
dat.out <- data.frame(init = init,
U = U, # upperobund
tau.w = tau.w, # when awake
L = L, # lowerbound
tau.s = tau.s, # when asleep
tau.mrna = tau.mrna,
intercept = intercept,
cos.part = cos.part,
sin.part = sin.part,
mix.weight = mix.weight,
logL = logL,
init.var = init.var,
U.var = U.var,
tau.w.var = tau.w.var,
L.var = L.var,
tau.s.var = tau.s.var,
intercept.var = intercept.var,
cos.part.var = cos.part.var,
sin.part.var = sin.part.var,
bic = bic,
convergence = fit.novar.optim$convergence,
msg = fit.novar.optim$message)
}
} else {
if (!do.lowpass){
dat.out <- data.frame(init = init,
U = U, # upperobund
tau.w = tau.w, # when awake
L = L, # lowerbound
tau.s = tau.s, # when asleep
intercept = intercept,
amp = CosSineToAmpPhase(cos.part, sin.part, T.period = 24)$amp,
phase = CosSineToAmpPhase(cos.part, sin.part, T.period = 24)$phase,
mix.weight = mix.weight,
logL = logL,
bic = bic,
convergence = fit.novar.optim$convergence,
msg = fit.novar.optim$message)
} else {
dat.out <- data.frame(init = init,
U = U, # upperobund
tau.w = tau.w, # when awake
L = L, # lowerbound
tau.s = tau.s, # when asleep
tau.mrna = tau.mrna,
intercept = intercept,
amp = CosSineToAmpPhase(cos.part, sin.part, T.period = 24)$amp,
phase = CosSineToAmpPhase(cos.part, sin.part, T.period = 24)$phase,
mix.weight = mix.weight,
logL = logL,
bic = bic,
convergence = fit.novar.optim$convergence,
msg = fit.novar.optim$message)
}
}
return(dat.out)
}
logL.WeightedSCircadian <- function(th, x, wake.collapsed, filter.times, estimate.var=FALSE,
has.reps = TRUE, include.mix.weight=FALSE, include.intercept=FALSE,
do.lowpass=FALSE, low.pass.filter.times=NA, lambda=0){
# include.mix.weight: whether or not add additional parameter for weighing between sleep and circ (superfluous)
# include.intercept: whether we include another intercept, but with S(t) it should suffice?
if (!estimate.var){
sig2 <- th[1]
params <- th[-1] # first parameter is variance of the signal: replicates of gene expression?
} else {
params <- th
}
mu <- WeightedSCircadian(params, wake.collapsed, unique(filter.times),
include.mix.weight=include.mix.weight, include.intercept=include.intercept,
do.lowpass=do.lowpass, low.pass.filter.times = low.pass.filter.times)
if (has.reps){
# handle replicates because we fit with unique(filter.times)
mu <- DuplicateVector(mu, filter.times)
}
if (estimate.var){
# biased estimator of variance
sig2 <- sum((x - mu) ^ 2) / length(x)
}
if (length(x) != length(mu)) stop("x and mu must be equal lengths")
loss <- -sum(dnorm(x, mean=mu, sd=sqrt(sig2), log=T))
if (lambda != 0){
penal.factor <- PenaltyFactor(mu, filter.times, L=2)
loss <- PenalizeLoss(loss, lambda, penal.factor)
}
return(loss)
}
WeightedSCircadian <- function(params, wake.collapsed, filt.time, ampphase=FALSE,
include.mix.weight=FALSE, include.intercept=FALSE,
do.lowpass=FALSE, low.pass.filter.times=NA){
# Do weighted mixing of params.S and params.sine
# do params is concatenated vector of S-process params and sine wave params with weighted param
## S process params
# params[1] <- init
# params[2] <- U
# params[3] <- tau.w
# params[4] <- L
# params[5] <- tau.s
## Sine wave params
# params[6] <- intercept (optional)
# params[7] <- amp (if ampphase), cos.part if not
# params[8] <- phase if ampphase, sin.part if not
## Finally add weighted parameter
# params[9] <- mixing.weight (optional)
#
# Optionally pass S(t) though lowpass filter, with appropriate mRNA levels
if (include.mix.weight){
w <- unlist(params[9]) # mixing weight and optional
}
if (!do.lowpass){
params.s.end.i <- 5
} else {
params.s.end.i <- 6
}
params.circ.start.i <- params.s.end.i + 1
if (include.intercept){
params.circ.end.i <- params.circ.start.i + 2
} else {
params.circ.end.i <- params.circ.start.i + 1
}
params.S <- unlist(params[1:params.s.end.i])
params.sine <- unlist(params[params.circ.start.i:params.circ.end.i])
# if (!do.lowpass){
# params.S <- unlist(params[1:5])
# if (include.intercept){
# params.sine <- unlist(params[6:8])
# } else {
# # no intercept
# params.sine <- unlist(params[6:7])
# }
# } else {
# params.S <- unlist(params[1:6])
# if (include.intercept){
# params.sine <- unlist(params[7:9])
# } else {
# # no intercept
# params.sine <- unlist(params[7:8])
# }
# }
y.S <- unlist(S.process.collapsed(params.S, wake.collapsed, filt.time, do.lowpass, low.pass.filter.times), use.names = FALSE)
y.sine <- unlist(CosSine(params.sine,
time = filt.time,
ampphase = ampphase,
include.intercept = include.intercept),
use.names = FALSE)
if (include.mix.weight){
y <- w * y.S + (1 - w) * y.sine
} else {
y <- y.S + y.sine
}
return(y)
}
# Fit FreeAmp with S process ----------------------------------------------
FitWeightedSFreeAmp <- function(dat, AmpFunc, wake.collapsed, exprs.cname = "exprs", time.cname = "time",
T.period = 24, tswitch = 30, form = "switch",
pseudo = 0, min.hl = 0.5, max.hl = 24, include.intercept = FALSE,
do.lowpass = FALSE, min.hl.mrna = NA, max.hl.mrna = NA, low.pass.filter.times = NA,
jlambda = 0, jmaxit = 100){
# fit data to weighted Process S and Free Amp
# return ML estimates, variance of estimates (from Hessian), and BIC
# pseudo: additional constant to allow for max/min expression in upper/lower limits
# include.intercept: dont need to include generally if you use S.process as your mean
if (include.intercept){
warning("Warning: including intercept in mixed S model. Consider setting it to False and let S process define mean")
}
# get inits
max.exprs <- max(dat[[exprs.cname]])
min.exprs <- min(dat[[exprs.cname]])
if (!do.lowpass){
inits.novar.S <- c(dat[[exprs.cname]][1], max.exprs, 10, min.exprs, 8)
} else {
inits.novar.S <- c(dat[[exprs.cname]][1], max.exprs, 10, min.exprs, 8, 2)
}
# inits for circadian fit by doing linear fit as initial guess
fit.circ <- FitRhythmic(dat, T.period = 24, use.weights = FALSE, get.bic = FALSE, condensed = FALSE) # params for circadian
if (include.intercept){
inits.novar.circ <- c(fit.circ$intercept, fit.circ$cos.part, fit.circ$sin.part)
} else {
inits.novar.circ <- c(fit.circ$cos.part, fit.circ$sin.part)
}
# init amplitude functions
params.lst <- ParseFormGetParams(form)
amp.params <- params.lst$amp.params
amp.lims.lower <- params.lst$amp.lims.lower
amp.lims.upper <- params.lst$amp.lims.upper
inits.novar <- c(inits.novar.S, inits.novar.circ, amp.params)
# get lims
# sleep lims independent of whether or not we include mix weights
if (!do.lowpass){
lims.S <- GetLims.S(max.exprs, min.exprs, pseudo = pseudo, min.hl = min.hl, max.hl = max.hl)
} else {
lims.S <- GetLims.S.LowPass(max.exprs, min.exprs, pseudo = pseudo, min.hl = min.hl, max.hl = max.hl, min.hl.mrna = min.hl.mrna, max.hl.mrna = max.hl.mrna)
}
# circadian lims depend on whether or not we include mix weights?
lims.circ <- GetLims.circ(Inf, -Inf, include.intercept = include.intercept, lowerpart = -Inf) # max and min exprs only takes into effect if include.intercept is TRUE
# lims for amp parameter defined in if loop checking form
lims.lower <- c(lims.S$lower, lims.circ$lower, amp.lims.lower)
lims.upper <- c(lims.S$upper, lims.circ$upper, amp.lims.upper)
# check limits are sane
if (any(abs(lims.upper - lims.lower) < 1e-5)){
warning("Warninig, upper and lower limits are very similar. Consider redefining your limits")
}
fit.novar.optim <- tryCatch({
fit.novar.optim <- optim(inits.novar, fn = logL.WeightedSFreeAmp,
x = dat[[exprs.cname]], wake.collapsed = wake.collapsed, filter.times = dat[[time.cname]],
tswitch = tswitch,
form = form,
has.reps = TRUE,
include.intercept = include.intercept,
do.lowpass=do.lowpass,
low.pass.filter.times=low.pass.filter.times,
lambda = jlambda,
hessian=TRUE, method = "L-BFGS-B",
lower = lims.lower, upper = lims.upper,
control = list(maxit = jmaxit))
# lower = rep(0, length(inits.novar)), upper = rep(Inf, length(inits.novar)))
fit.novar.optim
}, error = function(e) {
# print(paste("problem with:", dat$gene[[1]]))
fit.novar.optim <- list()
fit.novar.optim$par <- rep(NA, length(inits.novar))
fit.novar.optim$value <- NA
fit.novar.optim$convergence <- paste(dat$gene[[1]], e$message)
fit.novar.optim$hessian <- diag(NA, length(inits.novar))
fit.novar.optim
})
params <- fit.novar.optim$par # 5 params from sleep, 3 or 2 from circ, 1 from mixing (optional)
logL <- -1 * fit.novar.optim$value # optim minimizes -logL
bic <- GetBIC.logL(logL, n = nrow(dat), k = length(params))
# get param estimates: S
init <- params[[1]]; U <- params[[2]]; tau.w <- params[[3]]; L <- params[[4]]; tau.s <- params[[5]]
if (do.lowpass){
tau.mrna <- params[[6]]
circ.begin.i <- 7
} else {
circ.begin.i <- 6
}
# get param estimates: circadian
if (include.intercept){
intercept <- params[[circ.begin.i]]; cos.part <- params[[circ.begin.i + 1]]; sin.part <- params[[circ.begin.i + 2]]
amp.param.i <- circ.begin.i + 3
} else {
cos.part <- params[[circ.begin.i]]; sin.part <- params[[circ.begin.i + 1]]
intercept <- NA
amp.param.i <- circ.begin.i + 2 # after sin part
}
if (form == "switch"){
amp.param <- params[[amp.param.i]] # amp.new
} else if (form == "decay"){
amp.param <- params[[amp.param.i]] # amp.decay
} else {
stop("Form must be switch or ecay")
}
# # get variance estimates: TODO
# params.var <- diag(fit.novar.optim$hessian) # standard error is sqrt of this
# # var estimates: S
# init.var <- params.var[[1]]; U.var <- params.var[[2]]; tau.w.var <- params.var[[3]]; L.var <- params.var[[4]]; tau.s.var <- params.var[[5]]
# # var estimates: circ
# if (include.intercept){
# intercept.var <- params.var[[6]]; cos.part.var <- params.var[[7]]; sin.part.var <- params.var[[8]]
# } else {
# cos.part.var <- params.var[[6]]; sin.part.var <- params.var[[7]]
# intercept.var <- NA
# }
# # var estimates: amp parameters
if (!do.lowpass){
dat.out <- data.frame(init = init,
U = U, # upperobund
tau.w = tau.w, # when awake
L = L, # lowerbound
tau.s = tau.s, # when asleep
intercept = intercept,
amp = CosSineToAmpPhase(cos.part, sin.part, T.period = 24)$amp,
phase = CosSineToAmpPhase(cos.part, sin.part, T.period = 24)$phase,
amp.param = amp.param,
form = form,
tswitch = tswitch,
logL = logL,
bic = bic,
convergence = fit.novar.optim$convergence,
msg = fit.novar.optim$message)
} else {
dat.out <- data.frame(init = init,
U = U, # upperobund
tau.w = tau.w, # when awake
L = L, # lowerbound
tau.s = tau.s, # when asleep
tau.mrna = tau.mrna,
intercept = intercept,
amp = CosSineToAmpPhase(cos.part, sin.part, T.period = 24)$amp,
phase = CosSineToAmpPhase(cos.part, sin.part, T.period = 24)$phase,
amp.param = amp.param,
form = form,
tswitch = tswitch,
logL = logL,
bic = bic,
convergence = fit.novar.optim$convergence,
msg = fit.novar.optim$message)
}
return(dat.out)
}
logL.WeightedSFreeAmp <- function(th, x, wake.collapsed, filter.times, tswitch, form,
has.reps = TRUE, include.intercept=FALSE,
do.lowpass=FALSE, low.pass.filter.times=NA,
lambda = 0){
# include.intercept: whether we include another intercept, but with S(t) it should suffice?
mu <- WeightedSFreeAmp(th,
wake.collapsed,
unique(filter.times),
tswitch,
form,
ampphase=FALSE,
include.intercept=include.intercept,
do.lowpass=do.lowpass, low.pass.filter.time=low.pass.filter.times)
if (has.reps){
# handle replicates because we fit with unique(filter.times)
# expand vector to handle replicates
mu <- DuplicateVector(mu, filter.times)
}
# biased estimator of variance (always estimate)
sig2 <- sum((x - mu) ^ 2) / length(x)
if (length(x) != length(mu)) stop("x and mu must be equal lengths")
loss <- -sum(dnorm(x, mean=mu, sd=sqrt(sig2), log=T))
if (lambda != 0){
penal.factor <- PenaltyFactor(mu, filter.times, L=2)
loss <- PenalizeLoss(loss, lambda, penal.factor)
}
return(loss)
}
WeightedSFreeAmp <- function(params, wake.collapsed, filt.time, tswitch, form, ampphase=FALSE, include.intercept=FALSE,
do.lowpass=FALSE, low.pass.filter.times=NA){
# Do weighted mixing of params.S and params.sine
# do params is concatenated vector of S-process params and sine wave params with weighted param
## S process params
# params[1] <- init
# params[2] <- U
# params[3] <- tau.w
# params[4] <- L
# params[5] <- tau.s
## Sine wave params
# params[6] <- intercept (optional)
# params[7] <- amp (if ampphase), cos.part if not
# params[8] <- phase if ampphase, sin.part if not
## Free amp params
# everything beyond last sine wave parameter (8 or 7 depending on include.intercept)
# tswitch: integer or possibly a string to indicate time at which amplitude can "float"
# form: form at which amplitude floats after tswitch (switch, decay)
if (!do.lowpass){
params.s.end.i <- 5
} else {
params.s.end.i <- 6
}
params.circ.start.i <- params.s.end.i + 1
if (include.intercept){
params.circ.end.i <- params.circ.start.i + 2
} else {
params.circ.end.i <- params.circ.start.i + 1
}
params.amp.start.i <- params.circ.end.i + 1
params.S <- unlist(params[1:params.s.end.i])
params.sine <- unlist(params[params.circ.start.i:params.circ.end.i])
params.amp <- unlist(params[params.amp.start.i:length(params)])
y.S <- unlist(S.process.collapsed(params.S, wake.collapsed, filt.time, do.lowpass, low.pass.filter.times), use.names = FALSE)
y.sine <- unlist(Rhythmic.FreeAmp(th = c(params.sine, params.amp),
times = filt.time,
AmpFunc = AmpFunc,
T.period = 24,
tswitch = tswitch,
form = form,
include.intercept = include.intercept,
amp.phase = ampphase),
use.names=FALSE)
y <- y.S + y.sine
return(y)
}
# Fit Rhythmic with Free Amp ----------------------------------------------
FitRhythmic.FreeAmp <- function(dat, AmpFunc, exprs.cname = "exprs", time.cname = "time",
T.period = 24, tswitch = 27, form = "switch", include.intercept=TRUE,
jmaxit = 100){
# Fit rhythmic mode but with a general parametric amplitude function
# use lapply to vectorize this code on long dat
#
# Always estimate variance from the fit
#
# include.intercept: whether we fit the mean or not, perhaps useful if we include sleep?
# Init parameters with standard sinusoid fit
fit.circ <- FitRhythmic(dat, T.period = T.period, use.weights = FALSE, get.bic = FALSE, condensed = FALSE) # params for circadian
if (include.intercept){
inits.novar.circ <- c(fit.circ$intercept, fit.circ$cos.part, fit.circ$sin.part)
} else {
inits.novar.circ <- c(fit.circ$cos.part, fit.circ$sin.part)
}
# init amplitude functions
params.lst <- ParseFormGetParams(form)
amp.params <- params.lst$amp.params
amp.lims.lower <- params.lst$amp.lims.lower
amp.lims.upper <- params.lst$amp.lims.upper
# if (form == "switch"){
# A.new <- 0.5 # switch to half amplitude at tswitch
# amp.params <- A.new
# amp.lims.lower <- 0
# amp.lims.upper <- 3
# } else if (form == "decay"){
# tau <- 12 # 12 hours after tswitch, amplitude goes to 1/e (~0.367)
# amp.params <- tau
# amp.lims.lower <- 1/3 / log(2) # 1/3h half life converted to natural scale
# amp.lims.upper <- 24 / log(2) # 24h half life converted to natural scale
# } else if (form == "stepfree"){
# # like switch, but the time at which the switch happens is not predefined
# A.new <- 0.5
# tswitch.param <- 35 # at ZT 30
# amp.params <- c(A.new, tswitch.param)
# # time switch must happen somehwere in 2nd day
# amp.lims.lower <- c(0, 24)
# amp.lims.upper <- c(3, 48)
# } else if (form == "decayfree"){
# tau <- 12 # 12 hours after tswitch, amplitude goes to 1/e (~0.367)
# tswitch.param <- 35 # time at which decay happens
# amp.min <- 0.5 # decays to a minimum?
# amp.params <- c(tau, tswitch.param, amp.min)
# amp.lims.lower <- c(1/3 / log(2), 24, 0) # 1/3h half life converted to natural scale
# amp.lims.upper <- c(24 / log(2), 48, 3) # 24h half life converted to natural scale
# } else if (form == "decaymin"){
# # similar to "decay" but include a minimum (does not decay to 0)
# tau <- 9
# amp.min <- 0.5
# amp.params <- c(tau, amp.min)
# amp.lims.lower <- c(1/3 / log(2), 0) # 1/3h half life converted to natural scale
# amp.lims.upper <- c(24 / log(2), 3) # 24h half life converted to natural scale
# } else {
# stop(paste("form:", form, "not yet coded"))
# }
inits.novar <- c(inits.novar.circ, amp.params)
lims.circ <- GetLims.circ(Inf, -Inf, include.intercept = include.intercept, lowerpart = -Inf)
lims.lower <- c(lims.circ$lower, amp.lims.lower)
lims.upper <- c(lims.circ$upper, amp.lims.upper)
fit.novar.optim <- tryCatch({
fit.novar.optim <- optim(inits.novar, fn = logL.Rhythmic.FreeAmp,
x = dat[[exprs.cname]], times = dat[[time.cname]],
AmpFunc = AmpFunc,
T.period = T.period,
tswitch = tswitch,
form = form,
include.intercept = include.intercept,
hessian=TRUE, method = "L-BFGS-B",
lower = lims.lower, upper = lims.upper,
control = list(maxit = jmaxit))
# lower = rep(0, length(inits.novar)), upper = rep(Inf, length(inits.novar)))
fit.novar.optim
# # diagnostics
# x <- dat[[exprs.cname]]
# times <- dat[[time.cname]]
# xhat <- Rhythmic.FreeAmp(fit.novar.optim$par, times, AmpFunc, T.period = 24, tswitch = tswitch, form = form, amp.phase=FALSE, include.intercept=TRUE)
# # check final plot
# plot(times, x); lines(times, xhat)
# plot(times, xhat); lines(times, x)
# # check inits
# plot(times, x)
# lines(lines(times, Rhythmic.FreeAmp(inits.novar, x, times, AmpFunc, T.period, tswitch, form, include.intercept=TRUE), type = "o"))
# logL.Rhythmic.FreeAmp(inits.novar, x, times, AmpFunc, T.period, tswitch, form, include.intercept=TRUE)
}, error = function(e) {
# print(paste("problem with:", dat$gene[[1]]))
fit.novar.optim <- list()
fit.novar.optim$par <- rep(NA, length(inits.novar))
fit.novar.optim$value <- NA
fit.novar.optim$convergence <- paste(dat$gene[[1]], e$message)
fit.novar.optim$hessian <- diag(NA, length(inits.novar))
fit.novar.optim
})
params <- fit.novar.optim$par # 5 params from sleep, 3 or 2 from circ, 1 from mixing (optional)
logL <- -1 * fit.novar.optim$value # optim minimizes -logL
bic <- GetBIC.logL(logL, n = nrow(dat), k = length(params))
# get param estimates: circadian
if (include.intercept){
intercept <- params[[1]]; cos.part <- params[[2]]; sin.part <- params[[3]]
amp.param.i <- 4 # after sin part
} else {
cos.part <- params[[1]]; sin.part <- params[[2]]
intercept <- NA
amp.param.i <- 3 # after sin part
}
if (form == "switch"){
amp.param <- params[[amp.param.i]] # amp.new
} else if (form == "decay"){
amp.param <- params[[amp.param.i]] # amp.decay
} else if (form == "stepfree"){
nparams <- 2
indx.add <- nparams - 1
amp.param.i.end <- amp.param.i + indx.add
amp.param <- params[amp.param.i:amp.param.i.end]
} else if (form == "decayfree"){
nparams <- 3
indx.add <- nparams - 1
amp.param.i.end <- amp.param.i + indx.add
amp.param <- params[amp.param.i:amp.param.i.end]
} else if (form == "decaymin"){
nparams <- 2
indx.add <- nparams - 1
amp.param.i.end <- amp.param.i + indx.add
amp.param <- params[amp.param.i:amp.param.i.end]
} else {
stop(paste("Form not yet coded:", form))
}
if (length(amp.param) > 1){
# concatenate by comma
amp.param <- as.character(paste0(signif(amp.param, 3), collapse=","))
}
# # get variance estimates: TODO
# params.var <- diag(fit.novar.optim$hessian) # standard error is sqrt of this
# # var estimates: circ
# if (include.intercept){
# intercept.var <- params.var[[1]]; cos.part.var <- params.var[[2]]; sin.part.var <- params.var[[3]]
# } else {
# cos.part.var <- params.var[[1]]; sin.part.var <- params.var[[2]]
# intercept.var <- NA
# }
# # var estimates: amplitude parameter
# # TODO
dat.out <- data.frame(intercept = intercept,
amp = CosSineToAmpPhase(cos.part, sin.part, T.period = 24)$amp,
phase = CosSineToAmpPhase(cos.part, sin.part, T.period = 24)$phase,
amp.param = amp.param,
form = form,
tswitch = tswitch,
logL = logL,
bic = bic,
convergence = fit.novar.optim$convergence,
msg = fit.novar.optim$message,
stringsAsFactors = FALSE)
return(dat.out)
}
logL.Rhythmic.FreeAmp <- function(th, x, times, AmpFunc, T.period, tswitch, form, include.intercept=TRUE){
# log likelihood for free rhythmic amplitude
#
# thetas are variable depending on AmpFunc, but the base
# Parameters depend on include.intercept
#
# include.intercept=TRUE:
# th[[1]] <- mu
# th[[2]] <- cospart
# th[[3]] <- sinpart
# th[4 to end] <- parameters to put into AmpFunc
#
# include.intercept=FALSE (mu=0):
# th[[1]] <- cospart
# th[[2]] <- sinpart
# th[3 to end] <- parameters to put into AmpFunc
#
# parse parameters
if (include.intercept){
yint <- th[[1]]
cospart <- th[[2]]
sinpart <- th[[3]]
amp.params <- th[-c(1,2,3)]
} else {
yint <- 0
cospart <- th[[1]]
sinpart <- th[[2]]
amp.params <- th[-c(1,2)]
}
mu <- Rhythmic.FreeAmp(th, times, AmpFunc, T.period, tswitch, form, include.intercept, amp.phase=FALSE)
# biased estimator of variance
sig2 <- sum((x - mu) ^ 2) / length(x)
if (length(x) != length(mu)) stop("x and mu must be equal lengths")
return(-sum(dnorm(x, mean=mu, sd=sqrt(sig2), log=T)))
}
Rhythmic.FreeAmp <- function(th, times, AmpFunc, T.period = 24, tswitch=27, form="switch", include.intercept = TRUE,
amp.phase = FALSE){
# amp.phase: FALSE, assume cospart and sinpart
# amp.phase: TRUE, assume amp and phase
# parse parameters
if (include.intercept){
yint <- th[[1]]
if (!amp.phase){
cospart <- th[[2]]
sinpart <- th[[3]]
} else {
amppart <- th[[2]]
phasepart <- th[[3]]
}
amp.params <- th[-c(1,2,3)]
} else {
yint <- 0
if (!amp.phase){
cospart <- th[[1]]
sinpart <- th[[2]]
} else {
amppart <- th[[1]]
phasepart <- th[[2]]
}
amp.params <- th[-c(1,2)]
}
w <- 2 * pi / T.period
# calculate Amplitude given AmpFunc which is function of time
amp <- AmpFunc(times, amp.params, time.switch = tswitch, form=form)
if (!amp.phase){
mu <- yint + amp * (cospart * cos(w * times) + sinpart * sin(w * times))
} else {
mu <- yint + amp * (amppart * cos(w * times - w * phasepart))
}
return(mu)
}
AmpFunc <- function(times, amp.params, time.switch = 27, form = "switch"){
# times: vector of times
# amp.params: parameters, depends on "form"
# time.switch: time at which AmpFunc takes into effect, otherwise A=1
# form: type of amplitude function to be used. Possible forms are:
# "switch": A=1 if t <= t1. Otherwise A=A.new
# "decay": A=1 if t <= t1. Otherwise A=exp(-(t - t1) / tau)
# TODO other forms
A <- rep(1, length(times)) # init
if (is.numeric(time.switch)){
switch.i <- which(times >= time.switch) # index for replacing with AmpFunc
} else if (is.character(time.switch)){
switch.i <- NA # define it later, depending on form
} else {
stop(paste("Time switch must be numeric or character:", time.switch, class(time.switch)))
}
if (form == "switch"){
# amp.params need to be length 1.
# amp.params[[1]]: A.new (new amplitude after time.switch)
if (length(amp.params) != 1){
stop(paste("Expected one parameter for 'form=switch'", paste0(amp.params, ",")))
}
A.new <- amp.params[[1]]
A[switch.i] <- A.new
} else if (form == "decay"){
if (length(amp.params) != 1){
stop(paste("Expected one parameter for 'form=decay'", paste0(amp.params, ",")))
}
tau <- amp.params[[1]]
Avec.new <- exp(-(times[switch.i] - time.switch) / tau)
A[switch.i] <- Avec.new
} else if (form == "stepfree"){
# two amp parameters
if (length(amp.params) != 2){
stop(paste("Expected one parameter for 'form=stepfree'", paste0(amp.params, collapse=",")))
}
A.new <- amp.params[[1]]
# need to define switch.i
time.switch <- amp.params[[2]]
switch.i <- which(times >= time.switch)
A[switch.i] <- A.new
} else if (form == "decayfree"){
# three amp parameters
if (length(amp.params) != 3){
stop(paste("Expected one parameter for 'form=decayfree'", paste0(amp.params, collapse=",")))
}
tau <- amp.params[[1]]
# need to define switch.i
time.switch <- amp.params[[2]]
switch.i <- which(times >= time.switch)
A.min <- amp.params[[3]]
Avec.new <- A.min + (1 - A.min) * exp(-(times[switch.i] - time.switch) / tau)
A[switch.i] <- Avec.new
} else if (form == "decaymin"){
tau <- amp.params[[1]]
A.min <- amp.params[[2]]
Avec.new <- A.min + (1 - A.min) * exp(-(times[switch.i] - time.switch) / tau)
A[switch.i] <- Avec.new
} else {
stop(paste("Form not yet coded:", form))
}
return(A)
}
# Fit process S ----------------------------------------------------------
FitProcessS <- function(dat, wake.collapsed, exprs.cname = "exprs", time.cname = "time",
condensed=TRUE, pseudo = 0, min.hl = 2/3, max.hl = 24, do.lowpass = FALSE,
min.hl.mrna=NA, max.hl.mrna=NA, low.pass.filter.times=NA,
jlambda = 0, jmaxit = 100){
# fit data to Process S
# return ML estimates, variance of estimates (from Hessian), and BIC
# pseudo: additional constant to allow for max/min expression in upper/lower limits
# do.lowpass: FALSE does not consider exponential smoothing of s(t). TRUE adds effect of mRNA HL and timestep to estimate exponential smoothing.
# jlambda: allows penalization for fits that predict differences between ZT0 and ZT24. This lambda could be fit through cross-validation?
max.exprs <- max(dat[[exprs.cname]])
min.exprs <- min(dat[[exprs.cname]])
if (!do.lowpass){
n.params <- 5
inits.novar <- c(dat[[exprs.cname]][1], max.exprs, 10, min.exprs, 8) # 5 total parameters
lims.S <- GetLims.S(max.exprs, min.exprs, pseudo = pseudo, min.hl = min.hl, max.hl = max.hl) # No MRNA half-life
} else {
n.params <- 6
inits.novar <- c(dat[[exprs.cname]][1], max.exprs, 10, min.exprs, 8, 2) # 6 total parameters
lims.S <- GetLims.S.LowPass(max.exprs, min.exprs, pseudo = pseudo, min.hl = min.hl, max.hl = max.hl, min.hl.mrna = min.hl.mrna, max.hl.mrna = max.hl.mrna) # HL half-life in hrs
}
lims.lower <- lims.S$lower
lims.upper <- lims.S$upper
fit.novar.optim <- tryCatch({
fit.novar.optim <- optim(inits.novar, fn = logL.Scollapsed,
x = dat[[exprs.cname]], wake.collapsed = wake.collapsed, filter.times = dat[[time.cname]], do.lowpass=do.lowpass, low.pass.filter.times=low.pass.filter.times,
has.reps=TRUE,
lambda = jlambda,
estimate.var=TRUE, hessian=TRUE, method = "L-BFGS-B",
lower = lims.lower, upper = lims.upper,
control = list(maxit = jmaxit))
# lower = rep(0, length(inits.novar)), upper = rep(Inf, length(inits.novar)))
fit.novar.optim
}, error = function(e) {
# print(paste("problem with:", dat$gene[[1]]))
fit.novar.optim <- list()
fit.novar.optim$par <- rep(NA, n.params)
fit.novar.optim$value <- NA
fit.novar.optim$convergence <- paste(dat$gene[[1]], e$message)
fit.novar.optim$hessian <- diag(NA, n.params)
fit.novar.optim
})
params <- fit.novar.optim$par
logL <- -1 * fit.novar.optim$value # optim minimizes -logL
bic <- GetBIC.logL(logL, n = nrow(dat), k = length(params))
# get param estimates
init <- params[[1]]; U <- params[[2]]; tau.w <- params[[3]]; L <- params[[4]]; tau.s <- params[[5]]
if (do.lowpass){
tau.mrna <- params[[6]]
}
if (!condensed){
# get variance estimates
params.var <- diag(fit.novar.optim$hessian) # standard error is sqrt of this
init.var <- params.var[[1]]; U.var <- params.var[[2]]; tau.w.var <- params.var[[3]]; L.var <- params.var[[4]]; tau.s.var <- params.var[[5]];
if (do.lowpass){
tau.mrna.var <- params.var[[6]]
}
}
if (!condensed){
if (!do.lowpass){
dat.out <- data.frame(init = init,
U = U, # upperobund
tau.w = tau.w, # when awake
L = L, # lowerbound
tau.s = tau.s, # when asleep
logL = logL,
init.var = init.var,
U.var = U.var,
tau.w.var = tau.w.var,
L.var = L.var,
tau.s.var = tau.s.var,
bic = bic,
convergence = fit.novar.optim$convergence,
msg = fit.novar.optim$message)
} else {
dat.out <- data.frame(init = init,
U = U, # upperobund
tau.w = tau.w, # when awake
L = L, # lowerbound
tau.s = tau.s, # when asleep
tau.mrna = tau.mrna,
logL = logL,
init.var = init.var,
U.var = U.var,
tau.w.var = tau.w.var,
L.var = L.var,
tau.s.var = tau.s.var,
bic = bic,
convergence = fit.novar.optim$convergence,
msg = fit.novar.optim$message)
}
} else {
if (!do.lowpass){
dat.out <- data.frame(init = init,
U = U, # upperobund
tau.w = tau.w, # when awake
L = L, # lowerbound
tau.s = tau.s, # when asleep
logL = logL,
bic = bic,
convergence = fit.novar.optim$convergence,
msg = fit.novar.optim$message)
} else {
dat.out <- data.frame(init = init,
U = U, # upperobund
tau.w = tau.w, # when awake
L = L, # lowerbound
tau.s = tau.s, # when asleep
tau.mrna = tau.mrna, # delay from transfering layers
logL = logL,
bic = bic,
convergence = fit.novar.optim$convergence,
msg = fit.novar.optim$message)
}
}
return(dat.out)
}
logL.Scollapsed <- function(th, x, wake.collapsed, filter.times, do.lowpass=FALSE, low.pass.filter.times=NA,
estimate.var=FALSE, has.reps=FALSE, lambda = 0){
# Can optioally do lowpass or not. The size of parameters changes depending on lowpass (6 parameters if lowpass, otherwise 5)
# here we minimize the negative log likelihood
if (!estimate.var){
sig2 <- th[1]
params <- th[-1] # first parameter is variance of the signal: replicates of gene expression?
} else {
params <- th
}
mu <- S.process.collapsed(params, wake.collapsed, unique(filter.times),
do.lowpass = do.lowpass, low.pass.filter.times = low.pass.filter.times)
if (has.reps){
# handle replicates
mu <- DuplicateVector(mu, filter.times)
}
if (estimate.var){
# biased estimator of variance
sig2 <- sum((x - mu) ^ 2) / length(x)
}
if (length(x) != length(mu)) stop("x and mu must be equal lengths")
loss = -sum(dnorm(x, mean=mu, sd=sqrt(sig2), log=T)) # minimize the loss function
if (lambda != 0){
# use lambda as penalty, assumes ZT0 and ZT24 exist
penal.factor <- PenaltyFactor(mu, filter.times, L=2)
loss <- PenalizeLoss(loss, lambda, penal.factor)
}
return(loss)
}
cppFunction('double GetNextS(NumericVector row, NumericVector th, double Sprev) {
double S;
if (row[0] == 1){
S = th[1] - (th[1] - Sprev) * exp(-row[1] / th[2]);
} else {
S = th[3] + (Sprev - th[3]) * exp(-row[1] / th[4]);
}
return S;
}')
S.process.collapsed <- function(th, wake.collapsed, filter.times, do.lowpass = FALSE, low.pass.filter.times = NA){
# th1 -> init value
# th2 -> Upper asymptote
# th3 -> Decay constant for wake
# th4 -> Lower asymptote
# th5 -> Decay constant for sleep
# th6 -> If do.lowpass==TRUE, then degradation rate of mRNA
#
# low.pass.filter.times -> If low.pass.filter.times != NA, then vector of evenly spaced times for filtering in order to filter S.out into
# equal timesteps. Timestep will be calculated from low.pass.filter.times, so it needs to be evenly spaced.
# low.pass.filter.times: filter.times must be subset of low.pass.filter.times
#
S.prev <<- th[1] # init
S.out <- apply(wake.collapsed, 1, function(row){
S <- GetNextS(row, th, S.prev)
# wake <- row[1]
# t.dur <- row[2]
# if (wake == 1){
# S <- th[2] - (th[2] - S.prev) * exp(-t.dur / th[3])
# } else if (wake == 0){
# S <- th[4] + (S.prev - th[4]) * exp(-t.dur / th[5])
# } else {
# warning(paste0("wake should be 0 or 1: ", wake))
# S <- NA
# }
S.prev <<- S
return(S)
})
if (do.lowpass){
tau.mrna <- th[6]
if (is.na(tau.mrna)){
stop("Tau mRNA is NA")
}
gamma.m <- 1 / tau.mrna
tstep <- unique(signif(diff(low.pass.filter.times), digits = 4)) # otherwise you don't get good uniques
if (length(tstep) != 1){
stop(paste("Time-step must be an evenly spaced numeric vector. Found:", paste(low.pass.filter.times, collapse=",")))
}
# filter S.out into evenly spaced times
S.evenly <- S.out[which(wake.collapsed$time.cum %in% low.pass.filter.times)]
# print("Debug")
# plot(low.pass.filter.times, S.evenly, type = "l", main = "before low pass")
if (length(S.evenly) != length(low.pass.filter.times)){
stop("S.out did not get filtered properly into evenly spaced timepoints")
}
# low-pass filter S.out signal
S.lowpass <- LowPass(S.evenly, tstep, gamma.m)
# plot(low.pass.filt.time, S.lowpass, type = "l", main = "after low pass")
# Integrate S.lowpass into vector of size |S.out| in order to
# allow easy filtering by filter.times
S.lowpass.long <- rep(NA, length(S.out))
S.lowpass.long[which(wake.collapsed$time.cum %in% low.pass.filter.times)] <- S.lowpass
S.out <- S.lowpass.long
}
if (!missing(filter.times)){
S.out <- S.out[which(wake.collapsed$time.cum %in% filter.times)]
}
if (filter.times[[1]] == 0){
# if starts at 0, first value is skipped. This is not the case if it starts at say 3 hr
S.out <- c(th[1], S.out) # add init value to output
}
return(S.out)
}
Sprocess <- function(wake.df, U = 1, L = 0, tau.i = 10 * 3600, tau.d = 10 * 3600, start = 5, tstep = 4, filter.time){
wake <- wake.df$wake
time <- wake.df$time.shift
S <- rep(NA, length(wake))
S[1] <- start
for (i in seq(2, length(wake))){
w <- wake[i]
if (w == 1){
# awake
S[i] <- U - (U - S[(i - 1)]) * exp(-tstep / tau.i)
} else if (w == 0){
# sleep
S[i] <- L + (S[(i - 1)] - L) * exp(-tstep / tau.d)
} else {
warning("w must be 0 or 1")
}
}
if (!missing(filter.time)){
S <- S[time %in% filter.time]
}
return(S)
}
# Linear models -----------------------------------------------------------
FitGeneric <- function(dat, use.weights=FALSE, get.bic=TRUE, condensed=FALSE){
# Fit generic model with parameters equal to number of timepoints
# return BIC
# Expect columns exprs and time
# Set up dat so it is sorted by time and make time a "character"
dat <- dat %>%
arrange(time) %>%
ungroup() %>%
mutate(time = as.character(time))
if (!use.weights){
jfit <- lm(exprs ~ 0 + time, data = dat)
} else {
jfit <- lm(exprs ~ 0 + time, data = dat, weights = sqrt(Var))
}
time.vec <- as.numeric(gsub("time", "", names(coef(jfit))))
if (!get.bic){
dat.out <- data.frame(coef(jfit))
}
if (get.bic){
n <- length(jfit$residuals)
k <- length(coef(jfit))
rss <- sum(jfit$residuals ^ 2)
sig2 <- rss / n
mu <- coef(jfit) # use means at each timepoint
logL <- sum(dnorm(dat$exprs, mean=mu, sd=sqrt(sig2), log=T))
bic.logL <- GetBIC.logL(logL, n, k)
if (!condensed){
dat.out <- data.frame(t(coef(jfit)),
rss = rss,
logL = logL,
bic = bic.logL)
} else {
dat.out <- data.frame(t(coef(jfit)),
rss = rss,
logL = logL,
bic = bic.logL)
}
}
return(dat.out)
}
FitRhythmic <- function(dat, T.period = 24, use.weights=FALSE, get.bic=FALSE, condensed=FALSE){
# Fit rhythmic model to long dat. Expect dat to be per tissue per gene.
# use lapply and ddply to vectorize this code.
# dat needs columns: tissue time experiment exprs
# Get parameters from complex model: used later
GetParamsRhythModel <- function(myfit){
model.params <- coef(myfit)
return(list(intercept = model.params[1],
a = model.params[2],
b = model.params[3]))
}
# tissue <- unique(dat$tissue)
w = 2 * pi / T.period
# Expect columns exprs and time
if (!use.weights){
rhyth.fit <- lm(exprs ~ 1 + cos(w * time) + sin(w * time), data = dat)
flat.fit <- lm(exprs ~ 1, data = dat)
} else {
rhyth.fit <- lm(exprs ~ 1 + cos(w * time) + sin(w * time), data = dat, weights = sqrt(Var))
flat.fit <- lm(exprs ~ 1, data = dat, weights = sqrt(Var))
}
compare.fit <- anova(flat.fit, rhyth.fit)
pval <- compare.fit["Pr(>F)"][[1]][2]
model.params <- GetParamsRhythModel(rhyth.fit) # y = experimentarray + experimentrnaseq + a cos(wt) + b sin(wt)
amp <- sqrt(model.params$a ^ 2 + model.params$b ^ 2)
phase.rad <- atan2(model.params$b, model.params$a)
phase.time <- (phase.rad / w) %% T.period
# print(model.params)
# print(amp); print(phase); print(pval)
if (!get.bic){
dat.out <- data.frame(cos.part = model.params$a, sin.part = model.params$b,
amp = amp, phase = phase.time, pval = pval, intercept = model.params$intercept)
}
if (get.bic){
n <- length(rhyth.fit$residuals)
k <- length(coef(rhyth.fit))
rss <- sum(rhyth.fit$residuals ^ 2)
sig2 <- rss / n
mu <- CosSine(c(model.params$intercept, model.params$a, model.params$b), dat$time)
logL <- sum(dnorm(dat$exprs, mean=mu, sd=sqrt(sig2), log=T))
bic.logL <- GetBIC.logL(logL, n, k)
if (!condensed){
dat.out <- data.frame(cos.part = model.params$a, sin.part = model.params$b,
amp = amp, phase = phase.time, pval = pval, intercept = model.params$intercept,
rss = rss,
logL = logL,
bic = bic.logL)
} else {
dat.out <- data.frame(amp = amp, phase = phase.time, pval = pval, intercept = model.params$intercept,
logL = logL,
bic = bic.logL)
}
}
return(dat.out)
}
FitFlat <- function(dat, use.weights=FALSE, get.bic=FALSE, condensed=FALSE){
# Fit flat and return BIC
# Expect columns exprs and time
if (!use.weights){
flat.fit <- lm(exprs ~ 1, data = dat)
} else {
flat.fit <- lm(exprs ~ 1, data = dat, weights = sqrt(Var))
}
intercept <- coef(flat.fit)[["(Intercept)"]]
if (!get.bic){
dat.out <- data.frame(intercept = intercept)
}
if (get.bic){
n <- length(flat.fit$residuals)
k <- length(coef(flat.fit))
rss <- sum(flat.fit$residuals ^ 2)
sig2 <- rss / n
mu <- rep(intercept, length(dat$exprs))
logL <- sum(dnorm(dat$exprs, mean=mu, sd=sqrt(sig2), log=T))
bic.logL <- GetBIC.logL(logL, n, k)
if (!condensed){
dat.out <- data.frame(intercept = intercept,
rss = rss,
logL = logL,
bic = bic.logL)
} else {
dat.out <- data.frame(intercept = intercept,
rss = rss,
logL = logL,
bic = bic.logL)
}
}
return(dat.out)
}
WeightedLm <- function(dat.sub, w = 2 * pi / 24){
return(lm(formula = exprs ~ 1 + sin(w * time) + cos(w * time), data = dat.sub, weights = sqrt(Var)))
}
# Get limits --------------------------------------------------------------
GetLims.S <- function(max.exprs, min.exprs, pseudo = 0, min.hl = 0.5, max.hl = 24){
# get limits for S process
# TODO: replace lims lower and upper for fitting S process
# min and max half lives (hl) in HOURS.
# convert time to 1/2 to time to 1/e (easier math)
min.g <- min.hl / log(2)
max.g <- max.hl / log(2)
lims.lower <- c(min.exprs, min.exprs - pseudo, min.g, min.exprs - pseudo, min.g) # init, U, tau.w, L, tau.s, intercept, cos.part, sin.part
lims.upper <- c(max.exprs, max.exprs + pseudo, max.g, max.exprs + pseudo, max.g) # init, U, tau.w, L, tau.s, intercept, cos.part, sin.part, add pseudo arbitrarily to limit U and L
return(list(lower = lims.lower, upper = lims.upper))
}
GetLims.S.LowPass <- function(max.exprs, min.exprs, pseudo = 0, min.hl = 0.5, max.hl = 24, min.hl.mrna = 1/4, max.hl.mrna = 48){
# get limits for S process
# min and max half lives (hl) in HOURS.
# convert time to 1/2 to time to 1/e (easier math)
# half-time of sleep-wake response to synthesis rate??
min.g <- min.hl / log(2)
max.g <- max.hl / log(2)
# half-time of mRNA
min.tau.mrna <- min.hl.mrna / log(2)
max.tau.mrna <- max.hl.mrna / log(2)
lims.lower <- c(min.exprs, min.exprs - pseudo, min.g, min.exprs - pseudo, min.g, min.tau.mrna) # init, U, tau.w, L, tau.s, tau.mrna
lims.upper <- c(max.exprs, max.exprs + pseudo, max.g, max.exprs + pseudo, max.g, max.tau.mrna) # init, U, tau.w, L, tau.s, tau.mrna
return(list(lower = lims.lower, upper = lims.upper))
}
GetLims.circ <- function(max.exprs, min.exprs, include.intercept=TRUE, lowerpart = -Inf){
# intercept, cos.part, sin.part
# TODO: replace lims lower and upper for fitting S process
# if include.intercept: max and min exprs needs to be defined. otherwise not needed
#
# Use lowerpart = -Inf
lims.lower <- c(); lims.upper <- c()
if (include.intercept){
lims.lower <- min.exprs
lims.upper <- max.exprs
}
lims.lower <- c(lims.lower, lowerpart, lowerpart)
lims.upper <- c(lims.upper, Inf, Inf)
return(list(lower = lims.lower, upper = lims.upper))
}
# Cos-Sine functions ------------------------------------------------------
CosSine <- Vectorize(function(params, time, w = 2 * pi / 24, ampphase=FALSE, include.intercept=TRUE){
# TODO: should unit test this
if (include.intercept){
intercept <- params[1]
amp.or.cos <- params[2]
phase.or.cos <- params[3]
} else {
intercept <- 0
amp.or.cos <- params[1]
phase.or.cos <- params[2]
}
if (!ampphase){
y <- intercept + amp.or.cos * cos(w * time) + phase.or.cos * sin(w * time)
} else {
y <- intercept + amp.or.cos * cos(w * time - w * phase.or.cos)
}
return(y)
}, vectorize.args = "time")
CosSineToAmpPhase <- function(cos.part, sin.part, T.period = 24){
# TODO: use this function in FitRhythmic
w <- 2 * pi / T.period
amp <- sqrt(cos.part ^ 2 + sin.part ^ 2)
phase.rad <- atan2(sin.part, cos.part)
phase.time <- (phase.rad / w) %% T.period
return(list(amp = amp, phase = phase.time))
}
AmpPhaseToCosSine <- function(amp, phase.time, T.period = 24){
# Get cos.part and sin.part from amp and phase.time
w <- 2 * pi / T.period
phase.rad <- phase.time * w
cos.part <- amp * cos(phase.rad)
sin.part <- amp * sin(phase.rad)
return(list(cos.part = cos.part, sin.part = sin.part))
}
# Auxiliary functions -----------------------------------------------------
PenalizeLoss <- function(loss, lambda, penal.factor){
# penalize loss function with lambda with optional exponents
# use lambda as penalty, assumes ZT0 and ZT24 exist
return(loss + lambda * penal.factor)
}
PenaltyFactor <- function(mu, filt.time, L = 2){
# penalty factor: initially coded as the difference between ZT0 and ZT24 (to ensure homeostatic)
if (all(24 %in% filt.time, 0 %in% filt.time)){
# multiple indices, but should be all identical
mu.0 <- unique(mu[which(filt.time == 0)])
mu.24 <- unique(mu[which(filt.time == 24)])
penal.factor <- abs(mu.0 - mu.24) ^ L # larger difference, the more you penalize
} else {
warning("Expect filt.time to contain 0 and 24")
print(unique(filt.time))
}
return(penal.factor)
}
DoTtest <- function(dat.sub, formula.str = "counts.norm ~trtmnt"){
# test.out <- t.test(counts.norm ~ trtmnt, dat.sub)
test.out <- tryCatch({
test.out <- t.test(as.formula(formula.str), dat.sub)
# return(test.out)
}, error = function(e) {
# output in form that conforms with test.out if no error
# print("Error:")
# print(e)
test.out <- list(p.value = NA, estimate = c(NA, NA)) # estimate is list of 2 because we do diff() to get NA
# return(test.out)
})
pval <- unname(test.out$p.value)
sd.minus.nsd <- unname(diff(test.out$estimate))
return(data.frame(pval=pval, sd.minus.nsd = sd.minus.nsd))
}
DuplicateVector <- function(x, p){
# Add duplicates based on number of counts of p
counts <- table(p)
if (length(x) != length(counts)) stop("Length of x must equal number of unique p")
x.long <- rep(NA, sum(counts))
j <- 1
for (i in seq(length(x))){
x.long[(j:(j + counts[i] - 1))] <- x[i]
j <- j + counts[i]
}
return(x.long)
}
ParseFormGetParams <- function(form){
if (form == "switch"){
A.new <- 0.5 # switch to half amplitude at tswitch
amp.params <- A.new
amp.lims.lower <- 0
amp.lims.upper <- Inf
} else if (form == "decay"){
tau <- 12 # 12 hours after tswitch, amplitude goes to 1/e (~0.367)
amp.params <- tau
amp.lims.lower <- 1/3 / log(2) # 1/3h half life converted to natural scale
amp.lims.upper <- 24 / log(2) # 24h half life converted to natural scale
} else if (form == "stepfree"){
# like switch, but the time at which the switch happens is not predefined
A.new <- 0.5
tswitch.param <- 33 # at ZT 30
amp.params <- c(A.new, tswitch.param)
# time switch must happen somehwere in 2nd day
amp.lims.lower <- c(0, 24)
amp.lims.upper <- c(3, 48)
} else if (form == "decayfree"){
tau <- 12 # 12 hours after tswitch, amplitude goes to 1/e (~0.367)
tswitch.param <- 35 # time at which decay happens
amp.min <- 0.5 # decays to a minimum?
amp.params <- c(tau, tswitch.param, amp.min)
amp.lims.lower <- c(1/3 / log(2), 24, 0) # 1/3h half life converted to natural scale
amp.lims.upper <- c(24 / log(2), 48, 3) # 24h half life converted to natural scale
} else if (form == "decaymin"){
# similar to "decay" but include a minimum (does not decay to 0)
tau <- 12
# tau <- 0.5 # for Tef
amp.min <- 0.5
amp.params <- c(tau, amp.min)
amp.lims.lower <- c(1/3 / log(2), 0) # 1/3h half life converted to natural scale
amp.lims.upper <- c(24 / log(2), 3) # 24h half life converted to natural scale
} else {
stop(paste("form:", form, "not yet coded"))
}
return(list(amp.params = amp.params, amp.lims.lower = amp.lims.lower, amp.lims.upper = amp.lims.upper))
}
LowPass <- function(s, dt, gamma.m){
# https://en.wikipedia.org/wiki/Low-pass_filter#Discrete-time_realization
# s: synthesis function
# dt: time step
# gamma.m: degradation rate of m
#
# define smoothing factor
alpha <- (dt * gamma.m) / (dt * gamma.m + 1)
m <- rep(NA, length(s))
# m[[1]] <- alpha * s[[1]] / gamma.m
m[[1]] <- s[[1]]
for (i in seq(2, length(s))){
m[[i]] <- m[i - 1] + alpha * (s[[i]] / gamma.m - m[i - 1])
}
return(m)
}
# BIC utilities -----------------------------------------------------------
GetBIC <- function(RSS, n, k){
return(n * log(RSS / n) + k * log(n))
}
GetBIC.logL <- function(logL, n, k){
# https://en.wikipedia.org/wiki/Bayesian_information_criterion
return(-2 * logL + k * log(n))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.