Nothing
# Model.R
######################################################################
# Statistical model of transcriptomic dynamics #
######################################################################
################################################################
### Model definition
#
# For each gene type i:
#
# log2(expression) = Norm(mu, sigma^2)
#
# where mu = a + b1*D + c*type + b2*C + b3*D*C + b4*E + b5*D*E
# and (a,b1,..,b5,c) are some regression parameters.
#
# (C,D,E,..) represent effects of environmental factors
# (ages of samples, time of the day etc.) and type takes into account
# the effect of gene types. See FIT.pdf for the details.
#
# Various parameters carry the following indices:
# mu_{i,s}: expression of gene i in the sample s (training data)
# a_{i} (s independent intercept)
# c_{i} * type_{i}
# b1_{i} * D_{s}
# b2_{i} * C_{i,s}
# b3_{i} * D_{s} * C_{i,s}
# b4_{i,e} * E_{ies}
# b5_{i,k,e} * D_{s} * E_{i,s,e}
# where
# i:1..ngenes (~3x10^4),
# e:1..length(env) (up to ~6; just 1 in FIT.R)
# s:1..nsamples (~500(2008), ~100(2009), size of microarrays)
#
################################################################
# An object representation of the way we construct and refine models
Model$Recipe <- setRefClass(
'Model$Recipe',
fields = list(
# limitation: at the moment, an env must be a single weather factor
envs = 'character', # the keyword 'all' or a vec of 'wind', 'temperature' etc.
init = 'character', # (gridsearch | manual)
optim = 'character', # (none | lm | lasso)+
fit = 'character', # (fit.lm | fit.lasso)
init.data = 'list', # init=gridsearch => grid.params | init=manual => init.params
time.step = 'integer',
gate.open.min = 'numeric',
opts = 'list'
)
)
################################################################
# The linear model for transcriptome in a field environment
#
# Note:
# - In FIT2, params were a 7*2 matrix (2: init, optim),
# but perhaps that was a bad idea..
# - It appears to be simpler to construct instances of models at the end of
# each phase (init, optim1, optim2, .., fit), and restrict params to be a 7-vector
# - If needed, user shall save the entire model at the end of each phase.
Model$LogNormal <- setRefClass(
Class = 'LogNormal',
fields = list(
gene = 'character', # gene name
env = 'character', # environmental factor (restricted to only one factor)
time.step = 'integer', # sampling interval of meteorological data (integral multiple of data.step in Weather object)
log = 'character', # recipe
params = 'numeric', # parameters (named numeric vector)
dev = 'numeric', # deviance
coefs = 'numeric', # regression coefficients (named numeric vector)
response.type = 'numeric', # response to environmental stimuli over the threshold (1) or under the threshold (-1)
input.mean = 'numeric', # mean values of inputs
input.sd = 'numeric' # standard deviations of inputs
),
methods = list(
# In the R5 class system, initialize() has to work with zero args
# to make a class instance to be copy()able.
# (copy() first calls initialize() and then copies field values.)
initialize = function(gene = character(0), env = character(0), time.step = 1,
log = character(0), params = NULL, dev = -1, coefs = NULL,
response.type = 1, input.mean = NULL, input.sd = NULL) {
if (is.null(names(params)) || !all(names(params) == Model$param.names(env))
|| (dev >=0
&& (is.null(names(coefs)) || !all(names(coefs) == Model$coef.names(env)))))
stop("Protocol bug: invalid names/ordering for params.")
gene <<- gene
env <<- env
time.step <<- time.step
log <<- log
params <<- params
dev <<- dev
coefs <<- coefs
response.type <<- sign(response.type)
if(is.null(input.mean)){
input.mean <<- rep(0, 1+length(env))
names(input.mean) <<- c('age', env)
}else{
input.mean <<- input.mean
}
if(is.null(input.sd)){
input.sd <<- rep(1, 1+length(env))
names(input.sd) <<- c('age', env)
}else{
input.sd <<- input.sd
input.sd[input.sd==0] <<- 1
}
},
# normalize inputs
normalize.attribute = function(attribute.data){
Model$normalize.attribute(attribute.data, input.mean['age'], input.sd['age'])
},
normalize.weather = function(weather.data){
Model$normalize.weather(weather.data, env, response.type, input.mean[env], input.sd[env])
},
# predict gene expression using attributes and meteorological data
predict = function(attribute.data, weather.data, data.step) {
# normally, mu = (1,D,type,C,E,..)*(optimized regression coefs)
# but inputs are NA when env == 'unexpressed' and
# we return the value of the first coef (intercept) as mu
mu <- if (env == 'unexpressed')
coefs[['intercept']]
else {
input <- inputVars(params, env,
normalize.attribute(attribute.data),
normalize.weather(weather.data),
data.step, time.step)
as.vector(input %*% coefs)
}
},
deviance = function(exprs, attribute.data, weather.data, data.step) {
diff <- exprs[, gene] - predict(attribute.data, weather.data, data.step)
sum(diff ^ 2)
}
)
)
# Wrapper for Model$LogNormal$new()
# - We provide this in case that we want optional args for the ctor.
# (It is difficult to have non-optional args in R5 ctors.)
# args
# - see the comments to Model$LogNormal$fields
Model$new <- function(gene, env, time.step, log, params, dev, coefs, response.type, input.mean, input.sd) {
Model$LogNormal(gene, env, time.step, log, params, dev, coefs, response.type, input.mean, input.sd)
}
# DOC:
# - caller of Model$new() must order params in this order:
Model$param.names <- function(env) {
vapply(env, function(e) {
c(sprintf('env.%s.period', e),
sprintf('env.%s.amplitude', e),
sprintf('env.%s.threshold', e),
sprintf('gate.%s.phase', e),
sprintf('gate.%s.amplitude', e),
sprintf('gate.%s.threshold', e))
}, rep("", 6))
}
# DOC:
# - caller of Model$new() must order coefs in this order:
Model$coef.names <- function(env) {
c('intercept', # 1
'coef.age', # D
'coef.genotype', # N
'coef.clock.cos', # C
'coef.clock.sin', # C
'coef.ageClock.cos', # D*C
'coef.ageClock.sin', # D*C
vapply(env, function(e) { # E(e) and D*E(e)
c(sprintf('coef.env.%s', e),
sprintf('coef.ageEnv.%s', e))
}, rep("", 2)))
}
# Normalizer of attribute:
Model$normalize.attribute <- function(attribute.data, age.mean, age.sd){
attribute.data[, 'age'] <- (attribute.data[, 'age'] - age.mean) / age.sd
attribute.data
}
# Normalizer of weather:
Model$normalize.weather <- function(weather.data, env, response.type, weather.mean, weather.sd){
for(e in env){
weather.data[, e] <- response.type * (weather.data[, e] - weather.mean[e]) / weather.sd[e]
}
weather.data
}
# Upper and lower bounds of parameters:
Model$param.bounds <- function(env, attribute.data, weather.data, data.step, gate.threshold.max=1){
period.max <- min(attribute.data$times.pickup) -1
env.n <- length(env)
threshold.bound <- sapply(env, function(e)
c(min(weather.data[, env]), max(weather.data[, env]))
)
colnames(threshold.bound) <- sprintf('env.%s.threshold', env)
cbind(
matrix(c(1, period.max), 2, env.n, dimnames = list(NULL, sprintf('env.%s.period', env))),
threshold.bound,
matrix(c(0, 1440), 2, env.n, dimnames = list(NULL, sprintf('gate.%s.phase', env))),
matrix(c(-1, gate.threshold.max), 2, env.n, dimnames = list(NULL, sprintf('gate.%s.threshold', env)))
)
}
# convert period to log-scale
Model$period.log <- function(params, env){
params[sprintf('env.%s.period', env)] <- log(params[sprintf('env.%s.period', env)])
params
}
Model$period.log.inv <- function(params, env){
params[sprintf('env.%s.period', env)] <- exp(params[sprintf('env.%s.period', env)])
params
}
# degree of freedom
Model$coef.deg.freedom <- function(env){
c(rep(1, 7), rep(7, 2*length(env)))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.