Nothing
gprege <-
function(data, inputs, gpregeOptions) {
## GPREGE Gaussian process ranking and estimation of gene expression time-series.
## FORMAT
## DESC Fit two GPs with the an RBF (+ noise diagonal) kernel on each
## profile. One GP kernel is initialised wih a short lengthscale
## hyperparameter, signal variance as the observed variance and a zero noise
## variance. It is optimised via scaled conjugate gradients (netlab). The
## other GP has fixed hyperparameters with a zero inverse-width, zero signal
## variance and noise variance as the observed variance. The log-ratio of
## marginal likelihoods of the two hypotheses acts as a score of
## differential expression for the profile. Comparison via ROC curves is
## performed against BATS (Angelini et.al, 2007).
## See Kalaitzis & Lawrence (2011) for a detailed discussion of the
## ranking algorithm and dataset used.
## ARG data : The matrix of gene expression profiles; one profile per row.
## ARG inputs : Inputs to the GP.
## ARG gpregeOptions$explore: (LOGICAL) Operate in a user interactive mode.
## Used for examining individual gene expression profiles.
## ARG gpregeOptions$labels : Contains flags that specify whether the
## corresponding profile comes from a differentially expressed gene
## (usually from a ground truth).
## ARG gpregeOptions$indexRange : Range of indices of profiles on which the
## function should operate. Useful for selective exploration of specific
## profiles, e.g. only genes marked as differentially expressed in a ground
## truth list.
## ARG gpregeOptions$interpolatedT : New timepoints to interpolate for each
## profile, based on the estimated function values.
## ARG gpregeOptions$iters : The number of iterations for scaled-conjugate
## gradients (SCG) optimisation.
## ARG gpregeOptions$display : Display gradient and LML information on each
## SCG iteration.
## ARG gpregeOptions$inithypers : The matrix of hyperparameter
## configurations as its rows:
## [inverse-lengthscale percent-signal-variance percent-noise-variance]
## The first row corresponds to a (practically constant) function
## with a very large lengthscale. Such a function will account for 0 percent
## of the observed variance in the expression profile (hence 0 for signal)
## and explain it as noise (hence 1 for noise). Subsequent rows
## (initialisations for SCG optimisation) correspond to functions of various
## lengthscales that explain all the observed variance as signal. A
## reasonable lengthscale would be roughly in line with the time-point
## sampling intervals.
## ARG gpregeOptions$exhaustPlotRes : The search resolution. Used for
## interactive mode (explore == 1).
## ARG gpregeOptions$exhaustPlotLevels : # Exhaustive plot contour levels.
## Used for interactive mode (explore == 1).
## ARG gpregeOptions$exhaustPlotMaxWidth : maximum lengthscale to search
## for. Used for interactive mode (explore == 1).
## RETURN gpregeOutput$signalvar : The vertical lengthscales of the
## optimised RBF kernel for each profile.
## RETURN gpregeOutput$noisevar : Same, for the noise hyperparameter.
## RETURN gpregeOutput$width : Same, for the horizontal lengthscales of the RBF.
## RETURN gpregeOutput$LMLs : Log-marginal likelihood of the GP for each profile.
## RETURN gpregeOutput$interpolatedData : extended dataset with interpolated values.
## RETURN gpregeOutput$rankingScores : the ranking scores based on the
## log-ratio of marginal likelihoods.
##
## USAGE: gpregeOutput <- gprege(exprs_tp63_RMA, seq(0,240,by=20), gpregeOptions)
##
## SEEALSO : gpOptions, gpCreate, gpExpandParam, gpOptimise, gpExtractParam,
## gpLogLikelihood, gpPosteriorMeanVar
##
## COPYRIGHT: Alfredo Kalaitzis, 2010, 2011
##
## GPREGE
##
if (missing(data))
stop("Missing data.")
if (missing(inputs)) {
stop("Missing inputs.")
}
if (is.null(gpregeOptions$indexRange)) {
gpregeOptions$indexRange <- 1:dim(data)[1]
}
n = length(gpregeOptions$indexRange)
if (is.null(gpregeOptions$explore)) {
gpregeOptions$explore <- FALSE
}
else {
if ((gpregeOptions$explore) && is.null(gpregeOptions$exhaustPlotRes)) {
gpregeOptions$exhaustPlotRes <- 20
}
if ((gpregeOptions$explore) && is.null(gpregeOptions$exhaustPlotMaxWidth)) {
gpregeOptions$exhaustPlotMaxWidth <- 30
}
}
if (is.null(gpregeOptions$iters)) {
gpregeOptions$iters <- 100
}
if (is.null(gpregeOptions$display)) {
gpregeOptions$display <- FALSE
}
gpregeOutput <- list()
if (!is.null(gpregeOptions$interpolatedT)) {
interpolate <- TRUE
newLength = dim(data)[2] + length(gpregeOptions$interpolatedT)
gpregeOutput$interpolatedData <- matrix(0, newLength,
n)
}
else {
interpolate <- FALSE
}
if (is.null(gpregeOptions$inithypers)) {
gpregeOptions$inithypers <- matrix(c(1/1000, 0, 1, 1/8,
0.999, 0.001), ncol = 3, byrow = TRUE)
}
npsets <- dim(gpregeOptions$inithypers)[1]
if (is.null(gpregeOptions$labels)) {
gpregeOptions$labels <- rep(0, n)
}
options <- gpOptions()
options$kern$comp <- list("rbf", "white")
x <- inputs
xstar <- matrix(seq(min(x) - (2 * (max(x) - min(x))/100),
max(x) + ((max(x) - min(x))/100), length = 100), ncol = 1)
models <- list()
loghypers <- matrix(0, 3, npsets)
LMLs <- matrix(0, n, npsets)
signalvar <- matrix(0, n, 1)
noisevar <- matrix(0, n, 1)
width <- matrix(0, n, 1)
data <- t(as.matrix(scale(t(data), scale = FALSE)))
datamax <- max(data)
datamin <- min(data)
if (gpregeOptions$explore) {
}
for (ix in 1:n) {
i <- gpregeOptions$indexRange[ix]
y <- matrix(data[i, ], ncol = 1)
if (sum(is.nan(y)) > (length(y)/2)) {
cat("Majority of points in profile are NaN.\n")
next
}
options$isMissingData <- any(is.nan(y))
options$isSpherical <- !any(is.nan(y))
stdy = sd(c(y[!is.nan(y)]))
inithypers = t(log(gpregeOptions$inithypers %*% diag(c(1, stdy^2, stdy^2))))
if (gpregeOptions$explore) {
pdf(file = paste("gpPlot", ix, ".pdf", sep = ""),
paper = "special", width = 6, height = 4 * npsets)
close.screen(all.screens = TRUE)
split.screen(c(dim(inithypers)[2], 1))
}
for (h in 1:npsets) {
models[[h]] <- gpCreate(dim(x)[2], dim(y)[2], x,
y, options)
models[[h]] <- gpExpandParam(models[[h]], inithypers[,
h])
if (h != 1) {
models[[h]] <- gpOptimise(models[[h]], gpregeOptions$display,
gpregeOptions$iters)
loghypers[, h] <- gpExtractParam(models[[h]],
only.values = FALSE)
}
LMLs[ix, h] <- gpLogLikelihood(models[[h]])
if (gpregeOptions$explore) {
screen(h)
erase.screen(h)
gpPlot(models[[h]], xstar, col = "blue", xlim = range(xstar),
ylim = c(datamin, datamax), title = paste("Init. length-scale = ",
as.character(1/exp(inithypers[1, h]/2)),
sep = ""), xlab = "time(mins)", ylab = bquote(paste(log[2] ~
expression ~ (centred))))
}
}
maxLML <- max(LMLs[ix, ])
mi <- which.max(LMLs[ix, ])
bestLoghypers <- loghypers[, mi]
width[ix] <- 1/exp(bestLoghypers[1]/2)
signalvar[ix] <- exp(bestLoghypers[2]/2)
noisevar[ix] <- exp(bestLoghypers[3]/2)
if (interpolate) {
meanVar <- gpPosteriorMeanVar(models[[mi]], gpregeOptions$interpolatedT, varsigma.return = TRUE)
mu <- meanVar$mu
S <- meanVar$varsigma
mu <- mu + matrix(rnorm(length(mu)), ncol = 1) * noisevar[ix]
gpregeOutput$interpolatedData[, ix] <- c(y, mu)
sortedInputs = sort(c(x, gpregeOptions$interpolatedT), index.return=TRUE)
idx = sortedInputs$ix
gpregeOutput$interpolatedData[, ix] = gpregeOutput$interpolatedData[idx, ix]
}
if (gpregeOptions$explore) {
if (interpolate) {
points(gpregeOptions$interpolatedT, mu, pch = 4, cex = 0.5, lwd = 2, col = "blue")
}
cat("\n========================================================\n")
cat(" Profile ", as.character(i), "\t\t\t\tLabel: ",
as.character(as.numeric(gpregeOptions$labels[i])))
cat("\n========================================================\n")
cat(sprintf("%-20s %-20s %s\n", "Length-scale", "Signal",
"Noise"))
cat(sprintf("% -20.5f % -20.5f % .5f\n\n", width[ix],
signalvar[ix], noisevar[ix]))
cat(sprintf("%-20s %-20s %s\n", "Init.le", "LML",
"Best"))
best = replace(rep("", npsets), which(LMLs[ix, ] ==
maxLML), " <--")
for (j in 1:npsets) {
cat(sprintf("% -20.0f % -20.8f %s\n", 1/exp(inithypers[1,
j]/2), LMLs[ix, j], best[j], "\n", sep = "\t\t"))
}
cat(sprintf("\n%-20s %-20s\n", "Total st.dev.", "Estim.sig + noise"))
cat(sprintf("% -20f % -20f\n\n", sd(c(y)), sum(exp(bestLoghypers[2:3]/2))))
cat("Log-ratio (max(LML[2:end]) - LML[1])\n ", max(LMLs[ix,
2:npsets]) - LMLs[ix, 1], "\n")
dev.off()
pdf(file = paste("exhaustivePlot", ix, ".pdf", sep = ""),
paper = "special", width = 6, height = 6)
C = exhaustivePlot(y, x, xstar, options, gpregeOptions$exhaustPlotMaxWidth,
gpregeOptions$exhaustPlotRes, gpregeOptions$exhaustPlotLevels)
dev.off()
readline(prompt = "ENTER to continue")
}
else {
cat(" Profile ", as.character(i), "\n")
}
}
gpregeOutput$signalvar <- signalvar
gpregeOutput$noisevar <- noisevar
gpregeOutput$width <- width
gpregeOutput$LMLs <- LMLs
gpregeOutput$rankingScores = apply(as.matrix(LMLs[, 2:npsets]),
1, max) - LMLs[, 1]
return(gpregeOutput)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.