Nothing
# Aim: simulate data to test a Bayesian Latent Factor Model
# Persons : Gabrielle Weinrott [cre, aut]
# Function to generate a set of signals
#
# N : number of individuals
# P : number of observations
# b.range : range of location of first gaussian
# c.range : range of location for second gaussian
# b.sd : standard dev of first gaussian
# c.sd : standard dev of second gaussian
# a.range : range of values for slope
# y.range : range of loaction for y-intercept
# amp : amplitude of cosinus function
# per : period of cosinus function
# data.type : type of data to generate (options: sparse, sparse.tend, sparse.tend.cos)
#
gen_X <- function(N = 10,
P = 150,
b.range = c(0.2, 0.35),
c.range = c(0.65, 0.8),
b.sd = 2,
c.sd = 2,
a.range = c(-0.4, 0.4),
y.range = c(0, 10),
amp = 10,
per = 12,
data.type = "sparse"){
theta = c(0, 10, 1) # parameters for the O-U process
t = 0:P
X <- matrix(nrow = N, ncol = length(t))
amp.sample_b = c(-12:-8, 8:12)
amp.sample_c = c(-8:-4, 4:8)
if(data.type == "sparse"){
for(i in 1:N){
b_i <- stats::runif(1, min = b.range[1]*max(t), max = b.range[2]*max(t)) # location of first peak
c_i <- stats::runif(1, min = c.range[1]*max(t), max = c.range[2]*max(t)) # location of second peak
gauss_b <- sample(amp.sample_b, 1)*stats::dnorm(t, mean = b_i, sd = b.sd)
gauss_c <- sample(amp.sample_c, 1)*stats::dnorm(t, mean = c_i, sd = c.sd)
X[i, ] <- gauss_b*t+ gauss_c*t+ sde::rsOU(n = t, theta)
}
}
else if(data.type == "sparse.tend"){
for(i in 1:N){
y_i <- stats::runif(1, min = y.range[1], max = y.range[2]) # intercept
a_i <- stats::runif(1, min = a.range[1], max = a.range[2]) # slope
b_i <- stats::runif(1, min = b.range[1]*max(t), max = b.range[2]*max(t)) # location of first peak
c_i <- stats::runif(1, min = c.range[1]*max(t), max = c.range[2]*max(t)) # location of first peak
gauss_b <- sample(amp.sample_b, 1)*stats::dnorm(t, mean = b_i, sd = b.sd)
gauss_c <- sample(amp.sample_c, 1)*stats::dnorm(t, mean = c_i, sd = c.sd)
X[i, ] <- y_i + a_i*t + gauss_b*t+ gauss_c*t + sde::rsOU(n = t, theta)
}
}
else if(data.type == "sparse.tend.cos"){
for(i in 1:N){
y_i <- stats::runif(1, min = y.range[1], max = y.range[2]) # intercept
a_i <- stats::runif(1, min = a.range[1], max = a.range[2]) # slope
b_i <- stats::runif(1, min = b.range[1]*max(t), max = b.range[2]*max(t)) # location of first peak
c_i <- stats::runif(1, min = c.range[1]*max(t), max = c.range[2]*max(t)) # location of first peak
gauss_b <- sample(amp.sample_b, 1)*stats::dnorm(t, mean = b_i, sd = b.sd)
gauss_c <- sample(amp.sample_c, 1)*stats::dnorm(t, mean = c_i, sd = c.sd)
X[i, ] <- y_i + a_i*t + gauss_b*t+ gauss_c*t + amp*cos(t/per) + sde::rsOU(n = t, theta)
}
}
return(X = X[ , -1])
}
# Simulate a matrix of observation times
#
# N : number of individuals
# P : number of observations
# t.range : range of times for the P observations
# b.range : range of location of first gaussian
# c.range : range of location for second gaussian
#
simul.matrix.t <- function(N = 10, P = 150, t.range = c(0, 1000),
b.range = c(0.2, 0.4), c.range = c(0.6, 0.8)){
t <- data.frame()
for(i in 1:N){
t.1 <- sort(round(stats::runif(3*P/10, t.range[1], b.range[1]*t.range[2]), 2), decreasing = FALSE)
t.b <- sort(round(stats::runif(P/20, b.range[1]*t.range[2], b.range[2]*t.range[2]), 2), decreasing = FALSE)
t.12 <- sort(round(stats::runif(3*P/10, b.range[2]*t.range[2], c.range[1]*t.range[2]), 2), decreasing = FALSE)
t.c <- sort(round(stats::runif(P/20, c.range[1]*t.range[2], c.range[2]*t.range[2]), 2), decreasing = FALSE)
# correct for the number of times simulated depending on P
if(P/20 != floor(P/20)){
t.2 <- sort(round(stats::runif(3*P/10 + 1, c.range[2]*t.range[2], t.range[2]), 2), decreasing = FALSE)
}
else{
t.2 <- sort(round(stats::runif(3*P/10, c.range[2]*t.range[2], t.range[2]), 2), decreasing = FALSE)
}
ti <- c(t.1, t.b, t.12, t.c, t.2)
t <- rbind(t, ti)
}
return(t)
}
##' PCA data projected onto a histogram basis
##'
##' @param X the data matrix
##' @param t the matrix of observation times
##' @param t.range a vector specifying the observation time range (default : c(0, 1000))
##' @param breaks the number of breaks in the histogram basis (default : breaks = 15)
##' @return Xt.proj a matrix of projected observations
##' @return U a matrix of eigenvectors
##' @return lambda a vector of eigenvalues
##' @return lambda.perc the percentage of inertia captured by each axis
##'
##' @examples
##' res <- drbats.simul(N = 5, P = 100, t.range = c(5, 100), breaks = 8)
##' pca.proj.Xt(res$X, res$t.simul, t.range = c(0, 100), breaks = 8)
##'
##' @export
##'
##' @author Gabrielle Weinrott
##'
pca.proj.Xt <- function(X,
t,
t.range = c(0, 1000),
breaks = 15){
Xt.proj <- histoProj(X, t, t.range, breaks)
X.histo <- Xt.proj$X.proj
YC <- scale(X.histo, center = TRUE, scale = FALSE)
cov.YC <- stats::cov(YC)
eg <- eigen(cov.YC, symmetric = T)
lambda <- eg$values
lambda.perc <- lambda/sum(lambda)
U <- eg$vectors
res <- list(Xt.proj = Xt.proj,
U = U,
lambda = lambda,
lambda.perc = lambda.perc)
return(res)
}
##' Build and decompose a low-rank matrix W
##'
##' @description Build and decompose a low-rank matrix from
##' a matrix of eigenvectors and eigenvalues
##' from principal component analysis
##'
##'@param U a matrix of eigenvectors
##'@param lambda a vector of corresponding eigenvalues
##'
##'@return W a low-rank matrix
##'@return D the number of latent factors
##'@return Q the orthogonal matrix of the W = QR matrix decomposition
##'@return R the upper triangular matrix of the W = QR matrix decomposition
##'
##'@examples
##' res <- drbats.simul(N = 5, P = 100, t.range = c(5, 100), breaks = 8)
##' res.pca <- pca.Deville(res$X, res$t.simul, t.range = c(5, 100), breaks = 8)
##' Wres.pca <- W.QR(res.pca$U, res.pca$lambda)
##' Wres.pca
##'
##'@author Gabrielle Weinrott
##'@export
##'
W.QR <- function(U, lambda){
if(!is.data.frame(U) && !is.matrix(U)){
stop("U must be a matrix or data frame")
}
if(ncol(U) != nrow(U)){
stop("U must have the same number of rows and columns")
}
if(!is.vector(lambda)){
stop("lambda must be a vector")
}
if(length(lambda) != nrow(U)){
stop("lambda must be of same length as each column of U")
}
U <- as.matrix(U)
W <- matrix(nrow = nrow(U), ncol = ncol(U))
biglambda = c()
count = c()
# get rid of tiny eigenvalues
for(i in 1:length(lambda)){
if(lambda[i]/sum(lambda) < 1/length(lambda)){
count[i] = i
lambda[i] = 0
}
}
D <- min(which(lambda == 0)) - 1
if(D < 2) D = 2
biglambda <- as.matrix(lambda*diag(1, ncol(U)))
W <- U%*%biglambda[ , 1:D]
qrW <- Matrix::qr(t(W))
Q <- qr.Q(qrW) # rotation matrix
R <- qr.R(qrW) # upper triangular
res <- list(W = W, D = D, Q = Q, R = R)
return(res)
}
# Simulate a data matrix Y
#
# N : number of individuals
# W : a matrix of latent factors
# D : the number of latent factors (ncol(W))
# R : a rotation matrix
# sigma2 : variance of the error
#
simul.matrix.Y <- function(N = 10,
W,
D,
R,
sigma2 = 0.2){
W.lower <- t(R)
Y.simul <- matrix(nrow = N, ncol = nrow(W))
beta.simul <- matrix(nrow = D, ncol = N)
epsilon.simul <- matrix(nrow = N, ncol = nrow(W))
for(i in 1:N){
epsilon <- MASS::mvrnorm(nrow(W), 0, sigma2)
beta <- MASS::mvrnorm(D, 0, 1)
Y.simul.i <- t(W.lower%*%beta + epsilon)
Y.simul[i, ] <- Y.simul.i
beta.simul[ , i] <- beta
epsilon.simul[i , ] <- epsilon
}
res = list(Y = Y.simul, beta = beta.simul, epsilon = epsilon.simul)
return(res)
}
##' Main simulation function
##'
##' @param N integer number of functions to simulate (default = 10)
##' @param P a number of observation times (default = 150)
##' @param t.range a range of times in which to place the P observations (default = c(1, 1000))
##' @param b.range a vector giving the range of values for the mean of the first mode (default b.range = c(0.2, 0.4))
##' @param c.range a vector giving the range of values for the mean of the second mode (default c.range = c(0.6, 0.8))
##' @param b.sd the standard deviation for the first mode (default b.sd = 2)
##' @param c.sd the standard deviation for the second mode (default c.sd = 2)
##' @param a.range a vector giving the range of values for the slope (default a.range = c(-0.4, 0.4))
##' @param y.range a vector giving the range of values for the intercept (default y.range = c(0, 10))
##' @param amp the amplitude of the cosine function (default = 10)
##' @param per the periodicity of the cosine function (default = 12)
##' @param data.type string indicating type of functions (options :sparse, sparse.tend, sparse.tend.cos)
##' @param breaks number of breaks in the histogram basis
##' @param sigma2 the precision of the error terms (default = 0.2)
##' @param seed integer specification of a seed (default = NULL)
##'
##' @return Y.simul a list containing a matrix Y, a matrix beta, and a matrix epsilon
##' @return t.simul a matrix of simulated observation times
##' @return X the underlying signal to build the data, see DataSimulationandProjection vignette
##' @return proj.pca the outputs of the function pca.proj.Xt
##' @return wlu the outputs of the function W.QR
##'
##' @examples
##' res <- drbats.simul(N = 5, P = 100, t.range = c(5, 100), breaks = 8)
##' X <- res$X
##' t <- res$t.simul
##' # To plot the observations, ie the rows
##' matplot(t(t), t(X), type = 'l', xlab = "Time", ylab = "X")
##'
##' @author Gabrielle Weinrott
##' @export
##'
drbats.simul <- function(N = 10,
P = 150,
t.range = c(0, 1000),
b.range = c(0.2, 0.4),
c.range = c(0.6, 0.8),
b.sd = 2,
c.sd = 2,
a.range = c(-0.4, 0.4),
y.range = c(0, 10),
amp = 10,
per = 12,
data.type = "sparse",
breaks = 15,
sigma2 = 0.2,
seed = NULL){
N <- as.integer(N)
if(N <= 1){
stop("N must be greater than 1")
}
P <- as.integer(P)
if(P <= 0){
stop("P must be a positive integer")
}
if(!is.vector(t.range) | length(t.range) != 2){
stop("t.range must be a vector of length 2")
}
if(!is.vector(b.range) | length(b.range) != 2){
stop("b.range must be a vector of length 2")
}
if(!is.vector(c.range) | length(c.range) != 2){
stop("c.range must be a vector of length 2")
}
b.sd <- as.numeric(b.sd)
c.sd <- as.numeric(c.sd)
if(b.sd <= 0 | c.sd <= 0){
stop("b.sd and c.sd must be positive values")
}
if(!is.vector(a.range) | length(a.range) != 2){
stop("a.range must be a vector of length 2")
}
if(!is.vector(y.range) | length(y.range) != 2){
stop("y.range must be a vector of length 2")
}
amp <- as.integer(amp)
per <- as.integer(per)
if(amp <= 0 | per <= 0){
stop("amp and per must be positive integers")
}
if(data.type != "sparse" & data.type != "sparse.tend" &
data.type != "sparse.tend.cos"){
stop("invalid data.type")
}
breaks <- as.integer(breaks)
if(breaks <= 0){
stop("breaks must be a positive integer")
}
if(!is.numeric(sigma2) | sigma2 <= 0){
stop("sigma2 must be a positive number")
}
set.seed(seed)
X <- gen_X(N, P, b.range, c.range, b.sd, c.sd, a.range, y.range, amp, per, data.type)
t <- as.matrix(simul.matrix.t(N, P, t.range, b.range, c.range))
proj.pca <- pca.proj.Xt(X, t, t.range, breaks)
wlu <- W.QR(proj.pca$U, proj.pca$lambda)
Y.simul <- simul.matrix.Y(N, wlu$W, wlu$D, wlu$R, sigma2)
res <- list(Y.simul = Y.simul, t.simul = t, X = X, proj.pca = proj.pca, wlu = wlu)
return(res)
}
# Aim: project longitudinal data onto a histogram basis
# Use the rectangle method to calculate the area under a
# curve Xi(ti)
#
# Xi : a vector of observations
# ti : a vector of observation times corresponding to Xi
# windows : the windows of time on which to use the rectangle method
#
intRec <- function(Xi,
ti,
windows){
step <- windows[2] - windows[1]
area = NULL # a vector of length (length(windows) - 1)
max = NULL
min = NULL
count = NULL
for(i in 1:(length(windows) - 1)){
mask = (ti >= windows[i]) & (ti <= windows[i+1])
t.int <- ti[mask]
X.int <- Xi[mask]
count[i] = length(X.int)
if(count[i] >= 1){
area.i = sum(X.int)/count[i]
max.i = max(X.int)
min.i = min(X.int)
}
if(count[i] < 1){
area.i = NA
max.i = NA
min.i = NA
}
area[i] = area.i
max[i] = max.i
min[i] = min.i
}
# if the first element is NA
if(is.na(area[1])){
if(!is.na(area[2])){
area[1] <- area[2]
max[1] = max[2]
min[1] = min[2]
warning("the first two intervals have been merged")
}
else if(!is.na(area[3])) {
area[1] <- area[3]
area[2] <- area[3]
max[1] = max[3]
max[2] = max[3]
min[1] = min[3]
min[2] = min[3]
warning("the first three intervals have been merged")
}
else if(!is.na(area[4])) {
area[1] <- area[4]
area[2] <- area[4]
area[3] <- area[4]
max[1] = max[4]
max[2] = max[4]
max[3] = max[4]
min[1] = min[4]
min[2] = min[4]
min[3] = min[4]
warning("the first four intervals have been merged")
}
else if(!is.na(area[5])) {
area[1] <- area[5]
area[2] <- area[5]
area[3] <- area[5]
area[4] <- area[5]
max[1] <- max[5]
max[2] <- max[5]
max[3] <- max[5]
max[4] <- max[5]
min[1] <- min[5]
min[2] <- min[5]
min[3] <- min[5]
min[4] <- min[5]
warning("the first four intervals have been merged")
}
}
if(is.na(area[1])) stop("reduce number of breaks")
# if there are NAs elsewhere in the vector
for(i in 2:(length(area))){
if(is.na(area[i]) && !is.na(area[i+1]) && !is.na(area[i-1])){
area[i] = (area[i-1] + area[i+1])/2
max[i] = (max[i-1] + max[i+1])/2
min[i] = (min[i-1] + min[i+1])/2
}
if(is.na(area[i]) && is.na(area[i+1]) && !is.na(area[i-1]) && !is.na(area[i+2])){
area[i] <- (area[i-1] + area[i+2])/2
area[i+1] <- (area[i-1] + area[i+2])/2
max[i] <- (max[i-1] + max[i+2])/2
max[i+1] <- (max[i-1] + max[i+2])/2
min[i] <- (min[i-1] + min[i+2])/2
min[i+1] <- (min[i-1] + min[i+2])/2
}
else if(is.na(area[i]) && is.na(area[i+1]) && is.na(area[i+2])){
warning("increase window size")
}
}
# if the last element is NA
if(is.na(area[length(area)])){
if(!is.na(area[length(area)-1])){
area[length(area)] <- area[length(area) - 1]
max[length(max)] <- max[length(max) - 1]
min[length(min)] <- min[length(min) - 1]
warning("the last two intervals have been merged")
}
else if(is.na(area[length(area)-1]) && !is.na(length(area)[i-2])){
area[length(area)] <- area[length(area) - 2]
area[length(area) - 1] <- area[length(area) - 3]
max[length(max)] <- max[length(max) - 2]
max[length(max) - 1] <- max[length(max) - 3]
min[length(min)] <- min[length(min) - 2]
min[length(min) - 1] <- min[length(min) - 3]
warning("the last three intervals have been merged")
}
else warning("increase window size")
}
if(sum(is.na(area)) > 0){
stop("increase window size")
}
res <- list(area = area, count = count, max = max, min = min)
return(res)
}
##' Project a set of curves onto a histogram basis
##'
##' @param X a matrix
##' @param t a matrix of observation times
##' @param t.range a range of times in which to place the P projections (default = c(0, 1000))
##' @param breaks the number of intervals in the histogram basis
##'
##' @return X.proj the matrix X after projection
##' @return X.count a matrix containing the number of observations used to build the projection onto the histogram basis
##' @return windows a vector containing the first time of each window of the histogram intervals
##' @return X.max the matrix of minimum values in each window
##' @return X.min the matrix of maximum values in each window
##'
##' @examples
##' res <- drbats.simul(N = 5, P = 100, t.range = c(5, 100), breaks = 8)
##' res.proj <- histoProj(res$X, res$t.simul, t.range = c(5, 100), breaks = 8)
##' res.proj
##'
##' @author Gabrielle Weinrott
##' @export
##'
histoProj <- function(X, t, t.range, breaks){
X <- as.matrix(X)
t <- as.matrix(t)
if(nrow(X) != nrow(t) | ncol(X) != ncol(t)){
stop("X and t dimensions must match")
}
if(!is.vector(t.range) | length(t.range) != 2){
stop("t.range must be a vector of length 2")
}
breaks <- as.integer(breaks)
if(breaks <= 0){
stop("breaks must be a positive integer")
}
windows <- seq(min(t.range), max(t.range), length.out = breaks)
X.proj <- matrix(nrow = nrow(X), ncol = length(windows) - 1)
X.count <- matrix(nrow = nrow(X), ncol = length(windows) - 1)
X.max <- matrix(nrow = nrow(X), ncol = length(windows) - 1)
X.min <- matrix(nrow = nrow(X), ncol = length(windows) - 1)
for(i in 1:nrow(X)){
integ.X <- intRec(X[i, ], t[i, ], windows)
X.proj[i, ] <- integ.X$area
X.count[i, ] <- integ.X$count
X.max[i, ] <- integ.X$max
X.min[i, ] <- integ.X$min
}
res <- list(X.proj = X.proj, X.count = X.count, windows = windows,
X.max = X.max, X.min = X.min)
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.