# R/gslcca.R In gslcca: Generalized Semi-Linear Canonical Correlation Analysis

```gslcca <- function (Y, # matrix of power spectra
formula = "Double Exponential",
time, # time points
subject = NULL,
global = FALSE, ##fit one model for all subjects?
treatment = NULL,
ref = 1,
separate = FALSE,# use "proportional" instead c.f. hazard?
partial = ~1,
data = NULL,
subset = NULL,
global.smooth = FALSE,
subject.smooth = TRUE,
pct.explained = 0.96,
start = NULL,
method = "L-BFGS-B",
lower = 2,
upper = 15,
...) { # allow arguments to be passed to optim
## Formula must be a function of "time", possibly other var and parameters
if (is.character(formula)){
models <- c("Double Exponential", "Critical Exponential")
formula <- models[agrep(formula, models)]
if (!length(formula))
stop("\"formula\" not recognised, only \"Double Exponential\"",
"or \"Critical Exponential\" \n",
"can be specified as character strings")
if (is.null(start))
start <- switch(formula, #replicate later for multiple treatments
#K1 > K2 so increases from ref
"Double Exponential" = list(K1 = 9, K2 = 8.5),
"Critical Exponential" = list(K1 = 8.5))
formula <- switch(formula,
"Double Exponential" = ~ exp(-time/exp(K1))-exp(-time/exp(K2)),
"Critical Exponential" = ~ time*exp(-time/exp(K1)))
}

vars <- all.vars(formula)
if (!"time" %in% vars) stop("'formula' must be a function of 'time'")

## anything without starting values assumed to be a variable
vars <- setdiff(c(vars, all.vars(partial)), c("time", names(start)))
dummy <- reformulate(c("0", vars)) #don't need int for mf
formula <- as.expression(formula[[length(formula)]])

## Collate data to ensure equal length and deal with NAs
## get model frame inc. Y, time, subject & treatment if specified, omit NAs
mf <- match.call(expand.dots = FALSE)
m <- match(c("Y", "subject", "treatment", "time", "data", "subset"),
names(mf), 0L)
mf <- mf[c(1L, m)]
mf\$formula <- dummy
mf\$na.action <- na.omit
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())

## split up again for convenience
Y <- mf\$`(Y)`
if(!is.matrix(Y)) Y <- as.matrix(Y)
nr <- nrow(Y)
freq.nm <- colnames(Y)
f <- suppressWarnings(as.numeric(gsub("[^0-9.]", "", freq.nm)))
if (any(is.na(f))) f <- seq_along(f)

if (!is.null(mf\$`(subject)`)) {
subject <- as.factor(mf\$`(subject)`)
ind <- split(seq_len(nr), subject)
}
else {
subject <- NULL
ind <- list(seq_len(nr))
}
nind <- length(ind)

if (!is.null(mf\$`(treatment)`)) {
treatment <- as.factor(mf\$`(treatment)`)
if (!is.numeric(ref)) ref <- which(levels(treatment) == ref)
ntrt <- nlevels(treatment) - 1
}
else ntrt <- 1

if (is.null(mf\$`(time)`))
stop("'time' must be specified and have length equal to", nr, "\n")
names(mf)[match("(time)", names(mf))] <- "time"

## take out extras from mf
mf <- mf[, !(names(mf) %in% c("(Y)", "(subject)", "(treatment)")),
drop = FALSE]

## Pre-smoothing of the complete data set for artefacts removal
if (global.smooth) {
if (global.smooth == TRUE) {
Ytemp= scale(Y,scale=FALSE)
eig=eigen(crossprod(Ytemp), only.values = TRUE)\$values
nroots=seq_along(eig)
global.smooth=max(nroots[(eig/eig[1])>0.001 &
cumsum(eig)/sum(eig)<pct.explained]) + 1
}
SVD = svd( Y, nu=global.smooth, nv=global.smooth )
Y = SVD\$u%*% (SVD\$d[1:global.smooth] * t(SVD\$v))
}

cum.pct.explained <- matrix(, nrow = ncol(Y), ncol = nind)
r <- numeric(nind)
for (i in seq_len(nind)) {
Ytemp = scale(Y[ind[[i]],],scale=FALSE)
eig = eigen(crossprod(Ytemp), only.values = TRUE)\$values
cum.pct.explained[,i] <- cumsum(eig)/sum(eig)
r[i] = sum((eig/eig[1])>0.001 & cum.pct.explained[,i] < pct.explained)
}
if (isTRUE(subject.smooth)) {
## Set number of roots to satisfy pct.explained
subject.smooth <- max(r)+1
}
else if(!is.numeric(subject.smooth)) subject.smooth <- ncol(Y)
cum.pct.explained <- cum.pct.explained[subject.smooth,]

reps <- ifelse(separate, ntrt, 1)
if (is.null(start)) {
stop('No initial values have been specified for the nonlinear',
' parameters.')
}
else {
start <- mapply(function(start, label, reps) {
if (length(start) == 1) rep(start, reps)
else if (length(start) == reps) start
else stop("there should be 1 or", reps,
"starting values for parameter", label, "\n")
}, start = start, label = names(start), MoreArgs = list(reps = reps),
SIMPLIFY = FALSE)
}
nPar <- length(start)
group <- gl(nPar, reps, labels = names(start))

## evaluate RHS for first row of data
par <- as.list(rep(NA, length(start)))
names(par) <- names(start)
dat <- do.call("cbind", list(mf[1,, drop = FALSE], par))
val <- eval(formula, dat)
mt <- terms(partial)
CCA.roots = 1

ny <- length(f)*nind
if (nind > 1 & global == TRUE){
nind <- 1
ind <- list(seq_len(nr))
}
ycoef <- matrix(numeric(ny), ncol=nind)
xcoef <- matrix(nrow=length(val)*ntrt, ncol=nind)
nonlin.par <- matrix(nrow=length(unlist(start)), ncol=nind)

y.list <- x.list <- opt <- list()
yscores <- xscores  <- numeric(nr)
f.min <- cor <- numeric(nind)

for (i in 1:nind) {
## Do SVD on Y
Yr = Y[ind[[i]],]
nr=nrow(Yr)
if (subject.smooth) {
smoother <- function(ind, x, r){
SVD <- svd(x[ind,], nu=r, nv=r)
SVD\$u %*% (SVD\$d[1:r] * t(SVD\$v))
}
if (global) ## smooth subject data separately, then diagonalise
Yr <- bdiag(tapply(1:nrow(Yr), list(subject),
smoother, Yr, subject.smooth))
else
Yr <- smoother(1:nrow(Yr), Yr, subject.smooth)
}
## Calculate RYr
if (is.empty.model(partial)){
y.list[[i]] = Yr
}
else { ## N.B equiv to doing for each subject separately, bit inefficient
G <- model.matrix(mt, mf[ind[[i]], , drop = FALSE])
y.list[[i]] = lm.fit(G, as.matrix(Yr))\$residuals
}
## Reduce the dimensionality using svd
rank.RYr=rankMatrix(y.list[[i]])
SVD = svd(y.list[[i]], nu=rank.RYr, nv=rank.RYr )
D<-diag(SVD\$d[1:rank.RYr],ncol=rank.RYr)
U1<-SVD\$u
V<-SVD\$v
RYr=U1%*%D
S11=crossprod(RYr)
S11.eig <- eigen(S11)
S11.val=suppressWarnings(sqrt(S11.eig\$values))
S11.val[is.na(S11.val)]=0
S11.sqrt <- solve(S11.eig\$vectors %*% (S11.val * t(S11.eig\$vectors)))
dat <- mf[ind[[i]], , drop = FALSE]
if (!is.null(treatment)) {
##can ignore reference level when finding nonlin par
id <- unclass(factor(treatment[ind[[i]]]))

F <- class.ind(id)
if (!is.null(ref)) {
F <- F[,-ref]
nlev <- max(id)
ord <- numeric(nlev)
ord[ref] <- nlev
ord[-ref] <- seq(nlev - 1)
id <- ord[id]
}
if (!separate) id <- 1
}
else F <- id <- 1
nm <- names(start)
qy <- qr(RYr)
dy <- qy\$rank
obj.f=function(Kvector) {
## evaluate RFr
par <- split(Kvector, group)
dat[nm] <- lapply(par, "[", id)
val <- eval(formula, dat)
val[is.na(val)] <- 0
if (is.empty.model(partial)){
RFr <- val*F
}
else{
RFr <- lm.fit(G, val*F)\$residuals
}
## find cor as in MASS:::cancor
qx <- qr(RFr)
dx <- qx\$rank
z <- svd(qr.qty(qx, qr.qy(qy, diag(1, nr, dy)))[1L:dx, ,
drop = FALSE], dx, dy)
log(1-(z\$d[1])^2)
}
## Minimize ln(1-largesteigenvalue(Cor))
opt[[i]] <- suppressWarnings(optim(unlist(start), obj.f, method = method,
lower = lower, upper = upper, ...))
nonlin.par[,i]=opt[[i]]\$par
par <- split(nonlin.par[,i], group)
dat[nm] <- lapply(par, "[", id)
val <- eval(formula, dat)
val[is.na(val)] <- 0
RFr <- val*F
if (!is.empty.model(partial))
RFr <- lm.fit(G, RFr)\$residuals
## find cor as in MASS:::cancor
qx <- qr(RFr)
dx <- qx\$rank
z <- svd(qr.qty(qx, qr.qy(qy, diag(1, nr, dy)))[1L:dx, ,
drop = FALSE], dx, dy)
ycoef[,i]= V %*% backsolve((qy\$qr)[1L:dy, 1L:dy, drop = FALSE],
z\$v)[,CCA.roots]
B= backsolve((qx\$qr)[1L:dx, 1L:dx, drop = FALSE], z\$u)[,CCA.roots]
RFrB=as.matrix(RFr)%*%B
yscores[ind[[i]]] = as.matrix(y.list[[i]])%*%ycoef[,i]
cor[i] <- lm.fit(RFrB, yscores[ind[[i]]])\$coef
f.min[i]=Re(opt[[i]]\$value)
## Return RF, i.e. X partialling out G
xscores[ind[[i]]] = RFrB
x.list[[i]]=RFr
## Recalc fitted value based on starting values to get alignment right
xcoef[,i] <- as.vector(B)
par <- split(unlist(start), group)
dat[nm] <- lapply(par, "[", id)
val <- eval(formula, dat)
val[is.na(val)] <- 0
RFr <- val*F
if (!is.empty.model(partial))
RFr <- lm.fit(G, RFr)\$residuals
if (cor(xscores[ind[[i]]], rowSums(as.matrix(RFr))) < 0) {
xcoef[,i] <- -xcoef[,i]
xscores[ind[[i]]] <- -xscores[ind[[i]]]
ycoef[,i] <- -ycoef[,i]
yscores[ind[[i]]] <- -yscores[ind[[i]]]
}
}

if (!is.null(treatment))
treatment_c <- paste("", levels(treatment)[setdiff(seq(nlev), ref)])
else treatment_c <- ""
if (separate) rownames(nonlin.par) = paste(group, treatment_c, sep = "")
else rownames(nonlin.par) = group

# need to name xcoef by columns of X (covariate columns partialled out)
#if (!is.empty.model(partial))
#    rownames(xcoef)=c(paste('formula', treatment_c), colnames(G))
#else rownames(xcoef)=paste('formula', treatment_c)

## split signatures if necessary
if (global == TRUE){
ycoef <- matrix(ycoef, nrow=length(f))
}
rownames(ycoef) <- freq.nm

if (!is.null(subject)) {
if (!global){
colnames(nonlin.par) <- colnames(xcoef) <-
names(cum.pct.explained) <- names(cor) <- names(opt) <-
names(y.list) <- names(x.list) <-
paste('subject',levels(subject))
}
colnames(ycoef) <- paste('subject',levels(subject))
}

out <- list(call = match.call(),
ycoef = ycoef, # 'signatures' rows = subject, cols = freq
xcoef = xcoef, # linear parameters on RHS
yscores = yscores,
xscores = xscores,
subject = subject,
treatment = treatment,
time = mf\$time,
ref = ref, # reference level(could be NULL?)
nonlinear.parameters = nonlin.par,
cor = cor,
y = y.list,
x = x.list,
global.smooth = global.smooth,
subject.smooth = subject.smooth,
pct.explained = cum.pct.explained,
opt = opt)
class(out) <- "gslcca"
out
}
```

## Try the gslcca package in your browser

Any scripts or data that you put into this service are public.

gslcca documentation built on May 2, 2019, 5:46 p.m.