# R/CubicSplineBasisFuncs.R In trioGxE: A data smoothing approach to explore and test gene-environment interaction in case-parent trio data

```#-------------------------------------------------------------------------------
# November 2008
# Functions for cubic penalized regression spline which is represented as in
# equation (4.2) in Wood (2006): value-second-derivative (or cardinal) representation
#
# The functions are written based on the thoretical results shown in
# notes'CubicSplineNotes.pdf'
# (or Chapter 3 of the thesis...For now) and some of the functions are borrowed from
# Wood 2006, Chapter 3.
#-------------------------------------------------------------------------------
# Bmat(), Dmat(), Xmat(), Smat(), trio.Xmat(), trio.Xmat.mul(), trio.Smat(), trio.Smat.mul()
# Matrices Bmat and Dmat defines the relationship between
# beta_k (=f(x_k)) and delta_k (=f"(x_k)).
# The entries of Bmat and Dmat are as defined in Wood, pg 150 (2006).
Bmat <- function(xK){
#xK is the vector of K knots
# calculating h[k]=x[k+1]-x[k]
K <- length(xK)
h <- xK[-1] - xK[-K] #(h[1],h[2],...h[K])'
Bmat <- matrix(0, nrow=(K-2), ncol=(K-2))
for(i in 1:(K-2))
diag(Bmat)[i] <- (h[i]+h[i+1])*(1/3)

if(K>3){
for(i in 1:(K-3)){
Bmat[(i+1),i] <- Bmat[i,(i+1)] <- h[(i+1)]/6
}
}
Bmat
}

Dmat <- function(xK){
K <- length(xK)
h <- xK[-1] - xK[-K]
Dmat <- matrix(0, nrow=(K-2), ncol=K)
for(i in 1:(K-2)){
diag(Dmat)[i] <- 1/h[i]
Dmat[i,(i+1)] <- -(1/h[i]+1/h[(i+1)])
Dmat[i,(i+2)] <- 1/h[(i+1)]
}
Dmat
}

Xmat <- function(xK,x){
#calculate the row vector of basis function at x.
##cat("range of x:")
##print(range(x))
##cat("range of xK:")
##print(range(xK))

xK = as.vector(xK)
n = length(x)
K <- length(xK)
if(any(xK[1]>x)|any(xK[K]<x))
stop("some covariate values are not in the range of knots.")

##if(K>3){
h <- xK[-1] - xK[-K]

j.vec=NA
ind = x==xK[1]
j.vec[ind] = 1
rm(ind)
ind = x==xK[length(xK)]
j.vec[ind] = (length(xK)-1)
rm(ind)

for(j in 2:length(xK)){
ind = (x >= xK[(j-1)]) & (x < xK[j])
j.vec[ind] = (j-1)
}

##j.vec <- find.j2(x=x, xK=xK)

ax.neg <- (xK[(j.vec+1)]-x)/h[j.vec]
ax.pos <- (x-xK[j.vec])/h[j.vec]
cx.neg <- ((xK[(j.vec+1)]-x)^3/h[j.vec] - h[j.vec]*(xK[(j.vec+1)]-x))/6
cx.pos <- ((x-xK[j.vec])^3/h[j.vec] - h[j.vec]*(x-xK[j.vec]))/6

a.vec <- cf.neg.vec <- cf.pos.vec <- matrix(0,ncol=K,nrow=n)

## calculating Fmat
cBmat <- chol(Bmat(xK))
Binv <- chol2inv(cBmat)
Fmat <-  matrix(0, nrow=K, ncol=K)
Fmat[2:(K-1),] <- Binv%*%Dmat(xK)
## end calculating Fmat

for(j in 1:(K-1)){
ind <- j.vec==j
a.vec[ind,j] = ax.neg[ind]
a.vec[ind,(j+1)] = ax.pos[ind]
cf.neg.vec[ind,] <- kronecker(matrix(Fmat[j,],nrow=1),cx.neg[ind])
cf.pos.vec[ind,] <- kronecker(matrix(Fmat[(j+1),],nrow=1),cx.pos[ind])
##cf.neg.vec[ind,] <- kronecker(matrix(Fmat(xK)[j,],nrow=1),cx.neg[ind])
##cf.pos.vec[ind,] <- kronecker(matrix(Fmat(xK)[(j+1),],nrow=1),cx.pos[ind])
##rm(ind)
}

X <- a.vec+cf.neg.vec+cf.pos.vec
X
}

Smat <- function(xK){
#penalty matrix
K <- length(xK)
#cBmat <- chol(Bmat(xK)) #Bmat is positive-definite!!
#Binv <- chol2inv(cBmat)
#Smat <- t(Dmat(xK))%*%Binv%*%Dmat(xK) ## easy to calculate??
Smat <- t(Dmat(xK))%*%solve(Bmat(xK))%*%Dmat(xK) ## easy to calculate??
Smat
}

trio.Xmat <- function(x, mt, k1, k2, xk1, xk2, cenv){
#x and mt can be from formatted data set 'triodat' obtained from pre.triogam()
#non-genetic covariate x.
#y1=1 if the risk allele had been transmitted to the affected child and 0, otherwise.
#For mating type 3, (y1,y2); (0,0) if cgeno=++, (1,0) if +-, and (1,1) if --.
#The basis is set up assuming that the basis for the first smoother is the same as
#that of the second smoother; i.e., the lenghts and positions of the knots for the
#two smoother are assumed to be the same.
#rightnow, it is a little bit too slow;
#should I use gam()'s smooth.construct.cr.smooth.spec() instead?
n <-length(x) #i.e., n<-n+n(MT3)
n1=length(unique(x[mt==1|mt==3.1]));##print(n1)
n2=length(unique(x[mt==2|mt==3.2]));##print(n2)
ind1 <- mt==1; ind2 <- mt==2; ind31 <- mt==3.1; ind32 <- mt==3.2

if( !is.null(xk1) & !is.null(xk2) ){
if ( (n1<k1)|(n2<k2) ) {
if(n1<k1 & n2>=k2){ # not enough for GRR1
msg <- paste(cenv, " has insufficient unique values to support ",
k1, " knots: reduce k1.", sep = "")
stop(msg)
}

else if(n1>=k1 & n2<k2){ # not enough for GRR2
msg <- paste(cenv, " has insufficient unique values to support ",
k2, " knots: reduce k2.", sep = "")
stop(msg)
}

else{ # not enough for both GRR1 and GRR2
msg <- paste(cenv, " has insufficient unique values to support ",
k1, " and ", k2, " knots: reduce k1 and k2.", sep = "")
stop(msg)
}

}

##gamma=1 #needed for estimating the smoothing parameter from GCV
n.col <- k1 +k2 # for augmented X

##Constructing Design Matrix (i.e., cubic spline basis function matrix)

##Mating type 1
X.tilde <- matrix(0, nrow=n, ncol=n.col)

if(sum(ind1)==0) #
X.tilde <- X.tilde
else
X.tilde[ind1, (1:k1)] <- Xmat(xK=xk1, x=x[ind1])

##Mating type 2
if(sum(ind2)==0) #
X.tilde <- X.tilde
else
X.tilde[ind2,(1+k1):(k1+k2)] <- Xmat(xK=xk2, x=x[ind2])

##Mating type 3
if(sum(ind31)==0) # sum(ind31) must be equal to sum(ind32)
X.tilde <- X.tilde
else{ #x[ind31]=x[ind32]
X.tilde[ind31, 1:k1] <- Xmat(xK=xk1, x=x[ind31])
X.tilde[ind32, (1+k1):(k1+k2)] <- Xmat(xK=xk2, x=x[ind32])
}#else ends
}#if(!is.null(xk1) & !is.null(xk2))

else {#either xk1 or xk2 is null
if(!is.null(xk1)) {
if(n1<k1){ # not enough for GRR1
msg <- paste(cenv, " has insufficient unique values to support ",
k1, " knots: reduce k1.", sep = "")
stop(msg)
}

##gamma=1 #needed for estimating the smoothing parameter from GCV
ncol <- k1
##Constructing Design Matrix (i.e., cubic spline basis function matrix)

##Mating type 1
X.tilde <- matrix(0, nrow=n, ncol=ncol)

if(sum(ind1)==0) #
X.tilde <- X.tilde
else
X.tilde[mt==1,] <- Xmat(xK=xk1, x=x[ind1])

##Mating type 3
if(sum(ind31)==0) # sum(ind31) must be equal to sum(ind32)
X.tilde <- X.tilde
else{ #x[ind31]=x[ind32]
X.tilde[mt==3.1,] <- Xmat(xK=xk1, x=x[ind31])
}#else ends

} #if(is.null(xk1))

else if(!is.null(xk2)) {
if(n2<k2){ # not enough for GRR2
msg <- paste(cenv, " has insufficient unique values to support ",
k2, " knots: reduce k2.", sep = "")
stop(msg)
}
##gamma=1 #needed for estimating the smoothing parameter from GCV

ncol <- k2
##Constructing Design Matrix (i.e., cubic spline basis function matrix)

##Mating type 2
X.tilde <- matrix(0, nrow=n, ncol=ncol)

if(sum(ind2)==0) #
X.tilde <- X.tilde
else
X.tilde[mt==2,] <- Xmat(xK=xk2, x=x[ind2])

##Mating type 3
if(sum(ind32)==0) # sum(ind31) must be equal to sum(ind32)
X.tilde <- X.tilde
else{ #x[ind31]=x[ind32]
X.tilde[mt==3.2,] <- Xmat(xK=xk2, x=x[ind32])
}
} #else if(is.null(xk2))

else { #both xk1 and xk2 are null
stop("knots are null for both GxE functions")
} #else: both xk1 and xk2 are null
} # else: either xk1 or xk2 is null

X.tilde
}

trio.Xmat.mul <- function(x, xk, cenv){
#x and mt can be from formatted data set 'triodat' obtained from pre.triogam()
#non-genetic covariate x.
#y1=1 if the risk allele had been transmitted to the affected child and 0, otherwise.
#For mating type 3, (y1,y2); (0,0) if cgeno=++, (1,0) if +-, and (1,1) if --.
#The basis is set up assuming that the basis for the first smoother is the same as
#that of the second smoother; i.e., the lenghts and positions of the knots for the
#two smoother are assumed to be the same.
#rightnow, it is a little bit too slow;
#should I use gam()'s smooth.construct.cr.smooth.spec() instead?
n <-length(x); k <- length(xk) #i.e., n<-n+n(MT3)
stopifnot(n>k)
n.col <- k

##Constructing Design Matrix (i.e., cubic spline basis function matrix)

##Mating type 1
X.tilde <- matrix(0, nrow=n, ncol=n.col)
X.tilde <- Xmat(xK=xk, x=x)
X.tilde
}

trio.Smat <- function(xk1,xk2,pen.scale,X,mt){#sp1,sp2,
# have to make sure xk1 and xk2 are the same as those used for
# constructing X1 and X2!!

ind1 <- mt==1 | mt==3.1
ind2 <- mt==2 | mt==3.2

k1 <- length(xk1); k2 <- length(xk2)

if(k1>0 & k2>0){#both xk1 and xk2 are non null
X1 <- X[ind1,1:k1]
X2 <- X[ind2,(k1+1):(k1+k2)]
pen.mat1 <- Smat(xk1)
pen.mat2 <- Smat(xk2)
}#both xk1 and xk2 are non null
else if(k1>0 & k2==0){
X1 <- X; X2 <- NULL
pen.mat1 <- Smat(xk1); pen.mat2 <- NULL
}# xk1 is not null
else if(k1==0 & k2>0){
X1 <- NULL; X2 <- X
pen.mat <- NULL; pen.mat2 <- Smat(xk2)
} #else if(k1==0 & k2>0
else {#both k1 and k2 are zero
stop("knots are null for both GxE functions")
} #both k1 and k2 are bigger than zero

dim.smat <- k1+k2
pen.mat <- matrix(0, nrow=dim.smat, ncol=dim.smat)

if(pen.scale){
## taken from smoothCon():
## "The following is intended to make scaling `nice' for better gamm performance.
##  Note that this takes place before any resetting of the model matrix, and
##  any `by' variable handling. From a `gamm' perspective this is not ideal,
##  but to do otherwise would mess up the meaning of smoothing parameters
##  sufficiently that linking terms via `id's would not work properly (they
##  would have the same basis, but different penalties)
##  if (scale.penalty && length(sm\$S)>0 && is.null(sm\$no.rescale)) # then the penalty coefficient matrix is rescaled
##
##  {  maXX <- mean(abs(t(sm\$X)%*%sm\$X)) # `size' of X'X
##      for (i in 1:length(sm\$S)) {
##        maS <- mean(abs(sm\$S[[i]]))
##        sm\$S[[i]] <- sm\$S[[i]] * maXX / maS
##      }
##  }"
if(!is.null(X1)){
maXX <- mean(abs(t(X1) %*% X1))
maS <- mean(abs(pen.mat1))
pen.scale1 <- (maXX/maS)
pen.mat1 <- pen.mat1*pen.scale1
}

if(!is.null(X2)){
maXX <- mean(abs(t(X2) %*% X2))
maS <- mean(abs(pen.mat2))
pen.scale2 <- (maXX/maS)
pen.mat2 <- pen.mat2*pen.scale2
}
}#if(pen.scale)

else{
pen.mat1 <- pen.mat1
pen.mat2 <- pen.mat2
}

## multiply pen matrices by sp
##pen.mat1 <- sp1*pen.mat1
##pen.mat2 <- sp2*pen.mat2

if( (k1>0) & (k2>0) ){
pen.mat[1:k1,1:k1] <- pen.mat1
pen.mat[(k1+1):(k1+k2),(k1+1):(k1+k2)] <- pen.mat2
}

else if( (k1>0) & (k2==0) ){
pen.mat <- pen.mat1
}

else{
pen.mat <- pen.mat2
}

S <- pen.mat

S
}
trio.Smat.mul <- function(xk,X,pen.scale=TRUE){#sp1,sp2,
# have to make sure xk1 and xk2 are the same as those used for
# constructing X1 and X2!!

k <- length(xk)
stopifnot(k>0)
pen.mat <- Smat(xk)

if(pen.scale){
## taken from smoothCon():
## "The following is intended to make scaling `nice' for better gamm performance.
##  Note that this takes place before any resetting of the model matrix, and
##  any `by' variable handling. From a `gamm' perspective this is not ideal,
##  but to do otherwise would mess up the meaning of smoothing parameters
##  sufficiently that linking terms via `id's would not work properly (they
##  would have the same basis, but different penalties)
##  if (scale.penalty && length(sm\$S)>0 && is.null(sm\$no.rescale)) # then the penalty coefficient matrix is rescaled
##
##  {  maXX <- mean(abs(t(sm\$X)%*%sm\$X)) # `size' of X'X
##      for (i in 1:length(sm\$S)) {
##        maS <- mean(abs(sm\$S[[i]]))
##        sm\$S[[i]] <- sm\$S[[i]] * maXX / maS
##      }
##  }"
if(!is.null(X)){
maXX <- mean(abs(t(X) %*% X))
maS <- mean(abs(pen.mat))
pen.scale <- (maXX/maS)
pen.mat <- pen.mat*pen.scale
}
}#if(pen.scale)

else{
pen.mat <- pen.mat
}
pen.mat
}
```

## Try the trioGxE package in your browser

Any scripts or data that you put into this service are public.

trioGxE documentation built on May 2, 2019, 3:41 p.m.