FusionLearn.R
In FusionLearn: Fusion Learning

## ----include = FALSE----------------------------------------------------------
library(ggplot2)
#library(ggalt)
#library(ggfortify)
#library(ggpubr)
library(FusionLearn)
#library(lattice)
set.seed(123)
cex = 0.5

## ----eval=FALSE---------------------------------------------------------------
#  install.packages("FusionLearn",repos = "http://cran.us.r-project.org")

## -----------------------------------------------------------------------------
library(MASS)
library(mvtnorm)
m <- 4
N <- 100
p <- 10

## ----include=TRUE-------------------------------------------------------------
## parameters
rho <- 0.3
Rho <- rho*matrix(1,4,4) + diag(1-rho,4,4)
sigma <- c(2,1,3,2)
Sigma <- Rho * sigma * rep(sigma, each = nrow(Rho))
mu = c(-1,1,0.5,-0.5)

## data information
name_pf =  c("platform 1","platform 2", "platform 3","platform 4")
name_id =  paste("ID", seq(1:N))
name_cov = paste("M", seq(1:p))

## -----------------------------------------------------------------------------
x <- array(NA,dim = c(N,m,p))
for(k in 1:p){
  x[,,k] <- mvrnorm(N,mu,Sigma)
}

## ----include = FALSE----------------------------------------------------------
dimnames(x) = list(name_id, name_pf, name_cov)

## ----echo=FALSE---------------------------------------------------------------
print("Platform 1")
print(round(x[1:5,1,1:5],3))

print("Platform 2")
print(round(x[1:5,2,1:5],3))

print("Platform 3")
print(round(x[1:5,3,1:5],3))
print("Platform 4")
print(round(x[1:5,4,1:5],3))

## ----echo= FALSE--------------------------------------------------------------
tp <- 5
beta <- array(0,dim = c(p,m))
for (j in 1:m){
  beta[1:tp,j]<- c(1,1.5,1.2,2,1.8)*j
  beta[(tp+1):p,j]<-rep(0,(p-tp))
} 
rownames(beta) <- name_cov
e <- rmvnorm(N,mu,Sigma)

x1 = x[,1,]
x2 = x[,2,]
x3 = x[,3,]
x4 = x[,4,]
  
y1 = x1 %*% beta[,1] + e[,1]
y2 = x2 %*% beta[,2] + e[,2]
y3 = x3 %*% beta[,3] + e[,3]
y4 = x4 %*% beta[,4] + e[,4]

## -----------------------------------------------------------------------------
x <- list(x1, x2, x3, x4)
y <- list(y1, y2, y3, y4)

## -----------------------------------------------------------------------------
result <- fusionbase(x, y, lambda = 0.9, N = N ,p = p, m = m, methods = "scad", 
                     Complete = TRUE)
print(result) ## partial estimated parameters

## ----fig.align='center', fig.width= 4.5, fig.height = 3.5, echo = FALSE, fig.cap="Learning example of simulated continuous responses"----
para <- data.frame(result$beta)
rownames(para) <- name_cov
g <- ggplot(para, aes(x=1:10))
g + geom_area(aes(y=X3+0.05, fill="Platform 3"))  + 
  geom_area(aes(y=X4+0.02, fill="Platform 4"))   +
  geom_area(aes(y=X1*1.2+0.01, fill="Platform 1")) + 
  geom_area(aes(y=X2*0.7+0.06, fill="Platform 2"))  + 
labs(title="Estimated Coefficients", 
       subtitle="Non-zero v.s. zero coefficients", 
       caption="Continuous responses", 
       y = "Estimation",
       x = " ") +
 scale_x_continuous( name =  "Predictors", breaks = 1:10, labels = name_cov) +
 scale_y_discrete(labels = NULL) 
#+theme(panel.background = element_rect(fill = "white", colour = "grey50"))


## ----echo = FALSE-------------------------------------------------------------
y1. <- y1 > 0
y2. <- y2 > 0
y3. <- y3 > 0
y4. <- y4 > 0
y.bin <- list(y1. ,y2., y3., y4.)

## -----------------------------------------------------------------------------
result <- fusionbinary(x, y.bin, lambda = 0.15, N = N, p = p, m = m, methods = "scad",
                       link = "logit")
print(result)

## ----fig.align='center',fig.width= 4.5, fig.height = 3.5, echo = FALSE, fig.cap = "Learning example of simulated binary responses"----
para <- data.frame(result$beta)
rownames(para) <- name_cov
g <- ggplot(para, aes(x=1:10))
g + geom_area(aes(y=X1*1.4+0.01, fill="Platform 1")) + 
  geom_area(aes(y=X2*1.2+0.015, fill="Platform 2"))  + 
  geom_area(aes(y=X3*1.1+0.005, fill="Platform 3"))  + 
  geom_area(aes(y=X4*0.8+0.02, fill="Platform 4"))   +
labs(title="Estimated Coefficients", 
       subtitle="Non-zero v.s. zero coefficients", 
       caption="Binary responses", 
       y = "Estimation",
       x = " ") +
 scale_x_continuous( name =  "Predictors", breaks = 1:10, labels = name_cov) +
 scale_y_discrete(labels = NULL)  
 #+theme(panel.background = element_rect(fill = "white", colour = "grey50"))

table(beta[,1]!=0, result$beta[,1]!=0)

## ----echo = FALSE-------------------------------------------------------------
y.mixed <- list(y1,y2, y3>0, y4>0)

## -----------------------------------------------------------------------------
result <- fusionmixed(x, y.mixed, lambda = 0.4, N = N, p = p, m1 = 2, m2 = 2, methods 
                      = "scad", link = "logit")
print(result)

## ----fig.align='center', fig.width= 4.5, fig.height = 3.5,  echo = FALSE, fig.cap = "Learning example of simulated mixed responses"----
para <- data.frame(result$beta)
rownames(para) <- name_cov
g <- ggplot(para, aes(x=1:10))
g + geom_area(aes(y=X1+0.01, fill="Platform 1")) + 
  geom_area(aes(y=X2*0.7+0.015, fill="Platform 2"))  + 
  geom_area(aes(y=X3*0.7+0.005, fill="Platform 3"))  + 
  geom_area(aes(y=X4*0.7+0.02, fill="Platform 4"))   +
labs(title="Estimated Coefficients", 
       subtitle="Non-zero v.s. zero coefficients", 
       caption="Mixed responses", 
       y = "Estimation",
       x = " ") +
 scale_x_continuous( name =  "Predictors", breaks = 1:10, labels = name_cov) +
 scale_y_discrete(labels = NULL) 
 #+theme(panel.background = element_rect(fill = "white", colour = "grey50"))

table(beta[,1]!=0, result$beta[,1]!=0)

## ----echo=FALSE---------------------------------------------------------------
x2. <- x2
x2.[,2] <- NA
print(cbind(round(x2.[1:5,1:5],3)))

## ----echo = FALSE-------------------------------------------------------------
x <- list(x1, x2., x3, x4)
y <- list(y1, y2, y3, y4)

## -----------------------------------------------------------------------------
result <- fusionbase(x, y, lambda = 1.3, N = N, p = p, m = m, methods = "scad", 
                     Complete = FALSE)
print(result$beta)

## ----include = F--------------------------------------------------------------
m <- 4
N <- 500
p <- 1000

## parameters
rho <- 0.3
Rho <- rho*matrix(1,4,4) + diag(1-rho,4,4)
sigma <- c(2,1,3,2)
Sigma <- Rho * sigma * rep(sigma, each = nrow(Rho))
mu =c(0,0,0,0)

## data information
name_pf =  c("platform 1","platform 2", "platform 3","platform 4")
name_id =  paste("ID", seq(1:N))
name_cov = paste("M", seq(1:p))

## Main
x <- array(NA,dim = c(N,m,p))
for(k in 1:p){
  x[,,k] <- mvrnorm(N,mu,Sigma)
}

dimnames(x) = list(name_id, name_pf, name_cov)


tp <- 40
beta <- array(0,dim = c(p,m))
for (j in 1:m){
  beta[1:tp,j]<- runif(tp, 1,2)
  beta[(tp+1):p,j]<-rep(0,(p-tp))
} 
beta[c(2,4,6,10,16,20),] <- - beta[c(2,4,6,10,16,20),]
rownames(beta) <- name_cov

rho <- 0.3
Rho <- rho*matrix(1,4,4) + diag(1-rho,4,4)
sigma <- c(2,1,3,2)
Sigma <- Rho * sigma * rep(sigma, each = nrow(Rho))
e <- rmvnorm(N,mu,Sigma)

x1 = x[,1,]
x2 = x[,2,]
x3 = x[,3,]
x4 = x[,4,]
  
y1 = x1 %*% beta[,1] + e[,1]
y2 = x2 %*% beta[,2] + e[,2]
y3 = x3 %*% beta[,3] + e[,3]
y4 = x4 %*% beta[,4] + e[,4]

x <- list(x1, x2, x3, x4)
y <- list(y1, y2, y3, y4)

## ----fig.align='center', echo = TRUE, fig.width= 4.5, fig.height = 3.5, fig.cap = "Example of model selection process in the fusion learning algorithm", warning = F----
lambda <- c(0.19, 0.2, 0.3, 0.5, 0.6, 1.75, 2)
model <- fusionbase.fit(x, y, lambda, N = 500, p = 1000, m = 4, depen = "CORR")
print(model)

## -----------------------------------------------------------------------------
result <- fusionbase(x, y, lambda = 0.5, N = 500, p = 1000, m = 4)

## ----echo=F-------------------------------------------------------------------
result_table = table(beta[,1]!=0,result$beta[,1]!=0 )
colnames(result_table) <- c("Est zero", "Est non-zero")
rownames(result_table) <- c("True zero", "True non-zero")
result_table

## ----fig.align='center', fig.width= 4.5, fig.height = 3.5, fig.cap = "Model selection for the function `fusionbase.fit`"----
##analysis of the stock index data
#Responses contain indices "VIX","GSPC", and "DJI" 
y <- list(stockindexVIX[,1],stockindexGSPC[,1],stockindexDJI[,1]) 
#Predictors include 46 stocks
x <- list(stockindexVIX[,2:47],stockindexGSPC[,2:47],stockindexDJI[,2:47])  
##Implementing the model selection algorithm based on the psuedolikelihood 
##information criteria  
model <- fusionbase.fit(x,y,seq(0.03,5,length.out = 10),232,46,3,depen="CORR")
print(model)
lambda <- model[which.min(model[,2]),1]
result <- fusionbase(x,y,lambda,232,46,3)
##Identify the significant predictors for the three indices
id <- which(result$beta[,1]!=0)+1
colnames(stockindexVIX)[id]

## ----fig.align='center'-------------------------------------------------------
##Analysis of the gene data 
y = list(mockgene1[,2],mockgene2[,2])           ## responses "status"
x = list(mockgene1[,3:502],mockgene2[,3:502])   ## 500 predictors 
##Implementing fusion learning algorithm 
lambda <- seq(0.1,0.5, length.out = 10)
result <- fusionbinary(x,y,0.3,N=c(98,286),500,2) 
id <- which(result$beta[,1]!=0)+2
genename <- colnames(mockgene1)[id]
print(genename)