rm(list=ls()) ### To clear namespace
library(knitr)
opts_chunk$set(echo=TRUE, eval=FALSE)

Linear Scalar-on-Function Regression

$$ Y_i = \alpha + \int_\mathcal{T}X_i(t)\beta(t)dt + \varepsilon_i $$

set.seed(123)

library(MASS)
library(fcr)
# library(tidyverse)
library(dplyr)
library(CompQuadForm)
library(sparsefreg)

## Data generation
M <- 100 # grid size
N <- 800
m <- 5
J <- 5
nimps <- 10
w <- 10
var_eps <- 1
var_delt <- 0.5
grid <- seq(from=0,to=1,length.out = M)
mux <- rep(0,M)
Cx_f<-function(t,s,sig2=1,rho=0.5){ # Matern covariance function with nu = 5/2
  d <- abs(outer(t,s,"-"))
  tmp2 <- sig2*(1+sqrt(5)*d/rho + 5*d^2/(3*rho^2))*exp(-sqrt(5)*d/rho)}
Cx <- Cx_f(grid,grid)
lam <- eigen(Cx,symmetric = T)$values/M
phi <- eigen(Cx,symmetric = T)$vectors*sqrt(M)

beta <- w*sin(2*pi*grid)
alpha <- 0

X_s <- mvrnorm(N,mux,Cx)
X_comp <- X_s + rnorm(N*M,sd = sqrt(var_delt))
Xi <- (X_s-mux)%*%phi/M
eps <- rnorm(N,0,sd = sqrt(var_eps))
y <- c(alpha + X_s%*%beta/M + eps)

Cxy <- Cx%*%beta/M
muy <- c(t(mux)%*%beta/M)
var_y <- c(t(beta)%*%Cx%*%beta/(M^2)) + var_eps

X_mat<-matrix(nrow=N,ncol=m)
T_mat<-matrix(nrow=N,ncol=m)
ind_obs<-matrix(nrow=N,ncol=m)

for(i in 1:N){
  ind_obs[i,]<-sort(sample(1:M,m,replace=FALSE))
  X_mat[i,]<-X_comp[i,ind_obs[i,]]
  T_mat[i,]<-grid[ind_obs[i,]]
}

spt<-1
ind_obs[spt,1] = 1; ind_obs[spt,m] = M
X_mat[spt,]<-X_comp[spt,ind_obs[spt,]]
T_mat[spt,]<-grid[ind_obs[spt,]]

## Create data frame for observed data
obsdf <- data.frame("X" = c(t(X_mat)),"argvals" = c(t(T_mat)),
                    "y" = rep(y,each = m),"subj" = rep(1:N,each = m))

user_params <- list(Cx = Cx, mux = mux, var_delt = var_delt,
                    muy = muy,lam = lam, phi = phi, Cxy = Cxy,
                    var_y = var_y)

misfit_est <- misfit(obsdf,grid,nimps = nimps,J = J,
                     user_params = user_params,
                     family = "Gaussian",
                     impute_type = "Multiple",
                     cond.y = T)
misfit_est$alpha.hat
sum((misfit_est$beta.hat-beta)^2)/M
mean(rowMeans((X_s-misfit_est$Xest)^2))

plot(grid,beta,type = 'l')
lines(grid,misfit_est$beta.hat,lty = 2)

alpha
misfit_est$alpha.hat

par(mfrow = c(1,2))
matplot(t(misfit_est$Xhat),type = 'l')
matplot(t(X_s),type = 'l')
par(mfrow = c(1,1))

{ids <- c(1,10,100,300)
  # ids <- sample(1:N,size = 4,replace = F)
par(mfrow = c(2,2))
for(i in 1:4){
  sid <- ids[i]
  ylim <- range(c(X_s[sid,],X_mat[sid,],misfit_est$Xhat[sid,]))
  plot(grid,X_s[sid,],type = 'l',ylim = ylim,main = paste("Subject Number ",sid))
  points(T_mat[sid,],X_mat[sid,])
  lines(grid,misfit_est$Xhat[sid,],lty = 2)
}
}

Logistic Scalar-on-Function Regression

$$ \text{logit}(p_i) = \alpha + \int_\mathcal{T}X_i(t)\beta(t)dt + \varepsilon_i $$

set.seed(123)

library(MASS)
library(fcr)
library(dplyr)
library(CompQuadForm)
library(sparsefreg)

## Data generation
M <- 100 # grid size
N <- 800
m <- 5
J <- 2
nimps <- 10
w <- 1
var_delt <- 0.5
grid <- seq(from=0,to=1,length.out = M)
nfpc <- 2 # number of fpcs used for mu1
p <- 0.5
mu0 <- rep(0,M)
Cx_f<-function(t,s,sig2=1,rho=0.5){ # Matern covariance function with nu = 5/2
  d <- abs(outer(t,s,"-"))
  tmp2 <- sig2*(1+sqrt(5)*d/rho + 5*d^2/(3*rho^2))*exp(-sqrt(5)*d/rho)}
Cx <- Cx_f(grid,grid)
lam <- eigen(Cx,symmetric = T)$values/M
phi <- eigen(Cx,symmetric = T)$vectors*sqrt(M)
if(nfpc==1){
  mu1 <- phi[,1]*w
}else{
  mu1 <- rowSums(phi[,1:nfpc])*w
}

## Slope Function
if(nfpc==1){
  beta <- phi[,1]*(1/lam[1])*w
}else{
  beta <- phi[,1:nfpc]%*%(1/lam[1:nfpc])*w
}
alpha <- log(p) - log(1 - p) - sum(((t(phi)%*%(mu1 - mu0)/M)^2)/(lam^2))/2

## Simulate Data
y <- sort(rbinom(N,1,p))
N_0 <- sum(y==0)
N_1 <- sum(y==1)
Xs_0 <- mvrnorm(N_0,mu0,Cx)
Xs_1 <- mvrnorm(N_1,mu1,Cx)
Xn_0 <- Xs_0 + rnorm(N_0*M,sd = sqrt(var_delt))
Xn_1 <- Xs_1 + rnorm(N_1*M,sd = sqrt(var_delt))
X_s <- rbind(Xs_0,Xs_1)
X_comp <- rbind(Xn_0,Xn_1)

Xi0 <- (Xs_0 - mu0)%*%phi/M
Xi1 <- (Xs_1 - mu1)%*%phi/M
Xi <- rbind(Xi0,Xi1)

X_mat<-matrix(nrow=N,ncol=m)
T_mat<-matrix(nrow=N,ncol=m)
ind_obs<-matrix(nrow=N,ncol=m)

for(i in 1:N){
  ind_obs[i,]<-sort(sample(1:M,m,replace=FALSE))
  X_mat[i,]<-X_comp[i,ind_obs[i,]]
  T_mat[i,]<-grid[ind_obs[i,]]
}

spt<-c(1,N_0+1)
ind_obs[spt,1] = 1; ind_obs[spt,m] = M
X_mat[spt,]<-X_comp[spt,ind_obs[spt[1],]]
T_mat[spt,]<-rbind(grid[ind_obs[spt[1],]],grid[ind_obs[spt[1],]])

## Create data frame for observed data
obsdf <- data.frame("X" = c(t(X_mat)),"argvals" = c(t(T_mat)),
                    "y" = rep(y,each = m),"subj" = rep(1:N,each = m))

user_params <- list(Cx = Cx, mu0 = mu0, mu1 = mu1,
                    var_delt = var_delt, lam = lam, phi = phi)

misfit_est <- misfit(obsdf,grid,nimps = nimps,J = J,
                user_params = user_params,
                family = "Binomial",
                impute_type = "Multiple",
                cond.y = T)

plot(grid,beta,type = 'l')
lines(grid,misfit_est$beta.hat,lty = 2)

alpha
misfit_est$alpha.hat

par(mfrow = c(1,2))
matplot(x = grid,t(X_s[which(y==0),]),type = 'l',col = 'black')
lines(grid,mu0,col = 'black',lwd = 5)
matplot(x = grid,t(X_s[which(y==1),]),type = 'l',col = 'red',add = T)
lines(grid,mu1,col = 'red',lwd = 5)

matplot(x = grid,t(misfit_est$Xhat[which(y==0),]),type = 'l',col = 'black')
lines(grid,misfit_est$params$mu0,col = 'black',lwd = 5)
matplot(x = grid,t(misfit_est$Xhat[which(y==1),]),type = 'l',col = 'red',add = T)
lines(grid,mu1,col = 'red',lwd = 5)
par(mfrow = c(1,1))


justin-petrovich/sparsefreg documentation built on Aug. 20, 2020, 9:04 p.m.