misaem.R
In misaem: Linear Regression and Logistic Regression with Missing Covariates

## -----------------------------------------------------------------------------
library(misaem)

## -----------------------------------------------------------------------------
set.seed(1)
n <- 50 # number of rows
p <- 2 # number of explanatory variables

# Generate complete design matrix
library(MASS)
mu.X <- c(1, 1) 
Sigma.X <- matrix(c(1, 1, 1, 4), nrow = 2)
X.complete <- mvrnorm(n, mu.X, Sigma.X)

# Generate response
b <- c(2, 3, -1) # regression coefficient
sigma.eps <- 0.25 # noise variance
y <- cbind(rep(1, n), X.complete) %*% b + rnorm(n, 0, sigma.eps)

## -----------------------------------------------------------------------------
library(mice)
# Add missing values
yX.miss <- ampute(data.frame(y, X.complete), 0.15, patterns = matrix(
  c(0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0), 
  ncol = 3, byrow = TRUE), freq = c(1, 1, 1, 2, 2, 2) / 9, 
  mech = "MCAR", bycases = FALSE)
y.obs <- yX.miss$amp[, 1]              # responses
X.obs <- as.matrix(yX.miss$amp[, 2:3]) # covariates with NAs

## -----------------------------------------------------------------------------
head(X.obs)

## -----------------------------------------------------------------------------
sum(is.na(X.obs))/(n*p) 

## -----------------------------------------------------------------------------
# Estimate regression using EM with NA 
df.obs = data.frame(y, X.obs)
miss.list = miss.lm(y~., data = df.obs)

## -----------------------------------------------------------------------------
print(miss.list)
print(summary(miss.list)) 
summary(miss.list)$coef 

## -----------------------------------------------------------------------------
# Estimate regression using self-defined parameters
miss.list2 = miss.lm(y~., data = df.obs, print_iter = FALSE, maxruns = 500, tol_em = 1e-4)
print(miss.list2)

## -----------------------------------------------------------------------------
# Add null variable with NA
X.null <- mvrnorm(n, 1, 1)
patterns <- runif(n)<0.15 # missing completely at random
X.null[patterns] <- NA
X.obs.null <- cbind.data.frame(X.obs, X.null)

# Without model selection
df.obs.null = data.frame(y, X.obs.null)
miss.list.null = miss.lm(y~., data = df.obs.null)
print(miss.list.null)

# Model selection
miss.model = miss.lm.model.select(y, X.obs.null)
print(miss.model)

## -----------------------------------------------------------------------------
# Prediction
# Generate dataset
set.seed(200)
nt <- 20  # number of new observations
Xt <- mvrnorm(nt, mu.X, Sigma.X)

# Add missing values
Xt.miss <- ampute(data.frame(Xt), 0.15, patterns = matrix(
  c(0, 1, 1, 0), 
  ncol = 2, byrow = TRUE), freq = c(1, 1) /2, 
  mech = "MCAR", bycases = FALSE)
Xt.obs <- as.matrix(Xt.miss$amp) # covariates with NAs

## -----------------------------------------------------------------------------
#train with NA + test no NA
miss.comptest.pred = predict(miss.list2, data.frame(Xt), seed = 100)
print(miss.comptest.pred)

## -----------------------------------------------------------------------------
#both train & test with NA
miss.pred = predict(miss.list2, data.frame(Xt.obs), seed = 100)
print(miss.pred)

## -----------------------------------------------------------------------------
# Generate dataset
set.seed(200)
n <- 500  # number of subjects
p <- 5     # number of explanatory variables
mu.star <- 1:p  #rep(0,p)  # mean of the explanatory variables
sd <- 1:p # rep(1,p) # standard deviations
C <- matrix(c(   # correlation matrix
1,   0.8, 0,   0,   0,
0.8, 1,   0,   0,   0,
0,   0,   1,   0.3, 0.6,
0,   0,   0.3, 1,   0.7,
0,   0,   0.6, 0.7, 1), nrow=p)
Sigma.star <- diag(sd)%*%C%*%diag(sd) # covariance matrix
beta.star <- c(1, -1, 1, 1, -1) # coefficients
beta0.star <- 0  # intercept
beta.true = c(beta0.star,beta.star)

# Design matrix
X.complete <- matrix(rnorm(n*p), nrow=n)%*%chol(Sigma.star)+
              matrix(rep(mu.star,n), nrow=n, byrow = TRUE)

# Reponse vector
p1 <- 1/(1+exp(-X.complete%*%beta.star-beta0.star))
y <- as.numeric(runif(n)<p1)

## -----------------------------------------------------------------------------
# Generate missingness
set.seed(200)
p.miss <- 0.10
patterns <- runif(n*p)<p.miss # missing completely at random
X.obs <- X.complete
X.obs[patterns] <- NA

## -----------------------------------------------------------------------------
head(X.obs)

## -----------------------------------------------------------------------------
df.obs = data.frame(y, X.obs)

#logistic regression with NA 
miss.list = miss.glm(y~., data = df.obs, seed = 100)
print(miss.list)
print(summary(miss.list)) 
summary(miss.list)$coef 

## -----------------------------------------------------------------------------
# Add null variable with NA
X.null <- mvrnorm(n, 1, 1)
patterns <- runif(n)<0.10 # missing completely at random
X.null[patterns] <- NA
X.obs.null <- cbind.data.frame(X.obs, X.null)

# Without model selection
df.obs.null = data.frame(y, X.obs.null)
miss.list.null = miss.glm(y~., data = df.obs.null)
print(miss.list.null)

# model selection for SAEM
miss.model = miss.glm.model.select(y, X.obs.null)
print(miss.model)

## -----------------------------------------------------------------------------
# Generate test set with missingness
set.seed(200)
nt = 100
X.test <- matrix(rnorm(nt*p), nrow=nt)%*%chol(Sigma.star)+
          matrix(rep(mu.star,nt), nrow = nt, byrow = TRUE)

# Generate the test set
p1 <- 1/(1+exp(-X.test%*%beta.star-beta0.star))
y.test <- as.numeric(runif(nt)<p1)

# Generate missingness on test set
p.miss <- 0.10
X.test[runif(nt*p)<p.miss] <- NA

# Prediction on test set
pr.saem <- predict(miss.list, data.frame(X.test))

# Confusion matrix
pred.saem = (pr.saem>0.5)*1
table(y.test,pred.saem )

Any scripts or data that you put into this service are public.

misaem documentation built on April 12, 2021, 9:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

misaem
Linear Regression and Logistic Regression with Missing Covariates

inst/doc/misaem.R
In misaem: Linear Regression and Logistic Regression with Missing Covariates

Try the misaem package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

misaem Linear Regression and Logistic Regression with Missing Covariates

inst/doc/misaem.R In misaem: Linear Regression and Logistic Regression with Missing Covariates

Try the misaem package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

misaem
Linear Regression and Logistic Regression with Missing Covariates

inst/doc/misaem.R
In misaem: Linear Regression and Logistic Regression with Missing Covariates