inst/doc/real_data.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----load package-------------------------------------------------------------
# Allocate 10GB of memory for Java. Must be called before library(iBART)
options(java.parameters = "-Xmx10g") 
library(iBART)

## ----load data----------------------------------------------------------------
load("../data/catalysis.rda")    # load data
summary(catalysis)

## ----iBART, eval=FALSE--------------------------------------------------------
#  iBART_real_data <- iBART(X = catalysis$X, y = catalysis$y,
#                           head = catalysis$head,  # colnames of X
#                           unit = catalysis$unit,  # units of X
#                           opt = c("binary", "unary", "binary"), # binary operator first
#                           out_sample = FALSE,
#                           Lzero = TRUE,
#                           K = 5, # maximum descriptors in l-zero model
#                           standardize = FALSE,
#                           seed = 888)

## ----load result, echo=FALSE--------------------------------------------------
load("../data/iBART_real_data.rda")             # load full result

## ----size_plot, fig.width=7, fig.height=3.5-----------------------------------
library(ggplot2)
df_dim <- data.frame(dim = c(iBART_real_data$iBART_sel_size, iBART_real_data$iBART_gen_size),
                     iter = rep(0:3, 2),
                     type = rep(c("Selected", "Generated"), each = 4))
ggplot(df_dim, aes(x = iter, y = dim, colour = type, group = type)) +
  theme(text = element_text(size = 15), legend.title = element_blank()) +
  geom_line(size = 1) +
  geom_point(size = 3, shape = 21, fill = "white") +
  geom_text(data = df_dim, aes(label = dim, y = dim + 40, group = type),
            position = position_dodge(0), size = 5, colour = "blue") +
  labs(x = "Iteration", y = "Number of descriptors") +
  scale_x_continuous(breaks = c(0, 1, 2, 3))

## ----k-descriptor-------------------------------------------------------------
iBART_real_data$Lzero_names[[3]]
summary(iBART_real_data$Lzero_models[[3]])

## ----OIS vs non-OIS, fig.width=7, fig.height=3.5------------------------------
# Train a non-OIS model with 3 predictors
set.seed(123)
model_no_OIS <- k_var_model(X_train = catalysis$X, y_train = catalysis$y, k = 3, parallel = FALSE)

#### Figure 7 ####
library(ggpubr)
model_OIS <- iBART_real_data$Lzero_model[[3]]

# Prepare data for plotting
data_OIS <- data.frame(y = catalysis$y, y_hat = model_OIS$fitted.values)
data_no_OIS <- data.frame(y = catalysis$y, y_hat = model_no_OIS$models$fitted.values)

p1 <- ggplot(data_OIS, aes(x = y_hat, y = catalysis$y)) +
  geom_point() +
  geom_abline() +
  xlim(c(min(data_OIS$y_hat, data_OIS$y) - 0.2, max(data_OIS$y_hat, data_OIS$y) + 0.2)) +
  ylim(c(min(data_OIS$y_hat, data_OIS$y) - 0.2, max(data_OIS$y_hat, data_OIS$y) + 0.2)) +
  xlab("") +
  ylab("") +
  annotate("text", x = -12, y = -3, parse = TRUE,
           label = paste("R^{2} ==", round(summary(model_OIS)$r.squared, 4)))
p2 <- ggplot(data_no_OIS, aes(x = y_hat, y = catalysis$y)) +
  geom_point() +
  geom_abline() +
  xlim(c(min(data_no_OIS$y_hat, data_no_OIS$y) - 0.2, max(data_no_OIS$y_hat, data_no_OIS$y) + 0.2)) +
  ylim(c(min(data_no_OIS$y_hat, data_no_OIS$y) - 0.2, max(data_no_OIS$y_hat, data_no_OIS$y) + 0.2)) +
  xlab("") +
  ylab("") +
  annotate("text", x = -12, y = -3, parse = TRUE,
           label = paste("R^{2} ==", round(summary(model_no_OIS$models)$r.squared, 4)))
fig <- ggarrange(p1, p2,
                 labels = c("OIS", "non-OIS"),
                 ncol = 2, nrow = 1)
annotate_figure(fig,
                bottom = text_grob("Predicted binding energy from descriptors (eV)"),
                left = text_grob("DFT binding energy (eV)", rot = 90))

## ----sessioninfo--------------------------------------------------------------
sessionInfo()

Try the iBART package in your browser

Any scripts or data that you put into this service are public.

iBART documentation built on Nov. 15, 2023, 1:09 a.m.