knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Introduction

In this vignette, we apply the functions of the package olsgasp to a real case example. This example is an extract from the methylation dataset of the article mettre ref.

Install and load package

Install packages

install.packages("olsgasp")
install.packages("knitr")
install.packages("ggplot2")
install.packages("ggrepel")
install.packages("ggthemes")
install.packages("tidyr")

Note that ggplot2, ggrepel and ggthemes to plot graphics.

Load packages

suppressPackageStartupMessages(library(olsgasp))
library(knitr)
library(ggplot2)
library(ggrepel)
library(ggthemes)
library(DiceEval)
library(tidyr)

Load data

Load the dataset.

data(data_methyl)
Y = data_methyl$Y
sites = data_methyl$sites
X = data_methyl$X
colnames(X) = c("ES-cell","primary-cell")
K = nrow(Y)
N = ncol(Y)

Simule missing data

The missing values are located from sample 2 to 8 and 10 to 15 for the sites 20 to 100 and 120 to 150.

k_star = (1:K)[-c(1,9)]
n_star = c(20:100,120:150)
Y_obs = Y
Y_obs[k_star,n_star] = NA

Initialize, fit and pred

The model is initialized with the function svd_olsgasp, fitted with fit_olsgasp and the missing values are predicted with pred_olsgasp.

obj_olsgasp = svd_olsgasp(Y_obs,sites,X,tol_eig = 1e-6)
obj_olsgasp = fit_olsgasp(obj_olsgasp)
Y_pred = pred_olsgasp(obj_olsgasp)

Quality criteria

The Quality criteria are calculated only on the missing values.

ind_na = is.na(Y_obs)
R2(Y[ind_na],Y_pred[ind_na])
RMSE(Y[ind_na],Y_pred[ind_na])

Visualize results

We choose two samples to visualize the true values of the processes and the predicted values.

proc_a = 2
proc_b = 10
colorTitle = "black"
sizeTitle = 15
formeTitle = "bold.italic"
colorAxe = "black"
sizeAxe = 10
formeAxe = "bold"
textSize = 15
Title = "Two samples"
low = "#349be8"
high = "#cc0000"
point_size = 2 
size_point_graph = c(2,3)
x = X

df = data.table(sites = sites,
                  Y_A = Y[proc_a, ],
                  Y_A_obs = Y_pred[proc_a, ],
                  Y_B = Y[proc_b, ],
                  Y_B_obs = Y_pred[proc_b, ])


df = data.table(df %>% pivot_longer(!sites, names_to = "origine", values_to = "values"))
df = data.table(df, Type = factor(rep(c("Pred","True"),nrow(df)/2),labels = c("True","Pred")), 
                  X = as.factor(rep(rep(c(x[proc_a],x[proc_b]),each=2),nrow(df)/4)))

  p = ggplot(df, aes(
    x = sites,
    y = values,
    color = X,
    shape = Type,
    size = Type
  )) +
    geom_point(alpha = 1) +
    scale_shape_manual(values = c(1, 15), name=c("Values")) + 
    scale_size_manual(guide="none", values = size_point_graph) +
    guides(color = guide_legend(override.aes = list(size=point_size)),
           shape = guide_legend(override.aes = list(size=point_size))) +
    scale_color_manual(values = c(low,high),
                       name=c("Cell type"),
                       breaks=c("0", "1"),
                       labels=c("ES", "primary"))+

    labs(title = Title,
         x = "Sites",
         y = "Gaussian processes")+
    theme_minimal() +
    theme(
      text = element_text(size=textSize),
      plot.title = element_text(
        hjust = 0.5,
        color = colorTitle,
        size = sizeTitle,
        face = formeTitle
      ),
      axis.title.x = element_text(
        color = colorAxe,
        size = sizeAxe,
        face = formeAxe
      ),
      axis.title.y = element_text(
        color = colorAxe,
        size = sizeAxe,
        face = "bold"
      )
    )

p


melinaR/olsgasp documentation built on March 11, 2023, 12:10 a.m.