knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
In this vignette, we apply the functions of the package olsgasp to a real case example. This example is an extract from the methylation dataset of the article mettre ref.
install.packages("olsgasp") install.packages("knitr") install.packages("ggplot2") install.packages("ggrepel") install.packages("ggthemes") install.packages("tidyr")
Note that ggplot2, ggrepel and ggthemes to plot graphics.
suppressPackageStartupMessages(library(olsgasp)) library(knitr) library(ggplot2) library(ggrepel) library(ggthemes) library(DiceEval) library(tidyr)
Load the dataset.
data(data_methyl) Y = data_methyl$Y sites = data_methyl$sites X = data_methyl$X colnames(X) = c("ES-cell","primary-cell") K = nrow(Y) N = ncol(Y)
The missing values are located from sample 2 to 8 and 10 to 15 for the sites 20 to 100 and 120 to 150.
k_star = (1:K)[-c(1,9)] n_star = c(20:100,120:150) Y_obs = Y Y_obs[k_star,n_star] = NA
The model is initialized with the function svd_olsgasp, fitted with fit_olsgasp and the missing values are predicted with pred_olsgasp.
obj_olsgasp = svd_olsgasp(Y_obs,sites,X,tol_eig = 1e-6) obj_olsgasp = fit_olsgasp(obj_olsgasp) Y_pred = pred_olsgasp(obj_olsgasp)
The Quality criteria are calculated only on the missing values.
ind_na = is.na(Y_obs) R2(Y[ind_na],Y_pred[ind_na]) RMSE(Y[ind_na],Y_pred[ind_na])
We choose two samples to visualize the true values of the processes and the predicted values.
proc_a = 2 proc_b = 10
colorTitle = "black" sizeTitle = 15 formeTitle = "bold.italic" colorAxe = "black" sizeAxe = 10 formeAxe = "bold" textSize = 15 Title = "Two samples" low = "#349be8" high = "#cc0000" point_size = 2 size_point_graph = c(2,3) x = X df = data.table(sites = sites, Y_A = Y[proc_a, ], Y_A_obs = Y_pred[proc_a, ], Y_B = Y[proc_b, ], Y_B_obs = Y_pred[proc_b, ]) df = data.table(df %>% pivot_longer(!sites, names_to = "origine", values_to = "values")) df = data.table(df, Type = factor(rep(c("Pred","True"),nrow(df)/2),labels = c("True","Pred")), X = as.factor(rep(rep(c(x[proc_a],x[proc_b]),each=2),nrow(df)/4))) p = ggplot(df, aes( x = sites, y = values, color = X, shape = Type, size = Type )) + geom_point(alpha = 1) + scale_shape_manual(values = c(1, 15), name=c("Values")) + scale_size_manual(guide="none", values = size_point_graph) + guides(color = guide_legend(override.aes = list(size=point_size)), shape = guide_legend(override.aes = list(size=point_size))) + scale_color_manual(values = c(low,high), name=c("Cell type"), breaks=c("0", "1"), labels=c("ES", "primary"))+ labs(title = Title, x = "Sites", y = "Gaussian processes")+ theme_minimal() + theme( text = element_text(size=textSize), plot.title = element_text( hjust = 0.5, color = colorTitle, size = sizeTitle, face = formeTitle ), axis.title.x = element_text( color = colorAxe, size = sizeAxe, face = formeAxe ), axis.title.y = element_text( color = colorAxe, size = sizeAxe, face = "bold" ) ) p
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.