Nothing
## ---- include=FALSE-----------------------------------------------------------
knitr::opts_chunk$set(
dpi=72,fig.width=5,fig.height=5.5
)
set.seed(57475)
## ---- eval=FALSE, include=TRUE------------------------------------------------
# # install BiocManager if not present
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# # install structToolbox and dependencies
# BiocManager::install("struct")
#
# # install ggplot if not already present
# if (!require('ggplot2')) {
# install.packages('ggplot2')
# }
## ---- message=FALSE,warning=FALSE---------------------------------------------
suppressPackageStartupMessages({
# load the package
library(struct)
# load ggplot
library(ggplot2)
})
## -----------------------------------------------------------------------------
S = struct_class()
S
## -----------------------------------------------------------------------------
S = struct_class(name = 'Example',
description = 'An example struct object')
S
## -----------------------------------------------------------------------------
# set the name
S$name = 'Basic example'
# get the name
S$name
## -----------------------------------------------------------------------------
DE = DatasetExperiment(
data = iris[,1:4],
sample_meta=iris[,5,drop=FALSE],
variable_meta=data.frame('idx'=1:4,row.names=colnames(data)),
name = "Fisher's iris dataset",
description = 'The famous one')
DE
## -----------------------------------------------------------------------------
DE = iris_DatasetExperiment()
DE
## -----------------------------------------------------------------------------
# number of columns
ncol(DE)
# number of rows
nrow(DE)
# subset the 2nd and 3rd column
Ds = DE[,c(2,3)]
Ds
## -----------------------------------------------------------------------------
# get data frame
head(DE$data)
# sample meta
head(DE$sample_meta)
## -----------------------------------------------------------------------------
# mean centre object
mean_centre = set_struct_obj(
class_name = 'mean_centre',
struct_obj = 'model',
stato = FALSE,
params = character(0),
outputs = c(centred = 'DatasetExperiment',
mean = 'numeric'),
private = character(0),
prototype=list(predicted = 'centred')
)
# PCA object
PCA = set_struct_obj(
class_name = 'PCA',
struct_obj = 'model',
stato = TRUE,
params = c(number_components = 'numeric'),
outputs = c(scores = 'DatasetExperiment',
loadings = 'data.frame'),
private = character(0),
prototype = list(number_components = 2,
stato_id = 'OBI:0200051',
predicted = 'scores')
)
## -----------------------------------------------------------------------------
M = mean_centre()
M
P = PCA(number_components=4)
P
## -----------------------------------------------------------------------------
# mean centre training
set_obj_method(
class_name = 'mean_centre',
method_name = 'model_train',
definition = function(M,D) {
# calculate the mean of all training data columns
m = colMeans(D$data)
# assign to output slot
M$mean = m
# always return the modified model object
return(M)
}
)
# mean_centre prediction
set_obj_method(
class_name = 'mean_centre',
method_name = 'model_predict',
definition = function(M,D) {
# subtract the mean from each column of the test data
D$data = D$data - rep(M$mean, rep.int(nrow(D$data), ncol(D$data)))
# assign to output
M$centred = D
# always return the modified model object
return(M)
}
)
## -----------------------------------------------------------------------------
# create instance of object
M = mean_centre()
# train with iris data
M = model_train(M,iris_DatasetExperiment())
# print to mean to show the function worked
M$mean
# apply to iris_data
M = model_predict(M,iris_DatasetExperiment())
# retrieve the centred data and show that the column means are zero
colMeans(M$centred$data)
## -----------------------------------------------------------------------------
# PCA training
set_obj_method(
class_name = 'PCA',
method_name = 'model_train',
definition = function(M,D) {
# get number of components
A = M$number_components
# convert to matrix
X=as.matrix(D$data)
# do svd
model=svd(X,A,A)
# loadings
P=as.data.frame(model$v)
# prepare data.frame for output
varnames=rep('A',1,A)
for (i in 1:A) {
varnames[i]=paste0('PC',i)
}
rownames(P)=colnames(X)
colnames(P)=varnames
output_value(M,'loadings')=P
# set output
M$loadings = P
# always return the modified model object
return(M)
}
)
# PCA prediction
set_obj_method(
class_name = 'PCA',
method_name = 'model_predict',
definition = function(M,D) {
## calculate scores using loadings
# get number of components
A = M$number_components
# convert to matrix
X=as.matrix(D$data)
# get loadings
P=M$loadings
# calculate scores
that=X%*%as.matrix(P)
# convert scores to DatasetExperiment
that=as.data.frame(that)
rownames(that)=rownames(X)
varnames=rep('A',1,A)
for (i in 1:A) {
varnames[i]=paste0('PC',i)
}
colnames(that)=varnames
S=DatasetExperiment(
data=that,
sample_meta=D$sample_meta,
variable_meta=varnames)
# set output
M$scores=S
# always return the modified model object
return(M)
}
)
## -----------------------------------------------------------------------------
# get the mean centred data
DC = M$centred
# train PCA model
P = model_apply(P,DC)
# get scores
P$scores
## -----------------------------------------------------------------------------
# create model sequence
MS = mean_centre() + PCA(number_components = 2)
# print summary
MS
## -----------------------------------------------------------------------------
# apply model sequence to iris_data
MS = model_apply(MS,iris_DatasetExperiment())
# get default output from sequence (PCA scores with 2 components)
predicted(MS)
## -----------------------------------------------------------------------------
# create iterator
I = iterator()
# add PCA model sequence
models(I) = MS
# retrieve model sequence
models(I)
## -----------------------------------------------------------------------------
# alternative to assign models for iterators
I = iterator() * (mean_centre() + PCA())
models(I)
## -----------------------------------------------------------------------------
I = iterator() * iterator() * (mean_centre() + PCA())
## -----------------------------------------------------------------------------
# new chart object
set_struct_obj(
class_name = 'pca_scores_plot',
struct_obj = 'chart',
stato = FALSE,
params = c(factor_name = 'character'),
prototype = list(
name = 'PCA scores plot',
description = 'Scatter plot of the first two principal components',
libraries = 'ggplot2'
)
)
## -----------------------------------------------------------------------------
# new chart_plot method
set_obj_method(
class_name = 'pca_scores_plot',
method_name = 'chart_plot',
signature = c('pca_scores_plot','PCA'),
definition = function(obj,dobj) {
if (!is(dobj,'PCA')) {
stop('this chart is only for PCA objects')
}
# get the PCA scores data
S = dobj$scores$data
# add the group labels
S$factor_name = dobj$scores$sample_meta[[obj$factor_name]]
# ggplot
g = ggplot(data = S, aes_string(x='PC1',y='PC2',colour='factor_name')) +
geom_point() + labs(colour = obj$factor_name)
# chart objects return the ggplot object
return(g)
}
)
## ----fig.width = 5, fig.height = 4--------------------------------------------
# create chart object
C = pca_scores_plot(factor_name = 'Species')
# plot chart using trained PCA object from model sequence
chart_plot(C,MS[2]) + theme_bw() # add theme
## -----------------------------------------------------------------------------
# define entity
npc = entity(
name = 'Number of principal components',
description = 'The number of principal components to calculate',
type = c('numeric','integer'),
value = 2,
max_length = 1
)
# summary
npc
## -----------------------------------------------------------------------------
# PCA object
PCA = set_struct_obj(
class_name = 'PCA',
struct_obj = 'model',
stato = TRUE,
params = c(number_components = 'entity'),
outputs = c(scores = 'DatasetExperiment',
loadings = 'data.frame'),
private = character(0),
prototype = list(number_components = npc,
stato_id = 'OBI:0200051',
predicted = 'scores')
)
# create PCA model
P = PCA(number_components = 3)
# get set value
P$number_components
# get description
param_obj(P,'number_components')$description
## -----------------------------------------------------------------------------
P = PCA()
# verify PCA is a stato object
is(P, 'stato')
# get stato id
stato_id(P)
# stato name
stato_name(P)
# stato definition
stato_definition(P)
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.