inst/doc/struct_templates_and_helper_functions.R

## ---- include=FALSE-----------------------------------------------------------
knitr::opts_chunk$set(
    dpi=72,fig.width=5,fig.height=5.5
)
set.seed(57475)

## ---- eval=FALSE, include=TRUE------------------------------------------------
#  # install BiocManager if not present
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#  
#  # install structToolbox and dependencies
#  BiocManager::install("struct")
#  
#  # install ggplot if not already present
#   if (!require('ggplot2')) {
#    install.packages('ggplot2')
#  }

## ---- message=FALSE,warning=FALSE---------------------------------------------
suppressPackageStartupMessages({
  # load the package
  library(struct)
  
  # load ggplot
  library(ggplot2)
})

## -----------------------------------------------------------------------------
S = struct_class()
S

## -----------------------------------------------------------------------------
S = struct_class(name = 'Example',
                 description = 'An example struct object')
S

## -----------------------------------------------------------------------------
# set the name
S$name = 'Basic example'

# get the name
S$name

## -----------------------------------------------------------------------------
DE = DatasetExperiment(
  data = iris[,1:4],
  sample_meta=iris[,5,drop=FALSE],
  variable_meta=data.frame('idx'=1:4,row.names=colnames(data)),
  name = "Fisher's iris dataset",
  description = 'The famous one')
DE

## -----------------------------------------------------------------------------
DE = iris_DatasetExperiment()
DE

## -----------------------------------------------------------------------------
# number of columns
ncol(DE)

# number of rows
nrow(DE)

# subset the 2nd and 3rd column
Ds = DE[,c(2,3)]
Ds

## -----------------------------------------------------------------------------
# get data frame 
head(DE$data)

# sample meta
head(DE$sample_meta)


## -----------------------------------------------------------------------------
# mean centre object
mean_centre = set_struct_obj(
  class_name = 'mean_centre', 
  struct_obj = 'model',       
  stato = FALSE,              
  params = character(0),            
  outputs = c(centred = 'DatasetExperiment',
              mean = 'numeric'),
  private = character(0),
  prototype=list(predicted = 'centred')
)

# PCA object
PCA = set_struct_obj(
  class_name = 'PCA', 
  struct_obj = 'model',       
  stato = TRUE,              
  params = c(number_components = 'numeric'),                
  outputs = c(scores = 'DatasetExperiment',
              loadings = 'data.frame'),
  private = character(0),
  prototype = list(number_components = 2,
                  stato_id = 'OBI:0200051',
                  predicted = 'scores')
)

## -----------------------------------------------------------------------------
M = mean_centre()
M

P = PCA(number_components=4)
P

## -----------------------------------------------------------------------------
# mean centre training
set_obj_method(
  class_name = 'mean_centre',
  method_name = 'model_train',
  definition = function(M,D) {
    # calculate the mean of all training data columns
    m = colMeans(D$data)
    # assign to output slot
    M$mean = m
    # always return the modified model object
    return(M)
  }
)

# mean_centre prediction
set_obj_method(
  class_name = 'mean_centre',
  method_name = 'model_predict',
  definition = function(M,D) {
    # subtract the mean from each column of the test data
    D$data = D$data - rep(M$mean, rep.int(nrow(D$data), ncol(D$data)))
    # assign to output
    M$centred = D
    # always return the modified model object
    return(M)
  }
)

## -----------------------------------------------------------------------------
# create instance of object
M = mean_centre()

# train with iris data
M = model_train(M,iris_DatasetExperiment())

# print to mean to show the function worked
M$mean

# apply to iris_data
M = model_predict(M,iris_DatasetExperiment())

# retrieve the centred data and show that the column means are zero
colMeans(M$centred$data)

## -----------------------------------------------------------------------------
# PCA training
set_obj_method(
  class_name = 'PCA',
  method_name = 'model_train',
  definition = function(M,D) {
    
    # get number of components
    A = M$number_components
    
    # convert to matrix
    X=as.matrix(D$data)
    
    # do svd
    model=svd(X,A,A)

    # loadings
    P=as.data.frame(model$v)
    
    # prepare data.frame for output
    varnames=rep('A',1,A)
    for (i in 1:A) {
      varnames[i]=paste0('PC',i)
    }
    rownames(P)=colnames(X)
    colnames(P)=varnames
    output_value(M,'loadings')=P
    
    # set output
    M$loadings = P
    
    # always return the modified model object
    return(M)
  }
)

# PCA prediction
set_obj_method(
  class_name = 'PCA',
  method_name = 'model_predict',
  definition = function(M,D) {
    ## calculate scores using loadings
    
    # get number of components
    A = M$number_components
    
    # convert to matrix
    X=as.matrix(D$data)
    
    # get loadings
    P=M$loadings
    
    # calculate scores
    that=X%*%as.matrix(P)
    
    # convert scores to DatasetExperiment
    that=as.data.frame(that)
    rownames(that)=rownames(X)
    varnames=rep('A',1,A)
    for (i in 1:A) {
      varnames[i]=paste0('PC',i)
    }
    colnames(that)=varnames
    S=DatasetExperiment(
      data=that,
      sample_meta=D$sample_meta,
      variable_meta=varnames)
    
    # set output
    M$scores=S
    
    # always return the modified model object
    return(M)
  }
)

## -----------------------------------------------------------------------------
# get the mean centred data
DC = M$centred

# train PCA model
P = model_apply(P,DC)

# get scores
P$scores

## -----------------------------------------------------------------------------
# create model sequence
MS = mean_centre() + PCA(number_components = 2)

# print summary
MS

## -----------------------------------------------------------------------------
# apply model sequence to iris_data
MS = model_apply(MS,iris_DatasetExperiment())

# get default output from sequence (PCA scores with 2 components)
predicted(MS)

## -----------------------------------------------------------------------------
# create iterator
I = iterator()

# add PCA model sequence
models(I) = MS

# retrieve model sequence
models(I)

## -----------------------------------------------------------------------------
# alternative to assign models for iterators
I = iterator() * (mean_centre() + PCA())
models(I)

## -----------------------------------------------------------------------------
I = iterator() * iterator() * (mean_centre() + PCA())

## -----------------------------------------------------------------------------
# new chart object
set_struct_obj(
  class_name = 'pca_scores_plot',
  struct_obj = 'chart',
  stato = FALSE,
  params = c(factor_name = 'character'),
  prototype = list(
    name = 'PCA scores plot',
    description = 'Scatter plot of the first two principal components',
    libraries = 'ggplot2'
  )
)

## -----------------------------------------------------------------------------
# new chart_plot method
set_obj_method(
  class_name = 'pca_scores_plot',
  method_name = 'chart_plot',
  signature = c('pca_scores_plot','PCA'),
  definition = function(obj,dobj) {
    
    if (!is(dobj,'PCA')) {
      stop('this chart is only for PCA objects')
    }
    
    # get the PCA scores data
    S = dobj$scores$data
    
    # add the group labels
    S$factor_name = dobj$scores$sample_meta[[obj$factor_name]]
    
    # ggplot
    g = ggplot(data = S, aes_string(x='PC1',y='PC2',colour='factor_name')) +
      geom_point() + labs(colour = obj$factor_name) 
    
    # chart objects return the ggplot object
    return(g)
  }
    
)

## ----fig.width = 5, fig.height = 4--------------------------------------------
# create chart object
C = pca_scores_plot(factor_name = 'Species')

# plot chart using trained PCA object from model sequence
chart_plot(C,MS[2]) + theme_bw() # add theme

## -----------------------------------------------------------------------------
# define entity
npc = entity(
  name = 'Number of principal components',
  description = 'The number of principal components to calculate',
  type = c('numeric','integer'),
  value = 2,
  max_length = 1
)

# summary
npc


## -----------------------------------------------------------------------------
# PCA object
PCA = set_struct_obj(
  class_name = 'PCA', 
  struct_obj = 'model',       
  stato = TRUE,              
  params = c(number_components = 'entity'),                
  outputs = c(scores = 'DatasetExperiment',
              loadings = 'data.frame'),
  private = character(0),
  prototype = list(number_components = npc,
                  stato_id = 'OBI:0200051',
                  predicted = 'scores')
)

# create PCA model 
P = PCA(number_components = 3)

# get set value
P$number_components

# get description
param_obj(P,'number_components')$description


## -----------------------------------------------------------------------------
P = PCA()

# verify PCA is a stato object
is(P, 'stato')

# get stato id
stato_id(P)

# stato name
stato_name(P)

# stato definition
stato_definition(P)

## -----------------------------------------------------------------------------
sessionInfo()

Try the struct package in your browser

Any scripts or data that you put into this service are public.

struct documentation built on Nov. 8, 2020, 8:14 p.m.