R/PCA_class.R

Defines functions PCA

Documented in PCA

#' @eval struct::get_description('PCA')
#' @import struct
#' @export PCA
PCA = function(number_components=2,...) {
    out=struct::new_struct('PCA',
        number_components=number_components,
        ...)
    return(out)
}


.PCA<-setClass(
    "PCA",
    contains=c('model'),
    slots=c(
        # INPUTS
        number_components='entity',

        # OUTPUTS
        scores='entity',
        loadings='data.frame',
        eigenvalues='data.frame',
        ssx='numeric',
        correlation='data.frame',
        that='DatasetExperiment'

    ),
    prototype = list(name='Principal Component Analysis (PCA)',
        description=paste0('PCA is a multivariate data reduction technique. ',
        'It summarises the data in a smaller number of Principal Components ',
        'that maximise variance.'),
        type="preprocessing",
        predicted='that',
        ontology='OBI:0200051',
        .params=c('number_components'),
        .outputs=c('scores','loadings','eigenvalues','ssx','correlation','that'),

        number_components=entity(
            name='Number of Principal Components',
            description='The number of Principal Components calculated.',
            ontology='STATO:0000555',
            value=2,
            type=c('numeric','integer')
        ),
        scores=entity(
            name='PCA scores DatasetExperiment',
            description='A matrix of PCA scores where each column corresponds to a Principal Component',
            type='DatasetExperiment'
        )
    )
)

#' @export
#' @template model_train
setMethod(f="model_train",
    signature=c("PCA",'DatasetExperiment'),
    definition=function(M,D)
    {
        A=param_value(M,'number_components')
        X=as.matrix(D$data)
        model=svd(X,A,A)
        if (A==1)
        {
            scores=model$u * model$d[A]
        } else {
            scores=model$u %*% diag(model$d[1:A])
        }

        scores=as.data.frame(scores)
        rownames(scores)=rownames(X)
        varnames=rep('A',1,A)
        for (i in 1:A)
        {
            varnames[i]=paste0('PC',i)
        }
        colnames(scores)=varnames
        S=DatasetExperiment(data=scores,sample_meta=D$sample_meta,variable_meta=varnames)
        output_value(M,'scores')=S

        P=as.data.frame(model$v)
        rownames(P)=colnames(X)
        colnames(P)=varnames
        output_value(M,'loadings')=P

        E=data.frame('Eigenvalues'=colSums(scores^2))
        rownames(E)=varnames
        output_value(M,'eigenvalues')=E

        output_value(M,'ssx')=sum(X*X) # sum of squares of x
        output_value(M,'correlation')=as.data.frame(cor(X,scores))

        return(M)
    }
)

#' @export
#' @template model_predict
setMethod(f="model_predict",
    signature=c("PCA",'DatasetExperiment'),
    definition=function(M,D)
    {
        A=param_value(M,'number_components')
        X=as.matrix(D$data)
        P=output_value(M,'loadings')
        that=X%*%as.matrix(P)

        that=as.data.frame(that)
        rownames(that)=rownames(X)
        varnames=rep('A',1,A)
        for (i in 1:A)
        {
            varnames[i]=paste0('PC',i)
        }
        colnames(that)=varnames

        # convert to DatasetExperiment for preprocessing output
        S=DatasetExperiment(data=that,sample_meta=D$sample_meta,variable_meta=varnames)
        output_value(M,'that')=S

        return(M)
    }
)
computational-metabolomics/structToolbox documentation built on Feb. 12, 2024, 2:15 a.m.