R/mv_feature_filter_class.R

Defines functions mv_feature_filter_hist mv_feature_filter

Documented in mv_feature_filter mv_feature_filter_hist

#' @eval get_description('mv_feature_filter')
#' @export mv_feature_filter
#' @examples
#' D = iris_DatasetExperiment()
#' M = mv_feature_filter(factor_name='Species',qc_label='versicolor')
#' M = model_apply(M,D)
mv_feature_filter = function(threshold=20,qc_label='QC',method='QC',factor_name,...) {
    out=struct::new_struct('mv_feature_filter',
        threshold=threshold,
        qc_label=qc_label,
        method=method,
        factor_name=factor_name,
        ...)
    return(out)
}

.mv_feature_filter<-setClass(
    "mv_feature_filter",
    contains = c('model'),
    slots=c(threshold='entity',
        qc_label='entity',
        method='enum',
        factor_name='entity',
        filtered='entity',
        flags='entity'
    ),
    prototype=list(name = 'Filter by fraction missing values',
        description = paste0('Filters features where the percent number ',
        'of missing values exceeds a predefined threshold.'),
        type = 'filter',
        predicted = 'filtered',
        libraries='pmp',
        .params=c('threshold','qc_label','method','factor_name'),
        .outputs=c('filtered','flags'),

        factor_name=ents$factor_name,

        threshold=entity(name = 'Missing value threshold (\\%)',
            description = 'The threshold for excluding features.',
            value = 20,
            type='numeric'),

        qc_label=entity(name = 'QC label',
            description = 'The label used to identify QC samples.',
            value = 'QC',
            type='character'),

        method=enum(name='Filtering method',
            description=c(
                "within_all" = 'The filter is applied within classes',
                "within_one" = 'The filter is applied within any one class',
                "QC" = 'The filter is applied within QC samples',
                "across" = 'The filter is applied across all samples'),
            value='QC',
            type='character',
            allowed=c('within_all','within_one','QC','across')),

        filtered=entity(name = 'Filtered DatasetExperiment',
            description = 'A DatasetExperiment object containing the filtered data.',
            type='DatasetExperiment',
            value=DatasetExperiment()
        ),
        flags=entity(name = 'Flags',
            description = '% missing values and a flag indicating whether the sample was rejected.',
            type='data.frame',
            value=data.frame()
        )
    )
)

#' @export
#' @template model_train
setMethod(f="model_train",
    signature=c("mv_feature_filter","DatasetExperiment"),
    definition=function(M,D)
    {
        opt=param_list(M)

        smeta=D$sample_meta
        x=D$data

        s=strsplit(opt$method,'_')[[1]][1]

        filtered = pmp::filter_peaks_by_fraction(t(x), min_frac = opt$threshold/100, classes=smeta[[M$factor_name]], method=s,qc_label=opt$qc_label,remove_peaks = FALSE)
        #D$data = as.data.frame(t(filtered$df))

        flags<-data.frame(attributes(filtered)$flags)

        output_value(M,'flags') = flags

        return(M)
    }
)

#' @export
#' @template model_predict
setMethod(f="model_predict",
    signature=c("mv_feature_filter","DatasetExperiment"),
    definition=function(M,D) {

        x=D$data
        smeta=D$sample_meta
        vmeta=D$variable_meta

        flags=M$flags

        if (M$method=='within_all') {
            L=levels(smeta[[M$factor_name]])
            IN=apply(flags[,(length(L)+1):ncol(flags)],MARGIN=1,function(x) all(x==1))

        } else if (M$method=='within_one') {
            L=levels(smeta[[M$factor_name]])
            IN=apply(flags[,(length(L)+1):ncol(flags)],MARGIN=1,function(x) any(x==1))
        } else {
            IN=flags[,2]==1

        }

        D=D[,IN,drop=FALSE]

        output_value(M,'filtered') = D
        return(M)
    }
)


##### plots
#' @eval get_description('mv_feature_filter_hist')
#' @import struct
#' @export mv_feature_filter_hist
#' @examples
#' C = mv_feature_filter_hist()
mv_feature_filter_hist = function(...) {
    out=struct::new_struct('mv_feature_filter_hist',...)
    return(out)
}


.mv_feature_filter_hist<-setClass(
    "mv_feature_filter_hist",
    contains='chart',
    prototype = list(name='Histogram of missing values per feature',
        description='A histogram of the proportion of missing values per feature.',
        type="histogram"
    )
)

#' @export
#' @template chart_plot
setMethod(f="chart_plot",
    signature=c("mv_feature_filter_hist",'mv_feature_filter'),
    definition=function(obj,dobj)
    {
        if (param_value(dobj,'method')=='within')
        {
            stop('plot not implemented for within class filter')
        }

        t=param_value(dobj,'threshold')
        A=output_value(dobj,'flags')
        n=colnames(A)
        A$x=100-((A[,1])*100) # filter report number of values, not number of missing values
        A$features=factor(A[,2],levels=c(1,0),labels=c('accepted','rejected'))
        out=ggplot(data=A, aes_(x=~x,fill=~features)) +
            geom_histogram(boundary=(100-t),color='white') +
            xlab('% missing values (per feature)') +
            ylab('Count') +
            scale_fill_Publication()+
            theme_Publication(base_size = 12) +
            ggtitle('Missing values')

        return(out)
    }
)

Try the structToolbox package in your browser

Any scripts or data that you put into this service are public.

structToolbox documentation built on Nov. 8, 2020, 6:54 p.m.