R/id_import.R

Defines functions .filter_id_data .preprocess_id_data import_identifications import_identifications.character import_identifications.data.frame

Documented in import_identifications import_identifications.character import_identifications.data.frame

.filter_id_data <- function(id_data, score_control) {
    if(!is.null(score_control)) {
        thres = score_control[['threshold']]
        if(score_control[['direction']] == 'greater') {
            filter <- id_data[[score_control[['score_column']]]] > thres
        } else {
            filter <- id_data[[score_control[['score_column']]]] < thres
        }
        all_columns <- colnames(id_data)
        remove_filter <- which(all_columns == score_control[['score_column']])
        id_data <- id_data[filter, -remove_filter]
    }
    id_data
}

.preprocess_id_data <- function(id_data, columns, score_control) {
    id_data = .filter_id_data(id_data, score_control)
    id_data = id_data[, unlist(columns, use.names = FALSE)]
    colnames(id_data)[1:2] <- c('peptides', 'proteins')
    class(id_data) <- c('shared_peptides_identifications', class(id_data))
    id_data
}

#' Import protein identification data
#'
#' @param identifications a character string or a data.frame. In the first case,
#' it will be treated as path to a delimited file that will be imported via the
#' `read.delim` function. In the second case, the data.frame will processed by
#' selecting relevant columns and optional filtering.
#' @param columns a named list of two to four elements that describe columns in
#' input data. `peptides` element identifies a colums that correspond to
#' identified peptides, `proteins` - to proteins. Field `intensities` can be
#' specified if abundance data are included. Field `grouping_variables` can be
#' added if any of the columns of the input define a grouping. Operations on
#' the output can then be performed in groups.
#' @param score_control a named list of three elements that describe optional
#' filtering of the data by a chosen PSM-quality metric. Element `score_column`
#' names the column of metric scores, `threshold` is a numeric cut-off point for
#' this metric and `direction` ("greater"/"smaller") tells if the scores should
#' be greater or smaller than the threshold.
#' @param ... these optional parameters will be passed to the `read.delim`
#' function if the first parameter is a path to a file.
#'
#' @return data.frame of class `shared_peptides_identifications`.
#'
#' @export
#'
#' @examples
#' df <- data.frame(peptide = c('AA', 'AAA'), protein = c('P1', 'P1,P2'))
#' import_identifications(df, list(peptides = 'peptide', proteins = 'protein'))
#'
#' @name import_identifications
#'

import_identifications <- function(identifications, columns,
                                   score_control = NULL, ...) {
    UseMethod("import_identifications", identifications)
}

#' @export
#' @describeIn import_identifications `identifications` parameter is treated as
#' a path to a delimited file.
#' @importFrom utils read.delim

import_identifications.character <- function(identifications, columns,
                                             score_control = NULL, ...) {
    id_data <- read.delim(identifications, ...)
    output <- .preprocess_id_data(id_data, columns, score_control)
    output
}

#' @export
#' @describeIn import_identifications the `identifications` parameter is treated
#' as a `data.frame` with peptide-spectrum matches.

import_identifications.data.frame <- function(identifications, columns,
                                              score_control = NULL, ...) {
    output <- .preprocess_id_data(identifications, columns, score_control)
    output
}
mstaniak/SharedPeptides documentation built on Jan. 21, 2020, 7:29 p.m.