R/correlate.R

#' Calculate correlations against top window.
#'
#' Calculates correlations of each window against the top, and stores them in a vector.
#'
#' The input of this function is usually the output of the \code{\link{bin_scdata}} function.
#'
#' The correlation vector for each window is generated by iterating the genes in the top window
#' and correlating them to every other window. As a result, there will be as many correlation
#' values in the vector as genes in the top window. Consequently, top window size greatly impacts
#' the computational efficiency of the process.
#'
#' The correlation method argument is passed on to the \code{cor} function, in the \code{stats}
#' package, and therefore, the same options as this function provides are available. However, it
#' is adviseable to use pearson correlation, since it presents the most advantageous balance of
#' result quality and computational efficiency.
#'
#' @param dataset A data frame containing all the binned genes.
#'
#' @param cor_method A string indicating the type of correlation to use.
#'
#' @return A list containing, on a separate element, the vectors of correlations for every
#' window against the top.

correlate <- function(dataset, cor_method){

    # extract the top window genes
    top_window <- subset(dataset, dataset$bin == 1)

    # only select columns that contain expression values (avoid mean, stdev, CV and bin columns)
    # convert object into matrix to allow iteration
    top_window <- select(top_window, -mean, -stdev, -CV, -bin) %>% as.matrix

    # create main container for final storage of all correlations
    bin_correlations <- list()

    # iterate bins in the dataset
    for(i in seq_len(max(dataset$bin))){

        # select the genes in the chosen window using the bin number
        selected_window <- subset(dataset, dataset$bin == i)
        # eliminate unwanted columns
        selected_window <- select(selected_window, -mean, -stdev, -CV, -bin)

        # create a new list every time a new window is selected
        correlations <- list()

        # iterate genes in the top window
        for(j in seq_len(nrow(top_window))){

            #store each set of correlation coefficients in a new column of the data frame
            correlations[[j]] <- cor(top_window[j,], t(selected_window),
                                     method = cor_method) %>% as.vector
        }
        bin_correlations[[i]] <- do.call(c, correlations)
    }
    return(bin_correlations)
}
angelesarzalluz/scfilters documentation built on May 10, 2019, 11:46 a.m.