R/cytoscape_functions.R

Defines functions CytoscapeFilter CytoscapeFile

Documented in CytoscapeFile CytoscapeFilter

#' CytoscapeFilter
#' @description Reduces the size of a file intended for Cytoscape by filtering
#'  out the genes/clusters which are not correlated
#' @param cytoscape.file A dataframe object generated by CytoscapeFile
#' @param threshold Positive genes below this threshold and negative genes above
#'  the additive inverse of this threshold will be removed.
#' @param save Logical. If TRUE then the dataframe will be saved as a csv file.
#' @param filename String. The name for the saved file.
#' @examples
#' filter.df <- CombiFilter(Laurasmappings)
#' pam.df <- PamClustering(filter.df, k = 10)
#' cor.df <- CorAnalysisClusterDataset(pam.df, save=FALSE)
#' cyto.df <- CytoscapeFile(cor.df, save = FALSE)
#' cyto.filtered.df <- CytoscapeFilter(cyto.df, threshold=0.95)
#'
#' @export

CytoscapeFilter <- function(cytoscape.file, threshold = 0.9,
                            save = TRUE, filename = NULL) {
    correlation <- NULL
    pos.sig <- subset(cytoscape.file, correlation >= threshold)
    # Significant +ve interactions
    pos.sig <- subset(pos.sig, correlation < 1)
    # Significant -ve interactions
    neg.sig <- subset(cytoscape.file, correlation <= (-1 * threshold))

    sig.df <- rbind(pos.sig, neg.sig)

    if (save == TRUE) {
        if (is.null(filename) == TRUE) {
            # If filename is not given then use name of cluster.dataset object
            filename <- deparse(substitute(cytoscape.file))
            filename <- paste(filename, "_filtered", sep = "")
        }
        filename <- paste(filename, ".csv", sep = "")
        FileConflict(filename)
        utils::write.csv(sig.df, filename, row.names = FALSE)
    }
    return(sig.df)
}


#' CytoscapeFile:
#' @description Converts a correlation dataframe object into a format suitable
#'  for cytoscape and saves as a csv file.
#' @param cor.dataset A NxN dataframe of correlation values created by
#'  \link{CorAnalysisDataset} or \link{CorAnalysisClusterDataset}
#' @param save Logical. If TRUE then the dataframe will be saved as a csv file.
#' @param filename String. The name for the saved file.
#' @param nthreads Number of processor threads for the process. If not specified
#'  then the maximum number of logical cores are used.
#' @examples
#' correlation.df <- CorAnalysisDataset(Laurasmappings, nthreads = 2)
#' CytoscapeFile(correlation.df, nthreads = 2)
#'
#' @return Dataframe object in the new format.
#' @export

CytoscapeFile <- function(cor.dataset, save = TRUE, filename = NULL,
                          nthreads = NULL) {
    j = i = NULL
    if (save == TRUE) {
        if (is.null(filename) == TRUE) {
            filename <- deparse(substitute(cor.dataset))
            filename <- paste(filename, "_cytoscape", sep = "")
        }

        filename <- paste(filename, ".csv", sep = "")

        # Checks if a file which will be created already exists &
        # Asks the user if this file should be overwritten.
        CircadianTools::FileConflict(filename)

    }
    # Load the dopar binary operator from foreach package
    `%dopar%` <- foreach::`%dopar%`
    # Load the do binary operator from foreach package
    `%do%` <- foreach::`%do%`


    if (is.null(nthreads) == TRUE) {
        # Set the threads to maximum if none is specified
        nthreads <- parallel::detectCores()
    }

    # The rows will act as source nodes in Cytoscape
    sourcelist <- rownames(cor.dataset)
    # The column will act as target nodes in Cytoscape
    targetlist <- colnames(cor.dataset)


    cl <- parallel::makeForkCluster(nthreads)  # Create cluster for parallelism
    doParallel::registerDoParallel(cl)

    finaldf <- foreach::foreach(j = 1:length(sourcelist),
                                .combine = rbind) %dopar% {
        # Select column (correlation values for the source)
        subsetdf <- cor.dataset[, j]

        sourcedf <- foreach::foreach(i = 1:length(targetlist),
                                     .combine = rbind) %do% {
            data.frame(sourcelist[j], targetlist[i], subsetdf[i])
            # Save source, target gene names and correlation values as a row.
            # All rows are combined in returned dataframe
        }
    }
    colnames(finaldf) <- c("source", "target", "correlation")
    parallel::stopCluster(cl)
    if (save == TRUE) {
        utils::write.csv(finaldf, filename, row.names = FALSE)  #Write to .csv
    }
    return(finaldf)
}
nathansam/CircadianTools documentation built on Dec. 26, 2019, 11:30 a.m.