R/cvWrapper.R

Defines functions cvWrapper

Documented in cvWrapper

#' @title cvWrapper
#' @description A wrapper for the pathwayTalk workflow. Splits the dataset into a specified
#' number of folds (‘times’), each representing a specified proportion of the data (‘p’)
#' @export
cvWrapper <- function(expression_matrix,
                      groups,
                      platform,
                      gene_alpha,
                      pathways,
                      pathway_alpha,
                      lambda,
                      sampling_method = 'partition',
                      times = 10,
                      p = 0.6){

    # output list
    output_list <- c()

    expression_matrix_t <- t(expression_matrix)

    # split data into resamples
    if (sampling_method == 'partition'){

        # generate indices to subset data
        indices <- caret::createDataPartition(y  = groups$group,
                                              times = times,
                                              p = p)
        # subset data
        exprs_list <- purrr::map(indices, ~ t(expression_matrix_t[., ])) # weird format to preserve column names
        groups_list <- purrr::map(indices, ~ groups[., ])
        resample <- purrr::map(groups_list, ~.$sample_id)
        resample_summary <- purrr::map(groups_list, ~table(.$group))
    } else if(sampling_method == 'down'){
        indices <- purrr::map(1:times,
                                ~ caret::downSample(x = 1:nrow(expression_matrix_t),
                                                    y = groups$group,
                                                    list = TRUE))
        indices <- purrr::map(indices, ~ as.numeric(.$x$x))
        exprs_list <- purrr::map(indices, ~ t(expression_matrix_t[., ])) # weird format to preserve column names
        groups_list <- purrr::map(indices, ~ groups[., ])
        sample_summary <- purrr::map(groups_list, ~table(.$group))
        resample <- purrr::map(groups_list, ~.$sample_id)
        resample_summary <- purrr::map(groups_list, ~table(.$group))
    } else {
        return(paste0("Sampling method should be a string ",
                      "with the value 'partition' or 'down'"))
    }

    # save sampling results
    output_list[['resample']] <- resample
    output_list[['resample_summary']] <- resample_summary

    # # # #### TEMP ####
    # resample_exprs <- exprs_list[[1]]
    # resample_groups <- groups_list[[1]]

    internalWrapper <- function(resample_exprs, resample_groups, platform,
                                gene_alpha, pathways, pathway_alpha){

        internal_return_list <- c()

        # step 1: do differential expression analysis
        DEG <- diffExpression(expression_matrix = resample_exprs,
                                groups = resample_groups,
                                platform = platform)
        # internal_return_list[['DEG']] <- DEG
        # extract DEGs from diffExpression output
        DEG_dfs <- purrr::map(DEG, ~.$DEGs)
        internal_return_list[['DEG']] <- DEG_dfs

        # step 2: do Fisher test for pathway enrichment analysis
        enriched <- fisherPathwayEnrichment(DEGs = DEG_dfs,
                                            gene_alpha=gene_alpha,
                                            pathways = reactome_pathways)
        # remove infinite values, if present
        enriched_short <- purrr::map(enriched, ~filter(., !is.infinite(estimate)))
        internal_return_list[['enriched']] <- enriched_short

        # step 3 - generate matrix of discriminating scores
        # remove any duplicate gene ids from the pathways list
        pathways <- purrr::map(pathways, ~ unique(.))
        crosstalk_matrices <- purrr::map2(DEG, enriched_short,
                                         ~ pathwayCrosstalk(expression_matrix = .x$data,
                                                            groups = .x$group,
                                                            DEPs = .y,
                                                            pathways = pathways,
                                                            pathway_alpha = pathway_alpha))
        internal_return_list[['crosstalk_matrices']] <- crosstalk_matrices


        # # ### TEMP
        # sub_groups <- DEG$LUMB$groups
        # crosstalk_matrix <- crosstalk_matrices$LUMB

        # step 4 - classification
        model_results <- purrr::map2(DEG, crosstalk_matrices,
                                     ~ crosstalkNetwork(crosstalk_matrix = .y,
                                                        groups = .x$groups,
                                                        lambda = lambda,
                                                        alpha = 1,
                                                        output_network = TRUE,
                                                        output_model = FALSE))
        # internal_return_list[['model_results']] <- purrr::map(model_results, ~.$model)
        internal_return_list[['network_results']] <- purrr::map(model_results, ~.$dataframe)


        return(internal_return_list)


    }

    # future::plan(multisession, workers = 4)
    internal_wrapper_results <- purrr::map2(exprs_list, groups_list, ~internalWrapper(.x, .y,
                                            platform, gene_alpha, pathways, pathway_alpha))

    output_list[['DEG']] <- purrr::map(internal_wrapper_results, ~.[['DEG']])
    # get vertex list for each network
    extractVertices <- function(resample_results){

        net_results <- resample_results$network_results

        net_df <- purrr::map(net_results, ~as.data.frame(.)) %>%
            bind_rows(.id = 'contrast')
    }
    network_results <- purrr::map(internal_wrapper_results,
                                           ~extractVertices(.)) %>%
        bind_rows(.id = 'resample')
    network_results_list <- split(network_results, network_results$contrast)
    keeps <- purrr::map(network_results_list,
                                       ~ duplicated(.[,c('V1', 'V2')]))
    network_results_list <- purrr::map2(network_results_list, keeps,
                                        ~ .x[.y,c('V1', 'V2')])
    network_results_list <- purrr::map(network_results_list,
                                        ~ unique(.))


    # generate full network
    generateFullNetwork <- function(network_df){
        unique_vertices <- unique(network_df)
        network <- igraph::graph_from_data_frame(unique_vertices,
                                                 directed = FALSE)
        return(network)

    }

    full_networks <- purrr::map(network_results_list, ~generateFullNetwork(.))

    # step 6: crosstalk inhibition
    crosstalk_inhibition_results <- purrr::map(full_networks,
                                               ~ crosstalkInhibition(network = .))

    # save components
    # output_list[['internal_wrapper_results']] <- internal_wrapper_results
    # output_list[['full_networks']] <- full_networks
    output_list[['crosstalk_inhibition_results']] <- crosstalk_inhibition_results

    return(output_list)

}
hemoshear/pathwayTalk documentation built on July 16, 2022, 12:09 a.m.