R/getCrossAssociation.R

Defines functions .calculate_gktau .association_table_to_matrix .association_sort .association_filter .calculate_association_with_own_function .calculate_association_for_categorical_values .calculate_association_for_numeric_values .calculate_stats_cor .calculate_association_table .sort_variable_pairs_row_wise .calculate_association .check_if_paired_samples .check_that_assays_match .cross_association_test_data_type .check_and_subset_colData_variables .test_experiment_of_mae .rename_based_on_samplemap .get_experiment_cross_association

#' Calculate correlations between features of two experiments.
#' 
#' @param x A
#' \code{\link[MultiAssayExperiment:MultiAssayExperiment-class]{MultiAssayExperiment}}
#' or
#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
#' object.
#'   
#' @param experiment1 \code{Character scalar} or \code{numeric scalar}.
#' Selects the experiment 1 from \code{experiments(x)} of
#' \code{MultiassayExperiment} object. (Default: \code{1})
#'    
#' @param experiment2 \code{Character scalar} or \code{numeric scalar}.
#' Selects the experiment 2 from\code{experiments(x)} of
#' \code{MultiAssayExperiment} object or 
#' \code{altExp(x)} of \code{TreeSummarizedExperiment} object. Alternatively, 
#' \code{experiment2} can also be \code{TreeSE} object when \code{x} is
#' \code{TreeSE} object. (Default: \code{2} when \code{x} is \code{MAE} and 
#' \code{x} when \code{x} is \code{TreeSE})
#'    
#' @param assay.type1 \code{Character scalar}. Specifies the name of the assay
#' in experiment 1 to be transformed.. (Default: \code{"counts"})
#'   
#' @param assay.type2  \code{Character scalar}. Specifies the name of the
#' assay in experiment 2 to be transformed.. (Default: \code{"counts"})
#'    
#' @param assay_name1 Deprecated. Use \code{assay.type1} instead.
#' 
#' @param assay_name2 Deprecated. Use \code{assay.type2} instead.
#' 
#' @param altexp1 \code{Character scalar} or \code{numeric scalar}. Specifies
#' alternative experiment from the altExp of experiment 1. If NULL, then the
#' experiment is itself and altExp option is disabled. (Default: \code{NULL})
#'   
#' @param altexp2 \code{Character scalar} or \code{numeric scalar}. Specifies
#' alternative experiment from the altExp of experiment 2. If NULL, then the
#' experiment is itself and altExp option is disabled. (Default: \code{NULL})
#'   
#' @param col.var1 \code{Character scalar}. Specifies column(s) from
#' \code{colData} of experiment 1. If col.var1 is used, assay.type1 is disabled.
#' (Default: \code{NULL})
#' 
#' @param colData_variable1 Deprecated. Use \code{col.var1} instead.
#'   
#' @param col.var2 \code{Character scalar}. Specifies column(s) from colData
#' of experiment 2. If col.var2 is used, assay.type2 is disabled.
#' (Default: \code{NULL})
#' 
#' @param colData_variable2 Deprecated. Use \code{col.var2} instead.
#' 
#' @param by A\code{Character scalar}. Determines if association are calculated
#' row-wise / for features ('rows') or column-wise / for samples ('cols').
#' Must be \code{'rows'} or \code{'cols'}. 
#' 
#' @param MARGIN Deprecated. Use \code{by} instead.
#'   
#' @param method \code{Character scalar}. Defines the association method 
#' ('kendall', pearson', or 'spearman' for continuous/numeric; 'categorical'
#' for discrete) (Default: \code{"kendall"})
#' 
#' @param mode \code{Character scalar}. Specifies the  output format 
#' Available formats are  'table' and 'matrix'.  (Default: \code{"table"})
#' 
#' @param p.adj.method \code{Character scalar}. Specifies adjustment method of
#' p-values. Passed to \code{p.adjust} function. 
#' (Default: \code{"fdr"})
#' 
#' @param p_adj_method Deprecated. Use \code{p.adj.method} instead.
#' 
#' @param p.adj.threshold \code{Numeric scalar}. From \code{0 to 1}, specifies
#' adjusted p-value threshold for filtering. 
#' (Default: \code{NULL})
#' 
#' @param p_adj_threshold Deprecated. Use \code{p.dj.threshold} instead.
#' 
#' @param cor.threshold \code{Numeric scalar}. From \code{0 to 1}, specifies
#' correlation threshold for filtering.
#' (Default: \code{NULL})
#' 
#' @param cor_threshold Deprecated. Use \code{cor.threshold} instead.
#' 
#' @param sort \code{Logical scalar}. Specifies whether to sort features or not
#' in result matrices. Used method is hierarchical clustering. 
#' (Default: \code{FALSE})
#' 
#' @param filter.self.cor \code{Logical scalar}. Specifies whether to 
#' filter out correlations between identical items. Applies only when
#' correlation between experiment itself is tested, i.e., when assays are
#' identical. (Default: \code{FALSE})
#' 
#' @param filter_self_correlations Deprecated. Use \code{filter.self.cor}
#' instead.
#' 
#' @param verbose \code{Logical scalar}. Specifies whether to get messages
#' about progress of calculation. (Default: \code{FALSE})
#' 
#' @param test.signif \code{Logical scalar}. Specifies whether to test
#' statistical significance of associations.
#' (Default: \code{FALSE})
#' 
#' @param test_significance Deprecated. Use \code{test.signif} instead.
#'    
#' @param show.warnings \code{Logical scalar}. specifies whether to show
#' warnings that might occur when correlations and p-values are calculated.
#' (Default: \code{FALSE})
#' 
#' @param show_warnings Deprecated. use \code{show.warnings} instead.
#'
#' @param paired \code{Logical scalar}. Specifies if samples are paired or not.
#' \code{colnames} must match between twp experiments. \code{paired} is disabled
#' when \code{by = 1}. (Default: \code{FALSE})
#'
#' @param ... Additional arguments:
#' \itemize{
#'   \item \code{symmetric}:   \code{Logical scalar}. Specifies if
#'   measure is symmetric or not. When \code{symmetric = TRUE}, associations
#'   are calculated only for unique variable-pairs, and they are assigned to
#'   corresponding variable-pair. This decreases the number of calculations in
#'   2-fold meaning faster execution. (By default: \code{FALSE})
#'   
#'   \item \code{association.fun}:  A function that is used to calculate
#'   (dis-)similarity between features. Function must take matrix as an input
#'   and give numeric values as an output. Adjust \code{method} and other
#'   parameters correspondingly. Supported functions are, for example,
#'   \code{stats::dist} and \code{vegan::vegdist}.
#' }
#'    
#' @details
#' The function \code{getCrossAssociation} calculates associations between 
#' features of two experiments. By default, it not only computes associations 
#' but also tests their significance. If desired, setting 
#' \code{test.signif} to FALSE disables significance calculation.
#' 
#' We recommend the non-parametric Kendall's tau as the default method for
#' association analysis. Kendall's tau has desirable statistical properties and
#' robustness at lower sample sizes. Spearman rank correlation can provide
#' faster solutions when running times are critical.
#'
#' @return 
#' This function returns associations in table or matrix format. In table
#' format, returned value is a data frame that includes  features and
#' associations (and p-values) in columns. In matrix format, returned value
#' is a one matrix when only associations are calculated. If also significances
#' are tested, then returned value is a list of matrices.
#'
#' @name getCrossAssociation
#' @export
#'
#' @examples
#' data(HintikkaXOData)
#' mae <- HintikkaXOData
#' 
#' # Subset so that less observations / quicker to run, just for example
#' mae[[1]] <- mae[[1]][1:20, 1:10]
#' mae[[2]] <- mae[[2]][1:20, 1:10]
#' # Several rows in the counts assay have a standard deviation of zero
#' # Remove them, since they do not add useful information about
#' # cross-association
#' mae[[1]] <- mae[[1]][rowSds(assay(mae[[1]])) > 0, ]
#' # Transform data
#' mae[[1]] <- transformAssay(mae[[1]], method = "rclr")
#' 
#' # Calculate cross-correlations
#' result <- getCrossAssociation(mae, method = "pearson", assay.type2 = "nmr")
#' # Show first 5 entries
#' head(result, 5)
#' 
#' # Use altExp option to specify alternative experiment from the experiment
#' altExp(mae[[1]], "Phylum") <- agglomerateByRank(mae[[1]], rank = "Phylum")
#' # Transform data
#' altExp(mae[[1]], "Phylum") <- transformAssay(
#'     altExp(mae[[1]], "Phylum"), method = "relabundance")
#' # When mode = "matrix", the return value is a matrix
#' result <- getCrossAssociation(
#'     mae, experiment2 = 2, assay.type1 = "relabundance", assay.type2 = "nmr",
#'     altexp1 = "Phylum", method = "pearson", mode = "matrix")
#' # Show first 5 entries
#' head(result, 5)
#' 
#' # If test.signif = TRUE, then getCrossAssociation additionally returns 
#' # significances
#' # filter.self.cor = TRUE filters self correlations
#' # p.adj.threshold can be used to filter those features that do not
#' # have any correlations whose p-value is lower than the threshold
#' result <- getCrossAssociation(
#'     mae[[1]], experiment2 = mae[[1]], method = "pearson",
#'     filter.self.cor = TRUE, p.adj.threshold = 0.05, test.signif = TRUE)
#' # Show first 5 entries
#' head(result, 5)
#'                                         
#' # Returned value is a list of matrices
#' names(result)
#' 
#' # Calculate Bray-Curtis dissimilarity between samples. If dataset includes
#' # paired samples, you can use paired = TRUE.
#' result <- getCrossAssociation(
#'     mae[[1]], mae[[1]], by = 2, paired = FALSE,
#'     association.fun = getDissimilarity, method = "bray")
#'                                         
#' 
#' # If experiments are equal and measure is symmetric
#' # (e.g., taxa1 vs taxa2 == taxa2 vs taxa1),
#' # it is possible to speed-up calculations by calculating association only
#' # for unique variable-pairs. Use "symmetric" to choose whether to measure
#' # association for only other half of of variable-pairs.
#' result <- getCrossAssociation(
#'     mae, experiment1 = "microbiota", experiment2 = "microbiota", 
#'     assay.type1 = "counts", assay.type2 = "counts", symmetric = TRUE)
#' 
#' # For big data sets, the calculations might take a long time.
#' # To speed them up, you can take a random sample from the data. 
#' # When dealing with complex biological problems, random samples can be 
#' # enough to describe the data. Here, our random sample is 30 % of whole data.
#' sample_size <- 0.3
#' tse <- mae[[1]]
#' tse_sub <- tse[ sample( seq_len( nrow(tse) ), sample_size * nrow(tse) ), ]
#' result <- getCrossAssociation(tse_sub)
#' 
#' # It is also possible to choose variables from colData and calculate
#' # association between assay and sample metadata or between variables of
#' # sample metadata
#' mae[[1]] <- addAlpha(mae[[1]])
#' # col.var works similarly to assay.type. Instead of fetching
#' # an assay named assay.type from assay slot, it fetches a column named
#' # col.var from colData.
#' result <- getCrossAssociation(
#'     mae[[1]], assay.type1 = "counts", 
#'     col.var2 = c("shannon_diversity", "coverage_diversity"),
#'     test.signif = TRUE)
#'                                         
NULL

#' @rdname getCrossAssociation
#' @export
setGeneric("getCrossAssociation", signature = c("x"),
    function(x, ...) standardGeneric("getCrossAssociation"))

#' @rdname getCrossAssociation
#' @export
setMethod("getCrossAssociation", signature = c(x = "MultiAssayExperiment"),
    function(
        x,
        experiment1 = 1,
        experiment2 = 2,
        assay.type1 = assay_name1, assay_name1 = "counts",
        assay.type2 = assay_name2, assay_name2 = "counts",
        altexp1 = NULL,
        altexp2 = NULL,
        col.var1 = colData_variable1,
        colData_variable1 = NULL,
        col.var2 = colData_variable2,
        colData_variable2 = NULL,
        by = MARGIN,
        MARGIN = 1,
        method = c("kendall", "spearman", "categorical", "pearson"),
        mode = "table",
        p.adj.method = p_adj_method,
        p_adj_method = c("fdr", "BH", "bonferroni", "BY", "hochberg", "holm",
            "hommel", "none"),
        p.adj.threshold = p_adj_threshold,
        p_adj_threshold = NULL,
        cor.threshold = cor_threshold,
        cor_threshold = NULL,
        sort = FALSE,
        filter.self.cor = filter_self_correlations,
        filter_self_correlations = FALSE,
        verbose = TRUE,
        test.signif = test_significance,
        test_significance = FALSE,
        show.warnings = show_warnings,
        show_warnings = TRUE,
        paired = FALSE,
        ...){
        #
        .get_experiment_cross_association(
            x,
            experiment1 = experiment1,
            experiment2 = experiment2,
            assay.type1 = assay.type1,
            assay.type2 = assay.type2,
            altexp1 = altexp1,
            altexp2 = altexp2,
            col.var1 = col.var1,
            col.var2 = col.var2,
            by = by,
            method = method,
            mode = mode,
            p.adj.method = p.adj.method,
            p.adj.threshold = p.adj.threshold,
            cor.threshold = cor.threshold,
            sort = sort,
            filter.self.cor = filter.self.cor,
            verbose = verbose,
            test.signif = test.signif,
            show.warnings = show.warnings,
            paired = paired,
            ...)
    }
)

#' @rdname getCrossAssociation
#' @importFrom MultiAssayExperiment MultiAssayExperiment ExperimentList
#' @importFrom SingleCellExperiment altExps
#' @export
setMethod("getCrossAssociation", signature = "SummarizedExperiment",
    function(x, experiment2 = x, ...){
        ############################## INPUT CHECK #############################
        # If y is  SE or TreeSE object
        if( is(experiment2, "SummarizedExperiment") ){}
        # If y is  character specifying name of altExp, 
        else if( is.character(experiment2) &&
            experiment2 %in% names(altExps(x)) ){}
        # If y is numeric value specifying altExp
        else if( is.numeric(experiment2) &&
            experiment2 <= length(altExps(x)) ){} 
        # If y does not match, then give error
        else{
            stop("'experiment2' must be SE object, or numeric or character",
                " value specifying experiment in altExps(x) or character ",
                "value specifying column(s) from 'colData(x)' or it must be ",
                "NULL.", call. = FALSE)
        }
        ############################ INPUT CHECK END ###########################
        # Fetch data sets and create a MAE object
        exp1 <- x
        args <- list()
        # If y is character or numeric, it specifies altExp
        if ( is.character(experiment2) || is.numeric(experiment2) ){
            exp2 <- altExps(x)[[experiment2]]
        } else {
            exp2 <- experiment2
        }
        # Add colnames if not defined
        if (is.null(colnames(exp1))) {
            colnames(exp1) <- paste("c", seq_len(ncol(exp1)), sep = "")
            warning("Colnames not defined; arbitrary colnames added in ",
                "experiment1.", call. = FALSE)
        }
        if (is.null(colnames(exp2))) {
            colnames(exp2) <- paste("c", seq_len(ncol(exp2)), sep = "")
            warning("Colnames not defined; arbitrary colnames added in ",
                "experiment2.", call. = FALSE)
        }
        # Add rownames if not defined
        if (is.null(rownames(exp1))) {
            rownames(exp1) <- paste("r", seq_len(nrow(exp1)), sep = "")
            warning("Rownames not defined; arbitrary rownames added in ",
                "experiment1.", call. = FALSE)
        }
        if (is.null(rownames(exp2))) {
            rownames(exp2) <- paste("r", seq_len(nrow(exp2)), sep = "")
            warning("Rownames not defined; arbitrary rownames added in ",
                "experiment2.", call. = FALSE)
        }
        # Create a MAE
        experiments <- ExperimentList(exp1 = exp1, exp2 = exp2)
        x <- MultiAssayExperiment(experiments = experiments)
        # Call method with MAE object as an input
        res <- .get_experiment_cross_association(
            x = x, experiment1 = 1, experiment2 = 2, ...)
        return(res)
    }
)

############################## MAIN FUNCTIONALITY ##############################
# This function includes all the main functionality.
#' @importFrom S4Vectors unfactor
.get_experiment_cross_association <- function(
        x,
        experiment1 = 1,
        experiment2 = 2,
        assay.type1 = "counts",
        assay.type2 = "counts",
        altexp1 = NULL,
        altexp2 = NULL,
        col.var1 = colData_variable1, colData_variable1 = NULL,
        col.var2 = colData_variable2, colData_variable2 = NULL,
        by = 1,
        method = c("kendall", "spearman", "categorical", "pearson"),
        mode = c("table", "matrix"),
        p.adj.method = c("fdr", "BH", "bonferroni", "BY", "hochberg", "holm",
            "hommel", "none"),
        p.adj.threshold = NULL,
        cor.threshold = NULL,
        sort = FALSE,
        filter.self.cor = FALSE,
        verbose = TRUE,
        test.signif = FALSE,
        show.warnings = TRUE,
        paired = FALSE,
        ...){
    ############################# INPUT CHECK ##############################
    # Check experiment1 and experiment2
    .test_experiment_of_mae(x, experiment1)
    .test_experiment_of_mae(x, experiment2)
    # Rename TreeSEs based on sample map, i.e., based on on what they are
    # linking to. In MAE, colnames can different between experiments even though
    # those samples are linking to same patient.
    obj_list <- .rename_based_on_samplemap(x, experiment1, experiment2)
    # Fetch tse objects
    tse1 <- obj_list[[1]]
    tse2 <- obj_list[[2]]
    # Check and fetch tse objects
    tse1 <- .check_and_get_altExp(tse1, altexp1)
    tse2 <- .check_and_get_altExp(tse2, altexp2)
    # If variables from coldata are specified check them. Otherwise,
    # check assay.type1
    if( !is.null(col.var1) ){
        tse1 <- .check_and_subset_colData_variables(tse1, col.var1)
    } else{
        .check_assay_present(assay.type1, tse1)
    }
    if( !is.null(col.var2) ){
        tse2 <- .check_and_subset_colData_variables(tse2, col.var2)
    } else{
        .check_assay_present(assay.type2, tse2)
    }
    # Check by
    by <- .check_MARGIN(by) 
    # Check method
    # method is checked in .calculate_association. Otherwise association.fun
    # would not work. (It can be "anything", and it might also have method
    # parameter.) Check mode
    mode <- match.arg(mode, c("table", "matrix"))
    p.adj.method <- match.arg(
        p.adj.method, c("fdr", "BH", "bonferroni", "BY", "hochberg", "holm",
            "hommel", "none"))
    # Check p.adj.threshold
    if( !(is.numeric(p.adj.threshold) &&
            (p.adj.threshold>=0 && p.adj.threshold<=1)  || 
            is.null(p.adj.threshold) ) ){
        stop("'p.adj.threshold' must be a numeric value [0,1].", call. = FALSE)
    }
    # Check cor.threshold
    if( !(is.numeric(cor.threshold) && 
            (cor.threshold>=0 && cor.threshold<=1)  || 
            is.null(cor.threshold) ) ){
        stop("'cor.threshold' must be a numeric value [0,1].", call. = FALSE)
    }
    # Check sort
    if( !.is_a_bool(sort) ){
        stop("'sort' must be a boolean value.", call. = FALSE)
    }
    # Check filter.self.cor
    if( !.is_a_bool(filter.self.cor) ){
        stop("'filter.self.cor' must be a boolean value.", call. = FALSE)
    }
    # Check test.signif
    if( !.is_a_bool(test.signif) ){
        stop("'test.signif' must be a boolean value.", call. = FALSE)
    }
    # Check verbose
    if( !.is_a_bool(verbose) ){
        stop("'verbose' must be a boolean value.", call. = FALSE)
    }
    # Check show.warnings
    if( !.is_a_bool(show.warnings) ){
        stop("'show.warnings' must be a boolean value.", call. = FALSE)
    }
    # Check paired
    if( !.is_a_bool(paired) ){
        stop("'paired' must be a boolean value.", call. = FALSE)
    }
    ############################ INPUT CHECK END ###########################
    # Fetch assays to correlate, if variables from coldata are specified, take 
    # coldata, otherwise take assay
    if( !is.null(col.var1) ){
        assay1 <- colData(tse1)
        assay1 <- as.matrix(assay1)
        assay1 <- t(assay1)
    } else{
        assay1 <- assay(tse1, assay.type1)
    }
    if( !is.null(col.var2) ){
        assay2 <- colData(tse2)
        assay2 <- as.matrix(assay2)
        assay2 <- t(assay2)
    } else{
        assay2 <- assay(tse2, assay.type2)
    }
    # Transposes tables to right format, if row is specified
    if( by == 1 ){
        assay1 <- t(assay1)
        assay2 <- t(assay2)
        # Disable paired
        paired <- FALSE
    }
    # Check that assays match
    .check_that_assays_match(assay1, assay2, by)
    # If significance is not calculated, p.adj.method is NULL
    if( !test.signif ){
        p.adj.method <- NULL
    }
    # Calculate correlations
    result <- .calculate_association(
        assay1, assay2, method, 
        p.adj.method, 
        test.signif, 
        show.warnings, paired, 
        verbose, by,
        assay.type1, assay.type2,
        altexp1, altexp2,
        col.var1, col.var2,
        ...)
    # Disable p.adj.threshold if there is no adjusted p-values
    if( is.null(result$p_adj) ){
        p.adj.threshold <- NULL
    }
    # Disable cor.threshold if there is no correlations
    if( is.null(result$cor) ){
        cor.threshold <- NULL
    }
    # Disable filter_self_correlation if assays are not the same
    if( !identical(assay1, assay2) ){
        filter.self.cor <- FALSE
    }
    # Do filtering
    if( !is.null(p.adj.threshold) || 
        !is.null(cor.threshold) || 
        filter.self.cor ){
        # Filter associations
        result <- .association_filter(
            result,
            p.adj.threshold,
            cor.threshold,
            assay1, 
            assay2, 
            filter.self.cor,
            verbose)
    }
    # Matrix or table?
    if( mode == "matrix" && !is.null(result) ) {
        # Create matrices from table
        result <- .association_table_to_matrix(result, verbose)
        # Sort associations if specified
        if( sort ){
            result <- .association_sort(result, verbose)
        }
        # Adjust names of all matrices
        result <- lapply(result, FUN = function(x){
            rownames(x) <- colnames(assay1)[ as.numeric(rownames(x)) ]
            colnames(x) <- colnames(assay2)[ as.numeric(colnames(x)) ]
            return(x)
        })
        # If result list includes only one matrix, return only the matrix
        if( length(result) == 1 ){
            result <- result[[1]]
        }
    } else if( !is.null(result) ){
        # Sort associations if specified
        if( sort ){
            result <- .association_sort(result, verbose)
            # Get levels
            levels1 <- unique(
                colnames(assay1)[ as.numeric(levels(result$Var1)) ] )
            levels2 <- unique(
                colnames(assay2)[ as.numeric(levels(result$Var2)) ] )
            # Unfactor so that factors do not affect when names are adjusted
            result$Var1 <- unfactor(result$Var1)
            result$Var2 <- unfactor(result$Var2)
        }
        # Adjust names
        result$Var1 <- colnames(assay1)[ as.numeric(result$Var1) ]
        result$Var2 <- colnames(assay2)[ as.numeric(result$Var2) ]
        # Adjust factors
        if( sort ){
            # If the order is specified by sorting
            result$Var1 <- factor(result$Var1, levels = levels1)
            result$Var2 <- factor(result$Var2, levels = levels2)
        } else{
            # Otherwise the order is same as in input
            result$Var1 <- factor(result$Var1, levels = unique(result$Var1))
            result$Var2 <- factor(result$Var2, levels = unique(result$Var2))
        }
    }
    return(result)
}

################################ HELP FUNCTIONS ################################
# Rename experiments' colnames based on sample map linkages.
#' @importFrom MultiAssayExperiment sampleMap
.rename_based_on_samplemap <- function(mae, exp1, exp2){
    # Get name of experiments, if indices are provided
    if( is.numeric(exp1) ){
        exp1 <- names(mae)[[exp1]]
    }
    if( is.numeric(exp2) ){
        exp2 <- names(mae)[[exp2]]
    }
    # Get sample map
    sample_map <- sampleMap(mae)
    # Get sample map for first experiment1
    map_sub <- sample_map[ sample_map[["assay"]] == exp1, ]
    # Rename TreeSE of experiment1 based on sample map info
    tse1 <- mae[[exp1]]
    ind <- match(colnames(tse1), map_sub[["colname"]])
    colnames(tse1) <- map_sub[ind, "primary"]
    
    # Do the same for experiment2
    map_sub <- sample_map[ sample_map[["assay"]] == exp2, ]
    # Rename TreeSE of experiment1 based on sample map info
    tse2 <- mae[[exp2]]
    ind <- match(colnames(tse2), map_sub[["colname"]])
    colnames(tse2) <- map_sub[ind, "primary"]
    
    # Check if samples match
    all1 <- all(colnames(tse1) %in% colnames(tse2))
    all2 <- all(colnames(tse2) %in% colnames(tse1))
    no_dupl1 <- anyDuplicated(colnames(tse1)) == 0
    no_dupl2 <- anyDuplicated(colnames(tse2)) == 0
    if( !(all1 && all2 && no_dupl1 && no_dupl2)  ){
        stop("Samples must match between experiments. Please check colnames.",
            call. = FALSE)
    }
    # Order samples
    tse2 <- tse2[, colnames(tse1)]
    res <- list(tse1, tse2)
    names(res) <- c(exp1, exp2)
    return(res)
}

# This function is for testing if experiment can be found from MAE
#' @importFrom MultiAssayExperiment experiments
.test_experiment_of_mae <- function(x, experiment){
    # If experiment is numeric and bigger than the number of experiments
    if( is.numeric(experiment) && experiment > length(experiments(x)) ){
        stop("'", deparse(substitute(experiment)), "' is greater than the ",
            "number of experiments in MAE object.",
            call. = FALSE)
    }
    # Negation of "if value is character and can be found from experiments or
    # if value is numeric and is smaller or equal to the list of experiments.
    if( !( is.character(experiment) && experiment %in% names(experiments(x)) || 
        is.numeric(experiment) && experiment <= length(experiments(x)) ) ){
        stop("'", deparse(substitute(experiment)), "' ", 
            "must be numeric or character value specifying ", 
            "experiment in experiment(x).", call. = FALSE)
    }
    # Check experiment's class
    obj <- x[[experiment]]
    if( !(is(obj, "SummarizedExperiment")) ){
        stop("The class of experiment specified by ", 
            deparse(substitute(experiment)), " must be 'SummarizedExperiment'.",
            call. = FALSE)
    }
    return(NULL)
}
###################### .check_and_subset_colData_variables #####################
# This function checks if columns can be found from colData. Additionally, 
# integers are converted into numeric and factors to character.

# Input: (Tree)SE and character
# Output: (Tree)SE
.check_and_subset_colData_variables <- function(tse, variables){
    # Get variable name
    variable_name <- deparse(substitute(variables))
    var_num <- substr(
        variable_name, 
        start = nchar(variable_name), stop = nchar(variable_name))
    # Check that variables can be found
    if( !(is.character(variables) &&
            all( variables %in% colnames(colData(tse))) ) ){
        stop("'", variable_name, "' must be a character value specifying ",
            "column(s) from colData of experiment ", var_num, ".",
            call. = FALSE)
    }
    # Get coldata
    coldata <- colData(tse)[ , variables, drop = FALSE]
    # Get the classes of variables
    classes <- unlist( lapply(coldata, class) )
    # IF there are factors, convert them into character
    if( any( classes == "factor") ){
        unfactor <- coldata[ , classes == "factor", drop = FALSE]
        unfactor <- lapply(unfactor, unfactor)
        coldata[ , classes == "factor"] <- as.data.frame(unfactor)
        classes[classes == "factor"] <- "character"
    }
    # IF there are integers, convert them into numeric
    int_or_double <- classes == "integer" | classes == "double"
    if( any( int_or_double ) ){
        as_numeric <- coldata[ , int_or_double, drop = FALSE]
        as_numeric <- lapply(as_numeric, as.numeric)
        coldata[ , int_or_double] <- as.data.frame(as_numeric)
        classes[int_or_double] <- "numeric"
    }
    # Check that all the variables have the same class
    if( length(unique(classes)) > 1 ){
        stop(" Variables specified by '", variable_name,
            "' do not share a same class.", call. = FALSE)
    }
    # Replace the colData with new, subsetted colData
    coldata <- DataFrame(coldata)
    colData(tse) <- coldata
    return(tse)
}

####################### .cross_association_test_data_type ######################
# This function tests if values match with chosen method. With numeric methods, 
# numeric values are expected, and with categorical method factor or character 
# values are expected. Otherwise gives an error.

# Input: assay and method
# Output: assay
.cross_association_test_data_type <- function(
        assay, method, colData_variable){
    # Different available methods
    numeric_methods <- c("kendall", "pearson","spearman")
    categorical_methods <- c("categorical")
    # Get message
    if( !is.null(colData_variable) ){
        message <- "Variables of colData"
    } else{
        message <- "Assay, specified by 'assay.type',"
    }
    # Check if method match with values, otherwise give an error.
    # For numeric methods, expect only numeric values. For categorical methods, 
    # expect only factors/characters.
    if (method %in% numeric_methods && !is.numeric(assay)) {
        # If there are no numeric values, give an error
        stop(message, " of 'experiment' does not ",
            "include numeric values. Choose categorical method for 'method'.",
            call. = FALSE)
    } else if (method %in% categorical_methods && !is.character(assay)) {
        # If there are no factor values, give an error
        stop(message, " specified by 'assay.type', of 'experiment' does not ",
            "include factor or character values. Choose numeric method ",
            "for 'method'.", call. = FALSE)
    }
    return(assay)
}

########################### .check_that_assays_match ###########################
# If correlations between features are analyzed, samples should match, and
# vice versa
.check_that_assays_match <- function(assay1, assay2, MARGIN){
    names <- ifelse(MARGIN == 2, "Features", "Samples")
    if( any(rownames(assay1) != rownames(assay2)) ){
        stop(names, " must match between experiments.", call. = FALSE)
    }
}

########################### .check_if_paired_samples ###########################
# Check if samples are paired
.check_if_paired_samples <- function(assay1, assay2){
    if( !all(colnames(assay1) == colnames(assay2)) ){
        stop("Experiments are not paired or samples are in wrong order.", 
            "Check that colnames match between experiments.", call. = FALSE)
    }
}

############################# .calculate_association ###########################
# This function calculates correlations between every feature-pair. For numeric
# values uses cor.test() cor() and for categorical values Goodman and
# Kruskal's tau test.

# Input: Assays that share samples but that have different features and
# different parameters.
# Output: Correlation table including correlation values (and p-values and
# adjusted p-values)
#' @importFrom stats p.adjust
.calculate_association <- function(
        assay1,
        assay2,
        method = c("kendall", "spearman", "categorical", "pearson"),
        p.adj.method,
        test.signif,
        show.warnings,
        paired,
        verbose,
        by,
        assay.type1, assay.type2,
        altexp1, altexp2,
        col.var1, col.var2,
        association.fun = association_FUN,
        association_FUN = NULL,
        ...){
    # Check method if association.fun is not NULL
    if( is.null(association.fun) ){
        method <- match.arg(method)
        # Get function name for message
        function_name <- ifelse(
            method == "categorical", "mia:::.calculate_gktau", 
            ifelse(test.signif, "stats::cor.test", "stats::cor"))
        
        # Test if data is in right format
        .cross_association_test_data_type(assay1, method, col.var1)
        .cross_association_test_data_type(assay2, method, col.var2)
    } else{
        # Get name of function
        function_name <- deparse(substitute(association.fun))
        test.signif <- FALSE
        p.adj.method <- NULL
    }
    
    # Message details of calculation
    if(verbose){
        message( 
            "Calculating correlations...\n",
            "altexp1: ", ifelse(!is.null(altexp1), altexp1, "-"), 
            ", altexp2: ", ifelse(!is.null(altexp2), altexp2, "-"),
            ifelse(!is.null(col.var1), 
                paste0(", assay.type1: -, col.var1: ", 
                    paste(col.var1, collapse = " + ")), 
                paste0(", assay.type1: ", assay.type1, ", col.var1: -")),
            ifelse(!is.null(col.var2), 
                paste0(", assay.type2: -, col.var2: ", 
                    paste(col.var2, collapse = " + ")), 
                paste0(", assay.type2: ", assay.type2, ", col.var2: -")),
            "\nby: ", by, 
            ", function: ", function_name, 
            ", method: ", method,
            ", test.signif: ", test.signif,
            ", p.adj.method: ",
            ifelse(!is.null(p.adj.method), p.adj.method, "-"),
            ", paired: ", paired,
            ", show.warnings: ", show.warnings, "\n" 
            ) 
    }
    
    # If association.fun is provided by user, use appropriate function.
    # Otherwise, choose correct method for numeric and categorical data
    if( !is.null(association.fun) ){
        FUN_ <- .calculate_association_with_own_function
    } else if( method %in% c("kendall", "pearson","spearman") ) {
        FUN_ <- .calculate_association_for_numeric_values
    } else if( method == "categorical" ){
        FUN_ <- .calculate_association_for_categorical_values
    } 
    
    # Get all the sample/feature pairs
    if( paired ){
        .check_if_paired_samples(assay1, assay2)
        variable_pairs <- data.frame(
            Var1 = seq_len(ncol(assay1)), Var2 = seq_len(ncol(assay2)) )
    } else{
        # Get feature_pairs as indices
        variable_pairs <- expand.grid(
            seq_len(ncol(assay1)), seq_len(ncol(assay2)) )
    }
    
    # If function is stats::cor, then calculate associations directly with
    # matrices. Otherwise, loop through variable_pairs
    if( function_name == "stats::cor" ){
        correlations_and_p_values <- .calculate_stats_cor(
            assay1 = assay1, assay2 = assay2, method = method,
            show.warnings = show.warnings)
    } else{
        correlations_and_p_values <- .calculate_association_table(
            variable_pairs = variable_pairs, FUN_ = FUN_,
            test.signif = test.signif, assay1 = assay1, assay2 = assay2,
            method = method, show.warnings = show.warnings, 
            association.fun = association.fun, ...)
    }
    
    # Get the order based on original order of variable-pairs
    order <- match(
        paste0(variable_pairs$Var1, "_", variable_pairs$Var2),
        paste0(correlations_and_p_values$Var1, "_",
            correlations_and_p_values$Var2) )
    # Order the table
    correlations_and_p_values <- correlations_and_p_values[ order, ]
    # Remove rownames / make them to be in increasing order
    rownames(correlations_and_p_values) <- NULL
    
    # If there are p_values, adjust them
    if( !is.null(correlations_and_p_values$pval) ){
        correlations_and_p_values$p_adj <- p.adjust(
            correlations_and_p_values$pval, method = p.adj.method)
    }
    return(correlations_and_p_values)
}

######################### .sort_variable_pairs_row_wise ########################
# This function adds two columns to variable pair data.frame. This function
# sorts each variable pair in alphabetical order. First additional column
# includes first variable, and second second variable from variable pair.

# Input: variable pair data.frame
# Output: variable pair data.frame with two additional columns
#' @importFrom DelayedMatrixStats rowMins rowMaxs
.sort_variable_pairs_row_wise <- function(variable_pairs){
    # Convert into matrix
    variable_pairs <- as.matrix(variable_pairs)
    # Get rowmins
    rowmins <- rowMins( variable_pairs )
    # Get rowmaxs
    rowmaxs <- rowMaxs( variable_pairs )
    # Convert back into data.frame
    variable_pairs <- as.data.frame( variable_pairs )
    # Add rowMins
    variable_pairs$Var1_sorted <- rowmins
    # Add rowMaxs
    variable_pairs$Var2_sorted <- rowmaxs
    return(variable_pairs)
}
######################### .calculate_association_table #########################
# This function calculates association by looping through feature/sample-pairs.

# Input: feature/sample_pairs, and assays
# Output: correlation table with variable names in Var1 and Var2, and
# correlation values in cor. Additionally, table can also include pval for
# p-values.
.calculate_association_table <- function(
        variable_pairs,
        FUN_, 
        test.signif, 
        assay1, 
        assay2,
        method,
        show.warnings, 
        association.fun, 
        symmetric = FALSE,
        ...){
    # Check symmetric
    if( !.is_a_bool(symmetric) ){
        stop("'symmetric' must be a boolean value.", call. = FALSE)
    }
    # 
    # If user has specified that the measure is symmetric
    if( symmetric ){
        # Are assays identical? If so, calculate only unique variable-pairs
        assays_identical <- identical(assay1, assay2)
    } else{
        # Calculate all variable-pairs
        assays_identical <- FALSE
    }
    
    # If they are identical, we can make calculation faster
    # row1 vs col2 equals to row2 vs col1
    if( assays_identical ){
        variable_pairs_all <- variable_pairs
        # Sort features row-wise
        variable_pairs_all <- .sort_variable_pairs_row_wise(variable_pairs_all)
        # Get unique feature/sample pairs
        variable_pairs <- variable_pairs_all[
            !duplicated(variable_pairs_all[ , c("Var1_sorted", "Var2_sorted")]),
            ]
    }
    
    # Calculate correlations
    correlations_and_p_values <- apply(
        variable_pairs, 1, 
        FUN = FUN_, 
        test.signif = test.signif, 
        assay1 = assay1, 
        assay2 = assay2,
        method = method,
        show.warnings = show.warnings, 
        association.fun = association.fun, 
        ...)
    
    # Convert into data.frame if it is vector, 
    # otherwise transpose into the same orientation as feature-pairs and then
    # convert it to df
    if( is.vector(correlations_and_p_values) ){
        correlations_and_p_values <- data.frame(correlations_and_p_values)
    } else{
        correlations_and_p_values  <- t(correlations_and_p_values)
        correlations_and_p_values <- as.data.frame(correlations_and_p_values)
    }
    # Give names
    if( ncol(correlations_and_p_values) == 1 ){
        colnames(correlations_and_p_values) <- c("cor")
    }
    else if( ncol(correlations_and_p_values) == 2 ){
        colnames(correlations_and_p_values) <- c("cor", "pval")
    } else{
        stop("Unexpected error occurred during calculation.",call. = FALSE)
    }
    
    # If assays were identical, and duplicate variable pairs were dropped
    if( assays_identical ){
        # Change names so that they are not equal to colnames of variable_pairs
        colnames(variable_pairs)[c(1,2)] <- c("Var1_", "Var2_")
        # Combine feature-pair names with correlation values and p-values
        correlations_and_p_values <- cbind(
            variable_pairs, correlations_and_p_values)
        
        # Combine two tables so that values are assigned to larger table with
        # all the variable pairs
        correlations_and_p_values <- dplyr::left_join(
            variable_pairs_all, 
            correlations_and_p_values, 
            by = c("Var1_sorted", "Var2_sorted"))
        
        # Drop off additional columns
        correlations_and_p_values <- correlations_and_p_values[
            ,
            !colnames(correlations_and_p_values) %in% 
                c("Var1_sorted", "Var2_sorted", "Var1_", "Var2_")
            ]
        
    } else{
        # Otherwise just add variable names
        correlations_and_p_values <- cbind(
            variable_pairs, correlations_and_p_values)
    }
    
    return(correlations_and_p_values)
}

############################# .calculate_stats_cor #############################
# This function calculates correlations with stats::cor. Compared to other
# functions, it can take whole matrices as an input.

# Input: assays
# Output: correlation table
#' @importFrom stats cor 
#' @importFrom tidyr pivot_longer
.calculate_stats_cor <- function(assay1, assay2, method, show.warnings){
    # If user does not want warnings, 
    # suppress warnings that might occur when calculating correlations (NAs...)
    # or p-values (ties, and exact p-values cannot be calculated...)
    if( show.warnings ){
        correlations <- stats::cor(
            assay1, assay2, method = method, use = "pairwise.complete.obs")
    } else{
        correlations <- suppressWarnings(stats::cor(
            assay1, assay2, method = method, use = "pairwise.complete.obs") )
    }
    # 
    correlations <- as.data.frame(correlations)
    # Convert row names and column names into indices, so that they equal to 
    # other functions
    rownames(correlations) <- seq_len( nrow(correlations) )
    colnames(correlations) <- seq_len( ncol(correlations) )
    correlations$ID <- rownames(correlations)
    # melt matrix into long format, so that it match with output of other
    # functions
    correlations <- correlations %>% tidyr::pivot_longer(cols = !"ID")
    
    # Adjust names
    colnames(correlations) <- c("Var1", "Var2", "cor")
    # Convert into data,frame
    correlations <- as.data.frame( correlations )
    # Convert indices back to numeric
    correlations$Var1 <- as.numeric(correlations$Var1)
    correlations$Var2 <- as.numeric(correlations$Var2)
    
    return(correlations)
}

################### .calculate_association_for_numeric_values ##################
# This function calculates correlation between feature-pair. If significance
# test is specified, then it uses cor.test(), if not then cor().

# Input: Vector of names that belong to feature pair, and assays.
# Output: Correlation value or list that includes correlation value and p-value.
#' @importFrom stats cor.test
.calculate_association_for_numeric_values <- function(
        feature_pair, test.signif, assay1, assay2, method, show.warnings, ...){
    # Get features
    feature1 <- assay1[ , feature_pair[1]]
    feature2 <- assay2[ , feature_pair[2]]
    
    # Calculate correlation
    # If user does not want warnings, 
    # suppress warnings that might occur when calculating correlations (NAs...)
    # or p-values (ties, and exact p-values cannot be calculated...)
    if( show.warnings ){
        temp <- cor.test(
            feature1, feature2, method = method, use = "pairwise.complete.obs")
    } else {
        temp <- suppressWarnings( cor.test(
            feature1, feature2, method = method,
            use = "pairwise.complete.obs") )
    }
    
    # Take only correlation and p-value
    temp <- c(temp$estimate, temp$p.value)

    return(temp)
}

################# .calculate_association_for_categorical_values ################
# This function calculates correlation between feature-pair. Correlation value
# is calculated with Goodman and Kruskal's tau test. P-value is calculated
# with Pearson's chi-squared test.

# Input: Vector of names that belong to feature pair, and assays.
# Output: Correlation value or list that includes correlation value and p-value.
.calculate_association_for_categorical_values <- function(
        feature_pair,
        test.signif, 
        assay1,
        assay2,
        show.warnings,
        ...){
    # Get features
    feature1 <- assay1[ , feature_pair[1]]
    feature2 <- assay2[ , feature_pair[2]]
    # Keep only those samples that have values in both features
    keep <- rowSums2(is.na(cbind(feature1, feature2))) == 0
    feature1 <- feature1[keep]
    feature2 <- feature2[keep]
    # Calculate cross-correlation using Goodman and Kruskal tau
    temp <- .calculate_gktau(
        feature1, feature2, test.signif = test.signif, show.warnings)
    # Whether to test significance
    if( test.signif ){
        # Take correlation and p-value
        temp <- c(temp$estimate, temp$p.value)
    } else{
        # Take only correlation value
        temp <- temp$estimate
    }
    return(temp)
}

################### .calculate_association_with_own_function  ##################
# This function calculates (dis-)similarity between feature-pair with function 
# that is specified by user.

# Input: Vector of names that belong to feature pair, and assays.
# Output: Correlation value or list that includes correlation value and p-value.
.calculate_association_with_own_function <- function(
        feature_pair,
        assay1, assay2, 
        association.fun, 
        show.warnings,
        test.signif,
        ...){
    # Get features
    feature1 <- assay1[ , feature_pair[1]]
    feature2 <- assay2[ , feature_pair[2]]
    # Create a matrix 
    feature_mat <- rbind(feature1, feature2)
    
    # If user does not want warnings, 
    # suppress warnings that might occur when calculating correlations (NAs...)
    # or p-values (ties, and exact p-values cannot be calculated...)
    # Use try-catch to catch errors that might occur.
    if( show.warnings ){
        temp <- tryCatch({
            do.call(association.fun, args = c(list(feature_mat), list(...)))
        },
        error = function(cond) {
            stop("Error occurred during calculation. Check, e.g., that ",
                "'association.fun' fulfills requirements. 'association.fun' ",
                "threw a following error:\n",  cond,
                call. = FALSE)
        })
    } else {
        temp <- tryCatch({
            suppressWarnings( do.call(
                association.fun, args = c(list(feature_mat), list(...))) )
        },
        error = function(cond) {
            stop("Error occurred during calculation. Check, e.g., that ",
                "'association.fun' fulfills requirements. 'association.fun' ",
                "threw a following error:\n",  cond, call. = FALSE)
        })
    }
    
    # If temp's length is not 1, then function does not return single numeric
    # value for each pair
    if( length(temp) != 1 ){
        stop("Error occurred during calculation. Check that ", 
            "'association.fun' fulfills requirements.", call. = FALSE)
    } 
    return(temp)
}

############################## .association_filter #############################
# This filters off features that do not have any value under specified
# thresholds. 

# Input: Correlation table and thresholds
# Output: Filtered correlation table (or NULL if there are no observations
# after filtering)
.association_filter <- function(
        result,
        p.adj.threshold,
        cor.threshold, 
        assay1,
        assay2,
        filter.self.cor, 
        verbose){
    # Give message if verbose == TRUE
    if(verbose){
        message("Filtering results...\np_adj_threshold: ",
            ifelse(!is.null(p.adj.threshold),  p.adj.threshold, "-"), 
            ", cor.threshold: ", 
            ifelse(!is.null(cor.threshold), cor.threshold, "-"), 
            ", filter.self.cor: ", 
            ifelse(filter.self.cor, filter.self.cor, "-"), "\n" )
    }
    
    # Which features have significant correlations?
    if ( !is.null(result$p_adj) && !is.null(p.adj.threshold) ) {
        # Get those feature-pairs that have significant correlations
        result <- result[
            result$p_adj < p.adj.threshold & !is.na(result$p_adj), ]
    }
    # Which features have correlation over correlation threshold?
    if ( !is.null(cor.threshold) ) {
        # Get those feature-pairs that have correlations over threshold
        result <- result[abs(result$cor) > cor.threshold & !is.na(result$cor), ]
    }
    
    # If there are no significant correlations
    if ( nrow(result) == 0 ) {
        message("No significant correlations with the given criteria.\n")
        return(NULL)
    }
    
    # Filter self correlations if it's specified
    if ( filter.self.cor ) {
        # Take only those rows where features differ
        result <- result[result$Var1 != result$Var2, ]
    }
    return(result)
}

############################## .association_sort  ##############################
# This sorts correlation, p-value and adjusted p-values matrices based on 
# hierarchical clustering

# Input: List of matrices (cor, p-values and adjusted p-values / matrix (cor)
# Output: Lst of sorted matrices (cor, p-values and adjusted p-values / matrix
# (cor)
#' @importFrom stats hclust
.association_sort <- function(result, verbose){
    # Give message if verbose == TRUE
    if(verbose){
        message("Sorting results...\n")
    }
    
    # Is the type of result table or matrix?
    is_dataframe <- is.data.frame(result)
    # If the type is data.frame, convert result first to matrix
    if( is_dataframe ){
        result_mat <- .association_table_to_matrix(result, verbose = FALSE)
        # Fetch data matrices
        correlations <- result_mat$cor
        p_values <- result_mat$pval
        p_values_adjusted <- result_mat$p_adj
    } else{
        # Fetch data matrices
        correlations <- result$cor
        p_values <- result$pval
        p_values_adjusted <- result$p_adj
    }
    
    # If matrix contains rows or columns that have only NAs, error occur in
    # hclust
    if( (any(rowSums(is.na(correlations)) == ncol(correlations)) || 
            any(colSums(is.na(correlations)) == nrow(correlations))) ){
        message("Result cannot be sorted, because it ",
            "contains variable(s) whose correlation was not possible to ",
            "calculate since they all were NAs.\n")
        return(result)
    }
    # If there are only one variable, return as it is
    if( nrow(correlations) == 1 && ncol(result$cor) == 1 ){
        return(result)
    }
    
    # Order in visually appealing order
    tmp <- correlations
    rownames(tmp) <- NULL
    colnames(tmp) <- NULL
    
    # Do hierarchical clustering, use try-catch to catch errors that might occur
    # if data contains NAs
    row_index <- tryCatch({
        hclust(as.dist(1 - cor(
            t(tmp), use="pairwise.complete.obs")))$order
    },
    error = function(cond) {
        stop("Error occurred during sorting. Possible reason is that ",
            "correlation matrix includes NAs. Try with 'sort = FALSE'.", 
            call. = FALSE)
    }
    )
    col_index <- tryCatch({
        hclust(as.dist(1 - cor(
            tmp, use="pairwise.complete.obs")))$order
    },
    error = function(cond) {
        stop("Error occurred during sorting. Possible reason is that ",
            "correlation matrix includes NAs. Try with 'sort = FALSE'.", 
            call. = FALSE)
    }
    )
    # Get the order of features from hierarchical clustering
    rownames <- rownames(correlations)[row_index]
    colnames <- colnames(correlations)[col_index]
    
    # If the format of result was data.frame
    if( is_dataframe ){
        # Add order as factor levels
        result$Var1 <- factor(result$Var1, levels = row_index)
        result$Var2 <- factor(result$Var2, levels = col_index)
        
    } else{
        # Otherwise, the format was matrix
        # Order the correlation matrix  based on order of hierarchical
        # clustering
        correlations <- correlations[row_index, col_index]
        # Add column and rownames
        colnames(correlations) <- colnames
        rownames(correlations) <- rownames
        
        # Create a result list
        result_list <- list(cor = correlations)
        
        # If p_values is not NULL. order and add to the list
        if( !is.null(p_values) ){
            # Sort the matrix
            p_values <- p_values[row_index, col_index]
            # Add column and rownames
            colnames(p_values) <- colnames
            rownames(p_values) <- rownames
            # Add matrix to result list
            result_list[["pval"]] <- p_values
        }
        # If p_values_adjusted is not NULL. order and add to the list
        if( !is.null(p_values_adjusted) ){
            # Sort the matrix
            p_values_adjusted <- p_values_adjusted[row_index, col_index]
            # Add column and rownames
            colnames(p_values_adjusted) <- colnames
            rownames(p_values_adjusted) <- rownames
            # Add matrix to result list
            result_list[["p_adj"]] <- p_values_adjusted
        }
        result <- result_list
    }
    
    return(result)
}

######################### .association_table_to_matrix #########################
# This function converts correlation table into matrices

# Input: Correlation table
# Output: List of matrices (cor, p-values and adjusted p-values / matrix (cor)
#' @importFrom tidyr pivot_wider
#' @importFrom DelayedArray rowSums colSums
.association_table_to_matrix <- function(result, verbose){
    # Give message if verbose == TRUE
    if(verbose){
        message("Converting table into matrices...\n")
    }
    
    # Correlation matrix is done from Var1, Var2, and cor columns
    cor <- result %>%
        # Convert into long format
        pivot_wider(
            id_cols = "Var1", names_from = "Var2", values_from = "cor") %>%
        # Convert into data.frame
        as.data.frame()
    # Give rownames and remove additional column
    rownames(cor) <- cor$Var1
    cor$Var1 <- NULL
    # Convert into matrix
    cor <- as.matrix(cor)
    # Remove empty rows and columns
    non_empty_rows <- rowSums(is.na(cor)) < ncol(cor)
    non_empty_cols <- colSums(is.na(cor)) < nrow(cor) 
    cor <- cor[ non_empty_rows, non_empty_cols, drop = FALSE ]
    # Create a result list
    result_list <- list(cor = cor)
    # If p_values exist, then create a matrix and add to the result list
    if( !is.null(result$pval) ){
        pval <- result %>%
            # Convert into long format
            pivot_wider(
                id_cols = "Var1", names_from = "Var2", values_from = "pval") %>%
            # Convert into data.frame
            as.data.frame()
        # Adjust rownames and remove an additional column
        rownames(pval) <- pval$Var1
        pval$Var1 <- NULL
        # Convert into matrix
        pval <- as.matrix(pval)
        # Remove empty rows and columns
        pval <- pval[ non_empty_rows, non_empty_cols, drop = FALSE ]
        # Add it to result list
        result_list[["pval"]] <- pval
    } 
    # If adjusted p_values exist, then create a matrix and add to the result
    # list
    if( !is.null(result$p_adj) ){
        p_adj <- result %>%
            # Convert into long format
            pivot_wider(
                id_cols = "Var1", names_from = "Var2",
                values_from = "p_adj") %>%
            # Convert into data.frame
            as.data.frame()
        # Adjust rownames and remove an  additional column
        rownames(p_adj) <- p_adj$Var1
        p_adj$Var1 <- NULL
        # Convert into matrix
        p_adj <- as.matrix(p_adj)
        # Remove empty rows and columns
        p_adj <- p_adj[ non_empty_rows, non_empty_cols, drop = FALSE ]
        # Add it to result list
        result_list[["p_adj"]] <- p_adj
    } 
    return(result_list)
}

############################### .calculate_gktau ###############################
# This function calculates association with Goodman and Kruskal's tau test and
# p-value with  Pearson's chi-squared test

# Input: Two vectors, one represent feature1, other feature2, which share
# samples
# Output: list of tau and p-value or just tau
#' @importFrom DelayedMatrixStats rowSums2 colSums2
#' @importFrom stats chisq.test
.calculate_gktau <- function(x, y, test.signif = FALSE, show.warnings){
    # First, compute the IxJ contingency table between x and y
    Nij <- table(x, y, useNA="ifany")
    # Next, convert this table into a joint probability estimate
    PIij <- Nij/sum(Nij)
    # Compute the marginal probability estimates
    #PIiPlus <- apply(PIij, MARGIN=1, sum)
    PIiPlus <- rowSums2(PIij)
    #PIPlusj <- apply(PIij, MARGIN=2, sum)
    PIPlusj <- colSums2(PIij)
    # Compute the marginal variation of y
    Vy <- 1 - sum(PIPlusj^2)
    # Compute the expected conditional variation of y given x
    # Because of the previous step, there should not be any NAs
    #InnerSum <- apply(PIij^2, MARGIN=1, sum)
    InnerSum <- rowSums2(PIij^2)
    VyBarx <- 1 - sum(InnerSum/PIiPlus)
    # Compute and return Goodman and Kruskal's tau measure
    tau <- (Vy - VyBarx)/Vy
    
    # If test significance is specified, then calculate significance with
    # chi-squared test. Are these two features independent or not?
    if ( !test.signif ){
        return(list(estimate = tau))
    } 
    # Do the Pearson's chi-squared test.
    # If user does not want warnings,
    # suppress warnings that might occur when there are ties, and exact p-value
    # cant be calculated
    if( show.warnings ){
        temp <- chisq.test(x, y)
    } else {
        temp <- suppressWarnings( chisq.test(x, y) )
    }
    
    # Take the p-value
    p_value <- temp$p.value
    # Result is combination of tau and p-value
    return(list(estimate = tau, p.value = p_value))
}
FelixErnst/mia documentation built on Nov. 3, 2024, 2:51 a.m.