WGCCNA/R/functions.R

#' Iterative Louvain clustering across multiple resolutions
#'
#' Performs the FindClusters() Seurat function over
#' multiple resolutions
#' @param object a Seurat object.
#' @param resolutions numeric array of clustering resolutions
#' @param n.start number of random starts for Louvain clustering
#' @return List of Seurat objects labelled by clustering resolution
#' @examples
#' find_clusters(seurat_object, resolutions = c(0.1,1,10), n.start = 10)
find_clusters <- function(object, resolutions = c(0.1,1,10), n.start = 5){
    all_clusters = list()
    for(i in 1:length(resolutions)){
        clusters <- FindClusters(object, resolution = resolutions[i], n.start = n.start)
        all_clusters[[paste(resolutions[i])]] <- clusters
    }
    return(all_clusters)
}

#' Average expression of list of Seurat objects
#'
#' Performs the AverageExpression() Seurat function over
#' a list of multiple Seurat objects
#' @param objects a list of Seurat objects
#' @param features an array of strings. Array of genes to be filtered
#'     after averaging expression.
#' @return List of numeric matrices for each Seurat object provided
#'     of average gene expression for each cluster
#' @examples
#' average_expression(list_of_seurat_objects, features = c("Vldlr","Ldlr"))
average_expression <- function(objects, features = c()){
    all_avg_clusters <- list()
    for(i in 1:length(objects)){
        cat("Averaging gene expression in clusters from resolution",i,"\n")
        avg_cluster <- round(as.data.frame(AverageExpression(objects[[i]])),4)
        if(length(features)>0){
            avg_cluster <- avg_cluster[features,]
        }
        all_avg_clusters[[paste(names(objects)[i])]] <- t(avg_cluster)
    }
    return(all_avg_clusters)
}


#' Calculate Pearson correlations for a list of expression matrices
#'
#' Computes simple Pearson correlation R-squared values for
#' every gene pair in each average_expression matrix in a
#' list generated by average_expression
#' @param list A list of numeric expression matrices with
#'     genes as columns and clusters as rows
#' @return List of numeric correlation matrices
#' @examples
#' calculate_correlations(average_expression_list)
calculate_correlations <- function(list){
    cor <- list()
    neighborhoods <- c();
    for(i in 1:length(list)){
        cat("Calculating correlations for all genes in resolution",i,"matrix\n")
        cor[[paste(names(list)[i])]] <- round(cor(list[[i]], use = "pairwise.complete.obs"), 3)
        neighborhoods <- rbind(neighborhoods,c(names(list[i]),length(rownames(list[[i]]))))
    }
    cor[["neighborhoods"]] <- neighborhoods
    return(cor)
}



#' Map coexpression correlation trajectory of gene pairs
#' over multiple cluster resolutions
#'
#' Creates a matrix where each column corresponds to a
#' clustering resolution and each row corresponds to a
#' gene pair, where all gene pairs from calculate_correlations()
#' are assigned a unique row.
#'
#' @param corrs A list of correlation matrices
#' @param gene An array of genes to include in analysis.
#' @return List of numeric correlation matrices
#' @examples
#' map_trajectories(list_of_correlation_matrices_across_resolutions, gene = c("Lrp1","Lrp4","Lrp5","Lrp6"))
map_trajectories <- function(corrs, gene = c()){
        if(length(gene) == 0){
            gene <- colnames(corrs[[1]])
        }
        n <- as.numeric(corrs[["neighborhoods"]][,2])
        corrs[["neighborhoods"]] <- NULL
        trajectory <- c()
        for(i in 1:length(gene)){
            cat("Processing gene",i,"of",length(gene),"(",gene[i],")\n")
            for(j in i:length(gene)){
                if(gene[i]!=gene[j]){
                    t <- c()
                    for(k in corrs){
                        t <- c(t,k[gene[i],gene[j]])
                    }
                    avg <- round(mean(t),4)
                    sd <- round(sd(t),4)

                    # lm successfully runs, but need to access proper statistics (m, b, m_err, b_err, applicable uncertainties, Rsq)
                    # mat <- as.data.frame(cbind(t,n))
                    # lm <- summary(lm(t~n,mat))
                    # lin <- cor.test(t,n)
                    # linear <- lm(t~n,)
                    # lin.t <- as.numeric(lin$statistic)
                    # lin.df <- as.numeric(lin$parameter)
                    # lin.p <- as.numeric(lin$p.value)
                    # lin.rsq <- as.numeric(lin$estimate)
                    # b <- lin
                    # rsq <- lin

                    #after calculating variables above, append them here
                    t <- c(gene[i],gene[j],t,avg,sd)
                    trajectory <- rbind(trajectory,t)
                }
            }
        }
        gene1 <- trajectory[,1:2]
        trajectory <- data.frame(apply(trajectory, 2, function(x) as.numeric(as.character(x))))
        trajectory[,1:2] <- gene1
        colnames(trajectory) <- c("gene1","gene2",n,"avg","sd")
        return(trajectory)
}


#' Creates a single matrix of values from a specified
#' column in the trajectory matrix
#'
#' Creates a pairwise square matrix for all gene pairs
#' listed in the rows of the input trajectory matrix,
#' with values taken from the specified column in that
#' matrix.
#'
#' @param t Matrix generated by map_trajectories()
#' @param column Column name in t to extract values
#' @return Square numerical matrix for all gene pairs
#' @examples
#' trajectories_to_matrix(trajectories, column = "0.1")
#' trajectories_to_matrix(trajectories, column = "avg")
trajectories_to_matrix = function(t, column = "avg"){
    arr <- t[,c("gene1","gene2",column)]
    genes <- unique(c(t[,1],t[,2]))
    c <- c();
    for(i in 1:length(genes)){
        c <- c(c,1)
    }
    mat <- c()
    for(i in 1:length(genes)){
        mat <- rbind(mat,c)
    }
    rownames(mat) <- genes
    colnames(mat) <- genes
    for(i in 1:dim(t)[1]){
        gene1 <- t[i,1]
        gene2 <- t[i,2]
        val <- t[i,column]
        mat[gene1,gene2] <- val
        mat[gene2,gene1] <- val
    }
    return(mat)
}
ZacharyDeBruine/WGCCNA documentation built on Dec. 16, 2019, 12:33 a.m.