R/results-functions.R
In GSgalgoR: An Evolutionary Framework for the Identification and Study of Prognostic Gene Expression Signatures in Cancer

Documented in classify_multiple create_centroids non_dominated_summary

# GalgoR results functions
#' Summary of the non dominated solutions
#'
#' The function uses a \code{'galgo.Obj'} as input an the training dataset to
#' evaluate the non-dominated solutions found by GalgoR
#'
#' @param output An object of class \code{galgo.Obj}
#' @param prob_matrix a \code{matrix} or \code{data.frame}. Must be an
#' expression matrix with features in rows and samples in columns
#' @param OS a \code{survival} object (see \code{\link[survival]{Surv}}
#' function from the \code{\link{survival}} package)
#' @param distancetype a \code{character} that can be either \code{'pearson'},
#'  \code{'uncentered'}, \code{'spearman'} or \code{'euclidean'}
#'
#' @return Returns a \code{data.frame} with 5 columns and a number of rows
#' equals to the non-dominated solutions found by GalgoR.
#' The first column has the name of the non-dominated solutions, the second
#' the number of partitions found for each solution \code{(k)}, the third,
#' the number of genes, the fourth the mean silhouette coefficient of the
#' solution and the last columns has the estimated C.Index for each one.
#' @export
#' @usage non_dominated_summary (output, prob_matrix, OS,
#' distancetype = "pearson")
#' @examples
#' # load example dataset
#' library(breastCancerTRANSBIG)
#' data(transbig)
#' Train <- transbig
#' rm(transbig)
#'
#' expression <- Biobase::exprs(Train)
#' clinical <- Biobase::pData(Train)
#' OS <- survival::Surv(time = clinical$t.rfs, event = clinical$e.rfs)
#'
#' # We will use a reduced dataset for the example
#' expression <- expression[sample(1:nrow(expression), 100), ]
#'
#' # Now we scale the expression matrix
#' expression <- t(scale(t(expression)))
#'
#' # Run galgo
#' output <- GSgalgoR::galgo(generations = 5, population = 15,
#' prob_matrix = expression, OS = OS)
#' non_dominated_summary(
#'     output = output,
#'     OS = OS,
#'     prob_matrix = expression,
#'     distancetype = "pearson"
#' )
non_dominated_summary <-
    function(output, prob_matrix, OS, distancetype = "pearson") {
        if (!methods::is(output, "galgo.Obj")) {
            stop("object must be of class 'galgo.Obj'")
        }
        output_df <- to_dataframe(output)
        NonDom_solutions <- output_df[output_df$Rank == 1, ]
        calculate_distance <- select_distance(distancetype = distancetype
                                    )
        RESULT <- data.frame(
            solution = as.character(),
            k = as.numeric(),
            ngenes = as.numeric(),
            mean.Silhouette = as.numeric(),
            C.Index = as.numeric(),
            stringsAsFactors = FALSE
        )
        for (i in seq_len(nrow(NonDom_solutions))) {
            name <- rownames(NonDom_solutions)[i]
            genes <- NonDom_solutions[i, "Genes"][[1]]
            k <- NonDom_solutions[i, "k"]
            Sub_matrix <- prob_matrix[genes, ]
            D <- calculate_distance(Sub_matrix)
            true_class <- cluster_algorithm(D, k)
            Centroids <- k_centroids(Sub_matrix, true_class$cluster)
            predicted_class <- cluster_classify(Sub_matrix, Centroids,
                                        method = distancetype)
            predicted_class <- as.factor(predicted_class)
            predicted_classdf <- as.data.frame(predicted_class)
            surv_formula <- stats::as.formula("OS~ predicted_class")
            tumortotal <- survival::survfit(surv_formula)
            totalsdf <- survival::survdiff(surv_formula)
            tumortotalpval <- 1 - stats::pchisq(totalsdf$chisq,
                                        length(totalsdf$n) - 1)
            tumortotalpval <- format(tumortotalpval, digits = 4)
            coxsimple <- survival::coxph(surv_formula, data = predicted_classdf)
            CI <- survcomp::concordance.index(
                stats::predict(coxsimple, new = predicted_classdf),
                surv.time = OS[, 1],
                surv.event = OS[, 2],
                outx = FALSE
            )$c.index
            mean_Sil <- mean(cluster::silhouette(
                as.numeric(predicted_class), D)[, 3])
            row <- c(name, k, length(genes), mean_Sil, CI)
            RESULT[nrow(RESULT) + 1, ] <- row
        }
        return(RESULT)
    }

#' Create Centroids
#'
#' This functions create the signature centroids estimated from the
#' GalgoR output and the expression matrix of the training sets.
#'
#' @param output @param output An object of class \code{galgo.Obj}
#' @param solution_names A \code{character} vector with the names of the
#' solutions for which the centroids are to be calculated
#' @param trainset a \code{matrix} or \code{data.frame}. Must be an expression
#' matrix with features in rows and samples in columns
#' @param distancetype a \code{character} that can be either \code{'pearson'},
#' \code{'uncentered'}, \code{'spearman'} or \code{'euclidean'}
#'
#' @return Returns a list with the centroid matrix for each of the solutions
#' in \code{solution_names}, where each column represents the prototypic
#' centroid of a subtype and each row the constituents features of the
#' solution signature
#' @export
#' @usage create_centroids (output, solution_names, trainset,
#' distancetype = "pearson")
#'
#' @examples
#' # load example dataset
#' library(breastCancerTRANSBIG)
#' data(transbig)
#' Train <- transbig
#' rm(transbig)
#'
#' expression <- Biobase::exprs(Train)
#' clinical <- Biobase::pData(Train)
#' OS <- survival::Surv(time = clinical$t.rfs, event = clinical$e.rfs)
#'
#' # We will use a reduced dataset for the example
#' expression <- expression[sample(1:nrow(expression), 100), ]
#'
#' # Now we scale the expression matrix
#' expression <- t(scale(t(expression)))
#'
#' # Run galgo
#' output <- GSgalgoR::galgo(generations = 5, population = 15,
#' prob_matrix = expression, OS = OS)
#' outputDF <- to_dataframe(output)
#' outputList <- to_list(output)
#'
#' RESULTS <- non_dominated_summary(
#'     output = output, OS = OS,
#'     prob_matrix = expression,
#'     distancetype = "pearson"
#' )
#' CentroidsList <- create_centroids(output, RESULTS$solution,
#' trainset = expression)
create_centroids <-
    function(output,
            solution_names,
            trainset,
            distancetype = "pearson") {
        calculate_distance <-
            select_distance(distancetype = distancetype)

        CentroidsList <- vector("list",length(solution_names)) 
        names(CentroidsList) <- solution_names
        output_df <- to_dataframe(output)
        for (j in solution_names) {
            genes <- output_df[j, "Genes"][[1]]
            k <- output_df[j, "k"]
            name <- j
            Sub_matrix <- trainset[genes, ]

            D <- calculate_distance(Sub_matrix)

            true_class <- cluster_algorithm(D, k)

            Centroids <- k_centroids(Sub_matrix, true_class$cluster)
            CentroidsList[[name]] <- Centroids
        }
        return(CentroidsList)
    }


#' Classify samples from multiple centroids
#'
#' @param prob_matrix a \code{matrix} or \code{data.frame}. Must be an
#' expression matrix with features in rows and samples in columns
#' @param centroid_list a\code{list} with the centroid matrix for each of
#' the signatures to evaluate, where each column represents the prototypic
#' centroid of a subtype and each row the constituents features of the
#' solution signature. The output of
#' \code{\link[GSgalgoR:create_centroids]{create_centroids}} can be used.
#' @param distancetype  a \code{character} that can be either
#' \code{'pearson'} (default), \code{'spearman'} or \code{'kendall'}.
#'
#' @return Returns a \code{data.frame} with the classes assigned to
#' each sample in each signature, were samples are a rows and signatures
#' in columns
#' @export
#'
#' @examples
#' # load example dataset
#' library(breastCancerTRANSBIG)
#' data(transbig)
#' Train <- transbig
#' rm(transbig)
#'
#' expression <- Biobase::exprs(Train)
#' clinical <- Biobase::pData(Train)
#' OS <- survival::Surv(time = clinical$t.rfs, event = clinical$e.rfs)
#'
#' # We will use a reduced dataset for the example
#' expression <- expression[sample(1:nrow(expression), 100), ]
#'
#' # Now we scale the expression matrix
#' expression <- t(scale(t(expression)))
#'
#' # Run galgo
#' output <- GSgalgoR::galgo(generations = 5, population = 15,
#' prob_matrix = expression, OS = OS)
#' outputDF <- to_dataframe(output)
#' outputList <- to_list(output)
#'
#' RESULTS <- non_dominated_summary(
#'     output = output, OS = OS,
#'     prob_matrix = expression,
#'     distancetype = "pearson"
#' )
#' CentroidsList <- create_centroids(output, RESULTS$solution,
#' trainset = expression)
#' classes <- classify_multiple(prob_matrix = expression,
#' centroid_list = CentroidsList)
classify_multiple <-
    function(prob_matrix,
            centroid_list,
            distancetype = "pearson") {
        classes <-
            matrix(rep(NA, ncol(prob_matrix) * length(centroid_list)),
                ncol = length(centroid_list))
        as.data.frame <- classes
        colnames(classes) <- names(centroid_list)
        rownames(classes) <- colnames(prob_matrix)

        for (i in seq_len(length(centroid_list))) {
            centroids <- centroid_list[[i]]
            name <- names(centroid_list)[i]
            genes <- rownames(centroids)
            k <- ncol(centroids)
            Sub_matrix <- prob_matrix[genes, ]

            predicted_class <-
                cluster_classify(Sub_matrix, centroids, method = distancetype)

            predicted_class <- as.factor(predicted_class)
            classes[, name] <- predicted_class
        }

        return(classes)
    }

Any scripts or data that you put into this service are public.

GSgalgoR documentation built on Nov. 8, 2020, 6:57 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

GSgalgoR
An Evolutionary Framework for the Identification and Study of Prognostic Gene Expression Signatures in Cancer

R/results-functions.R
In GSgalgoR: An Evolutionary Framework for the Identification and Study of Prognostic Gene Expression Signatures in Cancer

Defines functions classify_multiple create_centroids non_dominated_summary

Documented in classify_multiple create_centroids non_dominated_summary

Try the GSgalgoR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GSgalgoR An Evolutionary Framework for the Identification and Study of Prognostic Gene Expression Signatures in Cancer

R/results-functions.R In GSgalgoR: An Evolutionary Framework for the Identification and Study of Prognostic Gene Expression Signatures in Cancer

Defines functions classify_multiple create_centroids non_dominated_summary

Documented in classify_multiple create_centroids non_dominated_summary

Try the GSgalgoR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GSgalgoR
An Evolutionary Framework for the Identification and Study of Prognostic Gene Expression Signatures in Cancer

R/results-functions.R
In GSgalgoR: An Evolutionary Framework for the Identification and Study of Prognostic Gene Expression Signatures in Cancer