#' Clustering by PCA
#'
#' Perform PCA using prcomp function
#' @param DATA input matrices or seurat object. Row is genes, Column is samples
#' @param genes vector of gene list
#' @param pca_max how many principal component will be output (default=30)
#' @return pca object
#' @export
#' @examples
#' Clustering_PCA(DATA, genes=Gene_list)
Clustering_PCA <- function(DATA, genes=NULL, cells=NULL, pca_max = 30){
Data_type = as.character(class(DATA))
if(Data_type == "seurat"){
mat <- DATA@scale.data
}else if(Data_type != "matrix"){
mat <- as.matrix(DATA)
}else{
mat <- DATA
}
if(!is.null(genes)){
mat <- mat[genes,]
}
if(!is.null(cells)){
mat <- mat[,cells]
}
mat <- t(mat)
pca <- prcomp(mat, scale=FALSE, tol = 0, rank. = pca_max)
pca
}
RemoveOutlier <- function(x){
abs(x - median(x))/mad(x, constant=1)
}
#' remove outlier from PCA result
#'
#' Define outlier from PCA
#' @param pca pca object from Clustering_PCA
#' @param file if set file name, output which cells were removed.
#' @return vector of cell names
#' @export
#' @examples
#' Filter_PCA(pca, file="output.png")
Filter_PCA <- function(pca, t_outlier=20, file=NULL){
suppressPackageStartupMessages(library(dplyr))
D_table <- data.frame(Cell=rownames(pca$x), PC1=pca$x[,1], PC2=pca$x[,2], stringsAsFactors = FALSE)
mad_PC1 <- RemoveOutlier(D_table$PC1) < 20
mad_PC2 <- RemoveOutlier(D_table$PC2) < 20
cell_name <- rownames(pca$x)
ok_cell <- cell_name[mad_PC1 & mad_PC2]
### おかしなcellの名前をプロット
if(!is.null(file)){
suppressWarnings(suppressMessages(library(ggplot2)))
suppressWarnings(suppressMessages(library(cowplot)))
suppressWarnings(suppressMessages(library(ggrepel)))
D_pca <- D_table %>% select(PC1, PC2) %>% tidyr::gather(key = "PC", value = "score") %>% mutate(mad=c(mad_PC1, mad_PC2)) %>%
mutate(name=if_else(mad, "", rep(cell_name,2)))
p <- ggplot(D_pca, aes(x=PC, y=score, colour=PC, fill=PC, label =name)) + geom_violin() +
geom_jitter(colour=if_else(D_pca %>% pull(mad), "gray20", "purple"))+
geom_text_repel()+
theme(legend.position="none") + labs(x="", y="PCA score")
save_plot(file, p, base_height = 5, base_width = 4)
}
ok_cell
}
#' plot PCA result
#'
#' Custom PCA output using output from PCA
#' @param pca output from Clustering_PCA function
#' @param file graph image
#' @param legend_file output of legend graph
#' @param title title of graph
#' @param Xcom Specify principal component at X-axis. (Default = 1)
#' @param Ycom Specify principal component at Y-axis. (Default = 2)
#' @param cell_table data frame having cell informations. 1 of column should be "Cell". Other column could be use for specify coloring, shape, size information.
#' @param color_by specify color target column
#' @param shape_by specify shape target column
#' @param size_by specify size target column
#' @param width default = 5
#' @param height default = 5
#' @param pallete color vectors
#' @param option other options for ggplot drawing
#' @return graph file
#' @export
#' @examples
#' plot_PCA(pca, file="output.png")
plot_PCA <- function(pca, file=NULL, legend_file=NULL, title=NULL, Xcom=1, Ycom=2, cell_table=NULL, color_by=NULL, shape_by=NULL,
size_by=NULL, width=5, height=5, alpha=0.5, pallete = NULL, option=NULL, le_width = NULL, le_height = 4){
### cell_tableはdata.frame
# Cellというカラムが定義されていること!
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(cowplot))
D_table <- data.frame(Cell=rownames(pca$x), x=pca$x[,Xcom], y=pca$x[,Ycom], stringsAsFactors = FALSE)
if(!is.null(cell_table)){
D_table <- dplyr::left_join(D_table, cell_table, by="Cell", copy=FALSE)
}
# 寄与率
contribution <- pca$sdev^2/sum(pca$sdev^2)*100
if(is.null(color_by)){
p <- ggplot(D_table, aes_string(x="x", y="y", colour=color_by, size=size_by, shape=shape_by, stroke=0)) + geom_point(alpha=alpha) +
labs(x=paste("PC", Xcom, " (", format(contribution[Xcom], digits = 3), "%)", sep=""),
y=paste("PC", Ycom, " (", format(contribution[Ycom], digits = 3), "%)", sep=""), title=title)
}else{
if(length(unique(D_table[,color_by])) < 23){
if(is.null(pallete)){
pallete <-c("#00007F", "blue", "#007FFF", "cyan","#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")
}
colors <- colorRampPalette(pallete)(length(unique(D_table[,color_by])))
p <- ggplot(D_table, aes_string(x="x", y="y", colour=factor(D_table[,color_by]), size=size_by, shape=shape_by, stroke=0)) + geom_point(alpha=alpha) +
scale_color_manual(values=colors, name=color_by) +
labs(x=paste("PC", Xcom, " (", format(contribution[Xcom], digits = 3), "%)", sep=""),
y=paste("PC", Ycom, " (", format(contribution[Ycom], digits = 3), "%)", sep=""), title=title)
}else{
mid <- median(D_table[,color_by], na.rm = TRUE)
p <- ggplot(D_table, aes_string(x="x", y="y", colour=color_by, size=size_by, shape=shape_by, stroke=0)) + geom_point(alpha=alpha) +
scale_color_gradient2(midpoint=mid, low="blue", mid="grey90", high="red", space ="Lab" )+
labs(x=paste("PC", Xcom, " (", format(contribution[Xcom], digits = 3), "%)", sep=""),
y=paste("PC", Ycom, " (", format(contribution[Ycom], digits = 3), "%)", sep=""), title=title)
}
}
if(!is.null(option)){
p <- p + option
}
if(!is.null(legend_file)){
save_plot(legend_file, get_legend(p), base_height = le_height, base_width = le_width)
}
if(!is.null(file)){
save_plot(file, p + theme(legend.position="none"), base_height = height, base_width = width)
}
p
}
#' perform tSNE clustering
#'
#' Perofrming tSNE clustering using PCA result as input
#' @param DATA pca result or data matrix
#' @param pca_use which principal component will be use for tSNE
#' @param theta 0.0 is original tSNE. 0.5 is quick version
#' @param perplexity perplexity. this value cannot exceed (nrow(mat) -1)/3
#' @param max_iter maximum number of iteration
#' @param seed seed
#' @return tsne result object
#' @export
#' @examples
#' tsne = Clustering_tSNE(pca, pca_use=1:10, perplexity=20)
Clustering_tSNE <- function(DATA, pca_use = 1:10, seed=1, perplexity = 20, theta = 0.0, max_iter=1000){
Data_type = as.character(class(DATA))
if(Data_type == "prcomp"){
mat <- DATA$x[,pca_use]
rownames(mat) <- rownames(DATA$x)
}else{
mat <- DATA
}
### check perplexity maximum
if(perplexity >= (nrow(mat) -1)/3){
cat(paste0(perplexity, " is too large\n"))
q()
}
library(Rtsne)
set.seed(seed) # 再現性の確保
tsne <- Rtsne(mat, dims = 2, verbose = FALSE, perplexity = perplexity, pca=FALSE,
theta = theta, max_iter=max_iter, check_duplicates=FALSE, num_threads=2)
tsne[["Cell"]] <- rownames(mat)
tsne
}
#' plot tSNE result
#'
#' plotting tSNE results
#' @param tsne result from Clustering_tSNE
#' @param file graph image
#' @param legend_file output of legend graph
#' @param title title of graph
#' @param cell_table data frame having cell informations. 1 of column should be "Cell". Other column could be use for specify coloring, shape, size information.
#' @param color_by specify color target column
#' @param shape_by specify shape target column
#' @param size_by specify size target column
#' @param width default = 5
#' @param height default = 5
#' @param pallete color vectors
#' @param option other options for ggplot drawing
#' @return graph of tSNE result
#' @export
#' @examples
#' plot_tSNE(tsne, file="output.png")
plot_tSNE <- function(tsne, file=NULL, legend_file=NULL, title=NULL, cell_table=NULL, color_by=NULL, shape_by=NULL, size_by=NULL,
width=5, height=5, alpha=0.5, pallete = NULL, option=NULL, le_width = NULL, le_height = 4){
### cell_tableはdata.frame
# Cellというカラムが定義されていること!
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(cowplot))
D_table <- data.frame(Cell=tsne$Cell, x=tsne$Y[,1], y=tsne$Y[,2], stringsAsFactors = FALSE)
if(!is.null(cell_table)){
D_table <- dplyr::left_join(D_table, cell_table, by="Cell", copy=FALSE)
}
if(is.null(color_by)){
p <- ggplot(D_table, aes_string(x="x", y="y", colour=color_by, size=size_by, shape=shape_by, stroke=0)) + geom_point(alpha=alpha) +
labs(x="tSNE_1", y="tSNE_2", title=title)
}else{
if(length(unique(D_table[,color_by])) < 23){
if(is.null(pallete)){
pallete <-c("#00007F", "blue", "#007FFF", "cyan","#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")
}
colors <- colorRampPalette(pallete)(length(unique(D_table[,color_by])))
p <- ggplot(D_table, aes_string(x="x", y="y", colour=factor(D_table[,color_by]), size=size_by, shape=shape_by, stroke=0)) + geom_point(alpha=alpha) +
scale_color_manual(values=colors, name=color_by) +
labs(x="tSNE_1", y="tSNE_2", title=title)
}else{
mid <- median(D_table[,color_by], na.rm = TRUE)
p <- ggplot(D_table, aes_string(x="x", y="y", colour=color_by, size=size_by, shape=shape_by, stroke=0)) + geom_point(alpha=alpha) +
scale_color_gradient2(midpoint=mid, low="blue", mid="grey60", high="red", space ="Lab" )+
labs(x="tSNE_1", y="tSNE_2", title=title)
}
}
if(!is.null(option)){
p <- p + option
}
if(!is.null(legend_file)){
save_plot(legend_file, get_legend(p), base_height = le_height, base_width = le_width)
}
if(!is.null(file)){
save_plot(file, p + theme(legend.position="none"), base_height = height, base_width = width)
}
p
}
#' Perform clustering using SIMILR
#'
#' perform clustering using SIMILR method
#' @param DATA output from clustering_PCA
#' @return similr output object
#' @export
#' @examples
#' sim = Clustering_SIMILR(pca, pca_use = 1:10)
Clustering_SIMILR <- function(DATA, pca_use = 1:10, seed=1){
suppressWarnings(suppressMessages(library(SIMLR)))
suppressWarnings(suppressMessages(library(igraph)))
result <- list()
Data_type = as.character(class(DATA))
if(Data_type == "prcomp"){
mat <- DATA$x[,pca_use]
rownames(mat) <- rownames(DATA$x)
}else{
mat <- DATA
}
mat <- t(mat)
### 上位3位のclusterを出す
SIM_estimate <- SIMLR_Estimate_Number_of_Clusters(mat, 2:10, cores.ratio = 0)
SIM_estimate <- data.frame(cluster_num=2:10, value=SIM_estimate$K1)
SIM_estimate <- SIM_estimate %>% filter(min_rank(value) < 4) %>% arrange(desc(value)) %>% pull(cluster_num)
result[["SIM_estimate"]] <- SIM_estimate
result[["Cell"]] <- colnames(mat)
i=1
set.seed(seed)
for(cluster in SIM_estimate){
sim <- SIMLR(X = mat, c = cluster)
colnames(sim$ydata) <- paste0("SIMLR", i, "_", c("X", "Y"))
if(i==1){
result[["X"]] <- sim$ydata[,1]
result[["Y"]] <- sim$ydata[,2]
}else{
result[["X"]] <- cbind(result[["X"]], sim$ydata[,1])
result[["Y"]] <- cbind(result[["Y"]], sim$ydata[,2])
}
i <- i+1
}
result
}
#' make graph of SIMILR result
#'
#' make graph for SIMILR results
#' @param sim Clustering_SIMILR result object
#' @param target which rank of SIMILR reulst will be output (default : best result)
#' @param file graph image
#' @param legend_file output of legend graph
#' @param title title of graph
#' @param Xcom Specify principal component at X-axis. (Default = 1)
#' @param Ycom Specify principal component at Y-axis. (Default = 2)
#' @param cell_table data frame having cell informations. 1 of column should be "Cell". Other column could be use for specify coloring, shape, size information.
#' @param color_by specify color target column
#' @param shape_by specify shape target column
#' @param size_by specify size target column
#' @param width default = 5
#' @param height default = 5
#' @param pallete color vectors
#' @param option other options for ggplot drawing
#' @return graphs
#' @export
#' @examples
#' plot_SIMILR(sim, file="output.png")
plot_SIMILR <- function(sim, file=NULL, target=1, legend_file=NULL, title=NULL, cell_table=NULL, color_by=NULL, shape_by=NULL, size_by=NULL,
width=5, height=5, alpha=0.5, pallete = NULL, option=NULL, le_width = NULL, le_height = 4){
### cell_tableはdata.frame
# Cellというカラムが定義されていること!
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(cowplot))
D_table <- data.frame(Cell=sim$Cell, x=sim$X[,target], y=sim$Y[,target], stringsAsFactors = FALSE)
if(!is.null(cell_table)){
D_table <- dplyr::left_join(D_table, cell_table, by="Cell", copy=FALSE)
}
if(is.null(color_by)){
p <- ggplot(D_table, aes_string(x="x", y="y", colour=color_by, size=size_by, shape=shape_by, stroke=0)) + geom_point(alpha=alpha) +
labs(x="SIMILR_1", y="SIMILR_2", title=title)
}else{
if(length(unique(D_table[,color_by])) < 23){
if(is.null(pallete)){
pallete <-c("#00007F", "blue", "#007FFF", "cyan","#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")
}
colors <- colorRampPalette(pallete)(length(unique(D_table[,color_by])))
p <- ggplot(D_table, aes_string(x="x", y="y", colour=factor(D_table[,color_by]), size=size_by, shape=shape_by, stroke=0)) + geom_point(alpha=alpha) +
scale_color_manual(values=colors, name=color_by) +
labs(x="SIMILR_1", y="SIMILR_2", title=title)
}else{
mid <- median(D_table[,color_by], na.rm = TRUE)
p <- ggplot(D_table, aes_string(x="x", y="y", colour=color_by, size=size_by, shape=shape_by, stroke=0)) + geom_point(alpha=alpha) +
scale_color_gradient2(midpoint=mid, low="blue", mid="grey90", high="red", space ="Lab" )+
labs(x="SIMILR_1", y="SIMILR_2", title=title)
}
}
if(!is.null(option)){
p <- p + option
}
if(!is.null(legend_file)){
save_plot(legend_file, get_legend(p), base_height = le_height, base_width = le_width)
}
if(!is.null(file)){
save_plot(file, p + theme(legend.position="none"), base_height = height, base_width = width)
}
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.