# Identification of G0-like cells
######################################################################################
require(car)
require(dplyr)
#' Kmeans Clustering - returns classification result
#'
#' This function performs imple kmeans clustering and prepares as input for identification of G0 cells
#'@param mean_scores The mean score results returned from the reCAT software, "get_score"
#'@param k The number of clusters, defaults to 5. ie G0, G1, S, G2, M cycle stages
#'@param nstart Defaults to 100. Number of times to start the kmeans clustering, ensure stability of clustering results.
#'@export
#'@keywords kmeans_clustering
#'@section G0 Identification:
#'@examples
#'cycle_mean_scores <- get_score(t(test_exp))$mean_score
#'kmean_classification <- kmeans_clustering(cycle_mean_scores)
#'statistical_test(cycle_mean_scores, kmean_classification, threshold = 0.001)
#'# Sample output
#'#"Possible group of G0-like cells is: 2"
kmeans_clustering <- function(mean_scores, k = 5, nstart = 100){
km <- kmeans(mean_scores, centers = k, nstart = nstart)
km_result <- km$cluster
return(km_result)
}
#' Identification of G0-like cells
#'
#' This function performs the simple statistical tests to determine the group of G0 like cells within single cell datasets.
#'@param mean_scores The mean score results returned from the reCAT software, "get_score"
#'@param kmean_result The classification of kmeans clustering result.
#'@param threshold Defaults to 0.001. The criterion that all p-values must be less than the threshold in order to determine G0-like cells
#'@export
#'@keywords statistical_test
#'@section G0 Identification:
#'@examples
#'cycle_mean_scores <- get_score(t(test_exp))$mean_score
#'kmean_classification <- kmeans_clustering(cycle_mean_scores)
#'statistical_test(cycle_mean_scores, kmean_classification, threshold = 0.001)
#'# Sample output
#'#"Possible group of G0-like cells is: 2"
#'
statistical_test <- function(mean_scores, kmean_result, threshold = 0.001){
# find cluster with lowest mean
require(car)
require(dplyr)
mean_val <- matrix(0, nrow = length(kmean_result), ncol = 6)
for(i in 1:length(kmean_result)){
mean_v <- colMeans(mean_scores[which(kmean_result == i),])
mean_val[i, ] <- mean_v
}
#print(which.min(rowMeans(mean_val)))
lowest_cluster <- which.min(rowMeans(mean_val))
#return(mean_val)
new_kmean_result <- kmean_result[-which(kmean_result == lowest_cluster)]
comparisons <- unique(new_kmean_result)
compare_group <- which(kmean_result == lowest_cluster)
c_group_length <- length(compare_group)
n_total <- nrow(mean_scores)
new_data_table <- group_by(mean_scores, as.factor(kmean_result))
colnames(new_data_table)[7] <- "cls_result"
store_results <- matrix(0, nrow = length(comparisons), ncol = 6)
store_levine <- matrix(0, nrow = length(comparisons), ncol = 6)
store_oneway <- matrix(0, nrow = length(comparisons), ncol = 6)
store_t_test <- matrix(0, nrow = length(comparisons), ncol = 6)
store_nonparam <- matrix(0, nrow = length(comparisons), ncol = 6)
for(i in 1:length(comparisons)){
for(j in 1:6){
tmp_group <- c(which(kmean_result == comparisons[i]), compare_group)
comp_group_length <- length(which(kmean_result == comparisons[i]))
tmp_data <- mean_scores[tmp_group, j]
tmp_group_label <- factor(c(rep("comp", comp_group_length), rep("control", c_group_length)))
model <- lm(tmp_data~tmp_group_label)
levene <- leveneTest(tmp_data~tmp_group_label)
oneway_model <- oneway.test(tmp_data~tmp_group_label)
ttest_model <- pairwise.t.test(tmp_data, tmp_group_label)
kruskal_model <- kruskal.test(tmp_data~tmp_group_label)
levene_stats <- levene$Pr[1]
anova_stats <- anova(model)$Pr[1]
oneway_stats <- oneway_model$p.value
ttest_stats <- ttest_model$p.value
kruskal_stats <- kruskal_model$p.value
store_results[i, j] <- anova_stats
store_levine[i, j] <- levene_stats
store_oneway[i, j] <- oneway_stats
store_t_test[i, j] <- ttest_stats
store_nonparam[i, j] <- kruskal_stats
}
#print(i)
}
rownames(store_results) <- make.names(comparisons)
colnames(store_results) <- colnames(mean_scores)
#print(round(store_nonparam, 3))
#print(store_oneway)
if(length(which(store_results > threshold))==0){
print(paste("Possible group of G0-like cells is: ",lowest_cluster, sep = ""))
}else{
print("None identified")
}
return(store_results)
}
######################################################################################################
#' Visualization of G0-like distribution in mean scores
#'
#' Provides boxplot for visualizing the cell cluster distribution across different cell cycle stages mean scores
#'@param mean_scores The mean score results returned from the reCAT software, "get_score"
#'@param kmean_result The classification of kmeans clustering result.
#'@export
#'@keywords distribution_plot
#'@section G0 Identification:
#'@examples
#'cycle_mean_scores <- get_score(t(test_exp))$mean_score
#'kmean_classification <- kmeans_clustering(cycle_mean_scores)
#'require(scater)
#'distribution_plot(cycle_mean_scores, kmean_classification)
distribution_plot <- function(mean_scores, kmean_result){
par(mfrow=c(3, 2))
# Plot for each mean score
g1 <- ggplot(mean_scores, aes(x = kmean_result, y = G1Score)) +
geom_boxplot(fill = "grey80", colour = "blue") +
scale_x_discrete() + xlab("Kmeans Cluster") + ggtitle("G1") +
ylab("Mean score")
g1s <- ggplot(mean_scores, aes(x = kmean_result, y = G1SScore)) +
geom_boxplot(fill = "grey80", colour = "blue") +
scale_x_discrete() + xlab("Kmeans Cluster") + ggtitle("G1S") +
ylab("Mean score")
s <- ggplot(mean_scores, aes(x = kmean_result, y = SScore)) +
geom_boxplot(fill = "grey80", colour = "blue") +
scale_x_discrete() + xlab("Kmeans Cluster") + ggtitle("S") +
ylab("Mean score")
g2 <- ggplot(mean_scores, aes(x = kmean_result, y = G2Score)) +
geom_boxplot(fill = "grey80", colour = "blue") +
scale_x_discrete() + xlab("Kmeans Cluster") + ggtitle("G2") +
ylab("Mean score")
g2m <- ggplot(mean_scores, aes(x = kmean_result, y = G2MScore)) +
geom_boxplot(fill = "grey80", colour = "blue") +
scale_x_discrete() + xlab("Kmeans Cluster") + ggtitle("G2M") +
ylab("Mean score")
m <- ggplot(mean_scores, aes(x = kmean_result, y = MScore)) +
geom_boxplot(fill = "grey80", colour = "blue") +
scale_x_discrete() + xlab("Kmeans Cluster") + ggtitle("M") +
ylab("Mean score")
multiplot(g1, g1s, s, g2, g2m, m, cols = 2)
}
#' Visualization of G0-like distribution in mean scores along a cell cycle time-series.
#' 6 plots for each of G1, S, G1/S, G2, M, G2/M mean scores.
#'
#' Provides mean score expression changees along the cell cycle time-series with classification of different cell clusters.
#'@param mean_scores The mean score results returned from the reCAT software, "get_score"
#'@param kmean_result The classification of kmeans clustering result.
#'@param ordIndex The order obtained from reCAT, "get_ordIndex" which returns the cell cycle time-series of the data.
#'@export
#'@keywords mean_score_plot
#'@section G0 Identification:
#'@examples
#'cycle_mean_scores <- get_score(t(test_exp))$mean_score
#'kmean_classification <- kmeans_clustering(cycle_mean_scores)
#'reCAT_ordIndex <- get_ordIndex(test_exp, threadnum = 2)
#'require(scater)
#'mean_score_plot(cycle_mean_scores, kmean_classification, reCAT_ordIndex)
mean_score_plot <- function(mean_scores, kmean_result, ordIndex){
par(mfrow=c(3, 2))
n_ <- nrow(mean_scores)
g1 <- qplot(x = c(1:n_), y = mean_scores$G1Score[ordIndex],
col = as.factor(kmean_result)[ordIndex])+#plot_labels[,2])+
labs(title = "G1", x = "Pseudotime", y = "Mean Scores", colour = "Selection")+
theme(axis.title=element_text(size=10),title=element_text(size = 10),legend.text = element_text(size = 10),
legend.title = element_text(size= 10), axis.text = element_text(size = 10))+geom_point(size = 2)
g1s <- qplot(x = c(1:n_), y = mean_scores$G1SScore[ordIndex],
col = as.factor(kmean_result)[ordIndex])+#plot_labels[,2])+
labs(title = "G1S", x = "Pseudotime", y = "Mean Scores", colour = "Selection")+
theme(axis.title=element_text(size=10),title=element_text(size = 10),legend.text = element_text(size = 10),
legend.title = element_text(size= 10), axis.text = element_text(size = 10))+geom_point(size = 2)
s <- qplot(x = c(1:n_), y = mean_scores$SScore[ordIndex],
col = as.factor(kmean_result)[ordIndex])+#plot_labels[,2])+
labs(title = "S", x = "Pseudotime", y = "Mean Scores", colour = "Selection")+
theme(axis.title=element_text(size=10),title=element_text(size = 10),legend.text = element_text(size = 10),
legend.title = element_text(size= 10), axis.text = element_text(size = 10))+geom_point(size = 2)
g2 <- qplot(x = c(1:n_), y = mean_scores$G2Score[ordIndex],
col = as.factor(kmean_result)[ordIndex])+#plot_labels[,2])+
labs(title = "G2", x = "Pseudotime", y = "Mean Scores", colour = "Selection")+
theme(axis.title=element_text(size=10),title=element_text(size = 10),legend.text = element_text(size = 10),
legend.title = element_text(size= 10), axis.text = element_text(size = 10))+geom_point(size = 2)
g2m <- qplot(x = c(1:n_), y = mean_scores$G2MScore[ordIndex],
col = as.factor(kmean_result)[ordIndex])+#plot_labels[,2])+
labs(title = "G2M", x = "Pseudotime", y = "Mean Scores", colour = "Selection")+
theme(axis.title=element_text(size=10),title=element_text(size = 10),legend.text = element_text(size = 10),
legend.title = element_text(size= 10), axis.text = element_text(size = 10))+geom_point(size = 2)
m <- qplot(x = c(1:n_), y = mean_scores$MScore[ordIndex],
col = as.factor(kmean_result)[ordIndex])+#plot_labels[,2])+
labs(title = "M", x = "Pseudotime", y = "Mean Scores", colour = "Selection")+
theme(axis.title=element_text(size=10),title=element_text(size = 10),legend.text = element_text(size = 10),
legend.title = element_text(size= 10), axis.text = element_text(size = 10))+geom_point(size = 2)
multiplot(g1, g1s, s, g2, g2m, m, cols = 2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.