#修改说明:
#2019——10-28 增加tsne_heatmap按照major_cond分组显示的功能
#2019_11_10 combined_data_sampled开始使用Raw Data,对density,tsne heatmap的代码进行适应性调整;
# tsne heatmap增加raw,0_to_max,min_to_max等bar展示方法
# 将phenograph_heatmap挪进此步骤
#2019_11_11 为tsne散点图增加等高线背景
#' @title dif_seq_rainbow
#'
#' @description
#' 生成一系列差异较大的颜色,适合于在散点图上显示不同population的细胞
#'
#' @param n 需要产生的颜色种类数量
#' @return 返回指定数量的颜色序列
#' @export
dif_seq_rainbow<-function(n){
library("colorspace")
div=11
depth= n %/% div+2
if (n %% div!=0) depth=depth+1
base_color<-c("red","#FF6B00","gold","greenyellow","green","cyan","#007FFF","blue","#4A00E9","darkviolet","#C90069")
color_matrix<-matrix(0,ncol=depth,nrow=0)
for(col_i in base_color){
seq_col<-colorRampPalette(c("white",col_i,"black"))(depth)
color_matrix<-rbind(color_matrix,seq_col)
}
color_matrix<-color_matrix[,c(-1,-1*depth)]
color_matrix_order<-data.frame(n=c(1:(depth-2)),t(color_matrix))
color_matrix_order$n<-abs(color_matrix_order$n-mean(color_matrix_order$n))
color_matrix_order2<-color_matrix_order[order(color_matrix_order[,"n"]),]
color_matrix_order3<-t(color_matrix_order2[,-1])
set.seed(456)
rdm<-function(var){sample(var,length(var))}
#rdm(1)
color_matrix_order4<-apply(color_matrix_order3,2,rdm)
color_vector<-as.vector(color_matrix_order4)[c(1:n)]
#if(n>30) cat("NOTE:You are using dif_seq_hcl to produce more than 30 colors, some of them may be difficult to distinguish\n")
return(color_vector)
}
#' @title 生成颜色系列
#'
#' @description
#' RColorBrewer Set1,Set2,Set3合在一起使用,生成包含29种差异较大的颜色的系列
#'
#' @param n 需要产生的颜色种类数量
#' @return 返回指定数量的颜色序列
#' @export
brewer_color_sets<-function(n)
{
library(RColorBrewer)
brewer_sets<-c(brewer.pal(9,"Set1"),brewer.pal(8,"Set2"),brewer.pal(12,"Set3"))[-c(6,19)] #剔除两个黄色
#colorset<-as.data.frame(matrix(ncol=1,nrow = n))
if(n>0) return(brewer_sets[1:n])
}
#' @title Elbow Test
#'
#' @description
#' Phenograph 的k值是指在计算k 近邻网络(knn)的所取得k值,它的意义是网络中每个细胞的最近邻居数。K值越大,最后得到的cluster越少,反之越大。
#' 因此过大的k值会引起“underclustering”,导致cluster偏少,过小的k值则会导致“overclustering”,导致cluster偏高。
#' 选择合适的k值就是在两者之间寻求平衡。比较直观的方法就是在k-cluster图中,找到曲线的 “elbow piont”。
#'
#'
#' @param x 需要进行测试的表达数据矩阵
#' @param k_from 自然数,测试中k的取值范围的起始点
#' @param k_to 自然数,测试中k的取值范围的结束点
#' @param k_step 自然数,k取值的步长
#' @return 返回值一个矩阵,记录每个k值对应的cluster数目
#' @export
PG_elbow<-function(x,k_from=5,k_to=100,k_step=5){
test_range<-seq(from=k_from,to=k_to,by=k_step)
PhenoGraph_elbow<-c(0,0)
for(k in test_range){
cluster_PhenoGraph <-as.numeric(membership(Rphenograph(data = x,k)))
if(k==5){PhenoGraph_elbow<-c(k,max(cluster_PhenoGraph))
}else{PhenoGraph_elbow<-rbind(PhenoGraph_elbow,c(k,max(cluster_PhenoGraph)))
}
}
colnames(PhenoGraph_elbow)<-c("PhenoGraph k","cluser number")
plot(PhenoGraph_elbow,type="b")
return(PhenoGraph_elbow)
}
#' @title metacluster_multi
#'
#' @description
#'利用FlowSOM或其他方法进行聚类以后,可以对产生的cluster进一步聚类,这些新产生的类被称为“metacluster”。
#'metacluster_multi可以利用FlowSOM自带的metacluster函数,以及PhenoGraph算法产生metacluster。
#'#'
#' @param data 包含所有cluster的表达矩阵,dataframe或者矩阵格式
#' @param method 要使用的聚类方法名称,取值有“PhenoGraph”,“metaClustering_consensus”,“metaClustering_hclust”, “metaClustering_kmeans”
#' @param nCluster 使用“metaClustering_consensus”和“metaClustering_kmeans”两种方法时,用其指定产生metaCluster的数量
#' @param max 使用“metaClustering_hclust”时,确定一个metacluster的上限,函数将在此下寻找合适的metacluster数量
#' @param k 使用“PhenoGraph"时,设置knn网络的k值,默认值为15
#'
#' @return 一个数组(Numeric array),包含对应每个cluster的metacluster数值
#' @export
metacluster_multi<-function(data,
method="metaClustering_hclust",
nCluster=NULL,
max=50,
k=15){
cat(paste0("Metaclustering with ",method))
if (method!="PhenoGraph"){
metaClusters <-suppressMessages(MetaClustering(data=data,
method=method,
nClus=nCluster,
max=max))
}else{
metaClusters <-as.numeric(membership(Rphenograph(data = data,k=k)))
}
return(metaClusters)
}
#' @title equal_sample
#'
#' @description
#' 从各个Meta cluster或文件中抽取定量细胞进行可视化
#' 可视化前的Sampling有两个功能,第一,减少后续用于降维分析的细胞总数,一般推荐在10万以内,以保证降维效果。
#' 第二, 解决原始数据中文件或者cluster存在细胞数目不均衡的情况。增加对小文件或cluster的可见性。
#' @param x 需要进行Sampleing表达数据矩阵,除了表达数据,还包括两个通道:“File_ID”,以及聚类产生的cluster通道。
#' @param sample_type Sample的方式,#sample_type="by_cluster",从每个cluster抽取相同细胞;sample_type="by_file",从每个样本抽取相同细胞
#' @param sample_method 默认"ceil", #两种取值:"ceil", "all",取"all"时,采取所有细胞
#' @param sample_num 每个样本或者每个cluster抽取的细胞数量;
#' @param cluster_name cluster通道的名称
#' @return 返回值一个矩阵,包含所有sample取得的表达数据
#' @export
equal_sample<-function( x,
sample_type="by_file", #,#sample_type="by_cluster",从每个cluster抽取相同细胞;sample_type="by_file",从每个样本(文件)抽取相同细胞; "by_cond",从每个major condition抽取相同细胞
sample_method="ceil", #四种取值:"ceil", "all"
sample_num=1000, #每个样本或者每个cluster抽取的细胞数量;
cluster_name="metaCluster",
groups=NULL,
sample_cond=NULL
)
{
#x=combined_data_analysed
x$row_name<-rownames(x)
if(sample_type=="by_cond") {
#sample_cond="Tissue_Type"
sample_per_cond<-unique(groups[,as.character(sample_cond)])
groups$File_ID<-as.character( groups$File_ID)
x$File_ID<-as.character(x$File_ID)
x<-dplyr::left_join(x,groups,by="File_ID")
}
if(sample_type=="by_cluster") sample_col=cluster_name
if(sample_type=="by_file") sample_col="File_ID"
if(sample_type=="by_cond") sample_col=sample_cond
combined_data_sampled<-as.data.frame(matrix(nrow=0,ncol=ncol(x)))
for(meta_i in unique(x[,sample_col])){
#meta_i=unique(x[,sample_col][1])
cell_n=length(which(x[,sample_col]==meta_i))
if (sample_method=="ceil"){
cell_n=min(cell_n,sample_num)
}
data_sampled<-x%>%
#dplyr::filter_(paste0(sample_col,"==meta_i"))%>%
dplyr::filter_at(vars(matches(sample_col)),all_vars(.==meta_i)) %>%
sample_n(cell_n)
data_sampled
combined_data_sampled<-rbind(data_sampled,combined_data_sampled)
}
rownames(combined_data_sampled)<-combined_data_sampled$row_name
combined_data_sampled$row_name=NULL
clustern<-length(unique(x[,sample_col]))
#cat(paste0("Total sampled events:",nrow(combined_data_sampled)))
layout(matrix(c(1,3,2,4),ncol=2))
smr_data_total<-x %>% count_(cluster_name) #smr is short for summerise
plot1<-barplot(smr_data_total$n,
names.arg=smr_data_total[,1,drop=T],
main="Events of clusters(Total)")
smr_data_sampled<-combined_data_sampled %>% count_(cluster_name)
plot2<-barplot(smr_data_sampled$n,
names.arg=smr_data_sampled[,1,drop=T],
main=paste0("Events of clusters (Sampled ",sample_type,")"))
smr_data_total<-x %>% count_("File_ID")
plot3<-barplot(smr_data_total$n,
#names.arg=smr_data_sampled[,1,drop=T],
main="Events of Files (Total)")
smr_data_sampled<-combined_data_sampled %>% count_("File_ID")
plot4<-barplot(smr_data_sampled$n,
#names.arg=smr_data_sampled[,1,drop=T],
#axis(side=1,lwd=0,las=1),
main=paste0("Events of Files (Sampled ",sample_type,")"))
layout(matrix(c(1),ncol=1))
return(combined_data_sampled)
}
#' @title draw_tsne_figs
#' @description
#' 自动生成系列tSNE图;
#'
#' @param combined_data_plot 数据框或者矩阵,附带有(meta)cluster聚类、降维结果(t_sne_1,t_sne_2)的表达矩阵
#' @param groups 实验组的Metadata信息,其中必需包含列名:”Short_name“
#' @param cluster_color tSNE图的亚群的配色方案,默认dif_seq_rainbow
#' @param cluster_name 选择groups中一个列名做为展现差异的major_cond
#' @param output_dir 输出数据文件夹名称
#' @param major_cond 选择groups中一个列名做为展现差异的major_cond
#' @param reduction_dm1 combined_data_plot降维产生的维度,默认"tsne_1"
#' @param reduction_dm2 combined_data_plot降维产生的维度,默认"tsne_2"
#' @param dot_size 散点图点的大小,默认是4
#' @param show_contour 布尔型变量,决定是否显示等高线背景
#' @param contour_line_size 设置等高线的宽度,默认0.5
#' @param contour_bins 设置等高线的层数,默认25
#' @param contour_colour 设置等高线的颜色,默认"grey"
#' @param group_seq 设置各组顺序,格式实例: group_seq=c("PBMC","Biopsy"),注:括号内为各组名称;
#' @param output_format 输出数据文件格式:"tiff"和“pdf”,默认"pdf"
#' @export
draw_tsne_figs<-function(
combined_data_plot,
groups,
cluster_color=dif_seq_rainbow,
cluster_name="metacluster",
output_dir="cluster_tsne_plots",
major_cond="Tissue_Type",
reduction_dm1="tsne_1",
reduction_dm2="tsne_2",
dot_size=4,
text_size=30,
show_contour=FALSE,
contour_line_size=0.5,
contour_bins=25,
contour_colour="grey",
group_seq=NULL,
output_format="pdf"){
if(0){
cluster_color=dif_seq_rainbow
cluster_name="metacluster"
output_dir="cluster_tsne_plots"
major_cond="Tissue_Type"
reduction_dm1="tsne_1"
reduction_dm2="tsne_2"
group_seq=NULL
}
library(colorRamps)
library(Rmisc)
if (!is.null(group_seq)){
groups[,major_cond]<-factor(groups[,major_cond,drop=T],
levels=group_seq,
ordered=T)
}
if (!dir.exists(paste0("./",output_dir))) {
dir.create(paste0("./",output_dir))
}
#head(combined_data_plot)
combined_data_plot$File_ID<-as.character(combined_data_plot$File_ID)
combined_data_plot<-full_join(combined_data_plot,groups,by="File_ID")
combined_data_plot2<-combined_data_plot
#%>%
#dplyr::filter(Merge_in_fig=="Yes")
#str(combined_data_plot2)
cluster_index<-colnames(combined_data_plot)==cluster_name
combined_data_plot[,cluster_index]<-as.factor(combined_data_plot[,cluster_index])
major_cond_index<-colnames(combined_data_plot)==major_cond
major_cond_n<-length(unique(combined_data_plot[,major_cond_index,drop=TRUE]))
combined_data_group<-combined_data_plot %>%
group_by_at(cluster_name)
centers<-eval(parse(text=paste0("summarise(combined_data_group,",reduction_dm1,"=median(",reduction_dm1,"),",reduction_dm2,"=median(",reduction_dm2,"))")))
for (file_id in groups$File_ID)
{
file_center<-centers
file_center$File_ID<-file_id
if (file_id==groups[1,"File_ID"]){
file_centers<-file_center
}else{
file_centers<-rbind(file_centers,file_center)
}
}
file_centers<-full_join(file_centers,groups,by="File_ID")
#str(file_centers)
mytheme <- theme(panel.background = element_rect(fill = "white", colour = "black", size = 0.2), #坐标系及坐标轴
legend.key = element_rect(fill = "white", colour = "white"), #图标
legend.background = (element_rect(colour= "white", fill = "white")))
head(combined_data_plot)
combined_data_plot<-combined_data_plot %>%
dplyr::arrange(Short_name) %>%
dplyr::arrange_(major_cond)
#1生成合并tSNE-Phenograph图像
if(output_format=="pdf")
pdf(file=paste0("./",output_dir,"/","tSNE-",cluster_name," merge.pdf"),width = 1300/72,height = 1200/72)
else if(output_format=="tiff")
tiff(filename = paste0("./",output_dir,"/","tSNE-",cluster_name," merge.tiff"),width=1300,height=1200)
else
{ message(paste0("Error-Output format ",output_format," is not supported\n"))
return(NULL)}
plot1<-ggplot(combined_data_plot,aes_string(x=reduction_dm1,y=reduction_dm2))
if(show_contour==TRUE){
plot1<-plot1+ geom_density_2d(colour=contour_colour,size=contour_line_size,bins=contour_bins)+
scale_x_continuous(limits=c(min(combined_data_plot[,reduction_dm1])*1.15,max(combined_data_plot[,reduction_dm1])*1.15))+
scale_y_continuous(limits=c(min(combined_data_plot[,reduction_dm2])*1.15,max(combined_data_plot[,reduction_dm2])*1.15))
}
plot1<-plot1+
geom_point(aes_string(colour=cluster_name),size=dot_size)+
mytheme+
theme(text=element_text(size=text_size),legend.title=element_blank())+
geom_text(data=centers,aes_string(x=reduction_dm1,y=reduction_dm2),label=centers[,1,drop=TRUE],colour="black",size=20)+
#scale_color_manual(values = primary.colors(nrow(centers)+1)[-1])#facet_wrap(~File_ID)
scale_color_manual(values = cluster_color(nrow(centers)))
# plot1<-plot1+ geom_density_2d(aes_string(x=reduction_dm1,y=reduction_dm2))
multiplot(plot1)
dev.off()
cat(paste0("Exported: ","tSNE-",cluster_name," merge.",output_format,"\n"))
#2生成合并tSNE-文件图像
if(output_format=="pdf")
pdf(file=paste0("./",output_dir,"/","tSNE-","Files"," merge.pdf"),width = 1300/72*(1+nrow(groups)/110),height = 1200/72)
else if(output_format=="tiff")
tiff(filename = paste0("./",output_dir,"/","tSNE-","Files"," merge.tiff"),width=1300*(1+nrow(groups)/110),height=1200)
else
{ message(paste0("Error-Output format ",output_format," is not supported\n"))
return(NULL)}
#jpeg(filename = paste0("tSNE-Phenograph merge.jpeg"),width=1000,height=1000,quality = 600)
plot1<-ggplot(combined_data_plot,aes_string(x=reduction_dm1,y=reduction_dm2))
if(show_contour==TRUE){
plot1<-plot1+ geom_density_2d(colour=contour_colour,size=contour_line_size,bins=contour_bins)+
scale_x_continuous(limits=c(min(combined_data_plot[,reduction_dm1])*1.15,max(combined_data_plot[,reduction_dm1])*1.15))+
scale_y_continuous(limits=c(min(combined_data_plot[,reduction_dm2])*1.15,max(combined_data_plot[,reduction_dm2])*1.15))
}
plot1<-plot1+
geom_point(aes_string(colour="Short_name"),size=dot_size)+
mytheme+
#geom_text(data=file_centers,aes_string(x=reduction_dm1,y=reduction_dm2),label=centers[,1,drop=TRUE],colour="black",size=5)+
#scale_color_manual(values = primary.colors(nrow(centers)+1)[-1])#facet_wrap(~File_ID)
scale_color_manual(values = cluster_color(nrow(groups)))
multiplot(plot1)
dev.off()
cat(paste0("Exported: ","tSNE-","Files"," merge.",output_format,"\n"))
#2生成单个文件的tSNE-Phenograph图像
if(output_format=="pdf")
pdf(file=paste0("./",output_dir,"/","tSNE-",cluster_name,"(sample name).pdf"),width = 1200*major_cond_n/72,height = 1200*ceiling(nrow(groups)/major_cond_n)/72)
else if(output_format=="tiff")
tiff(filename = paste0("./",output_dir,"/","tSNE-",cluster_name,"(sample name).tiff"),width = 1200*major_cond_n,height = 1200*ceiling(nrow(groups)/major_cond_n))
else
{ message(paste0("Error-Output format ",output_format," is not supported\n"))
return(NULL)}
#jpeg(filename = paste0("./",output_dir,"/","tSNE-",cluster_name,"(sample name).jpeg"),width=1200*major_cond_n,height=1200*ceiling(nrow(groups)/major_cond_n),quality = 100)
plot2<-ggplot(combined_data_plot)+
geom_point(aes_string(x=reduction_dm1,y=reduction_dm2,colour=cluster_name),size=4)+
geom_text(data=file_centers,aes_string(x=reduction_dm1,y=reduction_dm2),label=file_centers[,1,drop=TRUE],colour="black",size=7)+
mytheme+
theme(legend.text=element_text(size=text_size))+
#theme(legend.key.height=unit(10,"cm"))+
facet_wrap(~Short_name,ncol=major_cond_n)+
scale_color_manual(values = cluster_color(nrow(centers)))
multiplot(plot2)
dev.off()
#
# plot2<-ggplot(combined_data_plot,aes_string(x=reduction_dm1,y=reduction_dm2))
#
# if(show_contour==TRUE){
#
# plot2<-plot2+ geom_density_2d(colour=contour_colour,size=contour_line_size,bins=contour_bins)+
# scale_x_continuous(limits=c(min(combined_data_plot[,reduction_dm1])*1.15,max(combined_data_plot[,reduction_dm1])*1.15))+
# scale_y_continuous(limits=c(min(combined_data_plot[,reduction_dm2])*1.15,max(combined_data_plot[,reduction_dm2])*1.15))
# }
#
# plot2<-plot2+
# geom_point(aes_string(colour=cluster_name),size=dot_size)+
# geom_text(data=file_centers,aes_string(x=reduction_dm1,y=reduction_dm2),label=file_centers[,1,drop=TRUE],colour="black",size=7)+
# mytheme+
# theme(legend.text=element_text(size=text_size))+
# #theme(legend.key.height=unit(10,"cm"))+
# facet_wrap(~Short_name,ncol=major_cond_n)+
# scale_color_manual(values = cluster_color(nrow(centers)))
# multiplot(plot2)
# dev.off()
cat(paste0("Exported: ","tSNE-",cluster_name,"(sample name).",output_format,"\n"))
#生成单个分组的tSNE-Phenograph图像
if(output_format=="pdf")
pdf(file=paste0("./",output_dir,"/","tSNE-",cluster_name,"(",major_cond,").pdf"),width=1200*major_cond_n/72,height=1200/72)
else if(output_format=="tiff")
tiff(filename = paste0("./",output_dir,"/","tSNE-",cluster_name,"(",major_cond,").tiff"),width=1200*major_cond_n,height=1200)
else
{ message(paste0("Error-Output format ",output_format," is not supported\n"))
return(NULL)}
plot3<-ggplot(combined_data_plot,aes_string(x=reduction_dm1,y=reduction_dm2))
if(0){
plot3<-plot3+ geom_density_2d(colour=contour_colour,size=contour_line_size,bins=contour_bins)+
scale_x_continuous(limits=c(min(combined_data_plot[,reduction_dm1])*1.15,max(combined_data_plot[,reduction_dm1])*1.15))+
scale_y_continuous(limits=c(min(combined_data_plot[,reduction_dm2])*1.15,max(combined_data_plot[,reduction_dm2])*1.15))
}
plot3<-plot3+
geom_point(aes_string(colour=cluster_name),size=dot_size)+
mytheme+
geom_text(data=file_centers,aes_string(x=reduction_dm1,y=reduction_dm2),label=file_centers[,1,drop=TRUE],colour="black",size=7)+
theme(text = element_text(size=text_size))+
#theme(legend.key.height=unit(10,"cm"))+
facet_wrap(facets=major_cond,ncol=major_cond_n)+
#scale_color_manual(values = primary.colors(nrow(centers)+1)[-1])
scale_color_manual(values = cluster_color(nrow(centers)))
multiplot(plot3)
dev.off()
cat(paste0("Exported: ","tSNE-",cluster_name,"(",major_cond,").",output_format,"\n"))
#生成分组之间merge的tSNE-Phenograph图像
if(output_format=="pdf")
pdf(file=paste0("./",output_dir,"/","tSNE colored by ",major_cond,".pdf"),width=1300/72,height=1200/72)
else if(output_format=="tiff")
tiff(filename = paste0("./",output_dir,"/","tSNE colored by ",major_cond,".tiff"),width=1300,height=1200)
else
{ message(paste0("Error-Output format ",output_format," is not supported\n"))
return(NULL)}
plot4<-ggplot(combined_data_plot,aes_string(x=reduction_dm1,y=reduction_dm2))
if(show_contour==TRUE){
plot4<-plot4+ geom_density_2d(colour=contour_colour,size=contour_line_size,bins=contour_bins)+
scale_x_continuous(limits=c(min(combined_data_plot[,reduction_dm1])*1.15,max(combined_data_plot[,reduction_dm1])*1.15))+
scale_y_continuous(limits=c(min(combined_data_plot[,reduction_dm2])*1.15,max(combined_data_plot[,reduction_dm2])*1.15))
}
plot4<-plot4+
theme(text = element_text(size=text_size))+
#theme(legend.key.height=unit(10,"cm"))+
geom_point(aes_string(colour=major_cond),size=dot_size,alpha=0.6)+
mytheme+
scale_color_brewer(palette = "Set1")
multiplot(plot4)
dev.off()
cat(paste0("Exported: ","tSNE colored by ",major_cond,".",output_format,"\n"))
}
#' @title draw_density_plots
#' @description
#' 自动生成每个marker的直方图(密度图);
#'
#' @param combined_data_plot 数据框或者矩阵,附带有(meta)cluster、降维结果(t_sne_1,t_sne_2)的表达矩阵
#' @param groups 实验组的Metadata信息,其中必需包含列名:”Short_name“
#' @param cluster_color tSNE图的亚群的配色方案,默认dif_seq_rainbow
#' @param cluster_name 用过分析的cluster名称
#' @param output_dir 输出数据文件夹名称
#' @param major_cond 选择groups中一个列名做为展现差异的major_cond
#' @param cluster_id 指定显示的cluster
#' @param xlim 指定x轴范围,例如:c(0,5),只在free_x=F的时候有效
#' @param free_x True或者FALSE,设置是否每张图X-轴取值范围是否自动,建议设置为F
#' @param trans_method 数据转化方式,有四种:"CytofAsinh","simpleAsinh";"0_to_Max",所有Marker的信号强度除以最大值,线性转换,通过除以各通道信号的最大值把数值scale到0~1;"Min_to_Max",线性转换,最小值转换成0,最大值转换成1,最大限度展现population的表达差异
#' @export
draw_density_plots<-function(combined_data_plot,
groups,
all_markers,
cluster_color=dif_seq_rainbow,
cluster_name="metacluster",
output_dir="cluster_density_plots",
major_cond="Tissue_Type",
cluster_id=NULL,
xlim=NULL,
free_x=F,
trans_method="simpleAsinh"
)
{
library(colorRamps)
library(Rmisc)
if (!dir.exists(paste0("./",output_dir))) {
dir.create(paste0("./",output_dir))
}
combined_data_plot$File_ID<-as.character(combined_data_plot$File_ID)
combined_data_plot<-full_join(combined_data_plot,groups,by="File_ID")
density_plot_marker_id<-which(all_markers[,"density_plot"]==1)
density_plot_marker<-as.character(all_markers[density_plot_marker_id,"markers"])
if(trans_method=="CytofAsinh") trans_fun=CytofAsinh
if(trans_method=="simpleAsinh") trans_fun=simpleAsinh
if(trans_method=="0_to_Max") trans_fun=normalize_Zero_Max
if(trans_method=="Min_to_Max") trans_fun=normalize_min_max
combined_data_plot[, density_plot_marker_id] <- apply(combined_data_plot[, density_plot_marker_id,drop = FALSE],
2,trans_fun)
if (is.null(cluster_id)){
cluster_id=unique(combined_data_plot[,cluster_name])}
combined_data_plot2<-combined_data_plot%>%
#dplyr::filter(Merge_in_fig=="Yes")%>%
#dplyr::filter(cluster_name %in% cluster_id)%>%
dplyr::filter_at(vars(matches(cluster_name)),all_vars(.%in% cluster_id)) %>%
dplyr::select(one_of(c(cluster_name,density_plot_marker)))
head(combined_data_plot2)
#str(combined_data_plot2)
cluster_index<-colnames(combined_data_plot2)==cluster_name
combined_data_plot2[,cluster_index]<-as.factor(combined_data_plot2[,cluster_index])
combined_data_plot3<-tidyr::gather_(combined_data_plot2,key="Markers",value="exprs",density_plot_marker)
head(combined_data_plot3)
mytheme <- theme(panel.background = element_rect(fill = "white", colour = "black", size = 0.2), #坐标系及坐标轴
legend.key = element_rect(fill = "white", colour = "white"), #图标
legend.background = (element_rect(colour= "white", fill = "white")))
#2生成合并tSNE-文件图像
col_plot<-length(density_plot_marker)*3.5
row_plot<-length(unique(combined_data_plot3[,cluster_name]))*3.5
pdf(file=paste0("./",output_dir,"/","Density_plots",".pdf"),width = col_plot,height = row_plot)
plot1<-ggplot(combined_data_plot3)+
geom_density(aes_string(x= "exprs",fill=cluster_name),adjust =1)+
mytheme+
scale_fill_manual(values = cluster_color(length(unique(combined_data_plot3[,cluster_name]))))#+
#facet_wrap(as.formula(paste0(cluster_name," ~Markers") ),scales = "free_y",ncol = length(density_plot_marker))
if(!is.null(xlim) & free_x==F){
plot1<-plot1+
scale_x_continuous(limits=xlim)}
if(free_x==F){
plot1<-plot1+
facet_wrap(as.formula(paste0(cluster_name," ~Markers") ),scales = "free_y",ncol = length(density_plot_marker))}
if(free_x==T){
plot1<-plot1+
facet_wrap(as.formula(paste0(cluster_name," ~Markers") ),scales = "free",ncol = length(density_plot_marker))}
multiplot(plot1)
dev.off()
cat(paste0("Exported: ","tSNE-","Files"," merge.pdf\n"))
}
#' @title draw_tsne_heatmaps
#' @description
#' 生成各个marker的tSNE;
#'
#' @param combined_data_plot 数据框或者矩阵,附带有(meta)cluster、降维结果(t_sne_1,t_sne_2)的表达矩阵
#' @param heatmap_tsne_markers vector,指定marker的名称,默认NULL,此时将生成所有通道的heatmap
#' @param single_file 是否把所有heatmap合成单个文件,默认FALSE
#' @param groups 实验组的Metadata信息,其中必需包含列名:”Short_name“
#' @param major_cond 选择groups中一个列名做为展现差异的major_cond
#' @param trans_method 数据转化方式,有四种:"CytofAsinh","simpleAsinh";"0_to_Max",所有Marker的信号强度除以最大值,线性转换,通过除以各通道信号的最大值把数值scale到0~1;"Min_to_Max",线性转换,最小值转换成0,最大值转换成1,最大限度展现population的表达差异
#' @param show_legend 是否显示legend(colorbar)
#' @param show_scale 是否显示X 轴和Y轴的刻度,默认是TRUE
#' @param dot_size 图中点的大小,默认是4
#' @param dot_colours 设置colorbar的颜色,默认为jet color函数
#' @param output_dir 输出数据文件夹名称,默认是TRUE
#' @param output_format 输出数据文件格式:"tiff"和“pdf”
#' @param reduction_dm1 combined_data_plot降维产生的维度,默认"tsne_1"
#' @param reduction_dm2 combined_data_plot降维产生的维度,默认"tsne_2"
#' @export
draw_tsne_heatmaps<-function(combined_data_plot,
heatmap_tsne_markers=NULL,
trans_method="simpleAsinh",
single_file=FALSE,
groups=groups,
major_cond=NULL,
dot_size=4,
dot_colours=colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan","#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")),
show_legend=TRUE,
show_scale=TRUE,
output_dir="tsne_heatmap",
output_format="tiff",
reduction_dm1="tsne_1",
reduction_dm2="tsne_2"
){
if(0){
heatmap_tsne_markers=NULL
trans_method="simpleAsinh"
single_file=FALSE
groups=groups
major_cond=NULL
show_legend=TRUE
show_scale=TRUE
output_dir="tsne_heatmap"
output_format="JPEG"
reduction_dm1="tsne_1"
reduction_dm2="tsne_2"
}
if (!dir.exists(paste0("./",output_dir," ",trans_method))) {
dir.create(paste0("./",output_dir," ",trans_method))
}
if(trans_method=="CytofAsinh") trans_fun=CytofAsinh
if(trans_method=="simpleAsinh") trans_fun=simpleAsinh
if(trans_method=="0_to_Max") trans_fun=normalize_Zero_Max
if(trans_method=="Min_to_Max") trans_fun=normalize_min_max
if(is.null(heatmap_tsne_markers)) {
File_ID_num<-which(colnames(combined_data_plot)=="File_ID")
heatmap_tsne_markers<-colnames(combined_data_plot[2:File_ID_num-1])}
combined_data_plot[,heatmap_tsne_markers]<-apply(combined_data_plot[,heatmap_tsne_markers],
MARGIN = 2,
remove_extremum)
combined_data_plot[,heatmap_tsne_markers]<-apply(combined_data_plot[,heatmap_tsne_markers],MARGIN=2,trans_fun) #MAGIN=2 按照列进行Normalize,MAGIN=1按照行进行Normalize
xlim<-c(min(combined_data_plot$tsne_1)*1.1,max(combined_data_plot$tsne_1)*1.1)
ylim<-c(min(combined_data_plot$tsne_2)*1.1,max(combined_data_plot$tsne_2)*1.1)
add_cell<-combined_data_plot[1,,drop=F]
suppressWarnings(add_cell[1,]<-0)
add_cell[1,c(reduction_dm1,reduction_dm2)]=c(1000,1000)
add_cell$File_ID<-combined_data_plot[1,"File_ID"]
combined_data_plot<-rbind(combined_data_plot,
add_cell)
combined_data_plot$File_ID<-as.character(combined_data_plot$File_ID)
combined_data_plot<-full_join(combined_data_plot,groups,by="File_ID")
tail(combined_data_plot)
#jet.colors <- colorRampPalette(c("#00007F", "blue", "#007FFF", "cyan","#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000"))
mytheme <- theme(panel.background = element_rect(fill = "white", colour = "black", size = 0.2), #坐标系及坐标轴
legend.key = element_rect(fill = "white", colour = "white"), #图标
legend.background = (element_rect(colour= "white", fill = "white")))
heatmap_tsne_markers<-as.matrix(heatmap_tsne_markers)
single_heatmap_tsne<-function(marker_name){
single_heatmap<-ggplot(combined_data_plot)+
geom_point(aes_string(x=reduction_dm1,y=reduction_dm2,colour=marker_name),size=dot_size,alpha=1)+
mytheme+scale_color_gradientn(colours = dot_colours(7))+
#expand_limits(x=xlim,y=ylim)+
scale_y_continuous(limits=ylim)+
scale_x_continuous(limits=xlim)+
theme(text = element_text(size=50))+
theme(legend.title=element_blank())+
labs(title=marker_name)+
theme(legend.key.height=unit(5,"cm"))+
theme(plot.margin=unit(c(1,1,1,1),"cm"))
if(show_legend==F){
single_heatmap<-single_heatmap+
theme(legend.position = "none")
}
if(show_scale==F){
single_heatmap<-single_heatmap+
theme(axis.text = element_blank())
}
if(!is.null(major_cond))
{single_heatmap<-single_heatmap+
facet_wrap(facets=major_cond)
}
return(single_heatmap)
}
heatmap_tsne_list<-lapply(heatmap_tsne_markers,single_heatmap_tsne)
n_figure<-length(heatmap_tsne_list)
major_cond_n<-length(unique(groups[,major_cond,drop=TRUE]))
if(!is.null(major_cond)) fig_cond_n<-major_cond_n
else fig_cond_n=1
if(single_file==FALSE){
for(figure_i in 1:n_figure){
cat(paste0("Start to output heatmap of ",heatmap_tsne_markers[figure_i]),"\n")
if(output_format=="tiff"){
jpeg(filename = paste0("./",output_dir," ",trans_method,"/","Heatmap ",heatmap_tsne_markers[figure_i],".tiff"),width=1000*fig_cond_n,height=1000)}
else if(output_format=="pdf"){
pdf(file=paste0("./",output_dir," ",trans_method,"/","Heatmap ",heatmap_tsne_markers[figure_i],".pdf"),width=1000*fig_cond_n/72,height=1000/72)}
else{
message(paste0("Error-Output format ",output_format," is not supported\n"))
return(NULL)}
suppressWarnings( multiplot(heatmap_tsne_list[figure_i]))
dev.off()
}
}else{
fig_row<-ceiling(n_figure^0.5)
fig_col<-ceiling(n_figure/fig_row)
if(output_format=="tiff"){
jpeg(filename = paste0("./",output_dir," ",trans_method,"/","tSNE-heatmap.tiff"),width=1000*fig_row*fig_cond_n,height=1000*fig_col)}
else if(output_format=="pdf"){
jpeg(filename = paste0("./",output_dir," ",trans_method,"/","tSNE-heatmap.pdf"),width=1000*fig_row*fig_cond_n/72,height=1000*fig_col/72)}
else{
message(paste0("Error-Output format ",output_format," is not supported\n"))
return(NULL)}
suppressWarnings(multiplot(plotlist=heatmap_tsne_list,cols = fig_col))
dev.off()
}
}
#' @title One_SENSE_report
#' @description
#' 进行OneSense分析,并进行画出两个坐标轴方向的Heatmap;
#'
#' @param combined_data_sampled 数据框或者矩阵,待处理的表达数据
#' @param perplexity tsne降维参数,困惑度
#' @param max_iter tsne降维参数,迭代次数
#' @param all_markers 带有One_SENSE_F和One_SENSE_P的通道选择marker
#' @param output_dir 输出的目录名称
#' @export
One_SENSE_report<-function(combined_data_sampled,
#tSNE参数设置:
perplexity=30, #困惑度
max_iter=1000, #迭代次数
all_markers=all_markers,
output_dir="cluster_onesense_plots"
){
#One_SENSE_F分析
if (!dir.exists(paste0("./",output_dir))) {
dir.create(paste0("./",output_dir))
}
One_SENSE_F_id=which(all_markers$One_SENSE_F==1)
One_SENSE_F_name=colnames(combined_data_sampled[,One_SENSE_F_id])
One_SENSE_F_input_data=combined_data_sampled[,One_SENSE_F_name]
One_SENSE_F_result <- Rtsne(One_SENSE_F_input_data,
initial_dims = ncol(One_SENSE_F_input_data),
dims = 1,
check_duplicates = FALSE,
perplexity=perplexity,
max_iter=max_iter)$Y
#One_SENSE_P分析
One_SENSE_P_id=which(all_markers$One_SENSE_P==1)
One_SENSE_P_name=colnames(combined_data_sampled[,One_SENSE_P_id])
One_SENSE_P_input_data=combined_data_sampled[,One_SENSE_P_name]
One_SENSE_P_result <- Rtsne(One_SENSE_P_input_data,
initial_dims = ncol(One_SENSE_P_input_data),
dims = 1,
check_duplicates = FALSE,
perplexity=perplexity,
max_iter=max_iter)$Y
combined_data_plot <- data.frame(combined_data_sampled,
One_SENSE_F=One_SENSE_F_result,
One_SENSE_P=One_SENSE_P_result)
One_SENSE_result<-data.frame(One_SENSE_F=One_SENSE_F_result,
One_SENSE_P=One_SENSE_P_result)
#生成Heatmap
#统计分析
One_SENSE_P_Group<-cut(combined_data_plot$One_SENSE_P,50,labels = c(1:50))
One_SENSE_F_Group<-cut(combined_data_plot$One_SENSE_F,50,labels = c(1:50))
combined_data_plot<-data.frame(combined_data_plot,
P_Group=One_SENSE_P_Group,
F_Group=One_SENSE_F_Group)
statics_P_Groups<-aggregate(combined_data_plot[,One_SENSE_P_id],list(combined_data_plot$P_Group),median)[,-1]
statics_F_Groups<-aggregate(combined_data_plot[,One_SENSE_F_id],list(combined_data_plot$F_Group),median)[,-1]
head(statics_P_Groups)
head(statics_F_Groups)
#标准化:将所有数值Normalize到0~1
statics_P_Groups=BBmisc::normalize(statics_P_Groups, method = "range", range = c(0, 1), margin = 2)
statics_P_Groups=as.matrix(statics_P_Groups)
statics_F_Groups=BBmisc::normalize(statics_F_Groups, method = "range", range = c(0, 1), margin = 2)
statics_F_Groups=t(as.matrix(statics_F_Groups))
#绘图
cat(paste0("Start to output heatmap of P_Heatmap.pdf","\n"))
#jpeg(filename = paste0("./",output_dir,"/","Heatmap ",heatmap_tsne_markers[figure_i],".jpeg"),width=1000,height=1000,quality = 600)
pdf(file=paste0("./",output_dir,"/","P_Heatmap.pdf"),width = 8,height = 7)
heatmap3(x=statics_P_Groups,
method="average",
Rowv = NA,
Colv=NULL,
balanceColor=FALSE,
scale="none",
symm = FALSE,
showColDendro =F,
col=colorRampPalette(c("black","blue","green","yellow","Red"),space="rgb",interpolate="linear")(100),
#col=colorRampPalette(c("black","#00007F", "blue", "#007FFF", "cyan","#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000"))(100),
lasRow=3
)
dev.off()
#绘图
cat(paste0("Start to output heatmap of F_Heatmap.pdf","\n"))
#jpeg(filename = paste0("./",output_dir"Heatmap ",heatmap_tsne_markers[figure_i],".jpeg"),width=1000,height=1000,quality = 600)
pdf(file=paste0("./",output_dir,"/","F_Heatmap.pdf"),width = 8,height = 7)
heatmap3(x=statics_F_Groups,
method="average",
Rowv = NULL,
Colv=NA,
balanceColor=FALSE,
scale="none",
symm = FALSE,
showColDendro =T,
showRowDendro = T,
col=colorRampPalette(c("black","blue","green","yellow","Red"),space="rgb",interpolate="linear")(100),
lasRow=2
)
dev.off()
return(One_SENSE_result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.