#' mydplyr.percentage
#'
#' This function is an internal function
#' @param df
#' @param by
#' @return
#' @export
#' @examples
#' Primaryclutering(dgelist,txcut=500,cluster.resos=first.reso,RAWinput=israw)
mydplyr.percentage<-function(df,by="stage")
{
newdf<-c()
for (i in as.character(unique(df[,by])))
{
dftm<-df[which(as.character(df[,by])==i),]
current.sum<-sum(dftm[,2])
newdf<-rbind(newdf,cbind(dftm,percent=apply(dftm,1,function(x){as.numeric(as.character(x[2]))/current.sum})))
}
return(newdf)
}
#' Primaryclutering
#'
#' This function is an advanced clustering function that when for example input dgeA, dgeB. This will do cluster individually for A, B and also for merged AB
#' @param rawdatapackage a list of several dge files. At least 2 dge. in the case of TreCCA tree. It is alwasy two dge, one from early time, one form latert
#' @param txcut The transcripts cutoff for clustering analysis
#' @param RAWinput If true, it is a raw dge
#' @param cluster.resos A vector of number, each is the cluster resolution for each dge.
#' @param allsamereso If true, then take the first number of cluster.resos for clustering of all dges
#' @param Together.reso The resolution of merged dge(all samples together)
#' @return This will return a list of seurat result. It is used in function such as Tree.build.prepare
#' @export
#' @examples
#' Primaryclutering(dgelist,txcut=500,cluster.resos=first.reso,RAWinput=israw)
Primaryclutering<-function(rawdatapackage,txcut=500,RAWinput=T,cluster.resos=c(0.6,0.6),allsamereso=F,together.reso=0.6)
{
Seurat.list<-list()
for (i in 1:length(rawdatapackage))
{
if(allsamereso)
{
Seurat<-docluster(dgepreprocess(rawdatapackage[[i]],txcut,norowname=RAWinput),GetinformativeGene(dgepreprocess(rawdatapackage[[i]],txcut,norowname=RAWinput),500),names(rawdatapackage)[i],reso=cluster.resos[1])
}else
{
Seurat<-docluster(dgepreprocess(rawdatapackage[[i]],txcut,norowname=RAWinput),GetinformativeGene(dgepreprocess(rawdatapackage[[i]],txcut,norowname=RAWinput),500),names(rawdatapackage)[i],reso=cluster.resos[i])
}
Seurat.list<-c(Seurat.list,list(Seurat))
}
names(Seurat.list)<-names(rawdatapackage)
Sample.dict<-c()
Seurat.raw.list<-list()
for (i in 1:length(rawdatapackage))
{
donor_cell<-data.frame(Sample=Seurat.list[[i]]@data.info[,6],row.names=row.names(Seurat.list[[i]]@data.info))
Sample.dict<-rbind(Sample.dict,donor_cell)
Seurat.raw.list<-c(Seurat.raw.list,list(as.matrix(Seurat.list[[i]]@raw.data)))
}
names(Seurat.raw.list)<-names(Seurat.list)
#step4 Make original clustering
SeuratALL.origin<-docluster.multi(500,txcutoff=500,Seurat.raw.list,names(Seurat.list),reso=together.reso)
return(list(Seurat.list=Seurat.list,Sample.dict=Sample.dict,Seurat.raw.list=Seurat.raw.list,SeuratALL.origin=SeuratALL.origin))
}
#' myddply.center
#'
#' This is an internal function used by dist.matrix.prep and dist.matrix.prep.v3
#' @param df
#' @param Col
#' @param is.originalcluster
#' @param combinecol
#' @return This will return an object for dist.matrix.prep and dist.matrix.prep.v3
#' @export
#' @examples
#' Sample1.centers<-myddply.center(ob1.PCAS.clst,cluster.col1)
myddply.center<-function(df,Col,is.originalcluster=F,combinecol="Sample") # only when you want to make a combination of two columns into a new clusrer name# Note the myfun should be a function that return a vector
{
df<-df[complete.cases(df),]
Centers<-c()
cluster<-unique(df[,Col])
cellNumber<-c()
size.matrix<-c()
df<-df[complete.cases(df),]
for (i in unique(df[,Col]))
{
if(is.originalcluster)
{
cluster<-c(cluster,paste(unique(df[,combinecol]),i,sep="_"))
}
df.sub<-df[df[,Col]==i,]
curCenter<-colMeans(df.sub[,1:40])
Centers<-rbind(Centers,c(colMeans(df.sub[,1:40])))
cellNumber<-c(cellNumber,nrow(df.sub))
cur.size.matrix<-as.matrix(summary(apply(df.sub[,1:40],1,function(x){dist(rbind(x,curCenter))})))
colnames(cur.size.matrix)<-paste("",i,sep="")
size.matrix<-cbind(size.matrix,cur.size.matrix)
}
Centers<-cbind(Centers,cellNumber=cellNumber)
row.names(Centers)<-cluster
if (nrow(Centers)==1)
{
centers.dist.mtx<-"No distance matrix, because only one clust is found"
}else
{
centers.dist.mtx<-as.matrix(dist(Centers[,1:40]))
}
return(list(Centers=Centers,centers.dist.mtx=centers.dist.mtx,size.matrix=size.matrix))
}
#' myddply.center.v2
#'
#' This is another internal function used by dist.matrix.prep and dist.matrix.prep.v3
#' @param df.1
#' @param df.2
#' @param Col1
#' @param Col2
#' @return This will return an object for dist.matrix.prep and dist.matrix.prep.v3
#' @export
#' @examples
#' Sample1.centers<-myddply.center(ob1.PCAS.clst,cluster.col1)
myddply.center.v2<-function(df.1,df.2,Col1,Col2) # Note the myfun should be a function that return a vector
{
require(rdist)
Centers.1<-c()
cluster.1<-c()
cellNumber.1<-c()
size.matrix.1<-c()
for (i in unique(df.1[,Col1]))
{
cluster.1<-c(cluster.1,paste("",i,sep=""))
df.sub.1<-df.1[as.character(df.1[,Col1])==i,]
curCenter.1<-colMeans(df.sub.1[,1:40])
df.sub.1<-df.sub.1[complete.cases(df.sub.1),]
Centers.1<-rbind(Centers.1,c(colMeans(df.sub.1[,1:40])))
cellNumber.1<-c(cellNumber.1,nrow(df.sub.1))
cur.size.matrix.1<-as.matrix(summary(apply(df.sub.1[,1:40],1,function(x){dist(rbind(x,curCenter.1))})))
colnames(cur.size.matrix.1)<-paste("",i,sep="")
size.matrix.1<-cbind(size.matrix.1,cur.size.matrix.1)
}
Centers.1<-cbind(Centers.1,cellNumber.1=cellNumber.1)
row.names(Centers.1)<-cluster.1
#``` for df.2
Centers.2<-c()
cluster.2<-c()
cellNumber.2<-c()
size.matrix.2<-c()
for (i in unique(df.2[,Col2]))
{
cluster.2<-c(cluster.2,paste("",i,sep=""))
df.sub.2<-df.2[df.2[,Col2]==i,]
df.sub.2<-df.sub.2[complete.cases(df.sub.2),]
curCenter.2<-colMeans(df.sub.2[,1:40])
Centers.2<-rbind(Centers.2,c(colMeans(df.sub.2[,1:40])))
cellNumber.2<-c(cellNumber.2,nrow(df.sub.2))
cur.size.matrix.2<-as.matrix(summary(apply(df.sub.2[,1:40],1,function(x){dist(rbind(x,curCenter.2))})))
colnames(cur.size.matrix.2)<-paste("",i,sep="")
size.matrix.2<-cbind(size.matrix.2,cur.size.matrix.2)
}
Centers.2<-cbind(Centers.2,cellNumber.2=cellNumber.2)
row.names(Centers.2)<-cluster.2[cluster.2!="NA"]
center.1.PCs<-Centers.1[,1:40]
center.2.PCs<-Centers.2[,1:40]
if(!is.matrix(center.1.PCs))
{
center.1.PCs<-t(center.1.PCs)
}
if(!is.matrix(center.2.PCs))
{
center.2.PCs<-t(center.2.PCs)
}
if(is.null(row.names(center.1.PCs)))
{
row.names(center.1.PCs)<-cluster.1
}
if(is.null(row.names(center.1.PCs)))
{
row.names(center.2.PCs)<-cluster.2
}
centers.dist.mtx<-cdist(center.1.PCs,center.2.PCs)
row.names(centers.dist.mtx)<-cluster.1
colnames(centers.dist.mtx)<-cluster.2[cluster.2!="NA"]
df1_df2.relation<-paste(row.names(centers.dist.mtx),colnames(centers.dist.mtx)[apply(centers.dist.mtx,1,function(x){which(x==min(x))})],sep="--")
df2_df1.relation<-paste(colnames(centers.dist.mtx),row.names(centers.dist.mtx)[apply(centers.dist.mtx,2,function(x){which(x==min(x))})],sep="--")
##Creat the segData which is critical for plotting the connection plot seg.Data.main is a seg data seeking from later stage toward earlier stage which is a major considerarion
seg.Data.main<-c()
if(dim(centers.dist.mtx)[1]==1)
{
for (older.cluster.names in colnames(centers.dist.mtx))
{
average.dist<-mean(centers.dist.mtx)
if(min(centers.dist.mtx[,older.cluster.names])<=average.dist)
{
seg<-c(older.cluster.names,row.names(centers.dist.mtx))
seg.Data.main<-rbind(seg.Data.main,seg)
}
}
}else
{
seg.Data.main<-c()
for (older.cluster.names in colnames(centers.dist.mtx))
{
average.dist<-mean(centers.dist.mtx)
if(min(centers.dist.mtx[,older.cluster.names])<=average.dist)
{
seg<-c(older.cluster.names,names(which(centers.dist.mtx[,older.cluster.names]==min(centers.dist.mtx[,older.cluster.names]))))
seg.Data.main<-rbind(seg.Data.main,seg)
}
}
}
row.names(seg.Data.main)<-NULL
seg.Data.main<-as.data.frame(seg.Data.main)
names(seg.Data.main)<-c("SeekFrom.cluster.names","SeekToward.cluster.names")
seg.Data.main<-cbind(seg.Data.main,SeekFrom.stage.names=unlist(lapply(strsplit(as.character(seg.Data.main$SeekFrom.cluster.names),"_"),function(x){x[1]})))
seg.Data.main<-cbind(seg.Data.main,SeekToward.stage.names=unlist(lapply(strsplit(as.character(seg.Data.main$SeekToward.cluster.names),"_"),function(x){x[1]})))
##Creat the segData which is critical for plotting the connection plot seg.Data.main is a seg data seeking from earlier stage toward later stage which is a alternative considerarion
seg.Data.alt<-c()
if(dim(centers.dist.mtx)[2]==1)
{
for (earlier.cluster.names in row.names(centers.dist.mtx))
{
average.dist<-mean(centers.dist.mtx)
if(min(centers.dist.mtx[earlier.cluster.names,])<=average.dist)
{
seg<-c(earlier.cluster.names,colnames(centers.dist.mtx))
seg.Data.alt<-rbind(seg.Data.alt,seg)
}
}
}else
{
seg.Data.alt<-c()
for (earlier.cluster.names in row.names(centers.dist.mtx))
{
average.dist<-mean(centers.dist.mtx)
if(min(centers.dist.mtx[earlier.cluster.names,])<=average.dist)
{
seg<-c(earlier.cluster.names,names(which(centers.dist.mtx[earlier.cluster.names,]==min(centers.dist.mtx[earlier.cluster.names,]))))
seg.Data.alt<-rbind(seg.Data.alt,seg)
}
}
}
row.names(seg.Data.alt)<-NULL
seg.Data.alt<-as.data.frame(seg.Data.alt)
names(seg.Data.alt)<-c("SeekFrom.cluster.names","SeekToward.cluster.names")
seg.Data.alt<-cbind(seg.Data.alt,SeekFrom.stage.names=unlist(lapply(strsplit(as.character(seg.Data.alt$SeekFrom.cluster.names),"_"),function(x){x[1]})))
seg.Data.alt<-cbind(seg.Data.alt,SeekToward.stage.names=unlist(lapply(strsplit(as.character(seg.Data.alt$SeekToward.cluster.names),"_"),function(x){x[1]})))
return(list(Centers.1=Centers.1,Centers.2=Centers.2,centers.dist.mtx=centers.dist.mtx,size.matrix.1=size.matrix.1,size.matrix.2=size.matrix.2,df1_df2.relation=df1_df2.relation,df2_df1.relation=df2_df1.relation,seg.Data.main=seg.Data.main,seg.Data.alt=seg.Data.alt))
}
#' Tree.build.prepare
#'
#' This function is the first function to start calculate the subtree from dgeA, dgeB. This basically do the clustering and fullplot at low resolution
#' @param dge1 the dge file (usually from earlier timepoint), either raw or processed is acceptable
#' @param dge2 the dge file (usually from later timepoint), either raw or processed is acceptable
#' @param name1 The name for the dge1 (usually from earlier timepoint)
#' @param name2 The name for the dge2 (usually from later timepoint)
#' @param first.reso The cluster resolution for dge1 and dge2 respectively. A vector of two number , for example c(0.03,0.03)
#' @param treemake.dir A path. The workspace where the whole tree making project is on
#' @return A list of stuff including seurat clutsering result; fullplots; the resolution used and the path
#' @export
#' @examples
#' S4_S5ROCK.tree.prep<-Tree.build.prepare(dge1=s4.B.dge,dge2=S5.ROCKII.dge,name1="S4.B",name2="S5.rock",first.reso=c(0.03,0.03),treemake.dir="/mnt/NFS/homeGene/JinLab/cxw486/Dropseq/Entrance/Esderived/Esdrived_DGE/AttemptFrom17.8.28/ROCKII/Treemake/")
Tree.build.prepare<-function(dge1,dge2,name1,name2,first.reso=c(0.03,0.03),treemake.dir="./") # S4.B apply resolusion 0.015
{
setwd(treemake.dir)
if(!any(grepl(paste(name1,name2,sep="_"),list.files()))){
dir.create(paste(name1,name2,sep="_"))
}
setwd(paste(c("./",name1,"_",name2),collapse=""))
path<-getwd()
first.resos.used<-paste("first.reso is",paste(first.reso,collapse="_"))
print(first.resos.used)
# dir.create(paste(name1,name2,sep="_"))
# setwd(paste(c("./",name1,"_",name2),collapse=""))
#sink(paste(c(name1,"_",name2,".log"),collapse=""))
dgelist<-list(dge1,dge2)
names(dgelist)<-c(name1,name2)
print(paste(c("Start to do clustering between",name1,name2),collapse=" "))
## Primary clustering
#
israw<-colnames(dge1)[1]=="GENE"
primary.total<-Primaryclutering(dgelist,txcut=500,cluster.resos=first.reso,RAWinput=israw)
primary.total$Seurat.list[[1]]@data.info<-cbind(primary.total$Seurat.list[[1]]@data.info,Sample.2nd=paste(primary.total$Seurat.list[[1]]@data.info$Sample,primary.total$Seurat.list[[1]]@data.info[,paste("res.",first.reso[1],sep="")],sep="_"))
primary.total$Seurat.list[[2]]@data.info<-cbind(primary.total$Seurat.list[[2]]@data.info,Sample.2nd=paste(primary.total$Seurat.list[[2]]@data.info$Sample,primary.total$Seurat.list[[2]]@data.info[,paste("res.",first.reso[2],sep="")],sep="_"))
print("Total clustering with low resolution is completed")
print(paste(c("with low resolution ",name1," can be clustered into ",length(unique(primary.total$Seurat.list[[1]]@data.info[,paste("res.",first.reso[1],sep="")]))," ; ",name2," can be clustered into ",length(unique(primary.total$Seurat.list[[2]]@data.info[,paste("res.",first.reso[2],sep="")]))),collapse=""))
##
#saveRDS(primary.total,file=paste(c(name1,name2,"totalprimary.RDS"),collapse="_"))
## plot full plot for each two stage
dir.create(paste(c("LowresCluster",name1,name2),collapse="."))
setwd(paste(c("LowresCluster",name1,name2),collapse="."))
print("Start to plot clustering into pdf...")
total.fullplot.1<-Fullplot_v2(primary.total$Seurat.list[[1]],paste(c(name1,".total.",first.reso[1],".pdf"),collapse=""),signiture=NULL,resolusion=paste("res.",first.reso[1],sep=""),doreturn=T)
print(paste(c("The yonger: ",name1," Fullplot has completed, the file name is ",paste(c(name1,".total.",first.reso[1],".pdf"),collapse="")),collapse=""))
total.fullplot.2<-Fullplot_v2(primary.total$Seurat.list[[2]],paste(c(name2,".total.",first.reso[2],".pdf"),collapse=""),signiture=NULL,resolusion=paste("res.",first.reso[2],sep=""),doreturn=T)
print(paste(c("The older: ",name2," Fullplot has completed, the file name is ",paste(c(name2,".total.",first.reso[2],".pdf"),collapse="")),collapse=""))
return(list(primary.total=primary.total,total.fullplot.1=total.fullplot.1,total.fullplot.2=total.fullplot.2,path=path,first.resos.used=first.reso))
}
#' Prep.res.adjust
#'
#' This function is to adjust the clustering resolution generated from Tree.build.prepare
#' @param prep the output from Tree.build.prepare
#' @param first.reso.adjust This is the ADJUSTED CLUSTER RESOLUTION.The format is the same as First.reso in Tree.build.prepare
#' @return This will return the same object as that generated from Tree.build.prepare
#' @export
#' @examples
#' S4_S5ROCK.tree.prep<-Prep.res.adjust(S4_S5ROCK.tree.prep,first.reso.adjust=c(0.015,0.015))
Prep.res.adjust<-function(prep,first.reso.adjust=c(0.03,0.03)) # S4.B apply resolusion 0.015
{
setwd(prep$path)
path=prep$path
name1<-names(prep$primary.total$Seurat.list)[1]
name2<-names(prep$primary.total$Seurat.list)[2]
setwd(paste(c("LowresCluster",name1,name2),collapse="."))
prep$primary.total$Seurat.list[[1]]<- FindClusters(prep$primary.total$Seurat.list[[1]], pc.use = 1:10, resolution = first.reso.adjust[1], print.output = 0, save.SNN = T)
prep$primary.total$Seurat.list[[1]]@data.info<-prep$primary.total$Seurat.list[[1]]@data.info[,c("nGene","nUMI","orig.ident","percent.mito",paste("res",first.reso.adjust[1],sep="."),"Sample")] %>% cbind(.,Sample.2nd=paste(.$Sample,.[,paste("res",first.reso.adjust[1],sep=".")],sep="_"))
prep$primary.total$Seurat.list[[2]]<- FindClusters(prep$primary.total$Seurat.list[[2]], pc.use = 1:10, resolution = first.reso.adjust[2], print.output = 0, save.SNN = T)
prep$primary.total$Seurat.list[[2]]@data.info<-prep$primary.total$Seurat.list[[2]]@data.info[,c("nGene","nUMI","orig.ident","percent.mito",paste("res",first.reso.adjust[2],sep="."),"Sample")] %>% cbind(.,Sample.2nd=paste(.$Sample,.[,paste("res",first.reso.adjust[2],sep=".")],sep="_"))
total.fullplot.1<-Fullplot_v2(prep$primary.total$Seurat.list[[1]],paste(c(name1,".total.",first.reso.adjust[1],".pdf"),collapse=""),signiture=NULL,resolusion=paste("res.",first.reso.adjust[1],sep=""),doreturn=T)
print(paste(c("The yonger: ",name1," Fullplot has completed, the file name is ",paste(c(name1,".total.",first.reso.adjust[1],".pdf"),collapse="")),collapse=""))
total.fullplot.2<-Fullplot_v2(prep$primary.total$Seurat.list[[2]],paste(c(name2,".total.",first.reso.adjust[2],".pdf"),collapse=""),signiture=NULL,resolusion=paste("res.",first.reso.adjust[2],sep=""),doreturn=T)
print(paste(c("The older: ",name2," Fullplot has completed, the file name is ",paste(c(name2,".total.",first.reso.adjust[2],".pdf"),collapse="")),collapse=""))
return(list(primary.total=prep$primary.total,total.fullplot.1=total.fullplot.1,total.fullplot.2=total.fullplot.2,path=path,first.resos.used=first.reso.adjust))
}
#' dist.matrix.prep
#'
#' This is an important internal function to calculate distance matrix, it is used in Tree.build.1 and Tree.build.2nd.clustering
#' @param binary.primary This is the primary clustering result that can be retrieved from Tree.build.prepare object. (Because Tree.build.prepare mainly do the primary clustering.)The input here is tree.prep.ob$primary.total
#' @param datainfo.col1 default is c("res.0.6","nUMI","Sample")
#' @param cluster.col1 default is "res.0.6"
#' @param datainfo.col2 default is c("res.0.6","nUMI","Sample")
#' @param cluster.col2 default is "res.0.6"
#' @param res1 default is "res.0.06
#' @param res2 default is "res.0.06"
#' @param datainfo.col3 c("res.0.6","nUMI","Sample")
#' @return This will return an object used for Tree.build.1
#' @export
#' @examples
#' total.dist.matrxes<-dist.matrix.prep(primary.total,datainfo.col1=c(paste("res.",first.reso[1],sep=""),"nUMI","Sample"),cluster.col1=paste("res.",first.reso[1],sep=""),datainfo.col2=c(paste("res.",first.reso[2],sep=""),"nUMI","Sample"),cluster.col2=paste("res.",first.reso[2],sep=""),res1=paste("res.",first.reso[1],sep=""),res2=paste("res.",first.reso[2],sep=""))
dist.matrix.prep<-function(binary.primary,datainfo.col1=c("res.0.6","nUMI","Sample"),cluster.col1="res.0.6",datainfo.col2=c("res.0.6","nUMI","Sample"),cluster.col2="res.0.6",res1="res.0.06",res2="res.0.06",datainfo.col3=c("res.0.6","nUMI","Sample"))
{
print(paste("sample1 is:", names(binary.primary$Seurat.list)[1]))
print(paste("sample2 is:", names(binary.primary$Seurat.list)[2]))
samplename1<-names(binary.primary$Seurat.list)[1]
samplename2<-names(binary.primary$Seurat.list)[2]
##part1 to prepare the pca and information for each of the two sample
####prepare for sample1
ob1.PCAS.clst<-Tomerge_v2(binary.primary$Seurat.list[[1]]@pca.rot,binary.primary$Seurat.list[[1]]@data.info[,datainfo.col1])
ob1.info<-binary.primary$Seurat.list[[1]]@data.info[,datainfo.col1]
ob1.info<-ob1.info[!duplicated(substr(row.names(ob1.info),1,12)),]
row.names(ob1.info)<-paste(samplename1,substr(row.names(ob1.info),1,12),sep="_")
ob1.info[,cluster.col1]<-paste(samplename1,ob1.info[,res1],sep="_")
####prepare for sample2
ob2.PCAS.clst<-Tomerge_v2(binary.primary$Seurat.list[[2]]@pca.rot,binary.primary$Seurat.list[[2]]@data.info[,datainfo.col2])
ob2.info<-binary.primary$Seurat.list[[2]]@data.info[,datainfo.col2]
ob2.info<-ob2.info[!duplicated(substr(row.names(ob2.info),1,12)),]
row.names(ob2.info)<-paste(samplename2,substr(row.names(ob2.info),1,12),sep="_")
ob2.info[,cluster.col2]<-paste(samplename2,ob2.info[,res2],sep="_")
####prepare for sample1--sample2
obALL.PCAS.clst<-Tomerge_v2(binary.primary$SeuratALL.origin@pca.rot,binary.primary$SeuratALL.origin@data.info[,datainfo.col3])
obALL.PCAS.clst.SP1<-subset(obALL.PCAS.clst,Sample==samplename1)
obALL.PCAS.clst.SP2<-subset(obALL.PCAS.clst,Sample==samplename2)
obALL.PCAS.clst.SP1<-obALL.PCAS.clst.SP1[which(!duplicated(paste(samplename1,substr(row.names(obALL.PCAS.clst.SP1),1,12),sep="_"))),]
row.names(obALL.PCAS.clst.SP1)<-paste(samplename1,substr(row.names(obALL.PCAS.clst.SP1),1,12),sep="_")
obALL.PCAS.clst.SP2<-obALL.PCAS.clst.SP2[which(!duplicated(paste(samplename2,substr(row.names(obALL.PCAS.clst.SP2),1,12),sep="_"))),]
row.names(obALL.PCAS.clst.SP2)<-paste(samplename2,substr(row.names(obALL.PCAS.clst.SP2),1,12),sep="_")
obALL.PCAS.clst.SP1<-Tomerge_v2(obALL.PCAS.clst.SP1[,1:40],ob1.info[,1:2])
obALL.PCAS.clst.SP2<-Tomerge_v2(obALL.PCAS.clst.SP2[,1:40],ob2.info[,1:2])
##part2 to calculate the center and distance matrix
Sample1.centers<-myddply.center(ob1.PCAS.clst,cluster.col1)
Sample2.centers<-myddply.center(ob2.PCAS.clst,cluster.col2)
Sample1cross2.centers<-myddply.center.v2(obALL.PCAS.clst.SP1,obALL.PCAS.clst.SP2,cluster.col1,cluster.col2)
return(list(Sample1.centers=Sample1.centers,Sample2.centers=Sample2.centers,Sample1cross2.centers=Sample1cross2.centers))
}
#' dist.matrix.prep.v3
#'
#' This is an important internal function to calculate distance matrix, it is used particularly in Tree.build.2nd.treemaking
#' @param binary.primary This is the primary clustering result that can be retrieved from Tree.build.prepare object. (Because Tree.build.prepare mainly do the primary clustering.)The input here is tree.prep.ob$primary.total
#' @param datainfo.col1 default is c("res.0.6","nUMI","Sample")
#' @param cluster.col1 default is "res.0.6"
#' @param datainfo.col2 default is c("res.0.6","nUMI","Sample")
#' @param datainfo.col.dog c("res.0.6","nUMI","Sample")
#' @param cluster.col2 default is "res.0.6"
#' @param cluster.col.dog "res.0.6"
#' @param withsingledog Default is F
#' @param dog.seurat.ob =NULL
#' @param neighborwithdog.seurat.ob =NULL
#' @return This will return an object used for Tree.build.2nd.treemaking
#' @export
#' @examples
#' total.dist.matrxes<-dist.matrix.prep(primary.total,datainfo.col1=c(paste("res.",first.reso[1],sep=""),"nUMI","Sample"),cluster.col1=paste("res.",first.reso[1],sep=""),datainfo.col2=c(paste("res.",first.reso[2],sep=""),"nUMI","Sample"),cluster.col2=paste("res.",first.reso[2],sep=""),res1=paste("res.",first.reso[1],sep=""),res2=paste("res.",first.reso[2],sep=""))
dist.matrix.prep.v3<-function(binary.primary,datainfo.col1=c("res.0.6","nUMI","Sample"),datainfo.col2=c("res.0.6","nUMI","Sample"),datainfo.col.dog=c("res.0.6","nUMI","Sample"),cluster.col1="res.0.6",cluster.col2="res.0.6",cluster.col.dog="res.0.6",withsingledog=F,dog.seurat.ob=NULL,neighborwithdog.seurat.ob=NULL)
{
require(ggdendro)
#require(reshape2)
print(paste("sample1 is:", names(binary.primary$Seurat.list)[1]))
print(paste("sample2 is:", names(binary.primary$Seurat.list)[2]))
samplename1<-names(binary.primary$Seurat.list)[1]
samplename2<-names(binary.primary$Seurat.list)[2]
##part1 to prepare the pca and information for each of the two sample
####prepare for sample1
ob1.PCAS.clst<-Tomerge_v2(binary.primary$Seurat.list[[1]]@pca.rot,binary.primary$Seurat.list[[1]]@data.info[,datainfo.col1])
ob1.info<-binary.primary$Seurat.list[[1]]@data.info[,datainfo.col1]
ob1.info<-cbind(ob1.info,Sample.cluster=paste(as.character(ob1.info$Sample),ob1.info[,cluster.col1],sep="_"))
####prepare for sample2
ob2.PCAS.clst<-Tomerge_v2(binary.primary$Seurat.list[[2]]@pca.rot,binary.primary$Seurat.list[[2]]@data.info[,datainfo.col2])
ob2.info<-binary.primary$Seurat.list[[2]]@data.info[,datainfo.col2]
row.names(ob2.info)<-paste(row.names(ob2.info),"b",sep="_")
ob2.info<-cbind(ob2.info,Sample.cluster=paste(as.character(ob2.info$Sample),ob2.info[,cluster.col2],sep="_"))
####prepare for singledog if it is there
if (withsingledog)
{
print(paste("There is a single dog---",unique(dog.seurat.ob@data.info$Sample)))
ob.dog.PCAS.clst<-Tomerge_v2(dog.seurat.ob@pca.rot,dog.seurat.ob@data.info[,datainfo.col.dog])
ob.dog.info<-dog.seurat.ob@data.info
row.names(ob.dog.info)<-paste(row.names(ob.dog.info),"c",sep="_")
ob.dog.info<-cbind(ob.dog.info,Sample.cluster=paste(as.character(ob.dog.info$Sample),ob.dog.info[,cluster.col.dog],sep="_"))
### put together with dog
ob.1.2.dog.info<-rbind(ob1.info[,c("nUMI","Sample.cluster")],ob2.info[,c("nUMI","Sample.cluster")],ob.dog.info[,c("nUMI","Sample.cluster")])
####prepare for sample1--sample2 plus singledog
obALL.dog.PCAS.clst<-Tomerge_v2(neighborwithdog.seurat.ob@pca.rot,ob.1.2.dog.info)
SampleALL.centers.dog<-myddply.center(obALL.dog.PCAS.clst,"Sample.cluster")
hc.dog<-hclust(as.dist(SampleALL.centers.dog$centers.dist.mtx))
p.dendro.dog<-ggdendrogram(hc.dog)
label.order.dog<-hc.dog$labels[hc.dog$order]
#Adjust the order of heatmap
matrix.dog.m<-reshape2::melt(SampleALL.centers.dog$centers.dist.mtx)
matrix.dog.m$Var1<-factor(matrix.dog.m$Var1,levels=label.order.dog)
matrix.dog.m$Var2<-factor(matrix.dog.m$Var2,levels=label.order.dog)
p.heat.dog<-ggplot(matrix.dog.m)+aes(Var1,Var2,fill=value)+geom_tile()+scale_fill_gradient(low="red",high="white")+geom_text(aes(label=format(value,digits=2)))+theme(axis.text.x=element_text(angle=45,hjust=1))
}
#### put together for that without singledog
ob.1.2.info<-rbind(ob1.info[,c("nUMI","Sample.cluster")],ob2.info[,c("nUMI","Sample.cluster")])
####prepare for sample1--sample2
obALL.PCAS.clst<-Tomerge_v2(binary.primary$SeuratALL.origin@pca.rot,ob.1.2.info)
##part2 to calculate the center and distance matrix
#obALL.PCAS.clst<-cbind(obALL.PCAS.clst,sample_cluster=paste(as.character(obALL.PCAS.clst$Sample),obALL.PCAS.clst[,"Sample.cluster"],sep="_"))
SampleALL.centers<-myddply.center(obALL.PCAS.clst,"Sample.cluster")
# Do the dendro
hc<-hclust(as.dist(SampleALL.centers$centers.dist.mtx))
p.dendro<-ggdendrogram(hc)
label.order<-hc$labels[hc$order]
#Adjust the order of heatmap
matrix.m<-reshape2::melt(SampleALL.centers$centers.dist.mtx)
matrix.m$Var1<-factor(matrix.m$Var1,levels=label.order)
matrix.m$Var2<-factor(matrix.m$Var2,levels=label.order)
p.heat<-ggplot(matrix.m)+aes(Var1,Var2,fill=value)+geom_tile()+scale_fill_gradient(low="red",high="white")+geom_text(aes(label=format(value,digits=2)))+theme(axis.text.x=element_text(angle=45,hjust=1))
#
if(withsingledog)
{
part1.withoutdog<-list(p.dendro=p.dendro,p.heat=p.heat,matrix=SampleALL.centers$centers.dist.mtx)
part2.withdog=list(p.dendro.dog=p.dendro.dog,p.heat.dog=p.heat.dog,matrix=SampleALL.centers.dog$centers.dist.mtx)
return(list(part1.withoutdog=part1.withoutdog,part2.withdog=part2.withdog))
}else
{
part1.withoutdog=list(p.dendro=p.dendro,p.heat=p.heat,matrix=SampleALL.centers$centers.dist.mtx)
return(list(part1.withoutdog=part1.withoutdog))
}
}
#' Tree.build.1
#'
#' This is the subtree relationship builder for low resolution
#' @param tree.prep.ob This is an object generated from Tree.build.prepare or Prep.res.adjust
#' @return This will return an object used for Tree.build.1
#' @export
#' @examples
#' S4_S5ROCK.tree.1.ob<-Tree.build.1(S4_S5ROCK.tree.prep)
Tree.build.1<-function(tree.prep.ob)
{
setwd(tree.prep.ob$path)
name1<-names(tree.prep.ob$primary.total$Seurat.list)[1]
name2<-names(tree.prep.ob$primary.total$Seurat.list)[2]
setwd(paste(c("LowresCluster",name1,name2),collapse="."))
primary.total<-tree.prep.ob$primary.total
first.reso<-tree.prep.ob$first.resos.used
name1<-names(tree.prep.ob$primary.total$Seurat.list)[1]
name2<-names(tree.prep.ob$primary.total$Seurat.list)[2]
total.dist.matrxes<-dist.matrix.prep(primary.total,datainfo.col1=c(paste("res.",first.reso[1],sep=""),"nUMI","Sample"),cluster.col1=paste("res.",first.reso[1],sep=""),datainfo.col2=c(paste("res.",first.reso[2],sep=""),"nUMI","Sample"),cluster.col2=paste("res.",first.reso[2],sep=""),res1=paste("res.",first.reso[1],sep=""),res2=paste("res.",first.reso[2],sep="")) # This gave back distance matrix as well as segData that is critical for relationship drawing. and importantly, the segData determines what will be further explored. I reccomend manual inspection although integrated should be able to well tell the relationship
dendro.heap.p<-dist.matrix.prep.v3(primary.total,datainfo.col1=c(paste("res.",first.reso[1],sep=""),"nUMI","Sample"),datainfo.col2=c(paste("res.",first.reso[2],sep=""),"nUMI","Sample"),cluster.col1=paste("res.",first.reso[1],sep=""),cluster.col2=paste("res.",first.reso[2],sep=""))
##
# to creat internal relationship
unpaired.cluster.names<-setdiff(colnames(dendro.heap.p$part1.withoutdog$matrix)[grepl(name1,colnames(dendro.heap.p$part1.withoutdog$matrix))],total.dist.matrxes$Sample1cross2.centers$seg.Data.main$SeekToward.cluster.names)
total.dist.matrxes$Sample1cross2.centers$seg.Data.main[,1]<-factor(total.dist.matrxes$Sample1cross2.centers$seg.Data.main[,1],levels=colnames(dendro.heap.p$part1.withoutdog$matrix))
total.dist.matrxes$Sample1cross2.centers$seg.Data.main[,2]<-factor(total.dist.matrxes$Sample1cross2.centers$seg.Data.main[,2],levels=colnames(dendro.heap.p$part1.withoutdog$matrix))
total.dist.matrxes$Sample1cross2.centers$seg.Data.main.internal<-c()
total.dist.matrxes<-Refine.relationship(total.dist.matrxes,dendro.heap.p$part1.withoutdog$matrix)
total.relation.plots<-tree.plot.v2(dendrodata=dendro.heap.p,matrixes=total.dist.matrxes,textsize=2.5,youngername=name1,maturername=name2,nocellnumber=T) # This will give back a relationship network (low resolution)
#dist.cutoff<-mean(dendro.heap.p$part1.withoutdog$matrix) # Take the whole matrix, including 0
dist.cutoff<-mean(dendro.heap.p$part1.withoutdog$matrix[dendro.heap.p$part1.withoutdog$matrix>0]) # Calculate a distance cutoff, out of which will be considered no connection
for (unclust in unpaired.cluster.names)
{
if(min(dendro.heap.p$part1.withoutdog$matrix[unclust,][dendro.heap.p$part1.withoutdog$matrix[unclust,]>0])<dist.cutoff)
{
internaltarget.current<-names(dendro.heap.p$part1.withoutdog$matrix[unclust,][dendro.heap.p$part1.withoutdog$matrix[unclust,]>0])[dendro.heap.p$part1.withoutdog$matrix[unclust,][dendro.heap.p$part1.withoutdog$matrix[unclust,]>0]==min(dendro.heap.p$part1.withoutdog$matrix[unclust,][dendro.heap.p$part1.withoutdog$matrix[unclust,]>0])]
}else
{
internaltarget.current<-NA
}
total.dist.matrxes$Sample1cross2.centers$seg.Data.main.internal<-rbind(total.dist.matrxes$Sample1cross2.centers$seg.Data.main.internal,c(SeekFrom.cluster.names=internaltarget.current,SeekToward.cluster.names=unclust,SeekFrom.stage.names=levels(total.dist.matrxes$Sample1cross2.centers$seg.Data.main[,3]),SeekToward.stage.names=levels(total.dist.matrxes$Sample1cross2.centers$seg.Data.main[,4])))
}
##
pdf(paste(c(name1,".",first.reso[1],".dendrogam.pdf"),collapse=""),height=9,width=9)
print(dendro.heap.p$part1.withoutdog[[1]])
print(dendro.heap.p$part1.withoutdog[[2]])
grid.newpage()
grid.table(as.data.frame(total.dist.matrxes$Sample1cross2.centers$df2_df1.relation))
grid.newpage()
grid.draw(tableGrob(total.dist.matrxes$Sample1cross2.centers$seg.Data.main, theme = ttheme_default(base_size = 9), vp = NULL))
print(total.relation.plots$p1)
print(total.relation.plots$p2)
dev.off()
return(list(total.dist.matrxes=total.dist.matrxes,total.relation.plots=total.relation.plots,dendro.heap.p=dendro.heap.p))
}
#' Refine.relationship
#'
#' This is and internal function used in Tree.build.2nd.treemaking to refine the relationship if one upstreme nod is connected to more than one nods in the downstreme stage. Input is Input\ deeper.dist.matrxes.current and p.dendro
#' @param dist.matrix deeper.dist.matrxes.current
#' @param all.matrix p.dendro[[length(p.dendro)]]$matrix
#' @return This will return an object used in Tree.build.2nd.treemaking. return the new "XX.dist.matrxes.current", assign it back
#' @export
#' @examples
#' Refine.relationship(deeper.dist.matrxes.current,p.dendro[[length(p.dendro)]]$matrix)
Refine.relationship<-function(dist.matrix=deeper.dist.matrxes.current,all.matrix=p.dendro[[length(p.dendro)]]$matrix)
{
reverse.seg<-dist.matrix$Sample1cross2.centers$seg.Data.alt
main.seg=dist.matrix$Sample1cross2.centers$seg.Data.main
cutoff<-sum(all.matrix)/(nrow(all.matrix)*(ncol(all.matrix)-1))
main.seg.new<-c()
internal.seg.new<-c()
for (seekFrom in as.character(main.seg$SeekFrom.cluster.names)) # to loop around the seek from cluster(aka maturer cluster which supposedto find its upstreme in most cases)
{
current.toward.clust<-main.seg$SeekToward.cluster.names[main.seg$SeekFrom.cluster.names==seekFrom] # To show the seek-toward clusters that is before refine. It couild be multiple seekToward
if(length(which(main.seg$SeekToward.cluster.names==current.toward.clust))==1) # If the seektoward is only one, which means only one line is connected. Then this will directly pass my refine.
{
main.seg.new<-rbind(main.seg.new,main.seg[which(main.seg$SeekToward.cluster.names==current.toward.clust),])
}else # else, firstly to check if the current seekfrom is also the closest in terms of reverse relationship, if it is then this relationship will be maintained
{
multi.strongest.current<-reverse.seg[reverse.seg$SeekFrom.cluster.names==as.character(current.toward.clust),]
multi.strongest.current<-multi.strongest.current[,c(2,1,4,3)]
names(multi.strongest.current)<-names(multi.strongest.current)[c(2,1,4,3)]
if(seekFrom==as.character(multi.strongest.current$SeekFrom.cluster.names)) # This is the situation where tis current seekfrom is exactly the closest in terms of reverse relationship
{
main.seg.new<-rbind(main.seg.new,multi.strongest.current)
}else if(min(all.matrix[,seekFrom][all.matrix[,seekFrom]>0])<=cutoff) # If the current seekfrom cluster is not the closest then check out the all matrix, to see if the closest to this current seekfrom cluster is upstreme or internal
{
if( grepl(unique(main.seg$SeekToward.stage.names),names(all.matrix[,seekFrom][all.matrix[,seekFrom]>0])[all.matrix[,seekFrom][all.matrix[,seekFrom]>0]==min(all.matrix[,seekFrom][all.matrix[,seekFrom]>0])]))
{
main.seg.new<-rbind(main.seg.new,main.seg[which(main.seg$SeekFrom.cluster.names==seekFrom),] )
}else
{
internal.seg.current<-c(SeekToward.cluster.names=names(all.matrix[,seekFrom][all.matrix[,seekFrom]>0])[all.matrix[,seekFrom][all.matrix[,seekFrom]>0]==min(all.matrix[,seekFrom][all.matrix[,seekFrom]>0])],SeekFrom.cluster.names=seekFrom,SeekFrom.stage.names=strsplit(seekFrom,"_")[[1]][1],SeekToward.stage.names=strsplit(seekFrom,"_")[[1]][1])
internal.seg.new<-rbind(internal.seg.new,internal.seg.current)
}
}
}
}
row.names(internal.seg.new)<-NULL
internal.seg.new<-as.data.frame(internal.seg.new)
if(!all(dim(internal.seg.new)==0))
{
dist.matrix$Sample1cross2.centers$seg.Data.main.internal<-internal.seg.new
}
dist.matrix$Sample1cross2.centers$seg.Data.main<-main.seg.new
return(dist.matrix)
}
#' tree.plot.v2
#'
#' This is and internal function used in Tree.build.2nd.treemaking To plot the tree
#' @param maturername =downstremename,
#' @param youngername =upstremename,
#' @param withsingledog =F,dendrodata,
#' @param matrixes =deeper.dist.matrxes.current,
#' @param print.later.names =T,
#' @param print.earlier.names =T,
#' @param print.stage.level =T,
#' @param inputorder.mature r=F,
#' @param inputorder.yonger =F,
#' @param order.maturer =NULL,
#' @param order.yonger =NULL,
#' @param textsize =2,
#' @param ballsize =5,
#' @param percent =2,
#' @param nocellnumber =F,
#' @param thecellnumber =cellnumber
#' @return This will return an object used in Tree.build.2nd.treemaking. return the new "XX.dist.matrxes.current", assign it back
#' @export
#' @examples
#' Refine.relationship(deeper.dist.matrxes.current,p.dendro[[length(p.dendro)]]$matrix)
tree.plot.v2<-function(maturername=downstremename,youngername=upstremename,withsingledog=F,dendrodata,matrixes=deeper.dist.matrxes.current,print.later.names=T,print.earlier.names=T,print.stage.level=T,inputorder.maturer=F,inputorder.yonger=F,order.maturer=NULL,order.yonger=NULL,textsize=2,ballsize=5,percent=2,nocellnumber=F,thecellnumber=cellnumber)
{
strsplit(youngername,"_") %>% unlist ->youngername
strsplit(maturername,"_") %>% unlist ->maturername
#### First of all I will fix the self loop problem here on seg.Data.main.weak
if(!is.null(matrixes$Sample1cross2.centers$seg.Data.main.weak))
{
loop.df<-c()
for (i in 1:nrow(matrixes$Sample1cross2.centers$seg.Data.main.weak))
{
if(matrixes$Sample1cross2.centers$seg.Data.main.weak[i,2] %in% matrixes$Sample1cross2.centers$seg.Data.main.weak[,1])
{
if(which(matrixes$Sample1cross2.centers$seg.Data.main.weak[,1]==matrixes$Sample1cross2.centers$seg.Data.main.weak[i,2]) %>% matrixes$Sample1cross2.centers$seg.Data.main.weak[.,2] %>% as.character(.)==as.character(matrixes$Sample1cross2.centers$seg.Data.main.weak[i,1]))
loopcur<-c(as.character(matrixes$Sample1cross2.centers$seg.Data.main.weak[i,1]),as.character(matrixes$Sample1cross2.centers$seg.Data.main.weak[i,2]))
loop.df<-rbind(loop.df,loopcur)
}
}
if(!is.null(loop.df))
{
loop.df<-t(apply(loop.df,1,function(x){x[order(x)]})) %>% .[!duplicated(.)] %>% as.matrix %>% t
for(pair in 1: nrow(loop.df))
{
dendrodata$part2.withdog$matrix[loop.df[pair,1],!colnames(dendrodata$part2.withdog$matrix) %in% loop.df[pair,]] %>% min -> A.out.dist
dendrodata$part2.withdog$matrix[loop.df[pair,2],!colnames(dendrodata$part2.withdog$matrix) %in% loop.df[pair,]] %>% min -> B.out.dist
toOut<-c(A.out.dist,B.out.dist) %>% which.min ## 1 is A ; 2 is B
Out.seektoward<-dendrodata$part2.withdog$matrix[loop.df[pair,toOut],!colnames(dendrodata$part2.withdog$matrix) %in% loop.df[pair,]] %>% which.min %>% names
matrixes$Sample1cross2.centers$seg.Data.main.weak[matrixes$Sample1cross2.centers$seg.Data.main.weak[,1]== loop.df[pair,toOut] ,2]<-Out.seektoward
}
}
}
### Do the same same on seg.Data.main.internal
if(!is.null(matrixes$Sample1cross2.centers$seg.Data.main.internal))
{
loop.df<-c()
for (i in 1:nrow(matrixes$Sample1cross2.centers$seg.Data.main.internal))
{
if(matrixes$Sample1cross2.centers$seg.Data.main.internal[i,2] %in% matrixes$Sample1cross2.centers$seg.Data.main.internal[,1])
{
if(which(matrixes$Sample1cross2.centers$seg.Data.main.internal[,1]==matrixes$Sample1cross2.centers$seg.Data.main.internal[i,2]) %>% matrixes$Sample1cross2.centers$seg.Data.main.internal[.,2] %>% as.character(.)==as.character(matrixes$Sample1cross2.centers$seg.Data.main.internal[i,1]))
{
loopcur<-c(as.character(matrixes$Sample1cross2.centers$seg.Data.main.internal[i,1]),as.character(matrixes$Sample1cross2.centers$seg.Data.main.internal[i,2]))
loop.df<-rbind(loop.df,loopcur)
}
}
}
if(!is.null(loop.df))
{
loop.df<-t(apply(loop.df,1,function(x){x[order(x)]})) %>% .[!duplicated(.)] %>% as.matrix %>% t
for(pair in 1: nrow(loop.df))
{
dendrodata$part2.withdog$matrix[loop.df[pair,1],!colnames(dendrodata[[length(dendrodata)]]$matrix) %in% loop.df[pair,]] %>% min -> A.out.dist
dendrodata$part2.withdog$matrix[loop.df[pair,2],!colnames(dendrodata[[length(dendrodata)]]$matrix) %in% loop.df[pair,]] %>% min -> B.out.dist
toOut<-c(A.out.dist,B.out.dist) %>% which.min ## 1 is A ; 2 is B
Out.seektoward<-dendrodata$part2.withdog$matrix[loop.df[pair,toOut],!colnames(dendrodata$part2.withdog$matrix) %in% loop.df[pair,]] %>% which.min %>% names
matrixes$Sample1cross2.centers$seg.Data.main.internal[matrixes$Sample1cross2.centers$seg.Data.main.internal[,2]== loop.df[pair,toOut] ,1]<-Out.seektoward
}
}
}
### ------------------------------------------------------------------------------------------------------------------------------------------------ self loopo problem solved.
if (!nocellnumber)
{
cellnumber.info=thecellnumber
}
#merge.end<-"end.cluster.names"
#merge.start<-"start.cluster.names"
#merge.end.stage<-"end.stage.names"
#merge.start.stage<-"start.stage.names"
#if(length(matrixes)==3)
#{
matrixes<-matrixes$Sample1cross2.centers
names(matrixes)[3]<-"dist.matrix"
names(matrixes)[8]<-"segData.2_1" # This was the seg.Data.main
names(matrixes)[9]<-"segData.1_2"
merge.end<-"SeekFrom.cluster.names"
merge.start<-"SeekToward.cluster.names"
merge.end.stage<-"SeekFrom.stage.names"
merge.start.stage<-"SeekToward.stage.names"
#}
#Calculate the tree dataframe
if(withsingledog)
{
tree.df<-data.frame(cluster.names=colnames(dendrodata$part2.withdog$matrix),order=1:length(colnames(dendrodata$part2.withdog$matrix))) # here the order is just to make a column. it will be corrected later
}else
{
tree.df<-data.frame(cluster.names=colnames(dendrodata$part1.withoutdog$matrix),order=1:length(colnames(dendrodata$part1.withoutdog$matrix)))
}
tree.df<-cbind(tree.df,stage=unlist(lapply(strsplit(as.character(tree.df$cluster.names),"_"),function(x){x[1]})))
tree.df.dn<-subset(tree.df,stage==maturername)
tree.df.up<-subset(tree.df,stage!=maturername)
tree.df.dn$order<-1:nrow(tree.df.dn)
tree.df.up$order<-1:nrow(tree.df.up)
if(inputorder.maturer)
{
tree.df.up$cluster.names=order.maturer
}
if(inputorder.yonger)
{
tree.df.dn$cluster.names=order.yonger
}
matrixes$tree.df<-rbind(tree.df.up,tree.df.dn)
#To prepare the segData(the point to point link informations)
matrixes$segData.2_1.ordered<-merge(matrixes$segData.2_1,matrixes$tree.df[,1:2],by.x=merge.end,by.y="cluster.names") ## order.x indicate the order for "seekFrom"
matrixes$segData.2_1.ordered<-merge(matrixes$segData.2_1.ordered,matrixes$tree.df[,1:2],by.x=merge.start,by.y="cluster.names") ## order.x indicate the order for "seekToward"
if(any(grepl("internal",names(matrixes))))
{
matrixes$segData.2_1.internal.ordered<-matrixes$seg.Data.main.internal[complete.cases(matrixes$seg.Data.main.internal),]
if(is.atomic(matrixes$segData.2_1.internal.ordered))
{
matrixes$segData.2_1.internal.ordered<-rbind(matrixes$segData.2_1.internal.ordered,matrixes$segData.2_1.internal.ordered)
matrixes$segData.2_1.internal.ordered<-merge(matrixes$segData.2_1.internal.ordered,matrixes$tree.df[,1:2],by.x=merge.end,by.y="cluster.names")
matrixes$segData.2_1.internal.ordered<-merge(matrixes$segData.2_1.internal.ordered,matrixes$tree.df[,1:2],by.x=merge.start,by.y="cluster.names")
matrixes$segData.2_1.internal.ordered<-unique(matrixes$segData.2_1.internal.ordered)
}else
{
matrixes$segData.2_1.internal.ordered<-merge(matrixes$segData.2_1.internal.ordered,matrixes$tree.df[,1:2],by.x=merge.end,by.y="cluster.names")
matrixes$segData.2_1.internal.ordered<-merge(matrixes$segData.2_1.internal.ordered,matrixes$tree.df[,1:2],by.x=merge.start,by.y="cluster.names")
}
}
if(any(grepl("weak",names(matrixes))))
{
matrixes$segData.2_1.weak.ordered<-matrixes$seg.Data.main.weak[complete.cases(matrixes$seg.Data.main.weak),]
if(is.atomic(matrixes$segData.2_1.weak.ordered))
{
matrixes$segData.2_1.weak.ordered<-rbind(matrixes$segData.2_1.weak.ordered,matrixes$segData.2_1.weak.ordered)
matrixes$segData.2_1.weak.ordered<-merge(matrixes$segData.2_1.weak.ordered,matrixes$tree.df[,1:2],by.x=merge.end,by.y="cluster.names")
matrixes$segData.2_1.weak.ordered<-merge(matrixes$segData.2_1.weak.ordered,matrixes$tree.df[,1:2],by.x=merge.start,by.y="cluster.names")
matrixes$segData.2_1.weak.ordered<-unique(matrixes$segData.2_1.weak.ordered)
}else
{
matrixes$segData.2_1.weak.ordered<-merge(matrixes$segData.2_1.weak.ordered,matrixes$tree.df[,1:2],by.x=merge.end,by.y="cluster.names")
matrixes$segData.2_1.weak.ordered<-merge(matrixes$segData.2_1.weak.ordered,matrixes$tree.df[,1:2],by.x=merge.start,by.y="cluster.names")
}
}
matrixes$segData.1_2.ordered<-merge(matrixes$segData.1_2,matrixes$tree.df[,1:2],by.x=merge.end,by.y="cluster.names")
matrixes$segData.1_2.ordered<-merge(matrixes$segData.1_2.ordered,matrixes$tree.df[,1:2],by.x=merge.start,by.y="cluster.names")
#To store the stage level into stage.levels. the mature group order; the yonger group order
stage.level<-c(maturername,youngername)
matrixes$tree.df$stage<-factor(matrixes$tree.df$stage,levels=stage.level)
yonger.order<-as.character(tree.df.up$cluster.names)
maturer.order<-as.character(tree.df.dn$cluster.names)
matrixes$tree.df<-cbind(matrixes$tree.df,First.cluster.tag=unlist(lapply(strsplit(as.character(matrixes$tree.df$cluster.names),"_"),function(x){x[2]}))) # Got a tag to tell the first cluster, which will be coded by color.
# Print out the order information
if(print.later.names)
{
print("Here are the ordered cluster names that are relatively maturer")
print(maturer.order)
print("")
}
if(print.earlier.names)
{
print("Here are the ordered cluster names that are relatively yonger\n")
print(yonger.order)
print("")
}
if(print.stage.level)
{
print("Here is the level of stages\n:The first is on bottom, the laste is on top")
print(stage.level)
print("")
}
if (!nocellnumber)
{
matrixes$tree.df<-merge(matrixes$tree.df,cellnumber.info[,c(1,2,4)],by.x="cluster.names",by.y="Var1")
}
p1<-ggplot(matrixes$tree.df)+aes(order,stage,color=First.cluster.tag,size=percent)+geom_point()+geom_text(aes(label=cluster.names),vjust=1.5,hjust="left",color="blue",size=textsize,fontface="bold",nudge_y=0.1)+geom_segment(data=matrixes$segData.2_1.ordered,aes_string(x="order.x",xend="order.y",y=merge.end.stage,yend=merge.start.stage),arrow=arrow(),lwd=1,col="red")+xlim(1,max(matrixes$tree.df$order)+1)
if(any(grepl("internal",names(matrixes))))
{
p1<-p1+geom_curve(data=matrixes$segData.2_1.internal.ordered,aes_string(x="order.x",xend="order.y",y=merge.end.stage,yend=merge.start.stage),arrow=arrow(length=unit(0.03, "npc"),ends="last", type = "closed"),lwd=0.5,linetype=1,col="black")
}
if(any(grepl("weak",names(matrixes))))
{
p1<-p1+geom_curve(data=matrixes$segData.2_1.weak.ordered,aes_string(x="order.x",xend="order.y",y=merge.end.stage,yend=merge.start.stage),arrow=arrow(length=unit(0.03, "npc"),ends="last", type = "closed"),lwd=0.5,linetype=2,col="black")
}
p2<-ggplot(matrixes$tree.df)+aes(order,stage,color=First.cluster.tag)+geom_point(size=8)+geom_text(aes(label=cluster.names),vjust=1.5,hjust="left",color="blue",size=textsize,fontface="bold",nudge_y=0.1)+geom_segment(data=matrixes$segData.1_2.ordered,aes_string(x="order.x",xend="order.y",y=merge.end.stage,yend=merge.start.stage),arrow=arrow(),lwd=1,col="black")+xlim(1,max(matrixes$tree.df$order)+1)
return(list(p1=p1,p2=p2,matrix.advance=matrixes))
}
############################################### Tree.build.2nd.clustering #################################################################
### Introduction\ This function did seurat primary clustering for the second layer
### Input\ tree.prep,tree.1.ob, which is the output from Tree.build.1; the secondary resolution for clustering
### Output\ primaries.deeper.lst which is a list primary objects, including multiple pairs of connections
### Example\ s4_s5.tree.2nd.primary.list<-Tree.build.2nd.clustering(s4_s5.tree.prep.test,s4_s5.tree.1.test,second.reso=c(0.3,0.3))
### Function_define
#' Tree.build.2nd.clustering
#'
#' This function did seurat primary clustering for the second layer, which is higher resolution . Input\ tree.prep,tree.1.ob, which is the output from Tree.build.1; thesecondary resolution for clustering. ### Output\ primaries.deeper.lst which is a list primary objects, including multiple pairs of connections.
#' @param tree.prep This is an object generated from Tree.build.prepare or Prep.res.adjust. This is basically the clustering result.
#' @param tree.1.ob This is an object generated from Tree.build.1. This is the low resulotion subtree
#' @param second.reso This is the resolution for the high resolution. For example, c(0.3,0.3)
#' @return This will return the secondary clustering result. It will be used for Tree.build.2nd.clustering.patch and Tree.build.2nd.treemaking
#' @export
#' @examples
#' S4_S5ROCK.tree.2nd.primary_0.3_0.3.list<-Tree.build.2nd.clustering(S4_S5ROCK.tree.prep,S4_S5ROCK.tree.1.ob,second.reso=c(0.3,0.3))
Tree.build.2nd.clustering<-function(tree.prep,tree.1.ob,second.reso=c(0.3,0.3))
{
primaries.deeper.lst<-list()
all.list.names<-c()
for (i in 1:nrow(tree.1.ob$total.dist.matrxes$Sample1cross2.centers$seg.Data.main))
{
cur.list.name<-paste(tree.1.ob$total.dist.matrxes$Sample1cross2.centers$seg.Data.main$SeekToward.cluster.names[i],tree.1.ob$total.dist.matrxes$Sample1cross2.centers$seg.Data.main$SeekFrom.cluster.names[i],sep="_")
all.list.names<-c(all.list.names,cur.list.name)
# To assign the dge data
dge.pair.lst<-list(as.matrix(tree.prep$primary.total$Seurat.list[[1]]@raw.data)[,row.names(tree.prep$primary.total$Seurat.list[[1]]@data.info)[which(tree.prep$primary.total$Seurat.list[[1]]@data.info$Sample.2nd==as.character(tree.1.ob$total.dist.matrxes$Sample1cross2.centers$seg.Data.main[i,2]))]],as.matrix(tree.prep$primary.total$Seurat.list[[2]]@raw.data)[,row.names(tree.prep$primary.total$Seurat.list[[2]]@data.info)[which(tree.prep$primary.total$Seurat.list[[2]]@data.info$Sample.2nd==as.character(tree.1.ob$total.dist.matrxes$Sample1cross2.centers$seg.Data.main[i,1]))]])
names(dge.pair.lst)<-c(as.character(tree.1.ob$total.dist.matrxes$Sample1cross2.centers$seg.Data.main[i,2]),as.character(tree.1.ob$total.dist.matrxes$Sample1cross2.centers$seg.Data.main[i,1]))
primary.current<-Primaryclutering(dge.pair.lst,txcut=500,cluster.resos=second.reso,RAWinput=F)
primaries.deeper.lst<-c(primaries.deeper.lst,list(primary.current))
}
names(primaries.deeper.lst)<-all.list.names
###Print information about how I did the secondary clustering
print (paste(c("There are ",length(primaries.deeper.lst)," pairs of connection branches"),collapse=""))
return(primaries.deeper.lst=primaries.deeper.lst)
}
############################################### Tree.build.2nd.clustering.patch #################################################################
### Introduction\ This function is to address single dog problem. It will Identify and do seurat clustering for the single dog & dog+neighbours
### Input\ the clustered result from Tree.build.2nd.clustering as well as tree.prep and tree.1.ob. singledog.reso should be specified
### Output\ If there was no single dog, the out put is exactly the same as imput, otherwise, two seurat object would be added at the end, forming a list as whole
### Example\ S2_S2.24.tree.2nd.primary_0.06.list.patched<-Tree.build.2nd.clustering.patch(S2_S2.24.tree.2nd.primary_0.06.list,S2_S2.24.tree.prep,S2_S2.24.tree.1.ob,singledog.reso=0.06)
### Function_define
#' Tree.build.2nd.clustering.patch
#'
#' Introduction\ This function is to address single dog problem. It will Identify and do seurat clustering for the single dog & dog+neighbours. ### Input\ the clustered result from Tree.build.2nd.clustering as well as tree.prep and tree.1.ob. singledog.reso should be specified. ### Output\ If there was no single dog, the out put is exactly the same as imput, otherwise, two seurat object would be added at the end, forming a list as whole
#' @param primaries.deeper.lst This is generated by Tree.build.2nd.clustering and Second.primary.adjust
#' @param tree.prep This is the low resolution clustering result from Tree.build.prepare
#' @param tree.1.ob This is the low reslution subtree result from Tree.build.1
#' @param singledog.reso This is the resolution singledog cluster for 0.3
#' @return This will return the high resolution result with a pach that consider the single dog. This is going to be used for Tree.build.2nd.treemaking
#' @export
#' @examples
#' S4_S5ROCK.tree.2nd.primary_0.3_0.3.list<-Tree.build.2nd.clustering(S4_S5ROCK.tree.prep,S4_S5ROCK.tree.1.ob,second.reso=c(0.3,0.3))
Tree.build.2nd.clustering.patch<-function(primaries.deeper.lst,tree.prep,tree.1.ob,singledog.reso=0.3)
{
Singledog.clusters<-setdiff(tree.1.ob$total.relation.plots$matrix.advance$tree.df$cluster.names[as.character(tree.1.ob$total.relation.plots$matrix.advance$tree.df$stage)==unique(as.character(tree.1.ob$total.relation.plots$matrix.advance$segData.2_1.ordered$SeekFrom.stage.names))],as.character(tree.1.ob$total.relation.plots$matrix.advance$segData.2_1.ordered$SeekFrom.cluster.names)) # To get the cluster name who has no any relationship with upstream clusters. I will also do clustering for it AND seperartely store it in single.seurat.list
# single.seurat.list<-c() ### I annotated this part out because most commonly there would be only one single dog cluster.
# for(i in length(Singledog.clusters))
# {
#
# }
print(paste("Number of single dog is",length(Singledog.clusters)))
if (length(Singledog.clusters)>0)
{
# First to decide which lineage the dog should consider with
mindist=999
minlineage<-c()
for (n in 1:nrow(tree.1.ob$total.relation.plots$matrix.advance$segData.2_1.ordered)){
tree.1.ob$total.relation.plots$matrix.advance$segData.2_1.ordered[n,1:2] %>% t() %>% as.character -> lineage
if (mean(tree.1.ob$dendro.heap.p$part1.withoutdog$matrix[Singledog.clusters,lineage])<mindist)
{
mindist<-mean(tree.1.ob$dendro.heap.p$part1.withoutdog$matrix[Singledog.clusters,lineage])
minlineage<-paste(lineage,collapse="_")
}
}
singledog.dge<-as.matrix(tree.prep$primary.total$Seurat.list[[2]]@raw.data[,row.names(tree.prep$primary.total$Seurat.list[[2]]@data.info)[tree.prep$primary.total$Seurat.list[[2]]@data.info$Sample.2nd==Singledog.clusters]])
singledog.seurat.ob<-docluster(dgepreprocess(singledog.dge,500,norowname=F),GetinformativeGene(dgepreprocess(singledog.dge,500,norowname=F),500),Singledog.clusters,reso=singledog.reso)
singledog.seurat.ob@data.info<-cbind(singledog.seurat.ob@data.info,Sample.2nd=paste(singledog.seurat.ob@data.info$Sample,singledog.seurat.ob@data.info[,paste("res",singledog.reso,sep=".")],sep="_"))
neighbourWithsingledog.seurat.ob<-docluster.multi(500,sets=list(as.matrix(primaries.deeper.lst[[minlineage]]$Seurat.list[[1]]@raw.data),as.matrix(primaries.deeper.lst[[minlineage]]$Seurat.list[[2]]@raw.data),singledog.dge),nms=c(names(primaries.deeper.lst[[1]]$Seurat.list)[1:2],Singledog.clusters))
singledog.twoobs<-list(singledog.singledog.seurat.ob=singledog.seurat.ob,singledog.neighbourWithsingledog.seurat.ob=neighbourWithsingledog.seurat.ob,singledog.minlineage=minlineage)
primaries.deeper.lst<-c(primaries.deeper.lst,singledog.twoobs)
}
return(primaries.deeper.lst)
}
#' Second.primary.adjust
#'
#' This is to adjust the clustering resolution for secondary clusters.
#' @param second.pr This is the secondary clustering object from Tree.build.2nd.clustering
#' @param prep this is the low resolution clutsering object. from Tree.build.prepare
#' @param second.reso.adjust This is the adjusted resolution for example: c(0.3,0.6)
#' @return This will return. The adjusted secondary clustering, assign back to the same object generated from Tree.build.2nd.clustering
#' @export
#' @examples
#' S4_S5ROCK.tree.2nd.primary_0.3_0.3.list<-Second.primary.adjust(S4_S5ROCK.tree.2nd.primary_0.3_0.3.list,S4_S5ROCK.tree.prep,second.reso.adjust=c(0.3,0.6))
Second.primary.adjust<-function(second.pr,prep,second.reso.adjust=c(0.3,0.6))
{
setwd(prep$path)
name1<-names(prep$primary.total$Seurat.list)[1]
name2<-names(prep$primary.total$Seurat.list)[2]
setwd(paste(c("Hires",name1,name2),collapse="."))
for (i in 1: length(second.pr))
{
second.pr[[i]]$Seurat.list[[1]]<-FindClusters(second.pr[[i]]$Seurat.list[[1]], pc.use = 1:10, resolution = second.reso.adjust[1], print.output = 0, save.SNN = T)
second.pr[[i]]$Seurat.list[[1]]@data.info<-second.pr[[i]]$Seurat.list[[1]]@data.info[,c("nGene","nUMI","orig.ident","percent.mito",paste("res",second.reso.adjust[1],sep="."),"Sample")] %>% cbind(.,Sample.2nd=paste(.$Sample,.[,paste("res",second.reso.adjust[1],sep=".")],sep="_"))
second.pr[[i]]$Seurat.list[[2]]<-FindClusters(second.pr[[i]]$Seurat.list[[2]], pc.use = 1:10, resolution = second.reso.adjust[2], print.output = 0, save.SNN = T)
second.pr[[i]]$Seurat.list[[2]]@data.info<-second.pr[[i]]$Seurat.list[[2]]@data.info[,c("nGene","nUMI","orig.ident","percent.mito",paste("res",second.reso.adjust[2],sep="."),"Sample")] %>% cbind(.,Sample.2nd=paste(.$Sample,.[,paste("res",second.reso.adjust[2],sep=".")],sep="_"))
}
return(second.pr)
}
#' DOCLEAN
#'
#' This is to delete the past files with not-so-good resolution.
#' @param prep This is the object from Tree.build.prepare
#' @param hiorlo if == "hi", then go delete the files in hi resolution folder. if == "lo" then go to low resolution folder to delete
#' @param res.tokeep This is th3e final resolution to keep. for example c(0.3,0.6)
#' @return No return
#' @export
#' @examples
#' S4_S5ROCK.tree.2nd.primary_0.3_0.3.list<-Second.primary.adjust(S4_S5ROCK.tree.2nd.primary_0.3_0.3.list,S4_S5ROCK.tree.prep,second.reso.adjust=c(0.3,0.6))
DOCLEAN<-function(prep,hiorlo="hi",res.tokeep=c(0.3,0.6)){
setwd(prep$path)
name1<-names(prep$primary.total$Seurat.list)[1]
name2<-names(prep$primary.total$Seurat.list)[2]
if(hiorlo=="hi"){
setwd(paste(c("Hires",name1,name2),collapse="."))
c(grep(paste(res.tokeep,collapse=" _ "),list.files()),grep(paste(name1,"...",res.tokeep[1],sep=""),list.files()),grep(paste(name2,"...",res.tokeep[2],sep=""),list.files()))->indextokeep
print(paste("To removing following",list.files()[setdiff(1:length(list.files()),indextokeep)]))
file.remove(list.files()[setdiff(1:length(list.files()),indextokeep)])
}else if(hiorlo=="lo"){
setwd(paste(c("LowresCluster",name1,name2),collapse="."))
c(grep(paste(c(name1,"total",res.tokeep[1]),collapse="."),list.files()), grep(paste(c(name2,"total",res.tokeep[2]),collapse="."),list.files()), grep(paste(c(name1,res.tokeep[1]),collapse="."),list.files()))->indextokeep
print(paste("To removing following",list.files()[setdiff(1:length(list.files()),indextokeep)]))
file.remove(list.files()[setdiff(1:length(list.files()),indextokeep)])
}
}
#' Tree.build.2nd.treemaking
#'
#' Introduction\ Similar with Tree.build.1, this function after getting the clustering primary object for the second layer. this one is literally building the tree and generate plots
#' @param primaries.deeper.lst This is the adjusted secondary clustering results from Tree.build.2nd.clustering.patch
#' @param second.reso This should be consistent with what eventually used in secondary clustering, it should follow Second.primary.adjust
#' @param singledog.reso This should follow that in Tree.build.2nd.clustering.patch
#' @param downstremename ="s1.24"
#' @param upstremename ="s1"
#' @param relationtext =2.5
#' @param dorefine =T
#' @param dir The path of the whole tree generating. keep consistent with Tree.build.prepare
#' @return No return
#' @export
#' @examples
#' S4_S5ROCK.tree.2nd.primary_0.3_0.3.list<-Second.primary.adjust(S4_S5ROCK.tree.2nd.primary_0.3_0.3.list,S4_S5ROCK.tree.prep,second.reso.adjust=c(0.3,0.6))
Tree.build.2nd.treemaking<-function(primaries.deeper.lst,second.reso=c(0.3,0.3),singledog.reso=0.06,downstremename="s1.24",upstremename="s1",relationtext=2.5,dorefine=T,dir){
setwd(paste(dir,paste(upstremename,downstremename,sep="_"),sep=""))
if(!any(grepl(paste("Hires",upstremename,downstremename,sep="."),list.files()))){
dir.create(paste("Hires",upstremename,downstremename,sep="."))
}
setwd(paste("Hires",upstremename,downstremename,sep="."))
deeper.dist.matrxes.lst<-list()
deeper.relation.plots.lst<-list()
deeper.fullplots.1.lst<-list()
deeper.fullplots.2.lst<-list()
deeper.dendro.lst<-list()
deeper.heat.lst<-list()
deeper.allmatrix.lst<-list()
thereisdog<-any(grepl("singledog",names(primaries.deeper.lst)))
Iscurlineage<-F
for (i in which(!grepl("singledog",names(primaries.deeper.lst))))
{
print(paste(c(i,":", names(primaries.deeper.lst[[i]]$Seurat.list)),collapse=" "))
primaries.deeper.lst[[i]]$Seurat.list[[1]]@data.info<-primaries.deeper.lst[[i]]$Seurat.list[[1]]@data.info[,1:6] # This is to reset
primaries.deeper.lst[[i]]$Seurat.list[[2]]@data.info<-primaries.deeper.lst[[i]]$Seurat.list[[2]]@data.info[,1:6]
primaries.deeper.lst[[i]]$Seurat.list[[1]]@data.info<-cbind(primaries.deeper.lst[[i]]$Seurat.list[[1]]@data.info,Sample.2nd=paste(primaries.deeper.lst[[i]]$Seurat.list[[1]]@data.info$Sample,primaries.deeper.lst[[i]]$Seurat.list[[1]]@data.info[,paste("res.",second.reso[1],sep="")],sep="_")) # give a
primaries.deeper.lst[[i]]$Seurat.list[[2]]@data.info<-cbind(primaries.deeper.lst[[i]]$Seurat.list[[2]]@data.info,Sample.2nd=paste(primaries.deeper.lst[[i]]$Seurat.list[[2]]@data.info$Sample,primaries.deeper.lst[[i]]$Seurat.list[[2]]@data.info[,paste("res.",second.reso[2],sep="")],sep="_"))
if(thereisdog)
{
Iscurlineage<-primaries.deeper.lst$singledog.minlineage==paste(names(primaries.deeper.lst[[i]]$Seurat.list),collapse="_")
primaries.deeper.lst$singledog.singledog.seurat.ob@data.info<-primaries.deeper.lst$singledog.singledog.seurat.ob@data.info[,1:6]
primaries.deeper.lst$singledog.singledog.seurat.ob@data.info<-cbind(primaries.deeper.lst$singledog.singledog.seurat.ob@data.info,Sample.2nd=paste(primaries.deeper.lst$singledog.singledog.seurat.ob@data.info$Sample,primaries.deeper.lst$singledog.singledog.seurat.ob@data.info[,paste("res.",singledog.reso,sep="")],sep="_"))
print("Printing single dog fullplot into pdf")
if(length(unique(primaries.deeper.lst$singledog.singledog.seurat.ob@data.info[,5]))>1)
{
fullplot.current.dog<-Fullplot_v2(primaries.deeper.lst$singledog.singledog.seurat.ob,paste(unique(primaries.deeper.lst$singledog.singledog.seurat.ob@data.info$Sample),singledog.reso,".pdf",collapse=""),signiture=NULL,resolusion=paste("res.",singledog.reso,sep=""),heatmapannosize=0.5,doreturn=T)
}else
{
fullplot.current.dog<-Fullplot_v2(primaries.deeper.lst$singledog.singledog.seurat.ob,paste(unique(primaries.deeper.lst$singledog.singledog.seurat.ob@data.info$Sample),singledog.reso,".pdf",collapse=""),signiture=NULL,resolusion=paste("res.",singledog.reso,sep=""),heatmapannosize=0.5,doreturn=T,darwPCdrive=F)
}
}
# Print out the cluster
print("Start to plot clustering into pdf...")
doPCAdrive<-nrow(primaries.deeper.lst[[i]]$Seurat.list[[1]]@data.info)>500
fullplot.current.1<-Fullplot_v2(primaries.deeper.lst[[i]]$Seurat.list[[1]],paste(names(primaries.deeper.lst[[i]]$Seurat.list)[1],second.reso[1],".pdf",collapse=""),signiture=NULL,resolusion=paste("res.",second.reso[1],sep=""),heatmapannosize=0.5,doreturn=T,darwPCdrive=doPCAdrive)
fullplot.current.2<-Fullplot_v2(primaries.deeper.lst[[i]]$Seurat.list[[2]],paste(names(primaries.deeper.lst[[i]]$Seurat.list)[2],second.reso[2],".pdf",collapse=""),signiture=NULL,resolusion=paste("res.",second.reso[2],sep=""),heatmapannosize=0.5,doreturn=T,darwPCdrive=doPCAdrive)
deeper.fullplots.1.lst<-c(deeper.fullplots.1.lst,list(fullplot.current.1))
deeper.fullplots.2.lst<-c(deeper.fullplots.2.lst,list(fullplot.current.2))
deeper.dist.matrxes.current<-dist.matrix.prep(binary.primary=primaries.deeper.lst[[i]],datainfo.col1=c(paste("res.",second.reso[1],sep=""),"nUMI","Sample"),cluster.col1=paste("res.",second.reso[1],sep=""),datainfo.col2=c(paste("res.",second.reso[2],sep=""),"nUMI","Sample"),cluster.col2=paste("res.",second.reso[2],sep=""),res1=paste("res.",second.reso[1],sep=""),res2=paste("res.",second.reso[2],sep=""))
# This gave back distance matrix as well as segData that is critical for relationship drawing. and importantly, the segData determines what will be further explored. I reccomend manual inspection although integrated should be able to well tell the relationship
if(thereisdog & Iscurlineage)
{
p.dendro<-dist.matrix.prep.v3(binary.primary=primaries.deeper.lst[[i]],datainfo.col1=c(paste("res.",second.reso[1],sep=""),"nUMI","Sample"),datainfo.col2=c(paste("res.",second.reso[2],sep=""),"nUMI","Sample"),cluster.col1=paste("res.",second.reso[1],sep=""),cluster.col2=paste("res.",second.reso[2],sep=""),cluster.col.dog=paste("res.",singledog.reso,sep=""),datainfo.col.dog=c(paste("res.",singledog.reso,sep="")),dog.seurat.ob=primaries.deeper.lst$singledog.singledog.seurat.ob,neighborwithdog.seurat.ob=primaries.deeper.lst$singledog.neighbourWithsingledog.seurat.ob,withsingledog=thereisdog)
print("ok1012")
}else
{
p.dendro<-dist.matrix.prep.v3(binary.primary=primaries.deeper.lst[[i]],datainfo.col1=c(paste("res.",second.reso[1],sep=""),"nUMI","Sample"),datainfo.col2=c(paste("res.",second.reso[2],sep=""),"nUMI","Sample"),cluster.col1=paste("res.",second.reso[1],sep=""),cluster.col2=paste("res.",second.reso[2],sep=""))
}
# to creat internal relationship, or (if not < cutoff) weak relationship which could be inter or intra for unpaired clusters
unpaired.cluster.names<-as.character(setdiff(colnames(p.dendro$part1.withoutdog$matrix)[grepl(downstremename,colnames(p.dendro$part1.withoutdog$matrix))],deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main$SeekFrom.cluster.names))
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main[,1]<-factor(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main[,1],levels=colnames(p.dendro$part1.withoutdog$matrix))
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main[,2]<-factor(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main[,2],levels=colnames(p.dendro$part1.withoutdog$matrix))
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.internal<-c()
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak<-c()
if(dorefine)
{
deeper.dist.matrxes.current<-Refine.relationship(deeper.dist.matrxes.current,p.dendro[[length(p.dendro)]]$matrix)
} #dist.cutoff<-mean(p.dendro$part1.withoutdog$matrix) # Take the whole matrix, including 0
dist.cutoff<-mean(p.dendro$part1.withoutdog$matrix[p.dendro$part1.withoutdog$matrix>0]) # Calculate a distance cutoff, out of which will be considered no connection
for(j in 1:4)
{
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.internal[,j]<-as.character(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.internal[,j])
}
for (unclust in unpaired.cluster.names)
{
current.unclust.distances<-p.dendro$part1.withoutdog$matrix[unclust,][p.dendro$part1.withoutdog$matrix[unclust,]>0] # a vector of distances from this uncluster to either other cluster in this stage or the one from corresponding upstreme stage clusters(accept itself)
current.unclust.distances.intra<-current.unclust.distances[grepl(downstremename,names(current.unclust.distances))] # Only the same stage distances are left
if(min(current.unclust.distances.intra)<dist.cutoff)
{
internaltarget.current<-names(current.unclust.distances.intra)[which(current.unclust.distances.intra==min(current.unclust.distances.intra))]
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.internal<-rbind(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.internal,c(SeekToward.cluster.names=internaltarget.current,SeekFrom.cluster.names=unclust,SeekFrom.stage.names=unlist(strsplit(unclust,"_"))[1],SeekToward.stage.names=unlist(strsplit(internaltarget.current,"_"))[1]))
}else
{
weaktarget.current<-names(current.unclust.distances)[which(current.unclust.distances==min(current.unclust.distances))]
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak<-rbind(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak,c(SeekFrom.cluster.names=unclust,SeekToward.cluster.names=weaktarget.current,SeekFrom.stage.names=unlist(strsplit(unclust,"_"))[1],SeekToward.stage.names=unlist(strsplit(weaktarget.current,"_"))[1]))
}
}
if(thereisdog & Iscurlineage)
{
dist.cutoff<-mean(p.dendro$part2.withdog$matrix[p.dendro$part2.withdog$matrix>0])
for(doggy in as.character(unique(primaries.deeper.lst$singledog.singledog.seurat.ob@data.info$Sample.2nd)))
{
current.doggy.distances<-p.dendro$part2.withdog$matrix[doggy,][p.dendro$part2.withdog$matrix[doggy,]>0] # a vector of distances from this uncluster to either other cluster in this stage or the one from corresponding upstreme stage clusters(accept itself)
current.doggy.distances.intra<-current.doggy.distances[grepl(downstremename,names(current.doggy.distances))] # Only the same stage distances are left
if(grepl(upstremename,names(which(current.doggy.distances==min(current.doggy.distances))))) # If the minimum relationship is from the previous stage
{
if(min(current.doggy.distances)<dist.cutoff) # if the minimum relationship is from the the same stage stage
{
target.current<-names(current.doggy.distances)[which(current.doggy.distances==min(current.doggy.distances))]
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main$SeekFrom.cluster.names<-factor(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main$SeekFrom.cluster.names,levels=unique(c(levels(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main$SeekFrom.cluster.names),doggy)))
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main<-rbind(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main,c(SeekFrom.cluster.names=doggy,SeekToward.cluster.names=target.current,SeekFrom.stage.names=unlist(strsplit(doggy,"_"))[1],SeekToward.stage.names=unlist(strsplit(target.current,"_"))[1]))
}else
{
if(!is.null(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak)){
if(nrow(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak)>1)
{
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak$SeekFrom.cluster.names<-factor(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak$SeekFrom.cluster.names,levels=unique(c(levels(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak$SeekFrom.cluster.names),doggy)))
}
}
weaktarget.current<-names(current.doggy.distances)[which(current.doggy.distances==min(current.doggy.distances))]
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak<-rbind(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak,c(SeekFrom.cluster.names=doggy,SeekToward.cluster.names=weaktarget.current,SeekFrom.stage.names=unlist(strsplit(doggy,"_"))[1],SeekToward.stage.names=unlist(strsplit(weaktarget.current,"_"))[1]))
}
}else
{
if(min(current.doggy.distances.intra)<dist.cutoff) # if the minimum relationship is from the the same stage stage
{
internaltarget.current<-names(current.doggy.distances.intra)[which(current.doggy.distances.intra==min(current.doggy.distances.intra))]
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.internal<-rbind(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.internal,c(SeekFrom.cluster.names=doggy,SeekToward.cluster.names=internaltarget.current,SeekFrom.stage.names=unlist(strsplit(doggy,"_"))[1],SeekToward.stage.names=unlist(strsplit(internaltarget.current,"_"))[1]))
}else
{
weaktarget.current<-names(current.doggy.distances)[which(current.doggy.distances==min(current.doggy.distances))]
deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak<-rbind(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main.weak,c(SeekFrom.cluster.names=doggy,SeekToward.cluster.names=weaktarget.current,SeekFrom.stage.names=unlist(strsplit(doggy,"_"))[1],SeekToward.stage.names=unlist(strsplit(weaktarget.current,"_"))[1]))
}
}
}
}
## Here, to summarize the cell numbers
cellnumber<-c()
for (x in setdiff(names(primaries.deeper.lst),c("singledog.neighbourWithsingledog","singledog.minlineage")))
{
if(grepl("singledog",x))
{
cellnumber<-rbind(cellnumber,as.data.frame(table(primaries.deeper.lst[[x]]@data.info$Sample.2nd)))
}else
{
cellnumber<-rbind(cellnumber,as.data.frame(table(primaries.deeper.lst[[x]]$Seurat.list[[1]]@data.info$Sample.2nd)))
cellnumber<-rbind(cellnumber,as.data.frame(table(primaries.deeper.lst[[x]]$Seurat.list[[2]]@data.info$Sample.2nd)))
}
}
cellnumber<-cbind(cellnumber,stage=unlist(lapply(strsplit(as.character(cellnumber$Var1),"_"),function(x){x[[1]]})))
cellnumber<-mydplyr.percentage(cellnumber,by="stage")
deeper.relation.plots<-tree.plot.v2(withsingledog=thereisdog & Iscurlineage,dendrodata=p.dendro,deeper.dist.matrxes.current,textsize=relationtext,maturername=downstremename,youngername=upstremename,thecellnumber=cellnumber) # This will give back a relationship network (low resolution)
deeper.dist.matrxes.lst<-c(deeper.dist.matrxes.lst,list(deeper.dist.matrxes.current))
deeper.relation.plots.lst<-c(deeper.relation.plots.lst,list(deeper.relation.plots))
names(deeper.dist.matrxes.lst)[i]<-paste(c(names(primaries.deeper.lst[[i]]$Seurat.list)),collapse="__")
names(deeper.relation.plots.lst)[i]<-paste(c(names(primaries.deeper.lst[[i]]$Seurat.list)),collapse="__")
pdf(paste(paste(c(names(primaries.deeper.lst[[i]]$Seurat.list)),collapse="__"),second.reso[1],"_",second.reso[2],"dendrogam.pdf",collapse=""),height=9,width=9)
if(thereisdog & Iscurlineage)
{
print(p.dendro$part2.withdog$p.heat.dog)
print(p.dendro$part2.withdog$p.dendro.dog)
}else
{
print(p.dendro$part1.withoutdog$p.heat)
print(p.dendro$part1.withoutdog$p.dendro)
}
grid.newpage()
grid.table(as.data.frame(deeper.dist.matrxes.current$Sample1cross2.centers$df1_df2.relation))
grid.newpage()
grid.draw(tableGrob(deeper.dist.matrxes.current$Sample1cross2.centers$seg.Data.main, theme = ttheme_default(base_size = 9), vp = NULL))
print(deeper.relation.plots$p1)
print(deeper.relation.plots$p2)
dev.off()
if(thereisdog)
{
deeper.dendro.lst<-c(deeper.dendro.lst,list(p.dendro$part2.withdog$p.dendro.dog))
deeper.heat.lst<-c(deeper.heat.lst,list(p.dendro$part2.withdog$p.heat.dog))
deeper.allmatrix.lst<-c(deeper.allmatrix.lst,list(p.dendro$part2.withdog$p.heat.dog$matrix))
}else
{
deeper.dendro.lst<-c(deeper.dendro.lst,list(p.dendro$part1.withoutdog$p.dendro))
deeper.heat.lst<-c(deeper.heat.lst,list(p.dendro$part1.withoutdog$p.heat))
deeper.allmatrix.lst<-c(deeper.allmatrix.lst,list(p.dendro$part1.withoutdog$p.heat$matrix))
}
###
#pdf(paste(c(name1,name2,"tree.pdf"),collapse="_"),height=14,width=14)
#grid.arrange(total.fullplot.1[[1]],total.fullplot.2[[1]],dendro.heap.p[[1]],dendro.heap.p[[2]],total.relation.plots$p1,total.relation.plots$p2)
#for (i in 1:length(primaries.deeper.lst))
#{
#grid.arrange(deeper.fullplots.1.lst[[i]][[1]],deeper.fullplots.2.lst[[i]][[1]],deeper.dendro.lst[[i]],deeper.heat.lst[[i]],deeper.relation.plots.lst[[i]][[1]],deeper.relation.plots.lst[[i]][[2]])
#}
#dev.off()
## For ones with S5 involved. DO NOT DO IT
#tm<-deeper.dist.matrxes.lst[[1]]
#deeper.dist.matrxes.lst[[1]]<-deeper.dist.matrxes.lst[[2]]
#deeper.dist.matrxes.lst[[2]]<-tm
#tm<-deeper.relation.plots.lst[[1]]
#deeper.relation.plots.lst[[1]]<-deeper.relation.plots.lst[[2]]
#deeper.relation.plots.lst[[2]]<-tm
}
if(thereisdog)
{
return(list(deeper.dist.matrxes.lst=deeper.dist.matrxes.lst,deeper.relation.plots.lst=deeper.relation.plots.lst,deeper.fullplots.1.lst=deeper.fullplots.1.lst,deeper.fullplots.2.lst=deeper.fullplots.2.lst,deeper.dendro.lst=deeper.dendro.lst,deeper.heat.lst=deeper.heat.lst,fullplot.current.dog=fullplot.current.dog))
}else
{
return(list(deeper.dist.matrxes.lst=deeper.dist.matrxes.lst,deeper.relation.plots.lst=deeper.relation.plots.lst,deeper.fullplots.1.lst=deeper.fullplots.1.lst,
deeper.fullplots.2.lst=deeper.fullplots.2.lst,deeper.dendro.lst=deeper.dendro.lst,deeper.heat.lst=deeper.heat.lst))
}
}
#' Tree.build.2nd.treemaking
#'
#' Introduction\ ## To dertermine the corrdinate of each cluster
#' @param nodes.data the all.nodes
#' @param edges.data the all.edges
#' @param manu.xaxis =nametransform
#' @param by default is "V2", the column used for merging
#' @return
#' @export
#' @examples
#' hESCdiff.beta.allcell.mygraph<-makelayout(nodes.data=all.nodes,edges.data=all.edges,manu.xaxis=nametransform.rock,byy="finalname")
makelayout<-function(nodes.data,edges.data,manu.xaxis=nametransform,byy="V2") # V2 is based on new name system. V1 is based on original name system
{
nodes.data<-cbind(nodes.data,x=0,y=0)
### First loop to order the dataframe
for (i in nrow(edges.data):1)
{
if(length(which(as.character(edges.data[,1])==as.character(edges.data[i,2])))>0)
{
change.idx<-min(which(as.character(edges.data[,1])==as.character(edges.data[i,2])))
if(change.idx<i)
{
tmp<-edges.data[i,]
edges.data[i,]<-edges.data[change.idx,]
edges.data[change.idx,]<-tmp
}
}
}
### Second loop to determine the y position of all the nodes
for (i in 1:nrow(edges.data))
{
print (i)
start.pos<-which(nodes.data$nodes==as.character(edges.data[i,1])) %>% nodes.data[.,"y"]
print(paste(as.character(edges.data[i,1])," start y:",start.pos))
end.pos<-start.pos-edges.data[i,"distance"]
nodes.data[which(nodes.data$nodes==as.character(edges.data[i,2])),"y"] <-end.pos
}
merge(nodes.data,manu.xaxis,by.x="nodes",by.y=byy) %>% .[,c(1,2,6,4)] ->nodes.data.x.justed
colnames(nodes.data.x.justed)[3]<-"x"
merge(edges.data,nodes.data.x.justed[,c(1,3,4)],by.x="SeekToward.cluster.names",by.y="nodes") %>% merge(.,nodes.data.x.justed[,c(1,3,4)],by.x="SeekFrom.cluster.names",by.y="nodes") -> segment.data
treeplot<-ggplot(nodes.data.x.justed)+aes(x,y)+geom_point()+geom_text(data=nodes.data.x.justed,aes(label=nodes),vjust=-0.1)+geom_segment(data=segment.data,aes(x=x.x,y=y.x,xend=x.y,yend=y.y),arrow=arrow(length=unit(0.1,"inch")))
return(list(treeplot=treeplot,nodes.data.x.justed=nodes.data.x.justed,edges.data=edges.data,segment.data=segment.data))
}
#' Get.pseudotime.byStage.pairing
#'
#' Introduction\
#' @param pcawithinfo pca.withinfo
#' @param edges.data Take all.edges or from mygraph
#' @param hESCdiff.beta.allcell.seurate all cluster seurat object
#' @param PCnumber default is 10
#' @return
#' @export
#' @examples
#'
Get.pseudotime.byStage.pairing<-function(pcawithinfo=hESCdiff.beta.allcell.pca.withinfo,edges.data=all.edges,object=hESCdiff.beta.allcell.seurate,PCnumber=10,nametransformfile)
{
bin.data.normed_UMI.list<-list()
for (i in 1:nrow(edges.data))
{
print(edges.data[i,4])
object@data.info$Sample.trans<-plyr::mapvalues(object@data.info$Sample,from=as.character(nametransformfile$originalname),to=as.character(nametransformfile$finalname))
print(table(subset(object@data.info,Sample.trans %in% as.character(unlist(edges.data[i,c(1,2)])))$Sample.trans)) # To print the cell number in current pair
binnumber=as.integer(10*edges.data[i,5]/min(edges.data$distance))
cur.pair<-c(as.character(edges.data[i,1]),as.character(edges.data[i,2]))
cur.cellnames<-object@data.info %>% cbind(.,cell=row.names(.)) %>% subset(.,Sample.trans %in% cur.pair) %>% .$cell
if(length(cur.cellnames)/binnumber<50)
{
binnumber<-as.integer(length(cur.cellnames)/50)
}
cur.rawdata<-object@raw.data %>% .[,cur.cellnames] %>% as.matrix
trj.edge.cutoff<-49
PCA.curpair<-subset(pcawithinfo,Sample.trans %in% cur.pair)
PCA.curpair$Sample.trans<-factor(PCA.curpair$Sample.trans,levels=cur.pair)
traj.info<-traj.make(PCA.curpair,PCnumber,colname="Sample.trans")
traj.info<-traj.info[complete.cases(traj.info),]
PCA.curpair.trajinfo<-Tomerge_v2(PCA.curpair,traj.info)
PCA.curpair.trajinfo<-PCA.curpair.trajinfo[complete.cases(PCA.curpair.trajinfo),]
PCA.curpair.trajinfo$pos<-as.numeric(as.character(PCA.curpair.trajinfo$pos))
PCA.curpair.trajinfo<-cbind(PCA.curpair.trajinfo,cell=row.names(PCA.curpair.trajinfo))
traj.ob.rawwithinfo<-t(cur.rawdata) %>% as.data.frame %>% cbind(.,cell=row.names(.)) %>% merge(.,PCA.curpair.trajinfo[,c("nUMI","pos","cell")],by="cell")
traj.ob.rawwithinfo<-traj.ob.rawwithinfo[complete.cases(traj.ob.rawwithinfo),]
raw.bin<-givebintag.2(traj.ob.rawwithinfo,ordername="pos",bin=binnumber,genenumbercut=trj.edge.cutoff)
print(table(raw.bin$tag))
bin.data<-as.data.frame(mydplyr(raw.bin[,-which(colnames(raw.bin) %in% c("nUMI","cell"))])) # bin.data is take the mean directly from raw UMI reads number
bin.data.normed_UMI<-cbind(t(apply(bin.data,1,function(x){(x[2:(length(x)-2)])/sum(x[2:(length(x)-2)])*1000})),bin.data[,(ncol(bin.data)-2):ncol(bin.data)])
bin.data.normed_UMI<-bin.data.normed_UMI[order(bin.data.normed_UMI$pos),]
print(data.frame(bin.data.normed_UMI$pos,bin.data.normed_UMI$wdth))
bin.data.normed_UMI.list<-c(bin.data.normed_UMI.list,list(bin.data.normed_UMI))
}
names(bin.data.normed_UMI.list)<-edges.data$identifier
return(bin.data.normed_UMI.list)
}
#' traj.make
#'
#' Introduction\
#' @param pcdata
#' @param PCtouse
#' @param colname
#' @return
#' @export
#' @examples
#'
traj.make<-function(pcdata,PCtouse,colname)
{
calculate.pos<-function(x,pre.cen=previous.center,next.cen=next.center,pre.trj=pre.traj.axis,cur.trj=cur.traj.axis,pre.mag=pre.traj.axis.mag,cur.mag=cur.traj.axis.mag) # this function is to calculate the position of an individual cell, either project to previous trajectory axis or current axis depending on the relative distance
{
dist.pre<-sqrt(sum((x-previous.center)^2)) # Calculate the distance from a individual cell in current cluster to the center of previous center
dist.next<-sqrt(sum((x-next.center)^2)) # Calculate the distance from a individual cell in current cluster to the center of next center
pos.on.preaxis<-((as.numeric(x)-previous.center) %*% pre.trj)/pre.mag
pos.on.nextaxis<-((as.numeric(x)-current.center) %*% cur.trj)/cur.mag
if (pos.on.preaxis<pre.mag & pos.on.nextaxis<0)
{
return(list(pos.on.preaxis,"previous","clean"))
}else if(pos.on.preaxis>pre.mag & pos.on.nextaxis>0)
{
return(list(pos.on.nextaxis,"current","clean"))
}else if (pos.on.preaxis<pre.mag & pos.on.nextaxis>0)
{
if (dist.pre<dist.next)
{
return(list(pos.on.preaxis,"previous","bydistance"))
}else
{
return(list(pos.on.nextaxis,"current","bydistance"))
}
}else
{
return(list(NA,NA,NA))
}
}
for (i in 1:length(levels(pcdata[,colname])))
{
current.pcs<-subset(pcdata,get(colname)==levels(pcdata[,colname])[i])
current.center<-colMeans(current.pcs[,1:PCtouse])
if(i==1)
{
next.pcs<-subset(pcdata,get(colname)==levels(pcdata[,colname])[i+1])
next.center<-colMeans(next.pcs[,1:PCtouse])
traj.axis<-next.center-current.center
traj.axis.mag<-sqrt(sum(traj.axis^2))
first.positions<-apply((current.pcs[,1:PCtouse]),1,function(x){((as.numeric(x)-current.center) %*% traj.axis)/traj.axis.mag})
starting.pos<-0
pos.info.all<-data.frame(pos=first.positions,where="first",how="first")
print(paste("i==",i,"Starting",collapse=""))
}else if (i==length(levels(pcdata[,colname])))
{
pre.pcs<-subset(pcdata,get(colname)==levels(pcdata[,colname])[i-1])
pre.center<-colMeans(pre.pcs[,1:PCtouse])
traj.axis<-current.center-pre.center
traj.axis.mag<-sqrt(sum(traj.axis^2))
last.positions<-apply((current.pcs[,1:PCtouse]),1,function(x){((as.numeric(x)-pre.center) %*% traj.axis)/traj.axis.mag})
last.positions<-data.frame(pos=last.positions,where="last",how="last")
last.positions[,"pos"]<-last.positions[,"pos"]+starting.pos
pos.info.all<-rbind(pos.info.all,last.positions)
print(paste("i==",i,"---all finished",collapse=""))
}else
{
previous.pcs<-subset(pcdata,get(colname)==levels(pcdata[,colname])[i-1])
previous.center<-colMeans(previous.pcs[,1:PCtouse])
next.pcs<-subset(pcdata,get(colname)==levels(pcdata[,colname])[i+1])
next.center<-colMeans(next.pcs[,1:PCtouse])
pre.traj.axis<-current.center-previous.center
pre.traj.axis.mag<-sqrt(sum(pre.traj.axis^2))
cur.traj.axis<-next.center-current.center
cur.traj.axis.mag<-sqrt(sum(traj.axis^2))
previous.staring.pos<-starting.pos
starting.pos<-starting.pos+pre.traj.axis.mag
cur.bi.positions<-apply(current.pcs[,1:PCtouse],1,calculate.pos)
cur.pos.info<-data.frame(pos=t(as.data.frame(lapply(cur.bi.positions,function(x){x[[1]]}))),where=t(as.data.frame(lapply(cur.bi.positions,function(x){x[[2]]}))),how=t(as.data.frame(lapply(cur.bi.positions,function(x){x[[3]]}))))
cur.pos.info[which(cur.pos.info$where=="previous"),"pos"]<-cur.pos.info[which(cur.pos.info$where=="previous"),"pos"]+previous.staring.pos
cur.pos.info[which(cur.pos.info$where=="current"),"pos"]<-cur.pos.info[which(cur.pos.info$where=="current"),"pos"]+starting.pos
pos.info.all<-rbind(pos.info.all,cur.pos.info)
print(paste("i==",i,"",collapse=""))
}
}
return(pos.info.all)
}
#' givebintag.2
#'
#' Introduction\
#' @param df
#' @param ordername
#' @param bin
#' @param enenumbercut
#' @return
#' @export
#' @examples
#'
givebintag.2<-function(df,ordername="pos",bin=20,genenumbercut=50){
bin.initial<-bin
v<-sort(df[,ordername])
current.max<-max(v)
current.min<-min(v)
current.stepwidth<-(current.max-current.min)/bin
leftsideset<-F
rightsideset<-F
leftsides<-c()
rightsides<-c()
n=0
while(!all(leftsideset & rightsideset))
{
print(n)
if(n>=0.6*bin.initial)
{
break
}
current.stepwidth<-(current.max-current.min)/bin
if(length(which(v>=current.min & v<(current.min+current.stepwidth)))<genenumbercut)
{
firtpoint<-v[which(v==current.min)+genenumbercut-1]
current.min<-firtpoint
leftsides<-c(leftsides,firtpoint)
bin=bin-1
n=n+1
}else
{
leftsideset=T
}
if(length(which(v<=current.max & v>(current.max-current.stepwidth)))<genenumbercut)
{
lastpoint<-v[which(v==current.max)-genenumbercut+1]
current.max<-lastpoint
rightsides<-c(rightsides,lastpoint)
bin=bin-1
n=n+1
}else
{
rightsideset=T
}
}
tags<-c()
current.stepwidth<-(current.max-current.min)/bin
for (i in 1:(bin-1))
{
leftsides<-c(leftsides,leftsides[length(leftsides)]+current.stepwidth)
}
if(length(leftsides)==0)
{
leftsides<-min(v)+current.stepwidth
for (i in 1:(bin-2))
{
leftsides<-c(leftsides,leftsides[length(leftsides)]+current.stepwidth)
}
}
all.points<-unique(c(leftsides,rev(rightsides)))
tag<-c()
point.prev<-min(v)
point.last<-max(v)
all.points<-c(all.points,point.last)
for (i in 1:length(all.points))
{
point=all.points[i]
tag[which(df[,ordername]>=point.prev & df[,ordername]<point)]<-i
point.prev<-point
}
tag[which(df[,ordername]==point)]<-i
df.tag<-cbind(df,tag=tag)
df.tag<-df.tag[order(df.tag$tag),]
wdth<-c()
n=1
for (i in 1:bin.initial)
{
n=n+1
print(n)
cur.pos<-subset(df.tag,tag==i)$pos
wdth<-c(wdth,max(cur.pos)-min(cur.pos))
}
df.tag<-merge(df.tag,data.frame(tag=1:bin.initial,wdth=wdth),by="tag")
return(df.tag)
}
#' mydplyr
#'
#' Introduction\
#' @param df
#' @param by
#' @param thefunction
#' @return
#' @export
#' @examples
#'
mydplyr<-function(df,by="tag",thefunction=mean)
{
newdf<-c()
for (i in as.character(unique(df[,by])))
{
dftm<-df[which(as.character(df[,by])==i),]
newdf<-rbind(newdf,apply(dftm,2,thefunction))
}
return(newdf)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.