inst/doc/CancerSubtypes-vignette.R

## ----message = FALSE,warning=FALSE--------------------------------------------
### Prepare a TCGA gene expression dataset for analysis. 
library(CancerSubtypes)
library("RTCGA.mRNA")
rm(list = ls())
data(BRCA.mRNA)
mRNA=t(as.matrix(BRCA.mRNA[,-1]))
colnames(mRNA)=BRCA.mRNA[,1]

###To observe the mean, variance and Median Absolute Deviation distribution of the dataset, it helps users to get the distribution characteristics of the data, e.g. To evaluate whether the dataset fits a normal distribution or not.
data.checkDistribution(mRNA)

## ----eval = FALSE-------------------------------------------------------------
#  index=which(is.na(mRNA))
#  res1=data.imputation(mRNA,fun="median")
#  res2=data.imputation(mRNA,fun="mean")
#  res3=data.imputation(mRNA,fun="microarray")

## -----------------------------------------------------------------------------
result1=data.normalization(mRNA,type="feature_Median",log2=FALSE)
result2=data.normalization(mRNA,type="feature_zscore",log2=FALSE)

## ----eval = FALSE-------------------------------------------------------------
#  ###The top 1000 most variance features will be selected.
#  data1=FSbyVar(mRNA, cut.type="topk",value=1000)
#  ###The features with (variance>0.5) are selected.
#  data2=FSbyVar(mRNA, cut.type="cutoff",value=0.5)

## ----eval = FALSE-------------------------------------------------------------
#  data1=FSbyMAD(mRNA, cut.type="topk",value=1000)
#  data2=FSbyMAD(mRNA, cut.type="cutoff",value=0.5)

## ----eval = FALSE-------------------------------------------------------------
#  mRNA1=data.imputation(mRNA,fun="microarray")
#  data1=FSbyPCA(mRNA1, PC_percent=0.9,scale = TRUE)

## -----------------------------------------------------------------------------
data(GeneExp)
data(time)
data(status)
data1=FSbyCox(GeneExp,time,status,cutoff=0.05)

## ----eval = FALSE-------------------------------------------------------------
#  ### The input dataset is single gene expression matrix.
#  data(GeneExp)
#  result=ExecuteCC(clusterNum=3,d=GeneExp,maxK=10,clusterAlg="hc",distance="pearson",title="GBM")
#  
#  ### The input dataset is multi-genomics data as a list
#  data(GeneExp)
#  data(miRNAExp)
#  GBM=list(GeneExp=GeneExp,miRNAExp=miRNAExp)
#  result=ExecuteCC(clusterNum=3,d=GBM,maxK=10,clusterAlg="hc",distance="pearson",title="GBM")

## ----eval = FALSE-------------------------------------------------------------
#  ### The input dataset is single gene expression matrix.
#  data(GeneExp)
#  result=ExecuteCNMF(GeneExp,clusterNum=3,nrun=30)
#  
#  ### The input dataset is multi-genomics data as a list
#  data(GeneExp)
#  data(miRNAExp)
#  GBM=list(GeneExp=GeneExp,miRNAExp=miRNAExp)
#  result=ExecuteCNMF(GBM,clusterNum=3,nrun=30)

## ----eval = FALSE-------------------------------------------------------------
#  data(GeneExp)
#  data(miRNAExp)
#  data1=FSbyVar(GeneExp, cut.type="topk",value=1000)
#  data2=FSbyVar(miRNAExp, cut.type="topk",value=300)
#  GBM=list(GeneExp=data1,miRNAExp=data2)
#  result=ExecuteiCluster(datasets=GBM, k=3, lambda=list(0.44,0.33,0.28))

## ----eval = FALSE-------------------------------------------------------------
#  data(GeneExp)
#  data(miRNAExp)
#  GBM=list(GeneExp=GeneExp,miRNAExp=miRNAExp)
#  result=ExecuteSNF(GBM, clusterNum=3, K=20, alpha=0.5, t=20)

## ----eval = FALSE-------------------------------------------------------------
#  data(GeneExp)
#  data(miRNAExp)
#  data(time)
#  data(status)
#  data1=FSbyCox(GeneExp,time,status,cutoff=0.05)
#  data2=FSbyCox(miRNAExp,time,status,cutoff=0.05)
#  GBM=list(GeneExp=data1,miRNAExp=data2)
#  result=ExecuteSNF.CC(GBM, clusterNum=3, K=20, alpha=0.5, t=20,maxK = 10, pItem = 0.8,reps=500,
#                       title = "GBM", plot = "png", finalLinkage ="average")

## ----eval = FALSE-------------------------------------------------------------
#  data(GeneExp)
#  data(miRNAExp)
#  data(Ranking)
#  ####Retrieve there feature ranking for genes
#  gene_Name=rownames(GeneExp)
#  index1=match(gene_Name,Ranking$mRNA_TF_miRNA.v21_SYMBOL)
#  gene_ranking=data.frame(gene_Name,Ranking[index1,],stringsAsFactors=FALSE)
#  index2=which(is.na(gene_ranking$ranking_default))
#  gene_ranking$ranking_default[index2]=min(gene_ranking$ranking_default,na.rm =TRUE)
#  
#  ####Retrieve there feature ranking for genes
#  miRNA_ID=rownames(miRNAExp)
#  index3=match(miRNA_ID,Ranking$mRNA_TF_miRNA_ID)
#  miRNA_ranking=data.frame(miRNA_ID,Ranking[index3,],stringsAsFactors=FALSE)
#  index4=which(is.na(miRNA_ranking$ranking_default))
#  miRNA_ranking$ranking_default[index4]=min(miRNA_ranking$ranking_default,na.rm =TRUE)
#  ###Clustering
#  ranking1=list(gene_ranking$ranking_default ,miRNA_ranking$ranking_default)
#  GBM=list(GeneExp,miRNAExp)
#  result=ExecuteWSNF(datasets=GBM, feature_ranking=ranking1, beta = 0.8, clusterNum=3,
#                     K = 20,alpha = 0.5, t = 20, plot = TRUE)

## -----------------------------------------------------------------------------
data(GeneExp)
data(miRNAExp)
GBM=list(GeneExp=GeneExp,miRNAExp=miRNAExp)
result=ExecuteSNF(GBM, clusterNum=3, K=20, alpha=0.5, t=20,plot = FALSE)

###Similarity smaple matrix
sil=silhouette_SimilarityMatrix(result$group, result$distanceMatrix)
plot(sil)

## -----------------------------------------------------------------------------
sil1=silhouette(result$group, result$distanceMatrix)
plot(sil1)  ##wrong result

## -----------------------------------------------------------------------------
data(GeneExp)
data(miRNAExp)
data(time)
data(status)
data1=FSbyCox(GeneExp,time,status,cutoff=0.05)
data2=FSbyCox(miRNAExp,time,status,cutoff=0.05)
GBM=list(GeneExp=data1,miRNAExp=data2)

#### 1.ExecuteSNF
result1=ExecuteSNF(GBM, clusterNum=3, K=20, alpha=0.5, t=20,plot = FALSE)
group1=result1$group
distanceMatrix1=result1$distanceMatrix
p_value=survAnalysis(mainTitle="GBM1",time,status,group1,
                     distanceMatrix1,similarity=TRUE)

## ----results="hide",message=FALSE---------------------------------------------
#### 2.ExecuteSNF.CC
result2=ExecuteSNF.CC(GBM, clusterNum=3, K=20, alpha=0.5, t=20,
                      maxK = 5, pItem = 0.8,reps=500, 
                      title = "GBM2", plot = "png", 
                      finalLinkage ="average")

## -----------------------------------------------------------------------------
group2=result2$group
distanceMatrix2=result2$distanceMatrix
p_value=survAnalysis(mainTitle="GBM2",time,status,group2,
                     distanceMatrix2,similarity=TRUE)


## ----warning=FALSE------------------------------------------------------------
data(GeneExp)
data(miRNAExp)
data(time)
data(status)
GBM=list(GeneExp=GeneExp,miRNAExp=miRNAExp)
result=ExecuteSNF(GBM, clusterNum=3, K=20, alpha=0.5, t=20,plot = FALSE)
group=result$group
sigclust=sigclustTest(miRNAExp,group, nsim=1000, nrep=1, icovest=1)
sigclust

## ----results="hide",message=FALSE,fig.show="hide",warning=FALSE---------------
library("RTCGA.mRNA")
#require(TCGAbiolinks)
rm(list = ls())
data(BRCA.mRNA)
mRNA=t(as.matrix(BRCA.mRNA[,-1]))
colnames(mRNA)=BRCA.mRNA[,1]
mRNA1=data.imputation(mRNA,fun="microarray")
mRNA1=FSbyMAD(mRNA1, cut.type="topk",value=5000)

###Split the normal and tumor samples
index=which(as.numeric(substr(colnames(mRNA1),14,15))>9)
mRNA_normal=mRNA1[,index]
mRNA_tumor=mRNA1[,-index]

### Remove the duplicate samples
index1=which(as.numeric(substr(colnames(mRNA_tumor),14,15))>1)
mRNA_tumor=mRNA_tumor[,-index1]

##### Identify cancer subtypes
result=ExecuteCC(clusterNum=5,d=mRNA_tumor,maxK=5,clusterAlg="hc",distance="pearson",title="BRCA")
group=result$group
res=DiffExp.limma(Tumor_Data=mRNA_tumor,Normal_Data=mRNA_normal,group=group,topk=NULL,RNAseq=FALSE)

## -----------------------------------------------------------------------------
## Differently expression genes in subtype 1
head(res[[1]])

## Differently expression genes in subtype 2
head(res[[2]])

Try the CancerSubtypes package in your browser

Any scripts or data that you put into this service are public.

CancerSubtypes documentation built on Nov. 8, 2020, 8:24 p.m.