R/DEGsTCGA.R

Defines functions DEGsTCGA

Documented in DEGsTCGA

#' Calculating the differently expressed genes between groups
#'
#' use the DEGsTCGA() fuction to get the DEGs based on DESeq2.
#' @import DESeq2
#' @param dataPath the direction of download Data
#' @export
DEGsTCGA<-function(dataPath){
  setwd(dataPath)
  message("正在进行差异表达分析...",appendLF = T)
  load(file=".//output//step03.RData")
  #进行差异表达分析
  x_m<-x_reduce[,!duplicated(colnames(x_reduce))]
  x_reduce=x_m
  xb=rownames(x_reduce)
  #正则表达式把基因版本(小数点去掉)抹去
  xc<-gsub("\\.(\\.?\\d*)","",xb)
  rownames(x_reduce)=xc
  #提取分组信息
  group_name=colnames(x_reduce)
  group_name=substr(group_name,14,15)
  group=ifelse(as.numeric(group_name)<10,1,0)
  group=factor(group,levels = c(0,1),labels = c('normal','cancer'))
  #使用R语言DESeq2包进行基因的差异表达分析
  cData<-data.frame(group=group)
  rownames(cData)<-colnames(x_reduce)

  #在一半样本中基因表达数目较低的基因去除掉
  #这里选择的阈值为100
  x_d<-x_reduce[rowSums(x_reduce>100)>ncol(x_reduce)/2,]
  d.des<-DESeq2::DESeqDataSetFromMatrix(x_d,colData = cData,design = ~group)
  res<-DESeq2::DESeq(d.des)
  #定义比较的两组信息
  #constrast=c(“组名”,“实验组”,“对照组”)
  result<-results(res,contrast = c("group","cancer","normal"))
  write.csv(result,".//output//step05_TCGA_analyis_results.csv")
  #head(result,20)
  save(x_reduce,result,x_d,cData,group,file=".//output//step05.RData")
  message("Completed!")
}
dming1024/TCGApackages0226 documentation built on April 9, 2021, 7:48 a.m.