#' Combine the interesting genes with the corresponed clinical traits to analysis
#'
#' use the tcgaEnrichment() fuction to get the enrichment results of DEGs.
#' @import survival progress survminer
#' @param dataPath the direction of download Data
#' @param clinical_data the downloaded clinical.tsv Data
#' @param transformed default is Ture,log2 transformed the gene expression,else not transform applied
#' @param groupby the quantitle threshold to determine the gene is relatively high expression or low expression in samples,and the range is 0.1~0.9.
#' @export
tcgaClinic<-function(dataPath,clinical_data,transfromed=T,groupby=0.5){
message("读入临床数据,进行初步生存分析...",appendLF =T)
setwd(dataPath)
load(file=".//output//step05.RData")
#带有symbold的TCGA差异表达结果:TCGA_result_genesymbol
load(file=".//output//step07.RData")
clinical_traits_cancer<-read.table(clinical_data,
stringsAsFactors = F,sep = "\t",header = T,quote ="")
#去重
clinical_traits_cancer<-clinical_traits_cancer[!duplicated(clinical_traits_cancer$case_id),]
clinical_traits_cancer_selected<-clinical_traits_cancer[,c("days_to_death",
"tumor_stage",
"days_to_last_follow_up",
"gender",
colnames(clinical_traits_cancer)[grepl("submitter",
colnames(clinical_traits_cancer))])]
#followup+death
clinical_traits_cancer_selected$days_to_death=as.numeric(as.character(clinical_traits_cancer_selected$days_to_death))
clinical_traits_cancer_selected$days_to_last_follow_up=as.numeric(as.character(clinical_traits_cancer_selected$days_to_last_follow_up))
day0=ifelse(is.na(clinical_traits_cancer_selected$days_to_last_follow_up),0,clinical_traits_cancer_selected$days_to_last_follow_up)
day1=ifelse(is.na(clinical_traits_cancer_selected$days_to_death),0,clinical_traits_cancer_selected$days_to_death)
clinical_traits_cancer_selected$final_days=as.numeric(day0)+as.numeric(day1)
clinical_traits_cancer_selected$survive_group<-ifelse(is.na(clinical_traits_cancer_selected$days_to_death),0,1)
#表达矩阵在x_d中
#差异分析结果在result中
#TCGA_result 为过滤之后的结果
x_td=as.data.frame(x_d)
#d=TCGA_result_genesymbol
#获取表达矩阵
interesting_genes_matrix=x_td[match(TCGA_result_genesymbol$ensembl_gene_id,rownames(x_td)),]
#选取肿瘤样本
interesting_genes_matrix_df=interesting_genes_matrix[,group=="cancer"]
#获取注释结果,同时对没有注释结果的ID进行预处理
interesting_genes_df=data.frame(ensembl_gene_id=TCGA_result_genesymbol$ensembl_gene_id)
ipo=data.frame(ensembl_gene_id=TCGA_result_genesymbol$ensembl_gene_id,
hgnc_symbol=TCGA_result_genesymbol$hgnc_symbol)
c=merge(interesting_genes_df,ipo,all.x=T)
c$ensembl_gene_id=as.character(c$ensembl_gene_id)
c$hgnc_symbol=as.character(c$hgnc_symbol)
c$hgnc_symbol=ifelse(is.na(c$hgnc_symbol),c$ensembl_gene_id,
ifelse(c$hgnc_symbol=="",c$ensembl_gene_id,c$hgnc_symbol)
)
rownames(interesting_genes_matrix_df)=c$hgnc_symbol
rm(interesting_genes_df)
#替换表达矩阵中的列名,需要和clinic_traits中的submitter_id保持一致
colnames(interesting_genes_matrix_df)=substr(colnames(interesting_genes_matrix_df),1,12)
#表达矩阵转置
interesting_genes_df=as.data.frame(t(interesting_genes_matrix_df))
clinical_traits_cancer_selected[,5]=gsub("\\-","\\.",clinical_traits_cancer_selected[,5])
rownames(interesting_genes_df)=gsub("\\-","\\.",rownames(interesting_genes_df))
#有“-”或“ ”的出现,不能进行下游的生存分析
colnames(interesting_genes_df)=gsub("[[:punct:]]","\\_",colnames(interesting_genes_df))
#选出对应样本的病理信息
clinical_traits_cancer_selected_x<-clinical_traits_cancer_selected[match(rownames(interesting_genes_df),
clinical_traits_cancer_selected[,5]),]
#将两组数据结果:表达矩阵+病理信息
#先转换
#1. log2转换与否
#2. median分组或者分位数分组
if(transfromed){
clinical_traits_cancer_selected_group<-apply(interesting_genes_df,2,
function(x){
ifelse(log2(x)<quantile(log2(x),groupby),"low expression","high expression")
}
)
message(paste0("采用Log对基因表达值进行转换,使用",round(groupby*100,2),"%分位数,对基因表达高低进行分组"),appendLF=T)
}else{
clinical_traits_cancer_selected_group<-apply(interesting_genes_df,2,
function(x){
ifelse(x<quantile(x,groupby),"low expression","high expression")
}
)
message(paste0("未对基因表达值进行转换,使用",round(groupby*100,2),"%分位数,对基因表达高低进行分组"),appendLF=T)
}
clinical_traits_cancer_selected_group_df=as.data.frame(clinical_traits_cancer_selected_group)
clinical_traits_cancer_selected_merge<-cbind(clinical_traits_cancer_selected_x,
clinical_traits_cancer_selected_group_df)
geneList<<-colnames(interesting_genes_df)
#y <- survival::Surv(as.numeric(clinical_traits_cancer_selected_merge$final_days),
# clinical_traits_cancer_selected_merge$survive_group)
Cairo::CairoPDF(file = ".//output//step09_survive_analysis.pdf",
width = 8,height = 5,
family="Arial")
#pvalues=rep(0,length(geneList))
#p_genes=rep(0,length(geneList))
pvalues<<-rep(0,length(geneList))
for(i in 1:length(geneList)){
geneName<<-geneList[i]
clinical_traits_cancer_selected_mergex<<-clinical_traits_cancer_selected_merge
fit<-survival::survfit(
#始终还是不行
#那就把“-”,替换为"_"把
eval(parse(text = sprintf("survival::Surv(final_days,survive_group)~`%s`",geneName))
),
#survival::Surv(final_days,survive_group)~eval(parse(text=geneName)),
data=clinical_traits_cancer_selected_mergex
)
p<<-survminer::ggsurvplot(fit,data=clinical_traits_cancer_selected_merge,
legend = c(0.8,0.8),
legend.title = geneName,
legend.labs = c("High Expression", "Low Expression"),
pval = TRUE,
conf.int = TRUE
)+ggplot2::labs(x="Time (Days)")
print(p)
#获取logRank检验的P值
logRanks=survminer::surv_pvalue(fit)
pvalues[i]=round(logRanks$pval,3)
}
dev.off()
pgenes=data.frame(pvalues,GeneSymbol=geneList)
write.csv(pgenes,".//output//step09_survivalOfGenes.csv")
write.csv(clinical_traits_cancer_selected,".//output//step09_followUpInformation.csv")
message("Complete!")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.