#' @title FeatureAnalysis
#' @description a function can generate mz vs rt plot and QC distribution plot,
#' also can filter isotope,rsd and zero value.
#' @author Shine Shen
#' \email{qq951633542@@163.com}
#' @param zero.filter default is FALSE,if the zero value exsit in your data,
#' make FALSE to TRUE.
#' @param RSD.filter default is FALSE,if the percentage of qc rsd larger than 0.3
#' more than 0.7,almost after normalization,make FALSE to TRUE.
#' @param zero.check default is TRUE.
#' @param mzrt default is FALSE.
#' @param FilterIsotope default is TRUE.
#' @return All the results can be got form other functions and instruction.
#' @export
FeatureAnalysis <- function(zero.filter = FALSE,RSD.filter = FALSE,
zero.check = TRUE,mzrt = FALSE,FilterIsotope=TRUE) {
cat("Import data...\n")
data <- data.table::fread("data.csv")
data <- data.table::setDF(data)
sample.info <-read.csv("sample.info.csv")
cat("Analyzing data...\n")
##create a folder for analysis
dir.create("FeatureAnalysis")
setwd("FeatureAnalysis")
if(FilterIsotope){
cat("Isotope filtering...\n")
###remove [M+n],\\ make [] lose the ability of function,
isotope_filter<-function(data){
temp<- data[c(grep("\\[M\\]",data$isotope),
which(data$isotope == "")),]
}
data <-isotope_filter(data)
write.csv(data,"filter.isotope.csv",row.names = FALSE)
}
###data preparation
sample.name<-sample.info$sample.name[sample.info$class=="Subject"]#get sample index
qc.name<-sample.info$sample.name[sample.info$class=="QC"]#get qc index
sample<-data[,match(sample.name,colnames(data))]#get sample matrix
qc<-data[,match(qc.name,colnames(data))]#get qc matrix
sample.qc<-cbind(sample,qc)
if(zero.check){
cat("Zero checking...\n")
###zero check
zero_check <- function(data){
check_zero <- sample.qc
numb.zero <- sapply(seq(nrow(check_zero)), function(i){
check.num.zero <- sum(check_zero[i,]== 0)#get zero value columns
})
numb.sample <- ncol(check_zero)
idx.check <- which(numb.zero >= numb.sample/2)
check_zero <- check_zero[idx.check,]
}
zero.data <- zero_check(data)
write.csv(zero.data,"zero.rows.csv",row.names = FALSE)
}
if(zero.filter){
cat("Zero filtering...\n")
###zero filter
zero_filter <- function(data){
temp_zero <- data[,-c(1:4)]
num.zero <- sapply(seq(nrow(temp_zero)), function(i){
temp.num.zero <- sum(temp_zero[i,]== 0)
})
num.sample <- ncol(temp_zero)
idx.filter <- which(num.zero >= num.sample/2)
temp_zero <- temp_zero[-idx.filter,]
temp <- data.frame(data[-idx.filter,c(1:4)], temp_zero)
}
data_zero_filter <- zero_filter(data)
data <- data_zero_filter
write.csv(data,"filter.zero.csv",row.names = FALSE)
}
cat("Calculate RSD...\n")
###calculate RSD
RSD <- function(qc){
qc_rsd <- qc
rsd_qc <- sapply(seq(nrow(qc_rsd)), function(i){
SD <- sd(qc_rsd[i,],na.rm = TRUE)
MEAN <- sum(qc_rsd[i,],na.rm = TRUE)/ncol(qc)
rsd_qc <- SD/MEAN
})
}
rsd.data <- RSD(qc)##run function
write.csv(rsd.data,"rsd.csv",row.names = FALSE)
cat("Draw QC distribution plot...\n")
### QC distribution plot
#png(file="QC distribution.png", width = 900, height = 800,res = 56*2)
d <- read.csv("rsd.csv")
percent <- round(sum(d$x<0.3)/nrow(d),3)
txt <- paste(percent*100,"%")
scatter.data<-as.data.frame(rsd.data[order(rsd.data)])
id<-c(1:nrow(qc))
data_qc<- cbind(scatter.data,id)
qc_dis<- ggplot2::ggplot(data_qc,ggplot2::aes(x=id,y=rsd.data[order(rsd.data)]))+
ggplot2::xlab("Feature Index")+
ggplot2::ylab("Relative Standard Deviation(RSD)")+
ggplot2::geom_point(ggplot2::aes(colour=scatter.data<0.3))+
ggplot2::geom_text(data = d,aes(x= 400,y= 1.2,label= txt))
ggplot2::geom_hline(ggplot2::aes(yintercept=0.3,linetype="dashed"))
#plot(qc_dis)
save(qc_dis,file = 'qc_dis.Rda')
export::graph2ppt(x=qc_dis,file='qc_dis.pptx',height=7,width=9)
#dev.off()
if(RSD.filter){
cat("RSD filtering...\n")
###RSD filter
data_rsd <- data
RSD_filter <- function(data_rsd){
qc.rsd.name<-sample.info$sample.name[sample.info$class=="QC"]
qc_rsd<-data_rsd[,match(qc.rsd.name,colnames(data_rsd))]
temp_rsd <- qc_rsd
rsd <- sapply(seq(nrow(temp_rsd)), function(i){
SD <- sd(temp_rsd[i,],na.rm = TRUE)
MEAN<-sum(temp_rsd[i,],na.rm = TRUE)/ncol(qc)
rsd<-SD/MEAN
})
idx.filter <- which(rsd >= 0.3)
rsd_data <- data_rsd[-idx.filter,]
}
filter.rsd.data <- RSD_filter(data_rsd)
write.csv(filter.rsd.data,"data for sta.csv",row.names = FALSE)
}
cat("Draw mz VS RT plot...\n")
if(mzrt){
#### mz VS RT plot
#png(file="mz.rt.png", width = 900, height = 800,res = 56*2)
col <- apply(sample,1,median)
mr <- ggplot2::ggplot(data,ggplot2::aes(x=rt,y=mz,colour=log10(col)))+
geom_point()+
scale_color_gradient(low = 'lightgreen', high = 'darkred')+
ggplot2::xlab("Retention time(s)")+
ggplot2::ylab("Mass to charge ratio(m/z)")+
ggplot2::labs(colour="log10(intensity)")+
ggplot2::theme(legend.position = c(0.95,0.9))
#plot(mr)
#dev.off()
save(mr,file = 'mr.Rda')
export::graph2ppt(x=mr,file='data.pptx',height=7,width=9)
}
##back origin work directory
setwd("..//")
cat("FeatureAnalysis is done\n")
}
.onAttach <- function(libname, pkgname){
packageStartupMessage("Shine 0.3.7
Maintainer: Xia Shen.
\n2023-01-18
News: Update PathwayEnrich
Notes: sample name in pos and neg mode must be identical
Version 0.3.7
--------------
"
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.