#' @title Select genotype variables by Annovar
#' @description The genotype data has high dimension, so it is difficult to analysis by many methods.
#' Thus We can have a preliminary screening by ANNOVAR. ANNOVAR is a good way to handle with the SNP
#' data, so we can apply the ideas to R. Firstly, import the exonic variant function by gene based
#' annotation and Score alignment library, PolyPhen2, cadd, sift. The three scores can assess the
#' importance of genes well. When there are two scores show that the gene is important, then keep it.
#' @param df the exonic variant function by gene based annotation
#' @param df2 the other exonic variant function by gene based annotation
#' @param pp2 the database of PolyPhen2 scores
#' @param cadd the database of cadd scores
#' @param sift the database of sift scores
#' @return a vector of important gene variables \code{Allrs}
#' @examples
#' \dontrun{
#' index<-myqc(df,df2,pp2,cadd,sift)
#' }
#' @export
myqc<-function(df,df2,pp2,cadd,sift){
#one
#select nonsynonymous
nonsyno <- df[which(df$V2 == "nonsynonymous SNV" ),3:8] #2891 6
id_syno <-paste(paste(paste(paste(nonsyno$V4,nonsyno$V5,sep = ":"),nonsyno$V6,sep = "-"), nonsyno$V7,sep = "_"), nonsyno$V8, sep = "_")
nonsyno[,7] <- id_syno
colnames(nonsyno)[7] <- "V9"
#two
colnames(pp2)[1] <- "chr"
selected_nonsyno_1 <-pp2[which(pp2$Start %in% nonsyno$V5),] # 8852 7
id_sn <-paste(paste(paste(paste(selected_nonsyno_1$chr,selected_nonsyno_1$Start,sep = ":"),
selected_nonsyno_1$End,sep = "-"), selected_nonsyno_1$Ref,sep = "_"), selected_nonsyno_1$Alt, sep = "_")
selected_nonsyno_1[,8] <- id_sn
colnames(selected_nonsyno_1)<-c("V1","V2","V3","V4","V5","V6","V7","V8")
selected_nonsyno_2 <- selected_nonsyno_1[which(selected_nonsyno_1$V8 %in% nonsyno$V9),] #810 8
#three
#CADD
colnames(cadd)[1] <- "chr"
selected_nonsyno_3 <-cadd[which(cadd$Start %in% nonsyno$V5),] # 9540 7
id_cadd <-paste(paste(paste(paste(selected_nonsyno_3$chr,selected_nonsyno_3$Start,sep = ":"),
selected_nonsyno_3$End,sep = "-"), selected_nonsyno_3$Ref,sep = "_"), selected_nonsyno_3$Alt, sep = "_")
selected_nonsyno_3[,8] <- id_cadd
colnames(selected_nonsyno_3)<-c("V1","V2","V3","V4","V5","V6","V7","V8")
selected_nonsyno_4 <- selected_nonsyno_3[which(selected_nonsyno_3$V8 %in% nonsyno$V9),] #[1] 827 8
#four
#sift
colnames(sift)[1] <- "chr"
selected_nonsyno_5 <-sift[which(sift$Start %in% nonsyno$V5),] # 9059 7
id_sift <-paste(paste(paste(paste(selected_nonsyno_5$chr,selected_nonsyno_5$Start,sep = ":"),
selected_nonsyno_5$End,sep = "-"), selected_nonsyno_5$Ref,sep = "_"), selected_nonsyno_5$Alt, sep = "_")
selected_nonsyno_5[,8] <- id_sift
colnames(selected_nonsyno_5)<-c("V1","V2","V3","V4","V5","V6","V7","V8")
selected_nonsyno_6 <- selected_nonsyno_5[which(selected_nonsyno_5$V8 %in% nonsyno$V9),] # 790 8
S2<-selected_nonsyno_2[,6:8] #810 3
S4<-selected_nonsyno_4[,6:8] #827 3
S6<-selected_nonsyno_6[,6:8] #790 3
zhname0<-rbind(as.matrix(S2[,3]),as.matrix(S4[,3]),as.matrix(S6[,3])) #810+827+790=2427
zhname<-unique(zhname0)
kkk<-length(zhname)#827
Table<-matrix(0,kkk,5)
Table[,1]<-zhname
#Table[,2]=pp2
for(i in 1:kkk){
n<-Table[i,1]
k<-which(S2[,3]==n)
if(length(k)!=0){
Table[i,2]=S2[k,2]
}else{
Table[i,2]=0
}
}
#Table[,3]=cadd
for(i in 1:kkk){
n<-Table[i,1]
k<-which(S4[,3]==n)
if(length(k)!=0){
Table[i,3]=S4[k,2]
}else{
Table[i,3]=0
}
}
#Table[,4]=sift
for(i in 1:kkk){
n<-Table[i,1]
k<-which(S6[,3]==n)
if(length(k)!=0){
Table[i,4]=S6[k,2]
}else{
Table[i,4]=0
}
}
#score to 0,1
for(i in 1:kkk){
if(Table[i,2]=="D"){
Table[i,2]=1
}else{
Table[i,2]=0
}
}
for(i in 1:kkk){
if(as.numeric(Table[i,3])>40){
Table[i,3]=1
}else{
Table[i,3]=0
}
}
for(i in 1:kkk){
if(Table[i,4]=="D"){
Table[i,4]=1
}else{
Table[i,4]=0
}
}
#
for(i in 1:kkk){
Table[i,5]=as.numeric(Table[i,2])+as.numeric(Table[i,3])+as.numeric(Table[i,4])
}
kk<-which(as.numeric(Table[,5])>=2)
nonsyno_Final<-Table[kk,1] #223
#six
nonsyno2<-df2[which(df$V2 == "nonsynonymous SNV" ),c(3:8,11)] #V11:rs 2891 7
id_syno2 <-paste(paste(paste(paste(nonsyno2$V4,nonsyno2$V5,sep = ":"),nonsyno2$V6,sep = "-"), nonsyno2$V7,sep = "_"), nonsyno2$V8, sep = "_")
nonsyno2[,8] <- id_syno2
colnames(nonsyno2)[8] <- "V9"
kk<-matrix(0,length(nonsyno_Final),1)
for(i in 1:length(nonsyno_Final)){
t<-nonsyno_Final[i]
k<-which(nonsyno2$V9==t)
kk[i]<-k
}
Allrs<-nonsyno2[kk,7]
return(Allrs)
}
#' @title Record the order and frequency of samples in phenotype data
#' @description For longitudinal data analysis, we need transform the SNP data into longitudinal data. So it is necessary to record the information of phenotype data, for example, the order and measurement number of sample. Then we can construct the longitudinal genotype data. There is a document about the relationship of genotype data and phenotype data.
#' @param pheno the phenotype data
#' @param dygx a document about the sample information
#' @return \code{v3}
#' @examples
#' \dontrun{
#' v<-myfl(pheno,dygx)
#' }
#' @export
myfl<-function(pheno,dygx){
dygx1<-dygx[,1:2]
nn<-dim(pheno)[1]
for(i in 1:nn)
{
k1<-pheno$names[i]
k2<-which(dygx1[,2]==k1)
pheno$names[i]<-dygx1[k2,1]
}
id.pheno <- pheno[,"names"]
v0<-as.factor(id.pheno)
K<-length (levels (v0) )
v1<- vector(mode="character", K)
v1[1]<-id.pheno[1]
k=1
for (i in 2:length(v0))
{
if(id.pheno[i]!=id.pheno[i-1])
{
k=k+1
v1[k]=id.pheno[i]
}
}
v2<-table(v0)
v3<-v2[v1]
return(v3)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.