R/RNASEQ_Analysis_solo_SVMerge_SE.r

Defines functions SVexpression_solo nonOverlapRNAseq_solo OverlapRNAseq_solo RNAseqcombine_solo

Documented in nonOverlapRNAseq_solo OverlapRNAseq_solo RNAseqcombine_solo SVexpression_solo

#' Combining the RNAseq reads of family members in a 
#' single file.
#'
#' @param RNASeqDir  character. Directory containing RNAseq reads.
#' @param returnMethod  character. Method of returning Data.
#' @param outpath  character. Contains file path if Method of return is chosen as 
#' Text.
#' @param outFileName  character. Output file name. 
#' @return Text or Dataframe containing TPM read counts of genes in the family.
#' @examples
#' RNASeqDir = system.file("extdata", package="nanotatoR")
#' returnMethod="dataFrame"
#' datRNASeq <- RNAseqcombine_solo(RNASeqDir = RNASeqDir,
#' returnMethod = returnMethod)
#' @importFrom AnnotationDbi mapIds
#' @importFrom stats na.omit 
#' @import org.Hs.eg.db
#' @export
RNAseqcombine_solo<-function(RNASeqDir,returnMethod=c("Text","dataFrame"),
                        outpath="",outFileName=""){ 
    #library()
    #setwd(RNASeqDir)
    l <- list.files(path = RNASeqDir, 
       pattern="*.genes.results", full.names = TRUE)
    len<-length(l)
    #dat<-listDatasets(ensembl)
    #g1<-grep("sscrofa",listDatasets(ensembl)$dataset)
    'grch37 = useMart(="ENSEMBL_MART_ENSEMBL", host="www.ensembl.org", 
                    dataset="hsapiens_gene_ensembl")'
    gen<-c(); 
    ##Need to make this function dynamic
    dat<-data.frame(matrix(ncol=len,nrow=nrow(r<-read.table(l[1],sep="\t",header=TRUE))))
    cnam<-c()
    for (ii in 1:length(l)){
        r<-read.table(l[ii],sep="\t",header=TRUE)
        gen<-c(gen,as.character(r$gene_id))
        dat[,ii]<-as.numeric(r$TPM)
        str<-strsplit(l[ii],split=".genes.results")
        #print(str[[1]][1])
        cnam<-c(cnam,str[[1]][1])
    }
    #datf<-data.frame(dat)
    gen<-unique(as.character(gen))
    st<-strsplit(gen,split="[.]")
    genes<-c()
    for(k in 1:length(gen)){
        genes<-c(genes,as.character(st[[k]][1]))
    }
    data1 <- data.frame(dat[,1])
    names(data1) <- cnam
    genesym <- c()
    ensemblid <- c()
    'gene1 = getBM(attributes = c("external_gene_name", "ensembl_gene_id"), 
                filters = "ensembl_gene_id", values = genes, mart = grch37)'
    gn1 <- mapIds(org.Hs.eg.db, genes, "SYMBOL", "ENSEMBL")
    'rn <- row.names(data.frame(gn1))
    rn1 <- row.names(data.frame(gn2))'
    gene1<-data.frame(
        ensembl_gene_id = as.character(names((gn1))),
        external_gene_name = as.character(data.frame(gn1)[,1])
        )
    genesym<-as.character(gene1$external_gene_name)
    ensemblid<-as.character(gene1$ensembl_gene_id)
    gene3<-c()
    ens<-c()
    ensemblid<-paste("^",ensemblid,"$",sep="")
    for(kk in 1:length(genes)){
        pag<-paste("^",genes[kk],"$",sep="")
        val<-grep(pag,ensemblid,fixed=TRUE)
        if(length(val)>0){
            gene3<-c(gene3,as.character(unique(genesym[val])))
            ens<-c(ens,as.character(genes[kk]))
        }
        else{
            gene3<-c(gene3,as.character("-"))
            ens<-c(ens,as.character(genes[kk]))
        }
    }
    RNASeqDat<-data.frame(GeneName = as.character(gene3),
            GeneID = as.character(ens),data1)
    if(returnMethod=="Text"){
        fname = file.path(outFileName,".csv",sep = "")
        write.csv(RNASeqDat,file.path(outpath,fname), row.names = FALSE )
    }
    else if (returnMethod=="dataFrame"){
        return (RNASeqDat)
    }
    else{
        stop("Invalid ReturnMethod")
    }
}
#' Annotating the Overlapping genes with RNAseq expression
#'
#' @param gnsOverlap  character. Vector containing overlapping genes.
#' @param SVID  character. SV Index ID.
#' @param RNASeqData  dataFrame. RNAseq data with gene names. 
#' @param pattern_Proband  character. Pattern for proband. 
#' @return Dataframe containing TPM read counts of overlapping genes.
#' @examples
#' RNASeqDir = system.file("extdata", package="nanotatoR")
#' returnMethod="dataFrame"
#' datRNASeq <- RNAseqcombine_solo(RNASeqDir = RNASeqDir,
#' returnMethod = returnMethod)
#' gnsOverlap <- c("AGL")
#' SVID = 397
#' datgnovrlap <- OverlapRNAseq_solo(gnsOverlap = gnsOverlap, 
#' SVID = SVID, RNASeqData = datRNASeq,
#' pattern_Proband = "*_P_*")
#' @importFrom stats na.omit  
#' @export
OverlapRNAseq_solo<-function(gnsOverlap, SVID, RNASeqData,
                pattern_Proband = NA){
  
  ###Finding the column names
   #print(gnsOverlap);print(length(gnsOverlap))
  'if(is.na(pattern_Father)==FALSE){
    fatherInd<-grep(pattern_Father,names(RNASeqData))
    } else{fatherInd<- NA}
    if(is.na(pattern_Mother)==FALSE){
    motherInd<-grep(pattern_Mother,names(RNASeqData))
    } else{motherInd<- NA}'
    if(is.na(pattern_Proband)==FALSE){
        probandInd <- grep(pattern_Proband,names(RNASeqData))
    } else{probandInd<- NA}
    'if(is.na(pattern_Sibling)==FALSE){
      siblingInd<-grep(pattern_Sibling,names(RNASeqData))
    } 
    else{
      siblingInd<-NA
    }'
    sv<-c()
    gene<-c()
    gnsname<-as.character(RNASeqData$GeneName)
    pasgnsname<-pasgnovlap<-paste("^",gnsname,"$",sep="")
    'overlap_ensemblgenes = select(EnsDb.Hsapiens.v79, gnsOverlap, 
                c("GENEID","GENENAME"), "SYMBOL")
    gnsOverlapID<-as.character(overlap_ensemblgenes$GENEID)'
    #print(paste("gnsOverlap:",gnsOverlap))
    #print(paste("overlap_ensemblgenes:",overlap_ensemblgenes))
    #print(paste("gnsOverlapID:",gnsOverlapID))
    #genes<-as.character(overlap_ensemblgenes$SYMBOL)
    ###Extracting Reads
    ###
    ###Genes Names Extraction
    #print(paste("fatherInd :",fatherInd))
    #print(paste("motherInd :",motherInd))
    #print(paste("probandInd :",probandInd))
    #print(paste("siblingInd :",siblingInd))
    gnsOverlapID <- as.character(gnsOverlap)
    #print(gnsOverlapID)
    if(length(gnsOverlapID)>1){
    
        datGeneInfoTemp<-data.frame()
        fatherReads<-c()
        motherReads<-c()
        probandReads<-c()
        siblingReads<-c()
        for (ki in 1:length(gnsOverlapID)){
            pasgnovlap <- paste("^", gnsOverlapID[ki],"$", sep = "")
            #print(ki)
            gg<-grep(pasgnovlap, pasgnsname, fixed = TRUE)
            dat_temp<-RNASeqData[gg, ]
        
            if(nrow(dat_temp)>1){
                #print("FALSE")
                #print(ki)
                dat_temp_1 <- mean(dat_temp[,probandInd])
                #dat_temp_1<-data.frame(dat_temp_1)
                dat_temp1 <- dat_temp[1,]
                dat_temp1[,probandInd] <- dat_temp_1
                #print(dim(dat_temp1))
            
                if(is.na(probandInd[1])==FALSE){
                    if(length(probandInd)>1){
                        for(j in probandInd){
                            probandcount<-c(probandcount,dat_temp1[,j])
                        }
                        probandReads<-c(
                            probandReads,paste(probandcount,collapse = ":"))
                    }
                    else if(length(probandInd)==1){
                        probandReads<-c(probandReads,dat_temp1[,probandInd])
                    }
                    else{
                        probandReads<-c(probandReads,0)
                    }
                }
                else{
                    probandReads<-c(probandReads,"-")
                }
            }
            else if (nrow(dat_temp)==1) {
                #print("TRUE")
                #print(dim(dat_temp1))
                probandcount<-c()
                if(is.na(probandInd [1])==FALSE){ 
                    if(length(probandInd)>1){
                        for(j in probandInd){
                            probandcount<-c(probandcount,dat_temp[,j])
                        }
                        probandReads<-c(
                        probandReads, paste(probandcount,collapse = ":"))
                    }
                    else if(length(probandInd)==1){
                        probandReads<-c(
                            probandReads, mean(dat_temp[,probandInd]))
                    }
                    else{
                        probandReads<-c(probandReads,0)
                    }
                }
                else{
                    probandReads<-c(probandReads, "-")
                }
            }
            else{
                probandReads<-c(probandReads,"-")
            }   
            gene<-c(gene,as.character(gnsOverlapID[ki]))
        }
        if(is.na(probandInd[1])==FALSE){
            ProbandGenes<-c()
            for(ii in 1:length(gene)){
                pasgene<-paste(gene[ii],"(", probandReads[ii],")", sep = "")
                ProbandGenes<-c(ProbandGenes,pasgene)
            }
            ProbandTPM<-paste(ProbandGenes,collapse=";")
            } else{
                ProbandTPM <- "-"
            }
        datGeneInfo<-data.frame(SVID=SVID, ProbandTPM=ProbandTPM)     
                
    }
    else if(length(gnsOverlapID)==1){
        pasgnovlap<-paste("^",as.character(gnsOverlapID),"$",sep="")
        #print(ki)
        gg<-grep(pasgnovlap,pasgnsname,fixed=TRUE)
      
        dat_temp<-RNASeqData[gg,]
        if(nrow(dat_temp)>1){
            dat_temp_1 <- mean(dat_temp[,probandInd])
            #dat_temp_1<-data.frame(dat_temp_1)
            dat_temp1<-dat_temp[1,]
            dat_temp1[,probandInd]<-dat_temp_1
            #dat_temp1<-cbind(dat_temp[1,1:3],dat_temp1)
            #print(dim(dat_temp1))
            probandcount<-c()
            if(is.na(probandInd[1])==FALSE){ 
                if(length(probandInd)>1){
                    for(j in probandInd){
                        probandcount <- c(probandcount,mean(dat_temp1[,j]))
                    }
                probandReads <- paste(probandcount,collapse = ":")
                }
                else if(length(probandInd)==1){
                    probandReads<-mean(dat_temp1[,probandInd])
                }
                else{
                    probandReads <- 0
                }
            } 
            else{
                probandReads <- "-"
            }
          
          
        #motherReads<-dat_temp1[,motherInd]
        #probandReads<-dat_temp1[,probandInd]
        #if(is.na(siblingInd[1])==TRUE){
        #siblingReads<-"-"
        #}
        #else{
        #siblingReads<-dat_temp1[,siblingInd]
        #}
        }
        else if (nrow(dat_temp)==1){
       
            #print(dim(dat_temp1))
            if(is.na(probandInd[1])==FALSE){ 
                if(length(probandInd)>1){
                    for(j in probandInd){
                        probandcount <- c(probandcount,mean(dat_temp[,j]))
                    }
                    probandReads<-paste(probandcount,collapse=":")
                }
                else if(length(probandInd)==1){
                    probandReads<-mean(dat_temp[,probandInd])
                }
                else{
                    probandReads<-0
                }
            } else{
                probandReads<-"-"
            }
        }
        else{
            probandReads<-"-"
        }
        #gene<-overlap_ensemblgenes$SYMBOL
        if(is.na(probandInd[1])==FALSE){
            gene<- as.character(gnsOverlapID)
        
            ProbandGenes<-paste(gene,"(",probandReads,")",sep="")
            #ProbandGenes<-c(ProbandGenes,pasgene)
            ProbandTPM<-as.character(ProbandGenes)
        }else{
            ProbandTPM <- "-"
        }
        datGeneInfo<-data.frame(SVID = SVID,ProbandTPM = ProbandTPM)
            
    }
    else{
        datGeneInfo <- data.frame(SVID = SVID,ProbandTPM = "-")
    }
  
  #print(warnings())
  
    return(datGeneInfo)
}
#' Annotating the Non-Overlapping genes with RNAseq expression
#'
#' @param gnsNonOverlap  character. Vector containing non-overlapping genes.
#' @param SVID  character. SV Index ID.
#' @param RNASeqData  dataFrame. RNAseq data with gene names. 
#' @param pattern_Proband  character. Pattern for proband. 
#' @return Dataframe containing TPM read counts of overlapping genes.
#' @examples
#' RNASeqDir = system.file("extdata", package="nanotatoR")
#' returnMethod="dataFrame"
#' datRNASeq <- RNAseqcombine_solo(RNASeqDir = RNASeqDir,
#' returnMethod = returnMethod)
#' gnsNonOverlap <- c("DDX11L1", "MIR1302-2HG", "OR4G4P")
#' SVID = 397
#' datgnnonovrlap <- nonOverlapRNAseq_solo(gnsNonOverlap = gnsNonOverlap, 
#' SVID = SVID, RNASeqData = datRNASeq,
#' pattern_Proband = "*_P_*")
#' @importFrom stats na.omit  
#' @export

nonOverlapRNAseq_solo<-function(gnsNonOverlap,SVID,RNASeqData,
                pattern_Proband = NA){
  ## annotation
  ###Checking if the input is empty; else if not empty add 
  ###expression values for each genes
    datGeneInfo<-data.frame()
    SVID=SVID
    ###Extracting the index for the the parents
    'if(is.na(pattern_Father)==FALSE){
    fatherInd<-grep(pattern_Father,names(RNASeqData))
    } else{fatherInd <- NA}
    if(is.na(pattern_Mother)==FALSE){
    motherInd<-grep(pattern_Mother,names(RNASeqData))
    } else{motherInd <- NA}'
    if(is.na(pattern_Proband)==FALSE){
         probandInd<-grep(pattern_Proband,names(RNASeqData))
    } else{probandInd <- NA}
    ##Checking for sibling
    'if(is.na(pattern_Sibling)==FALSE){
      siblingInd<-grep(pattern_Sibling,names(RNASeqData))
    } 
    else{
      siblingInd<-NA
    }'
    
    gene<-c()
    gnsname<-as.character(RNASeqData$GeneName)
    pasgnsname<-pasgnovlap<-paste("^",as.character(gnsname),"$",sep="")
    'nonoverlap_ensemblgenes = select(EnsDb.Hsapiens.v79, gnsNonOverlap, 
                c("GENEID","GENENAME"), "SYMBOL")
    gnsnonOverlapID<-as.character(nonoverlap_ensemblgenes$GENEID)'    
    ###Extracting Reads
    ###
    ###Genes Names Extraction
    gnsnonOverlapID<- as.character(gnsNonOverlap)
    if(length(gnsnonOverlapID)>1){
        #datGeneInfoTemp<-data.frame()
        probandReads<-c()
      
      
        for (ki in 1:length(gnsnonOverlapID)){
            
            pasgnnonovlap <- paste("^",as.character(
                gnsnonOverlapID[ki]),"$",sep = "")
            gg<-grep(pasgnnonovlap,pasgnsname,fixed=TRUE)
            dat_temp<-RNASeqData[gg,]
            if(nrow(dat_temp)>1){
                dat_temp_1 <- mean(dat_temp[,probandInd])
                #dat_temp_1<-data.frame(dat_temp_1)
                dat_temp1<-dat_temp[1,]
                dat_temp1[,probandInd]<-dat_temp_1
                #print(dim(dat_temp1))
                probandcount<-c()
                if(is.na(probandInd[1]) == FALSE){
                    if(length(probandInd)>1){
                        for(j in probandInd){
                            probandcount <- c(probandcount,dat_temp1[,j])
                        }
                    probandReads<-c(
                        probandReads,paste(probandcount,collapse=":")
                        )
                    }
                    else if(length(probandInd)==1){
                        probandReads<-c(probandReads,dat_temp1[,probandInd])
                    }
                    else{
                        probandReads<-c(probandReads,0)
                    }
                } else{
                    probandReads<-c(probandReads, "-")
                }
            }
            else if (nrow(dat_temp)==1) {
                #print(dim(dat_temp1))
                probandcount<-c()
                if(is.na(probandInd[1]) == FALSE){ 
                    if(length(probandInd)>1){
                        for(j in probandInd){
                            probandcount <- c(probandcount, dat_temp[,j])
                        }
                    probandReads<-c(probandReads,paste(probandcount,collapse=":"))
                }
                else if(length(probandInd)==1){
                    probandReads<-c(probandReads,dat_temp[,probandInd])
                }
                else{
                    probandReads<-c(probandReads,0)
                }
            }
            else{
                probandReads <- c(probandReads,"-")
            }
        }
        else{
            probandReads<-c(probandReads,"-")
        }
        gene<-c(gene,as.character(gnsnonOverlapID[ki]))
    }
        if(is.na(probandInd[1])==FALSE){
            ProbandGenes<-c()
        
            for(ii in 1:length(gene)){
                pasgene<-paste(gene[ii],"(",probandReads[ii],")",sep="")
                ProbandGenes<-c(ProbandGenes,pasgene)
            }
            ProbandTPM<-paste(ProbandGenes,collapse=";")
            } else{
                ProbandTPM<-"-"
            }
        datGeneInfo<-data.frame(SVID=SVID,ProbandTPM=ProbandTPM)
    } 
    else if(length(gnsnonOverlapID)==1){
        pasgnnonovlap<-paste("^",as.character(gnsnonOverlapID),"$",sep="")
        gg<-grep(pasgnnonovlap,pasgnsname,fixed=TRUE)
        #gg<-grep(pasgnnonovlap,gnsname,fixed=TRUE)
        dat_temp<-RNASeqData[gg,]
      
        if(nrow(dat_temp)>1){
            dat_temp_1 <- mean(dat_temp[,probandInd])
            #dat_temp_1<-data.frame(dat_temp_1)
            dat_temp1<-dat_temp[1,]
            dat_temp1[, probandInd]<-dat_temp_1
           #print(dim(dat_temp1))
            probandcount<-c()
            if(is.na(probandInd[1])==FALSE){
                if(length(probandInd)>1){
                    for(j in probandInd){
                        probandcount<-c(probandcount,dat_temp1[,j])
                    }
                probandReads<-paste(probandcount,collapse=":")
            }
            else if(length(probandInd)==1){
                probandReads<-dat_temp1[,probandInd]
            }
            else{
                probandReads<-0
            }
        }
        else{
            probandReads<- "-"
        }
          
          
        }
        else if (nrow(dat_temp)==1){
       
         #print(dim(dat_temp1))
            probandcount<-c()
            if(is.na(probandInd)==FALSE){
                if(length(probandInd)>1){
                    for(j in probandInd){
                        probandcount<-c(probandcount,dat_temp[,j])
                    }
                probandReads <- paste(probandcount,collapse=":")
            }
            else if(length(probandInd)==1){
                probandReads <- dat_temp[,probandInd]
            }
            else{
                probandReads<-0
            }
        }
        else{
            probandReads<- "-"
        }
        }
        else{
            probandReads<-"-"
        }
        gene<-c(gene,as.character(gnsnonOverlapID))
        if(is.na(probandInd[1])==FALSE){
            ProbandGenes <- c()
            ProbandGenes <- paste(gene,"(",probandReads,")",sep="")
            #ProbandGenes<-c(ProbandGenes,pasgene)
        #}
            ProbandTPM<-as.character(ProbandGenes)
        }else{
            ProbandTPM<-"-"
        }
        datGeneInfo <- data.frame(SVID = SVID,ProbandTPM = ProbandTPM)
    }
    else{
        datGeneInfo < -data.frame(SVID = SVID,ProbandTPM = "-")
    
    }
    return(datGeneInfo)
}
#' Annotating the Overlapping and Non-Overlapping genes with RNAseq expression
#'
#' @param input_fmt_SV  character. Input format of the SV data.Options 
#' "Text" or "DataFrame".
#' @param smapdata  dataframe. SV data dataframe.
#' @param smappath  character. smap path.
#' @param input_fmt_RNASeq  character. Input format of  
#' the RNASeq data. Options "Text" or "DataFrame"..
#' @param RNASeqData  dataFrame. RNAseq data with gene names.
#' @param RNASeqPATH  character. RNAseq dataset path . 
#' @param outputfmt  character. Output format of  
#' the result. Options "Text" or "DataFrame"..
#' @param pattern_Proband  character. Pattern for proband. 
#' @param EnzymeType  character. Enzyme used. option "SVMerge" or "SE".
#' @return Dataframe Annotated datafreme with RNASeq data.
#' @examples
#' RNASeqDir = system.file("extdata", package="nanotatoR")
#' returnMethod="dataFrame"
#' datRNASeq <- RNAseqcombine_solo(RNASeqDir = RNASeqDir,
#' returnMethod = returnMethod)
#' smapName="NA12878_DLE1_VAP_solo5.smap"
#' smap = system.file("extdata", smapName, package="nanotatoR")
#' smap = system.file("extdata", smapName, package="nanotatoR")
#' bedFile <- system.file("extdata", "HomoSapienGRCH19_lift37.bed", package="nanotatoR")
#' outpath <- system.file("extdata", package="nanotatoR")
#' datcomp<-overlapnearestgeneSearch(smap = smap, 
#'     bed=bedFile, inputfmtBed = "bed", outpath, 
#'     n = 3, returnMethod_bedcomp = c("dataFrame"), 
#'     input_fmt_SV = "Text",
#'     EnzymeType = "SE", 
#'     bperrorindel = 3000, 
#'     bperrorinvtrans = 50000)
#' datRNASeq1 <- SVexpression_solo (input_fmt_SV=c("dataFrame"),
#'     smapdata = datcomp,
#'     input_fmt_RNASeq=c("dataFrame"),
#'     RNASeqData = datRNASeq,
#'     outputfmt=c("datFrame"),
#'     pattern_Proband = "*_P_*", EnzymeType = c("SE"))
#' datRNASeq1[1,]
#' @importFrom stats na.omit  
#' @export
SVexpression_solo <- function(input_fmt_SV=c("Text","dataFrame"),
        smapdata,smappath, input_fmt_RNASeq=c("Text","dataFrame"),
        RNASeqData,RNASeqPATH,outputfmt=c("Text","datFrame"),
        pattern_Proband = NA, EnzymeType = c("SVMerge", "SE")){
  ###RNASEQ Analysis data
    if(input_fmt_RNASeq=="dataFrame"){
        RNASeqData = RNASeqData
    }
    else if(input_fmt_RNASeq=="Text"){
        RNASeqData=read.csv(RNASeqPATH)
    }
    else{
        stop("Input format for RNASeq Data Incorrect")
    }
  
    if(input_fmt_SV=="dataFrame"){
        smapdata = smapdata
        if(EnzymeType == "SVMerge"){
            #smapdata <- readSMap(smap, input_fmt_smap = "Text")
            SVID<-smapdata$SVIndex
        }
        else{
            #smapdata <- readSMap_DLE(smap, input_fmt_smap)
            SVID<-smapdata$SmapEntryID
        }
    }
    else if(input_fmt_SV=="Text"){
        if(EnzymeType == "SVMerge"){
            smapdata <- readSMap(smappath, input_fmt_smap = "Text")
            SVID<-smapdata$SVIndex
        }
        else{
            smapdata <- readSMap_DLE(smappath, input_fmt_smap = "Text")
            SVID<-smapdata$SmapEntryID
        }
    }
    else{
        stop("Input format for SMAP Incorrect")
    }
    ##Extracting Data
    overlapgenes <- stringr::str_trim(smapdata$OverlapGenes_strand_perc)
  
    
    dataOverLap <- data.frame(matrix(nrow = nrow(smapdata), ncol = 2))
    ##Extracting Overlapped Genes
    #dataOverLap<-data.frame(matrix(nrow=10,ncol=5))
    names(dataOverLap)<-c("SVID", "OverlapProbandTPM")
    print("###OverlapGenes###")
    for(kk in 1:length(overlapgenes)){
        #print(paste("kk:",kk))
        #for(kk in 1:10){
        #print(kk)
        datOverLap<-data.frame()
        #print(paste("kk:",kk,sep=""))
        svID<-as.character(SVID[kk])
        if(length(grep(";",overlapgenes[kk]))>=1){
            st1<-strsplit(as.character(overlapgenes[kk]), split = ";")
            sttemp<-as.character(st1[[1]])
            #print("1")
            gns_overlap<-c()
            for (tt in 1:length(sttemp)){
                gn_temp<-strsplit(sttemp[tt], split = "\\(")
                gns_overlap<-c(gns_overlap,as.character(gn_temp[[1]][1]))
            }
            
                datOverLap<-OverlapRNAseq_solo(
                        gnsOverlap = as.character(gns_overlap),
                        SVID = svID, 
                        RNASeqData = RNASeqData,
                        pattern_Proband = pattern_Proband)
                
        }
        else if (length(grep("\\(", as.character(overlapgenes[kk]))) >= 1){
            #print("2")
            gnsOverlap<-strsplit(as.character(overlapgenes[kk]),split="\\(")[[1]][1]
                datOverLap<-OverlapRNAseq_solo(gnsOverlap = as.character(gnsOverlap),
                    SVID = svID,
                    RNASeqData = RNASeqData,
                    pattern_Proband = pattern_Proband)
            }
            else{
                #print(paste("OverLapDNSVID:",svID))
                datOverLap<-data.frame(
                    SVID = svID, ProbandTPM = "-")
                }
                dataOverLap[kk,]<-c(
                    as.character(datOverLap$SVID),
                    Proband_OverlapGeneExpression_TPM = as.character(datOverLap$ProbandTPM))
        }
    
            ##Extracting NonOverlapped Genes
            nearestUPGenes<-smapdata$Upstream_nonOverlapGenes_dist_kb
            #datanonOverLapUP<-data.frame(matrix(nrow=nrow(smapdata),ncol=5))
            datanonOverLapUP<-data.frame(matrix(nrow = nrow(smapdata),ncol = 2))
            names(datanonOverLapUP) <- c("SVID", "NonOverlapUPProbandTPM")
            print("###NonOverlapUPStreamGenes###") 
            for(ll in 1:length(nearestUPGenes))
            {
                #for(ll in 1:10){
                #print(ll)
                datNonOverLapUP <- data.frame()
                #print(paste("llUP:",ll,sep=""))
                svID<-as.character(SVID[ll])
                if(length(grep(";",nearestUPGenes[ll]))>=1){
                    st1<-strsplit(
                        as.character(nearestUPGenes[ll]),
                        split = ";")
                        sttemp<-as.character(st1[[1]])
                        #print("1")
                        gns_nonoverlap_up<-c()
                    for (mm in 1:length(sttemp)){
                        gn_temp<-strsplit(sttemp[mm],split="\\(")
                        gns_nonoverlap_up<-c(gns_nonoverlap_up, 
                            as.character(gn_temp[[1]][1]))
                    }
                
                    datNonOverLapUP<-nonOverlapRNAseq_solo(
                        gnsNonOverlap = as.character(gns_nonoverlap_up),
                        SVID = svID,
                        RNASeqData = RNASeqData,
                        pattern_Proband = pattern_Proband)
                    
                }
                else if (length(grep(
                "\\(",as.character(nearestUPGenes[ll]))) >=1
                ){
            #print("2")
                        gnsNonOverlapUP<-strsplit(as.character(nearestUPGenes[ll]),split="\\(")[[1]][1]
                        
                        datNonOverLapUP<-nonOverlapRNAseq_solo(
                            gnsNonOverlap = as.character(gnsNonOverlapUP),
                            SVID = svID,RNASeqData = RNASeqData,
                            pattern_Proband=pattern_Proband)
                        
                    }
                else{
                    #print(paste("NonOverLapUPSVID:",svID))
                        datNonOverLapUP<-data.frame(
                            SVID = svID, ProbandTPM="-"
                        )
        }
        datanonOverLapUP[ll,]<-c(
            as.character(datNonOverLapUP$SVID), 
            Proband_Upstream_nonOverlapGeneExpression_TPM = as.character(datNonOverLapUP$ProbandTPM))
                
    }
    
  ##Extracting NonOverlapped Down Stream Genes
    nearestDNGenes<-smapdata$Downstream_nonOverlapGenes_dist_kb
    datanonOverLapDN<-data.frame(matrix(nrow = nrow(smapdata), ncol = 2))
    names(datanonOverLapDN)<-c("SVID","NonOverlapDNProbandTPM")
    print("###NonOverlapDNStreamGenes###") 
    for(nn in 1:length(nearestDNGenes))
    {
        #for(nn in 1:10){
        datNonOverLapDN<-data.frame()
        # print(paste("llDN:",ll,sep=""))
        svID<-as.character(SVID[nn])
        if(length(grep(";",nearestDNGenes[nn]))>=1){
            st1 <- strsplit(as.character(nearestDNGenes[nn]), split = ";")
            sttemp<-as.character(st1[[1]])
            #print("1")
            gns_nonoverlap_dn<-c()
            for (mm in 1:length(sttemp)){
                gn_temp <- strsplit(sttemp[mm],split="\\(")
                gns_nonoverlap_dn <- c(
                        gns_nonoverlap_dn,as.character(gn_temp[[1]][1])
                        )
            }
            datNonOverLapDN<-nonOverlapRNAseq_solo(
                    gnsNonOverlap = as.character(gns_nonoverlap_dn),
                    SVID = svID,RNASeqData = RNASeqData,
                    pattern_Proband = pattern_Proband)
            
        }
        else if (length(grep("\\(",as.character(nearestDNGenes[nn]))) >= 1){
            # print("2")
            gnsNonOverlapDN<-strsplit(as.character(
                nearestDNGenes[nn]),split="\\(")[[1]][1]
                datNonOverLapDN<-nonOverlapRNAseq_solo(
                        gnsNonOverlap = as.character(gnsNonOverlapDN),
                        SVID = svID,
                        RNASeqData = RNASeqData,
                        pattern_Proband = pattern_Proband)
                
        }
        else{
            #print(paste("NonOverLapDNSVID:",svID))
            #print ("SVID")
            datNonOverLapDN<-data.frame(
            SVID = svID,ProbandTPM="-")
        }
            datanonOverLapDN[nn,]<-c(as.character(datNonOverLapDN$SVID),
                Proband_Downstream_nonOverlapGeneExpression_TPM = as.character(datNonOverLapDN$ProbandTPM))
            }
  
            dataFinal<-data.frame(smapdata, 
                OverlapProbandEXP = dataOverLap[,2],
                NonOverlapUPprobandEXP = datanonOverLapUP[,2],
                NonOverlapDNprobandEXP = datanonOverLapDN[,2])
    return(dataFinal)

}
VilainLab/Nanotator documentation built on Oct. 9, 2021, 8:59 p.m.