R/geteSet.R

Defines functions geteSet

Documented in geteSet

#' Built Expression Set (eSet) from profile data.
#' @usage geteSet()
#' @export
#' @return ExpressionSet
#' @examples 
#'  f <- 9
#'  \dontrun{
#'  readRDS(paste(path.package("canceR"),"/extdata/rdata/prad_michPhenoTest1021.rds", sep=""))
#'  geteSet()
#'  }
#' 
geteSet <- function(){
    
    #function to replace blanks with missing
    
    blank2na <- function(x){ 
        z <- gsub("\\s+", "", x)  #make sure it's "" and not " " etc
        x[z==""] <- NA 
        return(x)
    } 
        ##Test checked Cases and Genetic Profiles
        testCheckedCaseGenProf()
       
        
        Lchecked_Studies <- myGlobalEnv$lchecked_Studies_forCases
        Lchecked_Cases <- length(myGlobalEnv$curselectCases)
        Lchecked_GenProf <- length(myGlobalEnv$curselectGenProfs)
        
        ProfDataAll=0
        ProfData=0
        LengthGenProfs=0
        LengthCases=0
        for (i in 1:Lchecked_Studies){
            Si = myGlobalEnv$checked_StudyIndex[i]
            progressBar_ProfilesData <- tkProgressBar(title = myGlobalEnv$Studies[Si], min = 0,
                                                      max = Lchecked_GenProf, width = 400)
            
            #tkfocus(progressBar_ProfilesData)
            LastLengthGenProfs = LengthGenProfs
            LengthGenProfs = LengthGenProfs + myGlobalEnv$LGenProfs[i]+1
            LastLengthCases = LengthCases
            LengthCases= LengthCases + myGlobalEnv$LCases[i]+1
            
            for (k in 1:length(myGlobalEnv$curselectCases)){
                
                Sys.sleep(0.1)
                setTkProgressBar(progressBar_ProfilesData, k, label=paste( round(k/Lchecked_GenProf*100, 0),
                                                                           "% of Expression Set"))
                
                if (myGlobalEnv$curselectGenProfs[k] <= LengthGenProfs && myGlobalEnv$curselectGenProfs[k]>LastLengthGenProfs){    
                    
                    GenProf<- myGlobalEnv$GenProfsRefStudies[myGlobalEnv$curselectGenProfs[k]]
                    
                    Case<- myGlobalEnv$CasesRefStudies[myGlobalEnv$curselectCases[k]]
                    
                    
                    
                    if(length(myGlobalEnv$GeneList)>500){
                        ProfData <- getMegaProfData(myGlobalEnv$GeneList,k )
                    } else{
                        ProfData<-getProfileData(myGlobalEnv$cgds,myGlobalEnv$GeneList, GenProf,Case)
                        print(ncol(ProfData))
                    }
                    
                    #ProfData<- getProfileData(myGlobalEnv$cgds,myGlobalEnv$GeneList, GenProf,Case)
                    #ProfData <-rbind.na(colnames(ProfData), ProfData)
                    
                    print("getting Profile Data and removing all NAs rows...")
                    ##remove all NAs rows
                    ProfData<- ProfData[which( apply( !( apply(ProfData,1,is.na) ),2,sum)!=0 ),]
                    
                    
                    ## Display AssyData with Tcl Table
                    title <- paste(myGlobalEnv$StudyRefGenProf[k],":",myGlobalEnv$CasesStudies[myGlobalEnv$curselectCases[k]+1])
                    getInTable(ProfData, title)
                    
                    #####nicData_MultipleCases function
                    Case<- myGlobalEnv$CasesRefStudies[myGlobalEnv$curselectCases[k]]
                    
                    ClinicalData<-getClinicalData(myGlobalEnv$cgds,Case)

                    matrix <-rbind.na(colnames(ClinicalData), ClinicalData)
                    rnames <- rownames(ClinicalData)
                    cnames <- colnames(ClinicalData)
                    
                   
                    #apply blank2na function
                    ClinicalData <- data.frame(lapply(ClinicalData,  blank2na))
                    rownames(ClinicalData) <- rnames
                    names(ClinicalData) <- cnames
                    
                     
                    
                   
                    
                    ## getClinicalData generate CHARACTER class if is there "NA" value in any column
                    ## Convert character value to numeric if grep [0-9] != 0
                     for(i in 1:ncol(ClinicalData)){
                     ## substitute "Not Available" by "NA"
                     ClinicalData[,i]<- gsub("\\[Not Available\\]",NA, ClinicalData[,i], ignore.case=TRUE)
                     ClinicalData[,i]<- gsub("NA",NA, ClinicalData[,i], ignore.case=TRUE)
                     if(length(grep("[0-9]*\\.[0-9]*",ClinicalData[,i]))!=0){
                         ClinicalData[,i] <- as.numeric(ClinicalData[,i])
                       }
                     }
                   
                    if(length(ClinicalData[1,])==0){
                        msgNoClinData=paste("No Clinical Data are Available for\n", CasesStudies[curselectCases[k]+1])
                        tkmessageBox(message=msgNoClinData, title= paste("Study: ",myGlobalEnv$StudyRefCase[k]))
                        close(progressBar_ProfilesData)
                        break
                    } 
                    
                    title <- paste(myGlobalEnv$StudyRefCase[k],myGlobalEnv$GenProfChoice[k], sep=": ")
                    getInTable(matrix,title)
                    
                  
                    ## Select only Cases (rownames) that exist in ClinicalDataSub and ProfData
                    merge <- merge(ClinicalData, ProfData, by="row.names")
                    print("merge Clinical and Profile Data")
                    ClinicalData<- merge[,1:(length(ClinicalData)+1)]
                    
                    
                    rownames(ClinicalData)<- ClinicalData[,1]
                    ClinicalData <- ClinicalData[-1]
                    ProfData<-merge[,!(merge %in% ClinicalData)]
                    
                    #row.names(ProfData)<- ProfData[,1]
                    
                    #ProfData <- ProfData[-1]
                    AssayData<- t(ProfData)
                    colnames(AssayData) <- AssayData[1,]
                    AssayData <- AssayData[-1,]
                    rnames <- rownames(AssayData) 
                    AssayData <- as.matrix(apply(AssayData,2 ,function(x) as.numeric(x)))
                    rownames(AssayData) <- rnames
                    
                    
                    ##Convert column with digital values from factor to numeric
                    for(i in 1:ncol(ClinicalData)){
                        ClinicalData[,i] <- sapply(ClinicalData[,i], function(x) if(length(grep("[a-z'-'+A-Z'/'' ']", as.character(ClinicalData[,i])))==0) { as.numeric(as.character(x)) } else {x})
                    }
                    
                

                    myGlobalEnv$ClinicalData <- ClinicalData
                    myGlobalEnv$ProfData <- ProfData
                    myGlobalEnv$AssayData <- AssayData
                    
                    
                    #Test if the same length cases for phenoData and AssayData
                    if (all(rownames(ClinicalData)==colnames(AssayData))){
                        
                        
                        
                        ## create labelDescription for columns of phenoData. 
                        ## labeldescription is used by Biobase packages
                        ## In our case labelDescription is Equal to column names
                        metaData <- data.frame(labelDescription= colnames(ClinicalData), row.names=colnames(ClinicalData))        ## Bioconductor’s Biobase package provides a class called AnnotatedDataFrame   
                        ##that conveniently stores and manipulates 
                        ##the phenotypic data and its metadata in a coordinated fashion. 
                        phenoData<-new("AnnotatedDataFrame", data=ClinicalData, varMetadata=metaData)    
                        
                        ##Assembling an ExpressionSet  
                        myGlobalEnv$eSet<-Biobase::ExpressionSet(assayData=AssayData, phenoData=phenoData, annotation="GO") 
                        print(paste("End of building eSet..."))
                        
                        #             for (i in 1:length(names(pData(eSet)))){
                        #             pData(eSet)[i] <- as.matrix(na.omit(pData(eSet)[i]))
                        #             }
                    }else {tkmessageBox( message= "The expression Gene Set and the Clinical Data do not have the same samples", icon="warning")}
                } 
            } 
            close(progressBar_ProfilesData)
        } 
        
}

Try the canceR package in your browser

Any scripts or data that you put into this service are public.

canceR documentation built on Nov. 8, 2020, 7:21 p.m.