R/createOrLoadAnnotations.R

Defines functions loadFromFile old2db creaAnotFromPDPackage createOrLoadAnnotations

Documented in createOrLoadAnnotations

####################################################################

loadFromFile <- function(fileName, pos = 1) {
  tempEnv <- new("environment")
  load(fileName, tempEnv)
  varNames <-ls(tempEnv)
  myVarName <- varNames[pos]
  load(fileName)
  myVar <- eval(parse(text = myVarName))
  return(myVar)
}

#####################################################################
old2db <- function(anot){paste0(anot, ".db")}
#####################################################################
creaAnotFromChipPackage <- function (chipPackage, field = "ENTREZ", cleanNAs = T,
                                     isControl = FALSE, ctlCode = NA, removeControls = FALSE)
{
  # if(!require(old2db(chipPackage), character.only = T)) {                   ### ModAlba
  #   stop(paste("Required annotation package", chipPackage, " is missing"))  ### ModAlba
  # } else {                                                                  ### ModAlba
  #   require(old2db(chipPackage), character.only = T) # No deu caldre        ### ModAlba
  # }                                                                         ### ModAlba
  if(old2db(chipPackage) %in% rownames(installed.packages())) {               ### ModAlba
    BiocManager::install(old2db(chipPackage)) # No deu caldre                 ### ModAlba
  }                                                                           ### ModAlba

  if(isControl) {
    cleanNAs <- FALSE
    field <- "ACCNUM"
  }

  name2extract <- paste0(old2db(chipPackage), "::", chipPackage, field)
  x <- eval(parse(text = name2extract))
  myAnot <- toTable(x) # Aqui hauria de venir un control d'error
  # per si se li ha donat el nom malament
  myAnotTable <- myAnot[, 2]
  names(myAnotTable) <- myAnot[, 1]
  if(!cleanNAs) {
    name2extract <- paste0(chipPackage, "ACCNUM")
    x<- eval(parse(text = name2extract))
    myAccTable <- toTable(x)
    myNAsNames <- setdiff(myAccTable[, 1], myAnot[, 1])
    # Compte! : Aquesta diferencia es assimetrica
    myNAs <- rep(NA, length(myNAsNames))
    names(myNAs) <- myNAsNames
    myAnotTable<- c(myAnotTable, myNAs)
  }

  if(isControl) {
    if(is.na(ctlCode))
      stop("Control codes must be different from NA")

    controls <- sapply(myAnotTable, function (x) ifelse(regexpr(ctlCode, x) > 0, TRUE, FALSE))
    if(!removeControls) {
      myAnotTable <- myAnotTable[controls]
    } else {
      myAnotTable <- myAnotTable[!controls]
    }
  }
  return(myAnotTable)
}
###############################################################
creaAnotFromPDPackage <- function(dbPackage, field, fieldName = NULL, cleanNAs = T,
                                  multipleIDsSymbol = " /// ", removeMultipleIDs = T,
                                  removeControls = TRUE)
{
  # if (!require(dbPackage, character.only=T)){                                     ### ModAlba
  #   stop(paste("Required Platfform design package", dbPackage," is missing"))     ### ModAlba
  # }else{                                                                          ### ModAlba
  #   require(dbPackage, character.only=T) # No deu caldre                          ### ModAlba
  # }                                                                               ### ModAlba
  if(!(old2db(chipPackage) %in% rownames(installed.packages()))) {                  ### ModAlba
    BiocManager::install(old2db(chipPackage)) # No deu caldre                       ### ModAlba
  }                                                                                 ### ModAlba

  conn <- db(eval(parse(text = dbPackage)))
  fSetType <- dbGetQuery(conn,
                         paste("SELECT DISTINCT meta_fsetid as transcript_id, type_id",
                               "FROM featureSet, core_mps, type_dict",
                               "WHERE featureSet.fsetid=core_mps.fsetid",
                               "AND featureSet.type=type_dict.type"))
  allExceptControls <- as.character(fSetType$transcript_id[fSetType$type_id == "main"])
  allControls <- as.character(fSetType$transcript_id[fSetType$type_id != "main"])

  # Un cop fet aixo podem mirar de recuperar els ids fent servir getNetAffx
  # getNetAffx necessita un ExpressionSet (suposa que ja ha normalitzat)
  # El fem a ma posant-li els noms dels transcripts en el camp assayData

  if(removeControls) {
    transcriptIds <- allExceptControls
  } else {
    transcriptIds <- allControls
    field <- 1 # Es l'ACCNUM que tambe existeix pels controls
    removeMultipleIDs <- FALSE
  }

  numTranscripts <- length(transcriptIds)
  exp <-matrix(NA, nrow = numTranscripts, ncol = 1,
               dimnames = list(transcriptIds, "Empty"))
  rownames(exp) <- transcriptIds
  aD <- assayDataNew(exprs = exp)
  nulEset <- new("ExpressionSet",  assayData = aD)
  featureNames(nulEset) <- transcriptIds
  annotation(nulEset) <-dbPackage

  # Ara invoco getNetAffx per recuperar les anotacions

  featureData(nulEset) <- getNetAffx(nulEset, "transcript")
  geneNames <-pData(featureData(nulEset))$geneassignment

  # Creem la taula d'anotacions

  anotTable <- data.frame(transcriptIds, rep(NA, length(transcriptIds)))
  if(is.null(fieldName))
    fieldName<- switch(field, "Accession", "GeneSymbol", "Gene Title", "Cytoband", "Entrez")

  colnames(anotTable) <- c("transcriptIds", fieldName)

  myAnotTable <- geneNames
  names(myAnotTable) <- transcriptIds

  # stopifnot(require(gdata))                             ### ModAlba

  for(i in 1:length(transcriptIds)) {
    if(!is.na(geneNames[i])) {
      l1 <- strsplit(geneNames[i], "///")
      if(runMulticore == 1 || runMulticore == 3) {
        l2 <- mclapply(l1, function(l) strsplit(l, "//"))
        myIds <- unique(unlist(mclapply(l2[[1]], function (x) try(trim(x[[field]])))))
      } else {
        l2 <- lapply(l1, function(l) strsplit(l, "//"))
        myIds <- unique(unlist(lapply(l2[[1]], function (x) try(trim(x[[field]])))))
      }

      anotTable[i, 2] <- paste(myIds, collapse = "//")
    }
  }

  if(removeMultipleIDs) {
    s1 <- sapply(anotTable[, 2], function(s) strsplit(s, "//"))
    if(runMulticore == 1 || runMulticore == 3) {
      s2 <- mclapply(s1, function(s) return(s[1]))
    } else {
      s2 <- lapply(s1, function(s) return(s[1]))
    }

    myAnotTable <- unlist(s2)
    names(myAnotTable) <- transcriptIds
  } else {
    myAnotTable <-anotTable[, 2]
    names(myAnotTable) <- transcriptIds
  }

  return(myAnotTable)
}

#################################################################

#' createOrLoadAnnotations
#'
#' Function that creates the annotation needed, or if it is already done, loads the file created.
#' The annotation packages used need to be installed previously.
#' @param loadAnnotations FALSE by default. If TRUE the function loads the annotations files.
#' @param chipPackAvailable TRUE if there is a chip package available.
#' @param platformDesignPackAvailable  TRUE if there is a platform design package available.
#' @param chipPackage Name of the chip package. Only if chipPackAvailable is TRUE.
#' @param platformDesignPackage Name of the platform design. Only if platformDesignPackAvailable is TRUE.
#' @param outputDir Path where the annotation will be stored.
#' @param annotationsFileName Name of the file for annotations.
#' @param entrezTableFileName Name of the file for Entrez genes.
#' @param symbolsTableFileName Name of the file for gene symbols ID.
#' @param controlsTableFileName Name of the file for controls.
#' @return A list "anotacions" that contains the annotations, the Entrez genes and the gene symbols ID.
#' @examples
#' \dontrun{
#' loadAnnotations <- FALSE
#' chipPackAvailable <- TRUE
#' platformDesignPackAvailable <- FALSE
#' chipPackage <- "hgu133a2"
#' platformDesignPackage <- NULL
#' outputDir <- "./ResultsDir"
#' annotationsFileName <- "Annotations"
#' entrezTableFileName <-"Entrezs.Rda"
#' symbolsTableFileName <-"Symbols.Rda"
#' controlsTableFileName <- "controls.Rda"
#'
#'
#' anotacions <- createOrLoadAnnotations (loadAnnotations= loadAnnotations,
#' chipPackAvailable = chipPackAvailable, platformDesignPackAvailable = platformDesignPackAvailable,
#' chipPackage = chipPackage, platformDesignPackage = platformDesignPackage,
#' outputDir = outputDir,annotationsFileName = annotationsFileName,
#' entrezTableFileName = entrezTableFileName, symbolsTableFileName = symbolsTableFileName,
#'  controlsTableFileName = controlsTableFileName)}
#' @export

createOrLoadAnnotations <-function(loadAnnotations = FALSE,
                                   chipPackAvailable,
                                   platformDesignPackAvailable,
                                   chipPackage,
                                   platformDesignPackage,
                                   outputDir,
                                   annotationsFileName,
                                   entrezTableFileName,
                                   symbolsTableFileName,
                                   controlsTableFileName) {
  if(!loadAnnotations) {
    if(chipPackAvailable) {
      entrezTable <- creaAnotFromChipPackage(chipPackage = chipPackage, field = 'ENTREZID')
      symbolsTable <- creaAnotFromChipPackage(chipPackage = chipPackage, field = 'SYMBOL')
      controlsTable <- creaAnotFromChipPackage(chipPackage = chipPackage,
                                               field = 'ACCNUM',
                                               isControl = TRUE,
                                               ctlCode = 'AFFX',
                                               removeControls = FALSE)
    } else {
      if(platformDesignPackAvailable) {
        entrezTable <- creaAnotFromPDPackage(dbPackage = platformDesignPackage, field = 5)
        symbolsTable <- creaAnotFromPDPackage(dbPackage = platformDesignPackage, field = 2, removeMultipleIDs = F)
        controlsTable <- creaAnotFromPDPackage(dbPackage = platformDesignPackage, field = 1, removeControls = F)
      } else {
        symbolsTable <- NULL          # Si hi ha chipPackage ha de ser NULL
        entrezTable <- NULL           # Si hi ha chipPackage ha de ser NULL
        controlsTable <- NULL
      }
    }
  } else {
    # entrezTable <-loadFromFile(file=file.path(outputDir, entrezTableFileName))                  ### ModAlba
    # symbolsTable <-loadFromFile(file=file.path(outputDir, symbolsTableFileName))                ### ModAlba
    # controlsTable <- loadFromFile(file=file.path(outputDir, controlsTableFileName))             ### ModAlba
    entrezTable <- loadFromFile(fileName = file.path(outputDir, entrezTableFileName))             ### ModAlba
    symbolsTable <- loadFromFile(fileName = file.path(outputDir, symbolsTableFileName))           ### ModAlba
    controlsTable <- loadFromFile(fileName = file.path(outputDir, controlsTableFileName))         ### ModAlba
  }

  if(!is.null(entrezTable)) {save(entrezTable, file = file.path(outputDir, entrezTableFileName))}
  if(!is.null(symbolsTable)) {save(symbolsTable, file = file.path(outputDir, symbolsTableFileName))}
  if(!is.null(controlsTable)) {save(controlsTable, file = file.path(outputDir, controlsTableFileName))}

  if(is.null(annotationsFileName)) {
    annotationsFileName <- paste("Annotations", "txt", sep = ".")
    write.table(data.frame(Entrez = entrezTable, Symbols = symbolsTable),
                sep = "\t", quote = FALSE, file = file.path(outputDir, annotationsFileName))
  }

  anotacions <- list(Entrez = entrezTable, Symbols = symbolsTable, controls = controlsTable)
  return(anotacions)
}
uebvhir/BasicP4microArrays documentation built on Nov. 5, 2019, 11:03 a.m.