Nothing
#'
#' Extract genotype calls from aroma files. It requires to have executed the normalization process suggested by aroma packages, by using
#' \link{signalPreProcess} for example.
#'
#' @title Extract genotype calls from aroma files
#' @param dataSetName The name of the data-set folder (it must correspond to a folder name in rawData folder.)
#' @param chromosome A vector containing the chromosomes for which the genotype call will be extracted.
#' @param listOfFiles A vector containing the names of the files in dataSetName folder for which the genotype signal will be extracted (default is all the files).
#' @param verbose If TRUE print some information (default=TRUE)
#'
#' @return a list of length the number of chromosomes containing a data.frame with columns:
#' \describe{
#' \item{chromosome}{Chromosome of the signal.}
#' \item{position}{Positions associated with the genotype.}
#' \item{genotype}{Genotype calls corresponding to selected files; the name of each column is the name of the associated data file name.}
#' \item{featureNames}{Names of the probes.}
#' }
#'
#' @details The aroma architecture must be respected. The working directory must contain rawData folder and totalAndFracBData folder.
#' To easily access the names of the files available in a dataset, one can use the \link{getListOfFiles} function.
#'
#'
#' @examples
#' \dontrun{
#' #DO NOT EXECUTE before reading the vignette
#' fracB=getGenotypeCalls("data1",5)
#' }
#'
#' @author Quentin Grimonprez
#'
#' @export
getGenotypeCalls=function(dataSetName,chromosome,listOfFiles=NULL,verbose=TRUE)
{
allpkg=TRUE
if(!suppressPackageStartupMessages(requireNamespace("aroma.affymetrix", quietly=TRUE) ) )
{
message("Package not found: aroma.affymetrix. For download it:\n")
message("source(\"http://callr.org/install#HenrikBengtsson/sfit\")\n")
message("if (!requireNamespace(\"BiocManager\", quietly = TRUE))\n")
message("install.packages(\"BiocManager\")\n")
message("BiocManager::install(\"affxparser\")\n")
message("BiocManager::install(\"DNAcopy\")\n")
message("BiocManager::install(\"aroma.light\")\n")
message("install.packages(\"aroma.affymetrix\")\n")
allpkg=FALSE
}
# else
# message("Package aroma.affymetrix loaded.\n")
if(!suppressPackageStartupMessages(requireNamespace("aroma.cn", quietly=TRUE) ) )
{
message("Package not found: aroma.cn. For download it:\n")
message("install.packages(\"aroma.cn\")\n")
allpkg=FALSE
}
# else
# message("Package aroma.cn loaded.\n")
if(!allpkg)
stop("You have to install some packages : Follow the printed informations.")
requireNamespace("aroma.core")
requireNamespace("R.filesets")
requireNamespace("R.methodsS3")
if(!("callData"%in%list.files()))
stop("There is no \"totalAndFracBData\", check if you are in the good working directory or if you have run the signalPreProcess function before.")
################### check
#chromosome
if(missing(chromosome))
stop("chromosome is missing.")
if(!is.numeric(chromosome))
stop("chromosome must contain integers between 1 and 25.")
if(sum(unlist(lapply(chromosome,is.wholenumber)))!=length(chromosome))
stop("chromosome must contain integers between 1 and 25.")
chromosome=unique(chromosome)
if(length(chromosome)>25 || length(chromosome)==0)
stop("chromosome contains too many elements.")
chromosome=sort(chromosome)
for(i in 1:length(chromosome))
if( (chromosome[i]<1) || (chromosome[i]>25) )
stop("chromosome must contain integers between 1 and 25.")
#dataSetName
if(missing(dataSetName))
stop("dataSetName is missing.")
if(!is.character(dataSetName))
stop("dataSetName must be the name of an existing folder in rawData.")
#check if we are in a normal-tumor study or in a single array study
# singleStudy=TRUE
# if(missing(normalTumorArray))
# {
# if(verbose)
# message("No normalTumorArray specified.\n The allele B fraction signal will be extracted for all the specified data.\n")
# }
# else
# {
# if(verbose)
# message("The allele B fraction signal will be extracted for normal and tumor signals. The normalized tumorboost allele B fraction signal will be extracted for tumor signal.")
# singleStudy=FALSE
# }
###import dataset to check listOfiles and normalTumorArray
#path where find the genotype calls data
rootPath <- "callData";
rootPath <- Arguments$getReadablePath(rootPath);
genotypeTag <- "NGC";
dataSet <- paste0(dataSetName,",ACC,ra,-XY,BPN,-XY,AVG,FLN,-XY");
gsN <- aroma.core::AromaUnitGenotypeCallSet$byName(dataSet, tags=genotypeTag, chipType="*");
#check listOfFiles
pos=c()
if(is.null(listOfFiles) || missing(listOfFiles))
{
listOfFiles=R.filesets::getNames(gsN)
pos=1:length(gsN)
}
else
{
#check the format
if(!is.vector(listOfFiles) || !is.character(listOfFiles))
stop("listOfFiles must be a vector of string.")
listOfFiles=unique(listOfFiles)
#check if all the files of listOfFiles are in the folder
pos=sapply(listOfFiles,match,R.filesets::getNames(gsN))#position of the files of listOfFiles in the folder
if(length(which(pos>0))!=length(pos))
{
phrase="Wrong name of files in listOfFiles.\n Available names are : "
for(i in R.filesets::getNames(gsN))
phrase=paste0(phrase,"\n",i)
stop(phrase)
}
}
########### END CHECK ARGUMENT
####################
#get names and psoition of the probes
ugp <- aroma.core::getAromaUgpFile(gsN);
unf <- aroma.core::getUnitNamesFile(ugp);
platform <- aroma.core::getPlatform(ugp);
if (platform == "Affymetrix")
{
requireNamespace("aroma.affymetrix") || R.methodsS3::throw("Package not loaded: aroma.affymetrix");
snpPattern <- "^SNP|^S-";
}
else if (platform == "Illumina")
{
snpPattern <- "^rs[0-9]";
}
else {
R.methodsS3::throw("Unknown platform: ", platform);
}
gsN=extract(gsN,pos)
allGen=list()
for(chr in chromosome)
{
units <- aroma.core::getUnitsOnChromosome(ugp, chromosome=chr);
#get the prefix of SNP probes
unitNames <- aroma.core::getUnitNames(unf,units=units);##names of the probes
#keep the SNP units
units=units[grep(snpPattern,unitNames)]
unitNames=unitNames[grep(snpPattern,unitNames)]
posChr <- aroma.core::getPositions(ugp, units=units);#positions of the probes
#sort signal by position
indSort=order(posChr)
units=units[indSort]
unitNames=unitNames[indSort]
posChr=posChr[indSort]
calls=matrix(nrow=length(posChr),ncol=length(listOfFiles))
colnames(calls)=R.filesets::getNames(gsN)
for(i in 1:length(listOfFiles))
{
gsNtemp=R.filesets::getFile(gsN,i)
calls[,i]=gsNtemp[units,1,drop=TRUE]
}
calls[calls==0]="BB"
calls[calls==1]="AB"
calls[calls==2]="AA"
allGen[[paste0("chr",chr)]]=data.frame(chromosome=rep(chr,length(posChr)),position=posChr,calls,featureNames=unitNames)
}
return(allGen)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.