R/accessoryFunctions.R
In HLAtools: Toolkit for HLA Immunogenomics

Documented in addCodonLine checkAlignType checkSource multiQueryRelease numFields parseAlignmentHead posSort queryRelease typeToSource validateAllele verifyAllele

##Accessory Functions v5.1.1 30APR2025

################
##posSort
#'Numerical Sort of Alignment Positions that Contain Indels
#'
#'@description
#'Sorts sequence alignment positions that contain indels in numerical order; e.g., three indels following position X are are identified as X.1, X.2 and X.3.
#'
#'Positions are validated against a specified locus for a specified alignment type. Positions that do not exist for that locus-alignment combination are not returned.
#'
#'@param posVec A character vector of variant positions.
#'@param alignType A character string describing the type of alignment the positions are found in. The values 'prot', 'codon', 'nuc' and 'gen' are valid options. Only one 'alignType' value is allowed.
#'@param locus A character string describing a locus in the HLAalignments object for the specified alignType.
#'
#'@return A character string of the correctly sorted sequence.
#'
#'@note Indel positions must be text-formatted (e.g. "607.12"). C.f., posSort(c(2,4,3,1,5), "nuc","DRB1") vs posSort(c("607.23","607.10","607.3","607.4"),"nuc","DRB1").
#'
#'@export
#'
posSort <- function(posVec,alignType, locus){
  
    alignType <- checkAlignType(alignType)  
    
    if(length(alignType)!=1) {stop(paste("Please specify only one 'alignType'."))}
    
    if(!locus %in% names(HLAalignments[[alignType]])) {stop(paste(locus,"is not included among the",alignType,"alignments.\n",sep=" "))}
  
  tab <- as.data.frame(cbind(match(posVec,names(HLAalignments[[alignType]][[locus]])),posVec))
  tab$V1 <- as.numeric(tab$V1)
  tab <- tab[order(tab$V1),]
  tab$posVec[!is.na(tab$V1)]
}

################
##numFields
#'Identify the Number of Fields in a Colon-Delimited Allele Name
#'
#'Returns the number of fields in a colon-delimited HLA allele name. A value of 1 is returned for digit-delimited HLA allele names.
#'
#'@param allele A character string of a colon-delimited HLA allele name.
#'
#'@return A numeric value describing the number of fields in the allele name.
#'
#'@export
#'
#'@examples
#'numFields("HLA-A*01:01")
#'numFields("DRB1*04:03:01")
#'numFields("13:02:01:01")
numFields <- function(allele) {
  length(strsplit(allele,":",fixed=TRUE)[[1]])
}

#################
##validateAllele
#'Validate Allele-Name Format and Presence in HLAalignments
#'
#'Returns TRUE if an allele name is found in HLAalignments in either the 'allele_name' column of full-length allele names or the 'trimmed_allele' column of two-field allele names in the pertinent genomic alignment. Returns FALSE if the allele name is not properly formed, or if the allele name is not found in HLAalignments.
#'
#'@param allele A character string of the colon-delimited HLA allele name.
#'
#'@return A logical identifying if the allele name is present in the alignments (TRUE) or, if it is not in the alignments or is not valid (FALSE).
#'
#'@note Messages will be returned to the console if the allele name is malformed, or the locus is invalid; e.g., 'validateAllele("C\*01:01:01:01")' or 'validateAllele("A\*01:01:01:01:01")'.
#'@note The locus being evaluated must be included in HLAalignments.
#'
#'@export
#'
validateAllele <- function(allele) {
  alleleParts <- strsplit(allele,"*",fixed=TRUE)[[1]]
  if(length(alleleParts) != 2) {
    message(paste("No asterisk ('*') is present in ",allele,".",sep=""))
    return(FALSE)
    }
  if(length(strsplit(alleleParts[2],":",fixed=TRUE)[[1]]) == 1) {
    message(paste("No colon (':') is present in ",allele,".",sep=""))
    return(FALSE)
  }
  locus <- alleleParts[1]
  if(!suppressMessages(validateLocus(alleleParts[1],"gDNA")) && !suppressMessages(validateLocus(alleleParts[1],"cDNA")) && !suppressMessages(validateLocus(alleleParts[1],"prot"))) {
    message(paste(locus, "is not a valid locus.", sep=" "))
    return(FALSE)
  } 
  if((allele %in% HLAalignments$prot[[locus]]$trimmed_allele) || (allele %in% HLAalignments$nuc[[locus]]$trimmed_allele) || (allele %in% HLAalignments$gen[[locus]]$trimmed_allele) || (allele %in% HLAalignments$prot[[locus]]$allele_name) || (allele %in% HLAalignments$nuc[[locus]]$allele_name) || (allele %in% HLAalignments$gen[[locus]]$allele_name)) {
    return(TRUE) } else { message(paste(allele, "is not found in version", HLAalignments$version,"alignments.",sep=" "))
    return(FALSE)
   }
}


#################
##verifyAllele
#'Determine if an Allele Name Ever Existed, and (if so) in Which IPD-IMGT/HLA Database Releases
#'
#'Returns TRUE if an allele name is present in AlleleListHistory or FALSE it is absent, c(TRUE,version), where 'version' can be the most recent, original, or all IPD-IMGT/HLA Database releases in which that name appears, and c(TRUE,version,acc), where 'acc' is the IPD-IMGT/HLA accession number for the allele.
#'
#'@param allele A character string of an HLA allele name. Colon-delimited and field-delimited names are both accepted.
#'@param version A logical that indicates if the most recent nomenclature release version in which that name was valid should be returned. The default value is FALSE.
#'@param all A logical that indicates if all of the nomenclature release versions in which that name was valid should be returned. The default value is FALSE.
#'@param first A logical that indicates if only the original nomenclature release version in which that name was valid should be returned. The default value is FALSE.
#'@param acc A logical that indicates if the IPD-IMGT/HLA accession number for an allele name should be returned. The default value is FALSE.  
#'
#'@return A logical identifying if the allele name is found in AlleleListHistory (TRUE) or not (FALSE), c(TRUE,version) if version = TRUE, and c(TRUE,acc) or c(TRUE,version,acc) if acc = TRUE.
#'
#'@export
#'
#'@examples
#'verifyAllele("A*01:01:01:01")
#'verifyAllele("A*01:01:01:01",TRUE)
#'verifyAllele("A*010101",TRUE,TRUE)
#'verifyAllele("A*0101",TRUE)
#'verifyAllele("A*02:01:01:01",FALSE,FALSE,FALSE,TRUE)
#'verifyAllele("A*02:01:01:01",TRUE,TRUE,FALSE,TRUE)
#'
verifyAllele <- function(allele, version=FALSE,all=FALSE,first=FALSE,acc=FALSE){
  
  if(first && !all) {all = TRUE}
  
  resArray <- which(alleleListHistory$AlleleListHistory == allele,arr.ind = TRUE)
  if(length(resArray)==0) {return(FALSE)}
  
  accession <- alleleListHistory$AlleleListHistory[as.data.frame(resArray)[1,1],1]
  
  if(!version) {ifelse(acc,return(c(TRUE,accession)),return(TRUE))
      } else {
        if(!all) {
          rawVersion <- colnames(alleleListHistory$AlleleListHistory)[resArray[1,2]]
          {ifelse(acc,return(c(TRUE,expandVersion(substr(rawVersion,start=2,stop = nchar(rawVersion))),accession)),return(c(TRUE,expandVersion(substr(rawVersion,start=2,stop = nchar(rawVersion))))))}
        } else {
            allVersion <- unlist(lapply(unlist(lapply(colnames(alleleListHistory$AlleleListHistory)[resArray[,2]],FUN = substring,2,5)),FUN=expandVersion))
            if(!first) {
              {ifelse(acc,return(c(TRUE,allVersion,accession)),return(c(TRUE, allVersion)))}
            } else {
              {ifelse(acc,return(c(TRUE,allVersion[length(allVersion)],accession)),return(c(TRUE,allVersion[length(allVersion)])))}
            }
          }
        } 
      }

#################
##parseAlignmentHead
#'Guides for Parsing the Header Blocks of Alignment Files
#'
#'Returns a vector describing the location of key information in the header blocks of alignment files.
#'
#'@param version A character string of a validated IPD-IMGT/HLA Database release version (e.g., '3.25.0' or '3250').
#'
#'@return Either FALSE if the version is not valid, or two-value numerical vector describing (1) the header line in which alignment version data appears and (2) the length of the character string in that line preceding version data.
#'
#'@export
#'
#'@examples
#'parseAlignmentHead("3.25.0")
#'
parseAlignmentHead <- function(version){
  
  if(validateVersion(version) ) { version <- squashVersion(version)
      } else {
          return(FALSE) }
  
    field1 <- as.numeric(substr(version,1,1))
        if(field1 != 3) { 
                  message(paste("Alignments are not available for version",field1,"releases.",sep=" "))
                     return(FALSE) 
              }
    field2 <- as.numeric(substr(version,2,3))
    
    if(field2 >= 32) {return(c(3,24))} # on line three, skip the first 24 characters 
    
    if(field2 %in% c(25,27:31)) {return(c(2,22))} # on line 2, skip the first 22 characters
    
    if(field2 %in% c(0:24,26)) {return(c(2,18))} # on line 2, skip the first 18 characters
    
    return(FALSE) ## In case something else makes it through.
    
#    field3 <- substr(version,4,4) ## not needed.
  
}

##################
##queryRelease
#'Search Allele Names Across Release Versions 
#'
#'Searches specific release versions in the AlleleListHistory object for user-defined allele variants. 
#'
#'@param rel An IPD-IMGT/HLA Database release version, represented as either a character (e.g., "3.56.0") or a numeric (e.g., 3560) value.
#'
#'@param variant A character string. The value of 'variant' can be any part of a locus or allele name (e.g., "DR", "02:01", "DRB1*08:07"). The default ("") specifies all alleles in 'rel'.
#'
#'@param all A logical. When 'all' = TRUE, a vector of all instances of 'variant' in 'rel' is returned.  When 'all' = FALSE, the number of instances of 'var' in 'rel' is returned. 
#
#'@return A character vector of all matches to 'variant' in 'rel' or the number of all matches to 'variant' in 'rel'.
#'
#'@export
#'
#'@examples
#' # Identify the number of DRB9 alleles in releases 3.30.0 and 3.31.0.
#'queryRelease("3.30.0","DRB9",FALSE) 
#'queryRelease("3.31.0","DRB9",FALSE)
#'
#' # Identify the total number of alleles in release 3.56.0.
#' queryRelease(3560)
#'
queryRelease <- function(rel, variant="", all= FALSE){
  
  if(validateVersion(rel)) {rel <- paste("X",squashVersion(rel),sep="")} else {return(paste("Release version",ifelse(is.numeric(rel),expandVersion(rel),rel),"is not in the local copy of 'AlleleListHisory'.",sep=" "))}
  
      alleles <- alleleListHistory$AlleleListHistory[,rel][!is.na(alleleListHistory$AlleleListHistory[,rel][])]
  
      if(variant == "") {
          
        matchAllele <- alleles 
          
            } else { 
            
            matchAllele <- grep(variant,alleles,fixed=TRUE)
            
              }
  
      numAllele <- length(matchAllele)
  
          if(all) { 
            
                  if(variant == "") {
                          return(matchAllele) 
                    } else {
                   return(alleles[matchAllele])
                    }
    
                 } else { return(numAllele)
    
             }
  }

##################
##multiQueryRelease
#'Search Multiple Elements of Allele Names Across Release Versions 
#'
#'Searches specific release versions in the AlleleListHistory object for a set of user-defined allele-name elements. 
#'
#'@param rel An IPD-IMGT/HLA Database release version, represented as either a character (e.g., "3.58.0") or a numeric (e.g., 3580) value.
#'
#'@param variants A vector of character strings. The values in 'variants' can be any part of a locus or allele name (e.g., "DR", "02:01", "DRB1*08:07"). The default ("") specifies all alleles in 'rel'.
#'
#'@param all A logical. When 'all' = TRUE, a vector of all alleles in 'rel' that share all elements of 'variants' is returned.  When 'all' = FALSE, the number of alleles that share all elements of 'variants' in 'rel' is returned. 
#
#'@return A character vector of all alleles that share all 'variants' in 'rel' or the number of alleles that share all 'variants' in 'rel'.
#'
#'@export
#'
#'@examples
#' # Identify DRB1 allele names that include a 'Q' character in release 3.58.0.
#' multiQueryRelease("3.58.0",c("DRB1","Q"),TRUE) 
#' # Identify the number of null (N) DRB1 alleles.
#' multiQueryRelease("3.58.0",c("DRB1","N"),FALSE)
#'
multiQueryRelease <- function(rel,variants = "", all = TRUE) {
  
  if(length(variants) == 1) {return(queryRelease(rel,variants,all))}
  
  result <- NA
  
  for(i in 1:length(variants)) {
    if(i == 1) { result <- queryRelease(rel,variants[i],TRUE) 
    } else {
      result <- result[grep(variants[i],result)]
      }
   }
  ifelse(all,return(result),return(length(result))) 
}


##################
##checkAlignType
#'Ensure that AlignType Values are Valid
#'
#'Evaluates 'alignType' values to ensure that only "prot", "nuc", "codon" and "gen" are passed to downstream functions. If any other values are entered, a message describing excluded values is generated. If no valid 'alignType' values are are present, an error is generated, and any calling function is ended. 
#'
#'@param alignType A vector of character values specifying sequence alignment types to be used for a function.
#'
#'@return A character vector that includes only allowed 'alignType' values.
#'
#'@export
#'
#'@examples
#'checkAlignType(c("nuc","prot","gDNA")) 
#'
checkAlignType <- function(alignType) {
  
  if(!all(alignType %in% c("codon","nuc","prot","gen"))) {
    
    fixedType <- alignType[alignType %in% c("codon","nuc","prot","gen")]
    
    if(length(fixedType) == 0 ){stop(paste("None of the values",paste(alignType,collapse=", "),"are valid 'alignType' values."))}
    
    message(paste("The value '",alignType[!alignType %in% fixedType],"' was removed from 'alignType', as it is not a valid 'alignType' value.\n",sep=""))
    
    return(fixedType)
    }
  
  alignType
}

##################
##checkSource
#'Ensure that Source Values are Valid
#'
#'Evaluates 'source' values to ensure that only "AA", "cDNA", and "gDNA" are passed to downstream functions. If any other values are entered, a message describing excluded values is generated. 
#'
#'@param source A vector of character values specifying sequence alignment file sources to be used for a function.
#'
#'@return A character vector that includes only allowed 'source' values.
#'
#'@export
#'
#'@examples
#'checkSource(c("AA","cDNA","codon")) 
#'
checkSource <- function(source) {
  
  if(!all(source %in% c("AA","cDNA","gDNA"))) {
    
    fixedSource <- source[source %in% c("AA","cDNA","gDNA")]
    
    if(length(fixedSource) == 0 ){stop(paste("None of the values",paste(source,collapse=", "),"are valid 'source' values."))}
    
    message(paste("The value '",source[!source %in% fixedSource],"' was removed from 'source', as it is not a valid 'source' value.\n",sep=""))
    
    return(fixedSource)
  }
  
  source
}

##################
##TypeToSource
#'Convert AlignType Values to Source Values
#'
#'Converts between 'alignType' values, identifying four types of sequence alignments and 'source' values, identifying three kinds of alignment files.
#'
#'@param alignVector A vector of character values specifying kinds of four sequence alignment types ("prot","nuc","codon" and "gen"), or three source alignment files ("AA","cDNA", and "gDNA").
#'
#'@param toSource A logical. If 'toSource' is true, 'alignType' values are converted to 'source' values. If 'toSource' is false, 'source' values are concerted to 'alignType' values
#'
#'@return A character string of the converted 'align' or 'source' value.  
#'
#'@export
#'
#'@examples
#'typeToSource(c("nuc","prot","gen"),TRUE) 
#'
typeToSource <- function(alignVector,toSource=TRUE){
  
  if(toSource) {
    
    alignVector <- checkAlignType(alignVector)
    
    alignVector[alignVector == "prot"] <- "AA"
    alignVector[alignVector == "nuc"] <- "cDNA"
    alignVector[alignVector == "codon"] <- "cDNA"
    alignVector[alignVector == "gen"] <- "gDNA"
    
    unique(alignVector)
  } else {
    
      alignVector <- checkSource(alignVector)
    
      alignVector[alignVector == "AA"] <- "prot"
      alignVector[alignVector == "cDNA"] <- "nuc"
      alignVector[alignVector == "gDNA"] <- "gen"
    
      if("nuc" %in% alignVector) {alignVector <- append(alignVector,"codon",length(alignVector))} ## cDNA is both nuc and codon
    
      unique(alignVector)
      
      }  
}

#################
##AddCodonLine
#'Add an 'AA codon' Line to Alignments When Missing.
#'
#'Modifies cDNA alignment objects that are missing "AA codon" lines to include these lines in the correct location with the correct codon position information.
#'
#'@param cDNAalign A matrix of cDNA alignment lines, generated from an alignment file that is missing "AA codon" lines.
#'@param firstPos A numeric value identifying the position number of the transcript's N-terminal codon, based on a complete cDNA alignment in another release.
#'@param afterLine A numeric value identifying the number of the line below the first "cDNA" line in the alignment. The default value is 8.
#'@param codons A numeric value identifying the number of codons in each line of the nucleotide alignment. The default value is 25.
#'
#'@return A complete cDNA alignment data frame that includes "AA codon" rows.
#'
#'@export
#'
#'@note For internal HLAtools use.
#'
addCodonLine <- function(cDNAalign,firstPos,afterLine = 8, codons = 25){
  
  # Assigns the AA codon positions using the first position in the codon row of another release, and adds the missing lines.
  afterRow <- rev(which(cDNAalign == cDNAalign[afterLine])-1)
  numRow <- length(afterRow)
  startPos <- rep(firstPos,numRow) ## this needs to be set based on a correct alignment 
  for(k in 2:length(startPos)) {startPos[k] <- startPos[k-1]+codons}
  if(any(startPos < 1)) {startPos[startPos > 0] = startPos[startPos > 0 ]+1 }
  startPos <- rev(startPos)
  
  for(f in 1:length(startPos)) {
    cDNAalign <- append(cDNAalign,paste(" AA codon          ",startPos[f],sep=""),after=afterRow[f])
  }      
  
  cDNAalign
}
Any scripts or data that you put into this service are public.
HLAtools documentation built on June 8, 2025, 12:24 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
HLAtools
Toolkit for HLA Immunogenomics

R/accessoryFunctions.R
In HLAtools: Toolkit for HLA Immunogenomics

Defines functions addCodonLine typeToSource checkSource checkAlignType multiQueryRelease queryRelease parseAlignmentHead verifyAllele validateAllele numFields posSort

Documented in addCodonLine checkAlignType checkSource multiQueryRelease numFields parseAlignmentHead posSort queryRelease typeToSource validateAllele verifyAllele

Try the HLAtools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

HLAtools Toolkit for HLA Immunogenomics

R/accessoryFunctions.R In HLAtools: Toolkit for HLA Immunogenomics

Defines functions addCodonLine typeToSource checkSource checkAlignType multiQueryRelease queryRelease parseAlignmentHead verifyAllele validateAllele numFields posSort

Documented in addCodonLine checkAlignType checkSource multiQueryRelease numFields parseAlignmentHead posSort queryRelease typeToSource validateAllele verifyAllele

Try the HLAtools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

HLAtools
Toolkit for HLA Immunogenomics

R/accessoryFunctions.R
In HLAtools: Toolkit for HLA Immunogenomics