R/ms_iso.R

Defines functions ms_iso

Documented in ms_iso

#' Obtain the isotope distribution for an amino acid sequence with
#' post-translation modifications (deamitation and hydroxylation)
#' @param seqeunce the amino acid sequence
#' @param ndeamidations the number of deamidations, default is zero
#' @param nhydroxylations the number of hydroxylations, default is zero
#' @keywords isotopes mass sequence deamidation hyroxylation
#' @export
#' @examples
#' ms_iso("IGQPGAVGPAGIR")
#' ms_iso("IGQPGAVGPAGIR",0,1)
ms_iso <- function(sequence,ndeamidations=0,nhydroxylations=0,verbose=F){

  if(verbose){
    message(sprintf("Calculating isotope distributions for the peptide %s",sequence))
  }

  #CHECK THE STRING VERY CAREFULLY - THE C CODE IS FRAGILE!
  if(grepl('^[A-Z]+$', sequence)){

    #result<-cppIso(sequence)
    atoms <- aa_seq_to_atoms(sequence)


    #REMEMBER: Order of atoms is C H N O S
    #                            1 2 3 4 5
    # From Kristine Korzow Richter:
    # hydroxylation or oxidation - gain one oxygen (O)
    # deamidation - gain one oxygen (O), loose one nitrogen (N) and one hydrogen (H)




    #Add the deamidations
    if(ndeamidations>0){
      max_deam <- stringr::str_count(sequence,"Q") + str_count(sequence,"N")
      if(ndeamidations>max_deam){
        message(sprintf("WARNING: %d deamidations are not possible for sequence %s",
                        ndeamidations,sequence))
        message(sprintf("       max number of deamidations is %d (count of 'Q's and 'N's in sequence)",max_deam))
        return (NA)
      }
      #else{
          #gain an oxygen
          atoms[4] <- atoms[4] + ndeamidations
          #lose a nitrogen
          atoms[3] <- atoms[3] - ndeamidations
          #lose a hydrogen
          atoms[2] <- atoms[2] - ndeamidations

          if(atoms[3]<0 || atoms[2]<0){
            message(sprintf("ERROR - can't have a negative number of atoms! deamidation at level %d can't happen\n nNitrogen = %d, nHydrogen =%d",
                            ndeamidations,
                            atoms[3],
                            atoms[2]))
            return(NA)
          }
      #}
    }

    if(nhydroxylations>0){
      max_hyd <- stringr::str_count(sequence,"P")
      if(nhydroxylations>max_hyd){
        message(sprintf("ERROR: %d hydroxylations are not possible for sequence %s",
                        nhydroxylations,sequence))
        message(sprintf("       max number of hydroxylations is %d (count of 'P' in sequence)",max_hyd))

        return (NA)
      }
      else{
        #gain an oxygen
        atoms[4] <- atoms[4] + nhydroxylations
      }
    }

    #Now we've done the mods, we can calculate the isotopes based on the atoms
    result <- cppIsoAtom_p(atoms)


    return (result)

  }
  else{
    message(sprintf("ms_iso: error processing string %s\n Input string must contain ONLY uppercase alphabeticial characters",sequence))
    return(NA)
  }
}
bioarch-sjh/bacollite documentation built on Oct. 7, 2022, 3:34 p.m.