R/ptm.R

Defines functions ptm

Documented in ptm

#' A function that takes a ptm data frame and a data frame of UIDs and UNIPROT IDs.
#' 
#' This function creates a PTM dataframe that links geneid from NCBI to the UNIPROT PTM data. This will allow this data to link expermintally 
#' verified PTM data from UNIPROT to protien data from NCBI.
#' 
#' @param ptmdf Data frame of PTM data generated by the readPTM function 
#' @param res Data frame of UNIPROT IDs and UIDs produced by the unip function
#' @keywords PTMS, UID, Uniprot
#' @export
#' @examples
#'
#' ptmdf <- ptm(ptmdf, res)

ptm <- function(ptmdf, res){

colnames(ptmdf)[1] <- "UNIPROTKB"

ptmdf$score <- NULL
ptmdf$source <- NULL
ptmdf$strand <- NULL
ptmdf$phase <- NULL

ptmdf$attributes <- gsub(pattern = ';.*$', "", ptmdf$attributes)
aam <- c('Non-standard residue', 'Modified residue', 'Lipidation', 'Glycosylation', 'Disulfide bond', 'Cross-link')
sub <- grepl(paste(aam,collapse="|"), ptmdf$type)
ptmdf1 <- subset(ptmdf, sub)
ptmdf1$attributes <- gsub(patter = 'Note=', "", ptmdf1$attributes)
ptmdf1$type <- gsub(pattern = '%.*',"", ptmdf1$type)

for(i in 1:length(ptmdf1$attributes)){
  if(ptmdf1$type[i] == 'Modified residue'){
    ptmdf1$type[i] <- toString(ptmdf1$attributes[i])
  }
}

ptmdf1$attributes <- NULL

total <- join(res, ptmdf1, type= "left", by = "UNIPROTKB")

total1 <- na.omit(total)

remam <- grepl('.*amide.*', total1$type)
total2 <- subset(total1, !remam)

ll <- grepl('Lipidation',total2$type)
g <- grepl('Glycosylation', total2$type)
total3 <- subset(total2, !ll)
total4 <- subset(total3, !g)
total5 <-  subset(total4, !g)

total6 <- na.omit(total5)

return(total6)
}
hawkdidy/prodata documentation built on May 17, 2019, 3:06 p.m.