R/ppi.matchPDB2.R

Defines functions ppi.matchPDB2

Documented in ppi.matchPDB2

# ppi.alignIDs
# xldf3_row <- xlink.df.filtered3[1,]
#
# pro_pos <- as.character(xldf3_row$pro_pos1)
# pro_name <- as.character(xldf3_row$pro_name1)
#
# pdbID <- as.character(ppi.alignIDs[ppi.alignIDs$protID==pro_name,'pdbID'])
#
# pdb_split <- strsplit(pdbID,'_')[[1]]
# pdb.id <- pdb_split[1]
# chain <- pdb_split[2]
#
# pdb_resno <- uniprot.PDBmap(pdb.id,chain,pro_pos,output='pdb')

#can also have an option that will create a new PDB file for any NAs that are generated

#once the uniprot mapping has been found
#can add the rows to the xlinkdf

#ppi.matchUniprot()

#should also display the distance histogram that has been generated by this function


#'PPI Match PDB 2
#'
#'This function is the newer version of ppi.matchPDB()
#'
#'@param xlink.df A data.frame made by ppi.combineData()
#'@param fasta_file Name of fasta file or loaded fasta file by seqinr::read.fasta().
#'@param alignIDs A data.frame containing the columns "ProteinName", "UniProtID", and "PDB"
#'@param uniprot2pdb If TRUE, will align to the UniProt sequence before aligning to the PDB. This parameter should be selected if the sequences used are not exactly UniProt (such as a slightly different N-terminal), but are relatively similar.
#'@param dist.histogram dist.histogram
#'@export
ppi.matchPDB2 <- function(xlink.df,fasta_file,alignIDs,uniprot2pdb=TRUE,
                          dist.histogram=TRUE){

  #will need to load the PDB file as well
  #but don't need to necessarily download it



  # dist_xlink_list <- c()
  # pdb1_xlink_list <- c()
  # pdb2_xlink_list <- c()
  # pro_pos1_list <- c()
  # pro_pos2_list <- c()

  #this should be within the loop? so that
  if(uniprot2pdb == TRUE){
    #need to align to Uniprot sequence first before aligning to
    #use ppi.matchUniprot for this to create a new xlinkdf
    #and then go on to the next stage of the analysis

    #this is likely not necessary since it already aligns to Uniprot for the pdb_vector
    #the designation should still be used though
    #the Uniprot sequence is th
    #xlink.df <- xlms.df.filt

    xlink.df <- ppi.matchUniprot(xlink.df,fasta_file,protein_to_uniprot_id = alignIDs,canonical = TRUE)
    pdb_match_vector <- ppi.alignPDB(fasta_file,alignIDs = alignIDs, uniprot2pdb = uniprot2pdb)
    xlink.df <- ppi.matchPDB(xlink.df,fasta_file=fasta_file,pdb_numbering = TRUE,pdb_match_vector = pdb_match_vector)

    #get the alignIDs chain and id
    #split and then put in pdb map function
    #uniprot.PDBmap() #output --> 'pdb', switch pdb_id designator to pdb.id?
    #add new protein position (now PDB )
  } else {
    pdb_match_vector <- ppi.alignPDB(fasta_file,alignIDs = alignIDs, uniprot2pdb = uniprot2pdb)
    xlink.df <- ppi.matchPDB(xlink.df,fasta_file=fasta_file,pdb_numbering = TRUE,pdb_match_vector = pdb_match_vector)
    #return(xlink.df) #need to make the dist.histogram if needed
    #xyz_coord_list <- ppi.matchPDB(xlink.df,fasta_file=fasta_file,pdb_numbering = TRUE,pdb_match_vector = pdb_match_vector)
    #pdb_read_truncated <- ppi.matchPDB(xlink.df,fasta_file=fasta_file,pdb_numbering = TRUE,pdb_match_vector = pdb_match_vector)
    #ppi.matchPDB(xlink.df,fasta_file=fasta_file,pdb_numbering = TRUE,pdb_match_vector = pdb_match_vector)

  }



  #xlink.df <- xlink.df.filtered2

  # for(row_num in 1:nrow(xlink.df)){
  #
  #   xldf_row
  #
  #
  #   if(uniprot2pdb == TRUE){
  #     uniprot.PDBmap() #use this to match the PDB ids using alignIDs?
  #     #should make a function to make an align IDs
  #     #can have it as part of ppi.alignPDB to make the right table with the right settings
  #   } else {
  #     #align to PDB file using
  #
  #     #should save the PDB file within ppi.alignPDB
  #
  #     #once this is done need to make sure that the PyMOL file is made correctly based on the numbering
  #     #double check ppi.pymol
  #
  #   } #end else to if(uniprot2pdb == TRUE){
  #
  #   #get the
  #
  # } #for(row_num in 1:nrow(xlink.df)){


  #will need to store the 2 points on the structure
  #make sure that the PDB IDs are the same
  #can make lists

  # pdb_ids <- c()
  # pdb_chains <- c()
  # pdb_posits <- c()
  # #use this info to get the protein positions and then calculate the distance
  #
  # #should be able to do something similar to the rbd function
  #
  # #go through each row
  # #get the info for each of the 2 proteins
  # #if uniprot2pdb is true, should align fasta to uniprot and then to pdb
  # #(take the code from the rbd function?)
  #
  # xlink_df$dist <- dist_xlink_list
  # xlink_df$pdb1 <- pdb1_xlink_list
  # xlink_df$pdb2 <- pdb2_xlink_list
  # xlink_df$pro_pos1
  # xlink_df$pro_pos2

  #should also replace the numbering within the to that of the PDB file
  #will make it so that ppi.pymol will not require any adjustments?


  #get the distances

  if(dist.histogram == TRUE){
    #ggplot the distances
    #customize the output name?

  } #end if(dist.histogram == TRUE){

  return(xlink.df)

} #end function ppi.matchPDB2



#pnv <- ppi.alignPDB(fasta_file)

#xldf3_row$pro_pos2

#need to have the matchPDB function work with just the PDB IDs

#should make use of this function

#need a function that has the capacity of uniprot2pdb




#generate_pdb_lists_from_pdb_csv
egmg726/crisscrosslinker documentation built on Jan. 23, 2021, 1:50 a.m.