# ppi.alignIDs
# xldf3_row <- xlink.df.filtered3[1,]
#
# pro_pos <- as.character(xldf3_row$pro_pos1)
# pro_name <- as.character(xldf3_row$pro_name1)
#
# pdbID <- as.character(ppi.alignIDs[ppi.alignIDs$protID==pro_name,'pdbID'])
#
# pdb_split <- strsplit(pdbID,'_')[[1]]
# pdb.id <- pdb_split[1]
# chain <- pdb_split[2]
#
# pdb_resno <- uniprot.PDBmap(pdb.id,chain,pro_pos,output='pdb')
#can also have an option that will create a new PDB file for any NAs that are generated
#once the uniprot mapping has been found
#can add the rows to the xlinkdf
#ppi.matchUniprot()
#should also display the distance histogram that has been generated by this function
#'PPI Match PDB 2
#'
#'This function is the newer version of ppi.matchPDB()
#'
#'@param xlink.df A data.frame made by ppi.combineData()
#'@param fasta_file Name of fasta file or loaded fasta file by seqinr::read.fasta().
#'@param alignIDs A data.frame containing the columns "ProteinName", "UniProtID", and "PDB"
#'@param uniprot2pdb If TRUE, will align to the UniProt sequence before aligning to the PDB. This parameter should be selected if the sequences used are not exactly UniProt (such as a slightly different N-terminal), but are relatively similar.
#'@param dist.histogram dist.histogram
#'@export
ppi.matchPDB2 <- function(xlink.df,fasta_file,alignIDs,uniprot2pdb=TRUE,
dist.histogram=TRUE){
#will need to load the PDB file as well
#but don't need to necessarily download it
# dist_xlink_list <- c()
# pdb1_xlink_list <- c()
# pdb2_xlink_list <- c()
# pro_pos1_list <- c()
# pro_pos2_list <- c()
#this should be within the loop? so that
if(uniprot2pdb == TRUE){
#need to align to Uniprot sequence first before aligning to
#use ppi.matchUniprot for this to create a new xlinkdf
#and then go on to the next stage of the analysis
#this is likely not necessary since it already aligns to Uniprot for the pdb_vector
#the designation should still be used though
#the Uniprot sequence is th
#xlink.df <- xlms.df.filt
xlink.df <- ppi.matchUniprot(xlink.df,fasta_file,protein_to_uniprot_id = alignIDs,canonical = TRUE)
pdb_match_vector <- ppi.alignPDB(fasta_file,alignIDs = alignIDs, uniprot2pdb = uniprot2pdb)
xlink.df <- ppi.matchPDB(xlink.df,fasta_file=fasta_file,pdb_numbering = TRUE,pdb_match_vector = pdb_match_vector)
#get the alignIDs chain and id
#split and then put in pdb map function
#uniprot.PDBmap() #output --> 'pdb', switch pdb_id designator to pdb.id?
#add new protein position (now PDB )
} else {
pdb_match_vector <- ppi.alignPDB(fasta_file,alignIDs = alignIDs, uniprot2pdb = uniprot2pdb)
xlink.df <- ppi.matchPDB(xlink.df,fasta_file=fasta_file,pdb_numbering = TRUE,pdb_match_vector = pdb_match_vector)
#return(xlink.df) #need to make the dist.histogram if needed
#xyz_coord_list <- ppi.matchPDB(xlink.df,fasta_file=fasta_file,pdb_numbering = TRUE,pdb_match_vector = pdb_match_vector)
#pdb_read_truncated <- ppi.matchPDB(xlink.df,fasta_file=fasta_file,pdb_numbering = TRUE,pdb_match_vector = pdb_match_vector)
#ppi.matchPDB(xlink.df,fasta_file=fasta_file,pdb_numbering = TRUE,pdb_match_vector = pdb_match_vector)
}
#xlink.df <- xlink.df.filtered2
# for(row_num in 1:nrow(xlink.df)){
#
# xldf_row
#
#
# if(uniprot2pdb == TRUE){
# uniprot.PDBmap() #use this to match the PDB ids using alignIDs?
# #should make a function to make an align IDs
# #can have it as part of ppi.alignPDB to make the right table with the right settings
# } else {
# #align to PDB file using
#
# #should save the PDB file within ppi.alignPDB
#
# #once this is done need to make sure that the PyMOL file is made correctly based on the numbering
# #double check ppi.pymol
#
# } #end else to if(uniprot2pdb == TRUE){
#
# #get the
#
# } #for(row_num in 1:nrow(xlink.df)){
#will need to store the 2 points on the structure
#make sure that the PDB IDs are the same
#can make lists
# pdb_ids <- c()
# pdb_chains <- c()
# pdb_posits <- c()
# #use this info to get the protein positions and then calculate the distance
#
# #should be able to do something similar to the rbd function
#
# #go through each row
# #get the info for each of the 2 proteins
# #if uniprot2pdb is true, should align fasta to uniprot and then to pdb
# #(take the code from the rbd function?)
#
# xlink_df$dist <- dist_xlink_list
# xlink_df$pdb1 <- pdb1_xlink_list
# xlink_df$pdb2 <- pdb2_xlink_list
# xlink_df$pro_pos1
# xlink_df$pro_pos2
#should also replace the numbering within the to that of the PDB file
#will make it so that ppi.pymol will not require any adjustments?
#get the distances
if(dist.histogram == TRUE){
#ggplot the distances
#customize the output name?
} #end if(dist.histogram == TRUE){
return(xlink.df)
} #end function ppi.matchPDB2
#pnv <- ppi.alignPDB(fasta_file)
#xldf3_row$pro_pos2
#need to have the matchPDB function work with just the PDB IDs
#should make use of this function
#need a function that has the capacity of uniprot2pdb
#generate_pdb_lists_from_pdb_csv
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.