#'Write PyMol file for RBDmap Data
#'
#'This function makes a PyMOL file (.pml) that can be used to visualize data obtained from rdb.getBindingSeq()
#'
#'@param bs_output Output from rbd.getBindingSeq()
#'@param color_by The variable that will be used to determine color. Must a column in bs_output or use "freq" to color by frequency of amino acid position in binding sequences. Defaults to "binding_sequence".
#'@param colors Colors to be used for color.pymol() function.
#'@param file.name If write.file = T, name of the output file. Defaults to 'rbd_pymol.pml'
#'@param write.file Boolean. If TRUE, will output file with name of file.name. If FALSE, will return file as list.
#'@param experiment.dir Directory of experiment, where PDB files are. If left as NULL, will use current directory.
#'@param gray0 Boolean. If TRUE, will shift color scheme for frequency analysis and label 0s as "gray" in PyMOL.
#'@param assembly Assembly number for PyMOL output that corresponds to RCSB database. Works for PyMOL 1.8 and above.
#'@export
rbd.pymol <- function(bs_output, color_by = 'binding_sequence',
                      colors = NULL, file.name = 'rbd_pymol.pml',
                      write.file = TRUE, experiment.dir = NULL,
                      gray0 = FALSE, heatmap = TRUE, assembly = 0,
                      fetch = TRUE){
  #what is color_by is frequency?
  if(is.null(experiment.dir)){
    experiment.dir <- getwd()
  }
  #freq_vector
  pymol_lines <- c()
  if(is.null(colnames(bs_output))){
    bs_output <- data.frame(bs_output)
  }
  bs_output <- bs_output[bs_output$db == 'PDB',]
  #load the PDB files up here?
  split_pdbs <- unlist(strsplit(as.character(bs_output$db_id),'_'))
  unique_pdbs <- unique(split_pdbs[nchar(split_pdbs) == 4])
  py_line <- paste0('set assembly, ', assembly)
  pymol_lines <- c(pymol_lines,py_line)
  for(u_pdb in unique_pdbs){
    #py_line <- paste('fetch',u_pdb)
    if(fetch == TRUE){
      py_line <- paste0('fetch ',u_pdb,', async=0')
    } else {
      py_line <- paste0('load ',experiment.dir,'/',u_pdb,'.pdb')
    }
    pymol_lines <- c(pymol_lines,py_line)
  }
  if(color_by == 'freq'){
    #rename the names give to the bs_freqVector --> name_by?
    #bs_output$source_sequence <- paste0(resno_and_resid$resid,collapse='')
    # bs_output <- rbd.getBindingSeq(ms_sequence = 'ERTEILNQEWK',
    #                   protease = 'ArgC',
    #                   sequence_for_alignment =paste0(resno_and_resid$resid,collapse=''),
    #                   protein_name = 'EZH2',
    #                   database_name = 'PDB',
    #                   database_id = '6C23_C')
    # bs_output <- data.frame(bs_output)
    #need to add in colors for freqVector
    bs_freqVector <- rbd.freqVector(bs_output = bs_output, name_by = 'db_id',heatmap = FALSE, db_selection = 'PDB')
    #return(bs_freqVector)
    bsfv_vars <- min(unlist(bs_freqVector)):max(unlist(bs_freqVector))
    #explicitly make the bar??
    #return(bsfv_vars)
    #return(list(vars=bsfv_vars,colors = colors,gray0 = gray0))
    
    viridis_colors <- c("magma","inferno","plasma","viridis","cividis")
    if((length(colors) != length(bsfv_vars)) && !((colors %in% rownames(brewer.pal.info)) || (colors %in% viridis_colors))){
      colors <- colorRampPalette(colors = colors)(length(bsfv_vars))
    }
    
    pymol_cc <- color.pymol(bsfv_vars, colors = colors, png.name = 'freqvector_legend_test.svg', gray0 = gray0, print.legend = write.file)
    #return(pymol_cc)
    #return(pymol_cc)
    bs_freqVector2 <- rbd.freqVector(bs_output = bs_output, name_by = 'db_id',heatmap = heatmap, db_selection = 'PDB', colors = pymol_cc$hexcodes)
    #return(bs_freqVector2)
    #pymol_lines <- c()
    pymol_lines <- c(pymol_lines,pymol_cc$set_colors)
    #can alter this so the first color set that is equal to 0 will be the 0
    if(gray0 == TRUE){
      pymol_lines <- c(pymol_lines,'color gray')
    } else {#if gray0 == FALSE
      if(length(paste0('color ',pymol_cc$color_names[pymol_cc$vars == 0])) > 0){
        pymol_lines <- c(pymol_lines,paste0('color ',pymol_cc$color_names[pymol_cc$vars == 0]))
      } else {
        cat('No var with value 0 detected --> coloring gray')
        pymol_lines <- c(pymol_lines,'color gray')
      }
    } #end else to if(gray0 == TRUE){
    #should have this changed
    pymol_lines <- c(pymol_lines,'show surface')
    #pymol_cc$color_names[pymol_cc$vars == 0]
    #return(pymol_lines)
    for(pdb_name in names(bs_freqVector)){
      freq_vector <- bs_freqVector[[pdb_name]]
      #freq_vector
      pdb_id <- strsplit(pdb_name,'_')[[1]][1]
      chain <- strsplit(pdb_name,'_')[[1]][2]
      pdb_read <- check_download_read_pdb(pdb_id) #add ID to list of IDs that need to be loaded?
      #or can just remove repeating lines from final PyMOL file
      resno_and_resid <- quick_resno_and_resid(pdb_read = pdb_read, chain = chain)
      if(length(resno_and_resid$resno) == length(freq_vector)){
        #the lengths match --> probably the right sequence
        #get the resno for each
        #make sure that the color matches to the pymol_cc
        #
        #what about making grays 0s?
        #for loop to go through vars and make them different colors?
        #then go through the entire list?
        #pymol_cc$vars
        for(index_num in 1:length(freq_vector)){
          aa_freq <- freq_vector[index_num]
          match_index <- match(aa_freq,pymol_cc$vars)
          #pymol_cc$vars[match_index]
          color_name <- pymol_cc$color_names[match_index]
          resno_index <- resno_and_resid$resno[index_num]
          py_line <- paste0('color ',color_name,', /',pdb_id,'//',chain,'/',as.character(resno_index))
          pymol_lines <- c(pymol_lines,py_line)
          #construct the string here and make into a line
          #use the match index to get the right colors
          #match the aa_freq
          #use the index_num to get the right numbering from resno
        } #end for(index_num in 1:length(freq_vector))
      } else { #end if(length(resno_and_resid$resno) == length(freq_vector))
        warning("The lengths don't match --> try a different sequence")
        #add specifics about what sequence it is?
        return(list(resno=resno_and_resid$resno,freq_vector=freq_vector))
      } #end else to if(length(resno_and_resid$resno) == length(freq_vector))
    }  #end  for(pdb_name in names(bs_freqVector))
    #keeping track of the top frequency number
  } else { #end if(color_by == 'freq')
    #do what's below
    #assume that it's the name of a column in bs_output
    pymol_lines <- c(pymol_lines,'show surface')
    # if((is.numeric(bs_output[[color_by]]) == FALSE) && (gray0 == FALSE)){
    #   cat('color_by not numeric --> gray0 is now true\n')
    #   gray0 <- TRUE
    # }
    pymol_cc <- color.pymol(sort(unique(bs_output[[color_by]])), colors = colors, gray0 = gray0)
    pymol_lines <- c(pymol_lines,pymol_cc$set_colors)
    if(gray0 == TRUE){
      pymol_lines <- c(pymol_lines,'color gray')
    } else {#if gray0 == FALSE
      if(length(pymol_cc$color_names[pymol_cc$vars == 0]) > 0){
        pymol_lines <- c(pymol_lines,paste0('color ',pymol_cc$color_names[pymol_cc$vars == 0]))
      } else {
        cat('No var with value 0 detected --> coloring gray')
        pymol_lines <- c(pymol_lines,'color gray')
      }
    } #end else to if(gray0 == TRUE){
    bs_output$color <- rep(NA,nrow(bs_output))
    levels(bs_output$color) <- c(levels(bs_output$color),pymol_cc$color_names)
    for(index_num in 1:length(pymol_cc$vars)){
      var_name <- pymol_cc$vars[index_num]
      bs_output[bs_output[[color_by]] == var_name, 'color'] <- pymol_cc$color_names[index_num]
    } #end for(index_num in 1:length(pymol_cc$vars))
    #need to then go through each of the rows of the bs_output
    for(row_num in 1:nrow(bs_output)){
      bso_row <- bs_output[row_num,]
      #write each of the lines and add to pymol_lines
      pdb_id <- strsplit(as.character(bso_row$db_id),'_')[[1]][1]
      chain <- strsplit(as.character(bso_row$db_id),'_')[[1]][2]
      py_line <- paste0('color ',bso_row$color,', /',pdb_id,'//',chain,'/',bso_row$binding_site_start,'-',bso_row$binding_site_end)
      pymol_lines <- c(pymol_lines,py_line)
    } #end for(row_num in 1:nrow(bs_output))
  } #end else { #end if(color_by == 'freq')
  if(write.file == TRUE){
    write(paste(unique(pymol_lines),collapse = '\n'),file.name)
  } else {
    return(pymol_lines)
  }
} #end function rbd.pymol
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.