R/test_function.R

Defines functions my_name_function process_fasta assess_conservation call_plot draw_MSA plotMSA

Documented in my_name_function plotMSA

#' A Name Function
#'
#' This is simply a way to demonstrate someone's name
#' @param first what is your first name?
#' @param middle
#' @param last
#' @export
#' @examples
#' my_name_function("John", "Quotient","Public)

my_name_function <- function(first,middle,last) {
  full_name <- paste(first, middle, last, sep=' ')
  print(full_name)
}


#' @export
'%notin%' <- Negate(`%in%`)


#' @export
process_fasta <- function(args) {
  inp <- read.fasta(args$fasta_file)

  read_names <- names(inp)
  read_names <- read_names[read_names %notin% args$to_exclude]

  seq_list <- list()

  for (name in read_names) {
    seq <- toupper(inp[[name]])

    if (args$window[1] != -1) {
      seq <- seq[args$window[1]:args$window[2]]
    }

    seq_list[[name]] <- seq
  }
  return(seq_list)
}
#' @export
assess_conservation <- function(seq_list) {
  length_of_seq <- length(seq_list[[1]])
  num_of_seqs <- length(names(seq_list))

  rank_vec <- character(length_of_seq * num_of_seqs)
  pident_vec <- character(length_of_seq * num_of_seqs)

  for (j in 1:length_of_seq) {
    pos_list <- lapply(seq_list, function(seq) return(seq[j]))

    tab <- as.data.frame(table(unlist(pos_list)))
    # tab$Total <- sum(tab$Freq[tab$Var1 != "-"])
    tab$Total <- sum(tab$Freq)
    tab$Pident <- tab$Freq / tab$Total

    tab$Hierarchy[tab$Pident < 0.4] <- 'D'
    tab$Hierarchy[tab$Pident >= 0.4] <- 'C'
    tab$Hierarchy[tab$Pident >= 0.6] <- 'B'
    tab$Hierarchy[tab$Pident >= 0.8] <- 'A'
    tab$Hierarchy[tab$Var1  == "-"] <- 'F'

    for (i in 1:length(pos_list)) {
      rank = tab$Hierarchy[tab$Var1 == pos_list[i]]
      rank_vec[(j-1) * num_of_seqs + i] <- rank

      # pident = round(tab$Pident[tab$Var1 == pos_list[i]],3)
      # pident_vec[(j-1) * num_of_seqs + i] <- pident
    }
  }
  rank_mat <- matrix(rank_vec, nrow=num_of_seqs, ncol=length_of_seq, byrow=FALSE)
  rank_df <- as.data.frame(rank_mat)
  rank_list <- split(rank_df, seq(nrow(rank_df)))
  names(rank_list) <- names(seq_list)

  return(rank_list)
  # rank_list <- as.list(t(rank_df))
  # names(rank_list) <- names(seq_list)


  # pident_mat <- matrix(pident_vec, nrow=num_of_seqs, ncol=length_of_seq, byrow=FALSE)
  # pident_df <- as.data.frame(pident_mat)
}
#' @export
call_plot <- function(args) {
  par(mar=c(0,0,0,0))
  plot(0,0, axes= F, type='n',xlim=c(0,args$plot.size),ylim=c(0,args$plot.size),xlab='',ylab='')
}

#' @export
draw_MSA <- function(seq_list, rank_list, args) {

  # offset_x = 12
  # text_cex = 0.8

  window_size = args$window[2] - args$window[1]
  if (window_size > 20) {
    tick_interval = ceiling(window_size / 10/ 5)*5
    tick_interval
  } else {
    tick_interval = 5
  }


  if (args$auto_scale_x == F)  { scale_x = 1
  } else {
    scale_x = args$auto_scale_x / window_size
    print(args$auto_scale_x)
  }
  print(scale_x)

  length_of_seq <- length(seq_list[[1]])
  num_of_seqs <- length(names(seq_list))

  for (j in 1:num_of_seqs) {

    name <- names(seq_list)[j]
    tot_seq  <- seq_list[[name]]
    tot_rank <- as.vector(unlist(rank_list[[name]]))

    text(.5, args$plot.size - 2-j+0.5, name, adj=c(0,0.5), cex=args$text.cex)

    for (i in 1:length_of_seq) {

      letter <- tot_seq[i]
      rank <- tot_rank[i]

      x1 = args$label.width + (i-1) * scale_x
      x2 = x1 + 1 * scale_x
      y1 = args$plot.size-2 - (j-1)
      y2 = y1 - 1

      rect(x1,y1,x2,y2,col=args$color_dict[rank], border=NA)

      if (scale_x == 1) {
        text(x1 + 0.5, y1 - 0.5, letter, adj=c(.5,.5), cex=args$text.cex)
      }

      if (j == 1) {



        if ((i-1 + args$window[1]) %% tick_interval == 0) {
          tx1 = args$label.width + (i -.5) * scale_x
          tx2 = tx1
          ty1 = args$plot.size - 2
          ty2 = args$plot.size - 2 + 0.8

          segments(tx1,ty1,tx2,ty2)
          text(tx1, ty2 + .5, args$window[1]-1+i, adj=c(0.5,0), las=1, cex=args$text.cex)


        }
      }

    }
  }

  if (args$bounding_box == T) {
    rect(args$label.width, args$plot.size-2, x2, y2)
  }

}

#' plotMSA Function
#'
#' This is the wrapper backbone script of this project
#' @export

plotMSA <- function(fasta_file, window, draw_sequence=T, auto_scale_x=F, to_exclude=c(), text.cex=0.8, bounding_box=T, label.width=8) {
  args <- list(
    fasta_file=fasta_file,
    draw_sequence=draw_sequence,
    auto_scale_x=auto_scale_x,
    to_exclude=to_exclude,
    text.cex=text.cex,
    bounding_box=bounding_box,
    label.width=label.width,
    window=window
  )


  if (args$auto_scale_x != F) {
    if (args$auto_scale_x < 20 || args$auto_scale_x > 200) {
      error_message = paste("User defined auto_scale_x = ", auto_scale_x, "\nMust be >20 and <200!", sep='')
      stop(error_message)
    }
    args$full.width <- args$auto_scale_x + args$label.width
  } else {
    args$full.width <- args$window[2] - args$window[1] + args$label.width
  }




  require(seqinr)

  greens = c("#64c952","#dbffbf","#ffe9d3","white","white",'red')
  color_dict = greens
  names(color_dict) = c('A','B','C','D','F')

  args$color_dict <- color_dict

  seq_list <- process_fasta(args)
  if (args$window[1] == -1) {
    args$window = c(1,length(seq_list[[1]]))
    auto_scale_x = T
  }

  args$full.height = length(seq_list)

  args$plot.size <- max(args$full.height + 3, args$full.width + 2)


  rank_list <- assess_conservation(seq_list)

  # display_size = 12
  # plot_size = 60
  # svglite::svglite("test_output.svg", height=display_size, width=display_size)
  call_plot(args)
  draw_MSA(seq_list, rank_list, args)
  # dev.off()
}

# plotMSA('test.aln.fa', c(2425,2450), auto_scale_x=F, bounding_box=T)
NateyJay/MSAplotter documentation built on Oct. 30, 2019, 10:09 p.m.