R/namesToColumns.R

Defines functions namesToColumns

Documented in namesToColumns

#' FUNCTION that modifies marks' names into columns
#'
#' @description Reads a data.frame with marks' of styles
#' \code{downArrow,upArrow,cM,cMLeft}
#' positions. It separates names in columns, avoiding overlap when multiple
#' close names
#' @description Exceptionally this function requires the column style in the
#' data.frame of marks'
#' positions.
#' @description Returns a data.frame
#'
#' @keywords data.frame marks
#'
#' @param marksDf data.frame with columns: \code{markName,style,markPos}
#' @param dfChrSize data.frame, size of chr. Same of plot.
#' @param markType character, use
#' \code{c("downArrow","upArrow","cM","cMLeft")} or a subset
#' @param amountofSpaces numeric, number of spaces for each column
#' @param colNumber numeric, number of columns
#' @param protruding numeric, same as plot, minimal protruding for arrow
#' marks, equivalent to cM
#' protruding
#' @param protrudingInt numeric, spacing of columns in terms of width of chr.
#' percent 1 = 100%.
#' Defaults to \code{0.5}
#' @param circularPlot boolean, use \code{TRUE} for circular plots. Use
#' \code{FALSE} otherwise
#' @param rotation numeric, same as plot, anti-clockwise rotation, defaults to
#' \code{0.5} which
#' rotates chr. from top to -90 degrees. (-0.5*\eqn{\pi} )
#' @param defaultStyleMark character, if some data in column style missing
#' fill with this one.
#' Defaults to \code{"square"}
#' @param orderChr character, replaces \code{orderBySize - deprecated} when \code{"size"}, sorts chromosomes by total
#'   length from the largest to the smallest. \code{"original"}: preserves d.f. order. \code{"name"}:
#'   sorts alphabetically; \code{"group"}: sorts by group name
#' @param halfModUp numeric, for circ. plots, when plotting several
#' chromosomes in a circular
#' plot, using a small value \code{0.05} corrects for alignment problems of
#' \code{upArrows, cM}
#' labels. Defaults to \code{NA}
#' @param halfModDown numeric, for circ. plots, when plotting several
#' chromosomes in a circular
#' plot, using a small value \code{0.05} corrects for alignment problems of
#' \code{downArrows,
#' cMLeft} labels. Defaults to \code{NA}
#' @param rotatMod numeric, for circ. plots, when rotation != 0 (diff.),
#' corrects alignment of
#' labels. Defaults to \code{0}
#' @export
#'
#' @return data.frame

namesToColumns <- function(marksDf, dfChrSize, markType = c("downArrow", "upArrow", "cMLeft", "cM"), # nolint: cyclocomp_linter
                           amountofSpaces = 13, colNumber = 2,
                           protruding = 0.2,
                           protrudingInt = 0.5,
                           circularPlot = TRUE,
                           rotation = 0.5,
                           defaultStyleMark = "square", orderChr = "size",
                           halfModDown = NA, halfModUp = NA, rotatMod = 0) {


  # if style column does not exist stop
  {
    if ("style" %in% colnames(marksDf)) {
      if (length(marksDf[which(is.na(marksDf$style)), ]$style) > 0) {
        marksDf[which(is.na(marksDf$style)), ]$style <- defaultStyleMark
      }
    } else {
      message(crayon::red("exceptionally, style column is mandatory, needs some downArrow, upArrow, cMLeft or cM, stopping!"))
      return(marksDf)
    }
  }
  if (!"chrName" %in% colnames(marksDf)) {
    message(crayon::red("chrName column is mandatory in marks d.f., stopping!"))
    return(marksDf)
  }
  ## d.f. chr size to list
  {
    dfChrSize <- makeNumCols(dfChrSize)

    listOfdfChromSize <- dfToListColumn(dfChrSize)

    # add attr. monocen or holocen based on columns
    listOfdfChromSize <- addAttributesDfChrSizeSimple(listOfdfChromSize) # this makes char. cols. adds cen.

    # add chrSize col. for monocen.
    listOfdfChromSize <- addChrSizeColumn(listOfdfChromSize) # now makes numeric 1st

    # define ordering of chr. length or name
    totalLength <- lapply(listOfdfChromSize, function(x) tryCatch(x$chrSize, error = function(e) NA))
    ifelse(
      inherits(totalLength, "matrix"),
      totalLength <- base::split(totalLength, col(totalLength)),
      NA
    )

    if (orderChr == "size") {
      orderlist <- lapply(totalLength, function(x) order(x, decreasing = TRUE))
    } else if (orderChr == "name") {
      orderlist <- lapply(listOfdfChromSize, function(x) tryCatch(order(x$chrName), error = function(e) NA))
    } else if (orderChr == "original" || orderChr == "group") {
      orderlist <- lapply(listOfdfChromSize, function(x) tryCatch(1:max(order(x$chrName)), error = function(e) NA))
    }

    #   add column of new chro index to data.frames and order !!
    listOfdfChromSize <- addNeworderColumn(listOfdfChromSize, orderlist)

    grouporderlist <- lapply(listOfdfChromSize, function(x) tryCatch(order(x$group), error = function(e) NA))

    if (orderChr == "group") {
      for (s in seq_along(listOfdfChromSize)) {
        if ("group" %in% colnames(listOfdfChromSize[[s]])) {
          message(crayon::blue("group column present - remove column if not using"))

          if (inherits(listOfdfChromSize[[s]], "data.frame")) {
            listOfdfChromSize[[s]] <- listOfdfChromSize[[s]][grouporderlist[[s]], ]
            listOfdfChromSize[[s]]$neworder <- seq_len(nrow(listOfdfChromSize[[s]]))
          }
        }
      }
    }

    # markPos d.f. to List
    listOfdfMarks <- dfToListColumn(marksDf)

    # transfers neworder column to markposlist # requires column chrName
    listOfdfMarks <- newOrderColumn(listOfdfChromSize, listOfdfMarks)

    proVec <- seq(protruding, (protrudingInt * (colNumber - 1)) + protruding, length.out = colNumber)

    if (exists("listOfdfMarks")) {
      listOfChecksChr <- checkNameChrDfMarks(listOfdfChromSize, listOfdfMarks)
      listOfdfChromSize <- listOfChecksChr[[1]]
      listOfdfMarks <- listOfChecksChr[[2]]

      if (length(listOfdfMarks) == 0) {
        remove(listOfdfMarks)
        message(crayon::red("stopping!"))
        return(marksDf)
      }
    }
  }
  # begin loop by SPS (OTU)
  for (k in seq_along(listOfdfChromSize)) {
    spindex <- which(names(listOfdfMarks) %in% names(listOfdfChromSize)[[k]])
    if (length(spindex) > 0) {

      # sp index for marks
      if (circularPlot) {
        if (attr(listOfdfChromSize[[k]], "cenType") == "monocen") {
          chrsize <- sum(as.numeric(as.character(listOfdfChromSize[[k]]$chrSize)))
        } else if (attr(listOfdfChromSize[[k]], "cenType") == "holocen") {
          chrsize <- sum(as.numeric(as.character(listOfdfChromSize[[k]]$chrSize)))
        }

        accuChrSize <- 0
        half <- chrsize / 2
      }

      # split marks based on chr.
      listOfdfMarks[[spindex]] <- dfToListColumn(listOfdfMarks[[spindex]], "chrName")

      # split chrDF based on chr.
      listOfdfChromSize[[k]] <- dfToListColumn(listOfdfChromSize[[k]], "chrName")
      # for each CHR. in dfchr
      for (l in seq_along(listOfdfChromSize[[k]])) { # important

        # get chr index for dfMarks
        chrIndex <- which(names(listOfdfMarks[[spindex]]) %in% names(listOfdfChromSize[[k]])[[l]])

        if (length(chrIndex) > 0) {

          # if column pro does not exist, create
          if (any(!colnames(listOfdfMarks[[spindex]][[chrIndex]]) %in% "protruding")) {
            listOfdfMarks[[spindex]][[chrIndex]]$protruding <- NA
          }

          # create markPos2 disregarding arm -  and sort
          if (attr(listOfdfChromSize[[k]][[l]], "cenType") == "monocen") {
            listOfdfMarks[[spindex]][[chrIndex]]$markPos2 <- listOfdfMarks[[spindex]][[chrIndex]]$markDistCen
            listOfdfMarks[[spindex]][[chrIndex]] <- listOfdfMarks[[spindex]][[chrIndex]][order(
              listOfdfMarks[[spindex]][[chrIndex]]$chrRegion,
              listOfdfMarks[[spindex]][[chrIndex]]$markPos2
            ), ]
          } else if (attr(listOfdfChromSize[[k]][[l]], "cenType") == "holocen") {
            listOfdfMarks[[spindex]][[chrIndex]]$markPos2 <- listOfdfMarks[[spindex]][[chrIndex]]$markPos

            listOfdfMarks[[spindex]][[chrIndex]] <-
              listOfdfMarks[[spindex]][[chrIndex]][order(listOfdfMarks[[spindex]][[chrIndex]]$markPos2), ]
          }

          if (circularPlot) {
            listOfdfMarks[[spindex]][[chrIndex]]$newPos <- as.numeric(NA)

            if (attr(listOfdfChromSize[[k]][[l]], "cenType") == "monocen") {
              las <- listOfdfChromSize[[k]][[l]]$longArmSize

              tryCatch(listOfdfMarks[[spindex]][[chrIndex]][which(listOfdfMarks[[spindex]][[chrIndex]]$chrRegion == "q"), ]$markPos2 <-
                las -
                listOfdfMarks[[spindex]][[chrIndex]][which(listOfdfMarks[[spindex]][[chrIndex]]$chrRegion == "q"), ]$markPos2,
              error = function(e) {
                ""
              }
              )
              tryCatch(listOfdfMarks[[spindex]][[chrIndex]][which(listOfdfMarks[[spindex]][[chrIndex]]$chrRegion == "p"), ]$markPos2 <-
                las +
                listOfdfMarks[[spindex]][[chrIndex]][which(listOfdfMarks[[spindex]][[chrIndex]]$chrRegion == "p"), ]$markPos2,
              error = function(e) {
                ""
              }
              )
            } # monocen

            # As downArrow marks labels are at the end, account for that
            if ("downArrow" %in% markType) {
              subsetA <- which(listOfdfMarks[[spindex]][[chrIndex]]$style %in% "downArrow")

              markSizeA <- tryCatch(listOfdfMarks[[spindex]][[chrIndex]][subsetA, ]$markSize, error = function(e) {
                0
              })

              tryCatch(listOfdfMarks[[spindex]][[chrIndex]][subsetA, ]$markPos2 <-
                psum(listOfdfMarks[[spindex]][[chrIndex]][subsetA, ]$markPos, markSizeA, na.rm = TRUE),
              error = function(e) {
                0
              }
              )
            }

            # markPos2 might have change - sort again
            if (attr(listOfdfChromSize[[k]][[l]], "cenType") == "monocen") {
              listOfdfMarks[[spindex]][[chrIndex]] <- listOfdfMarks[[spindex]][[chrIndex]][order(
                listOfdfMarks[[spindex]][[chrIndex]]$chrRegion,
                listOfdfMarks[[spindex]][[chrIndex]]$markPos2
              ), ]
            } else if (attr(listOfdfChromSize[[k]][[l]], "cenType") == "holocen") {
              listOfdfMarks[[spindex]][[chrIndex]] <-
                listOfdfMarks[[spindex]][[chrIndex]][order(listOfdfMarks[[spindex]][[chrIndex]]$markPos2), ]
            }

            if (rotation != 0) {
              halfRot <- rotation / 2
              halfRot <- halfRot + (rotatMod * halfRot)

              listOfdfMarks[[spindex]][[chrIndex]]$markPos2 <- listOfdfMarks[[spindex]][[chrIndex]]$markPos2 + accuChrSize

              st <- chrsize * halfRot

              listOfdfMarks[[spindex]][[chrIndex]][which(listOfdfMarks[[spindex]][[chrIndex]]$markPos2 > st), ]$newPos <-
                listOfdfMarks[[spindex]][[chrIndex]][which(listOfdfMarks[[spindex]][[chrIndex]]$markPos2 > st), ]$markPos2 - st

              newst <- chrsize - st + accuChrSize

              listOfdfMarks[[spindex]][[chrIndex]][which(listOfdfMarks[[spindex]][[chrIndex]]$markPos2 < st), ]$newPos <-
                listOfdfMarks[[spindex]][[chrIndex]][
                  which(listOfdfMarks[[spindex]][[chrIndex]]$markPos2 < st), ]$markPos2 + newst + accuChrSize
            } else {
              listOfdfMarks[[spindex]][[chrIndex]]$newPos <- listOfdfMarks[[spindex]][[chrIndex]]$markPos2 + accuChrSize
            }
          } else {
            listOfdfMarks[[spindex]][[chrIndex]]$newPos <- listOfdfMarks[[spindex]][[chrIndex]]$markPos2
          }

          if ("downArrow" %in% markType || "cMLeft" %in% markType) {
            subset1 <- which(listOfdfMarks[[spindex]][[chrIndex]]$style %in% "downArrow" |
              listOfdfMarks[[spindex]][[chrIndex]]$style %in% "cMLeft")

            # column size
            # Separate names in columns
            if (length(subset1) > 0) {
              names <- listOfdfMarks[[spindex]][[chrIndex]][subset1, ]$markName
              counterCol <- 1:colNumber
              i <- 0
              for (j in seq_along(names)) {
                i <- i + 1
                if (i > length(counterCol)) {
                  i <- 1
                }
                listOfdfMarks[[spindex]][[chrIndex]][subset1, ]$markName[j] <-
                  gsub("(.*)", paste0(
                    paste0(rep(" ", amountofSpaces * (colNumber - counterCol[i])), collapse = ""),
                    "\\1",
                    paste0(rep(" ", amountofSpaces * (counterCol[i] - 1)), collapse = "")
                  ), names[j]) # gsub
                listOfdfMarks[[spindex]][[chrIndex]][subset1, ]$protruding[j] <- proVec[i]
              }

              # # fix first half ( 0h to  6h) i.e. invert short & long
              if (circularPlot) {
                if (!is.na(halfModDown)) {
                  half2 <- half + (halfModDown * half)
                } else {
                  half2 <- half
                }
                if (nrow(listOfdfMarks[[spindex]][[chrIndex]][subset1, ]) >= colNumber) {
                  toMod <- which(listOfdfMarks[[spindex]][[chrIndex]][subset1, ]$newPos > half2)
                  if (length(toMod) > 0) {
                    listOfdfMarks[[spindex]][[chrIndex]][subset1, ][toMod, ]$markName <-
                      sub("(\\s+)?([[:alnum:]_]+)(\\s+)?", "\\3\\2\\1", listOfdfMarks[[spindex]][[chrIndex]][subset1, ][toMod, ]$markName)
                  }
                }
              }
            }
          }

          # add spaces before and after name for outer names (upArrow)

          if ("upArrow" %in% markType || "cM" %in% markType) {
            subset2 <- which(listOfdfMarks[[spindex]][[chrIndex]]$style %in% "upArrow" |
              listOfdfMarks[[spindex]][[chrIndex]]$style %in% "cM")

            if (length(subset2) > 0) {
              # Separate names in  columns
              names <- listOfdfMarks[[spindex]][[chrIndex]][subset2, ]$markName
              counterCol <- 1:colNumber
              i <- 0
              for (j in seq_along(names)) {
                i <- i + 1
                if (i > length(counterCol)) {
                  i <- 1
                }
                listOfdfMarks[[spindex]][[chrIndex]][subset2, ]$markName[j] <-
                  gsub("(.*)", paste0(
                    paste0(rep(" ", amountofSpaces * (counterCol[i] - 1)), collapse = ""),
                    "\\1",
                    paste0(rep(" ", amountofSpaces * (colNumber - counterCol[i])), collapse = "")
                  ), names[j]) # gsub
                listOfdfMarks[[spindex]][[chrIndex]][subset2, ]$protruding[j] <- proVec[i]
              }

              # fix first half ( 0h to  6h) i.e. invert short & long
              if (circularPlot) {
                if (!is.na(halfModUp)) {
                  half2 <- half + (halfModUp * half)
                } else {
                  half2 <- half
                }
                if (nrow(listOfdfMarks[[spindex]][[chrIndex]][subset2, ]) >= colNumber) {
                  toMod <- which(listOfdfMarks[[spindex]][[chrIndex]][subset2, ]$newPos > half2)
                  if (length(toMod) > 0) {
                    listOfdfMarks[[spindex]][[chrIndex]][subset2, ][toMod, ]$markName <-
                      sub("(\\s+)?([[:alnum:]_]+)(\\s+)?", "\\3\\2\\1", listOfdfMarks[[spindex]][[chrIndex]][subset2, ][toMod, ]$markName)
                  }
                }
              }
            }
          }

        }
        if (circularPlot) {
          accuChrSize <- accuChrSize + as.numeric(listOfdfChromSize[[k]][[l]]$chrSize)
        }
      }

      listOfdfMarks[[k]] <- dplyr::bind_rows(listOfdfMarks[[k]], .id = NULL)
    }
  }

  if ("OTU" %in% colnames(marksDf)) {
    marksDf <- dplyr::bind_rows(listOfdfMarks, .id = NULL)
  } else {
    marksDf <- dplyr::bind_rows(listOfdfMarks, .id = "OTU")
  }
  marksDf$newPos <- NULL
  marksDf$markPos2 <- NULL
  marksDf$neworder <- NULL

  return(marksDf)
}

Try the idiogramFISH package in your browser

Any scripts or data that you put into this service are public.

idiogramFISH documentation built on Aug. 22, 2023, 5:08 p.m.