R/perMark.R

Defines functions perMark

Documented in perMark

#' @name perMark
#' @title FUNCTION perMark
#' @description calculates fraction (%) of chromosome for each mark
#'
#' @param dfMarkPos data.frame, of marks' position
#' @param listOfdfChromSize list of data.frames or data.frame of chr. sizes.
#' @param bToRemove character vector, bands to ignore
#' @param result character, type of return, \code{"data.frame"} or \code{"list"}
#'
#' @keywords percentage span mark chromosome
#' @examples
#' load(system.file("shinyApps", "iBoard/www/rda/monoholoCS.rda", package = "idiogramFISH"))
#' load(system.file("shinyApps", "iBoard/www/rda/monoholoMarks.rda", package = "idiogramFISH"))
#' monoholoMarks2 <- fillMarkInfo(monoholoMarks, monoholoCS)
#' perMark(monoholoMarks2, monoholoCS, result = "data.frame")
#'
#' @return list
#' @rdname perMark
#' @importFrom stats setNames
#' @export
#'
perMark <- function(dfMarkPos, listOfdfChromSize, result = "list", bToRemove = "") { #nolint: cyclocomp_linter
  if (inherits(listOfdfChromSize, "list") == FALSE) {
    listOfdfChromSize <- dfToListColumn(listOfdfChromSize)

    if (!"OTU" %in% colnames(dfMarkPos)) {
      message(crayon::blue("listOfdfChromSize not a list & dfMarkPos without OTU column, dfMarkPos OTU will be 1"))
      dfMarkPos$OTU <- 1
    }
  }

  for (s in seq_along(listOfdfChromSize)) {
    dfChromSize <- fixChrNameDupDF(listOfdfChromSize[s], TRUE)
    listOfdfChromSize[[s]] <- dfChromSize[[1]]

    # remove empty columns

    listOfdfChromSize[[s]][sapply(listOfdfChromSize[[s]], function(x) all(is.na(x)))] <- NULL

    # add chrSize column

    if (!"chrSize" %in% colnames(listOfdfChromSize[[s]])) {
      listOfdfChromSize[[s]]$chrSize <- listOfdfChromSize[[s]]$shortArmSize + listOfdfChromSize[[s]]$longArmSize
    }
  }

  message(crayon::blue("\nCalculating position of each mark in terms of % of chromosome"))

  perList <- list()

  for (i in seq_along(listOfdfChromSize)) {

    spname <- names(listOfdfChromSize)[i]

    dup_chr <- any(duplicated(listOfdfChromSize[[i]]$chrName))

    if (dup_chr == FALSE) {
      perList[[i]] <- setNames(
        data.frame(matrix(ncol = nrow(listOfdfChromSize[[i]]), nrow = 0)),
        listOfdfChromSize[[i]]$chrName
      )

      names(perList)[i] <- names(listOfdfChromSize)[i]

      bandList <- unique(dfMarkPos[which(dfMarkPos$OTU %in% spname), ]$markName)

      if ("chrRegion" %in% colnames(dfMarkPos)) {
        bandList <- unique(dfMarkPos[which(
          dfMarkPos$OTU %in% spname &
            !dfMarkPos$chrRegion %in% "cen"
        ), ]$markName)
      }

      bandList <- setdiff(bandList, bToRemove)

      for (band in bandList) {
        chrName <- listOfdfChromSize[[i]]$chrName[1]

        for (chrName in listOfdfChromSize[[i]]$chrName) {
          chrSize <- listOfdfChromSize[[i]][which(listOfdfChromSize[[i]]$chrName %in% chrName), ]$chrSize

          perList[[i]]["chrSize", as.character(chrName)] <- chrSize

          markSize <- NA
          allMarks <- NA

          allMarks <- dfMarkPos[which(dfMarkPos$OTU %in% spname &
            dfMarkPos$chrName %in% chrName &
            dfMarkPos$markName %in% band), ]$markSize

          markSize <- sum(dfMarkPos[which(dfMarkPos$OTU %in% spname &
            dfMarkPos$chrName %in% chrName &
            dfMarkPos$markName %in% band), ]$markSize, na.rm = TRUE)
          chrRegion <- NA

          if ("chrRegion" %in% colnames(dfMarkPos)) {
            chrRegion <- dfMarkPos[which(dfMarkPos$OTU %in% spname &
              dfMarkPos$chrName %in% chrName &
              dfMarkPos$markName %in% band), ]$chrRegion
          }

          if (length(markSize)) {
            if (any(is.na(allMarks))) {
              if ("chrRegion" %in% colnames(dfMarkPos)) {
                if (!is.na(chrRegion[is.na(allMarks)] == "cen")) {
                  if (chrRegion[is.na(allMarks)] == "cen") {
                    message(crayon::blue("mark's style 'cen' does not have size, no % calculated, see ruler"))
                  }
                } else {
                  message(crayon::blue(paste(
                    spname, "- No data of size, for mark",
                    band,
                    "chr", chrName,
                    "region", chrRegion[is.na(allMarks)]
                  )))
                }
              }
            }
            perList[[i]][as.character(band), as.character(chrName)] <- markSize
          }
        }

        perList[[i]][paste0(band, "_per"), ] <- perList[[i]][as.character(band), ] / perList[[i]]["chrSize", ]
        perList[[i]] <- perList[[i]][rowSums(perList[[i]]) != 0, ]
      }
    } else {
      message(crayon::red(paste0("chrNames duplicated in: ", spname)))
    }
  }
  if (result == "data.frame") {
    dflist2 <- lapply(perList, function(x) {
      if (nrow(x)) {
        a <- cbind(markName = row.names(x), x)
        row.names(a) <- seq_len(nrow(a))
        a
      }
    })
    dflist2 <- dflist2[which(!sapply(dflist2, is.null))] # remove no-marks sps.
    df <- plyr::rbind.fill(mapply(function(x, y) cbind(OTU = x, y), x = names(dflist2), y = (dflist2), SIMPLIFY = FALSE))
    df[df == 0] <- NA

    otherc <- sort(setdiff(colnames(df), c("OTU", "markName")))
    numeric_c <- as.character(sort(as.numeric(otherc[which(!is.na(suppressWarnings(as.numeric(otherc))))])))
    not_num <- sort(otherc[which(is.na(suppressWarnings(as.numeric(otherc))))])
    df <- df[, c("OTU", "markName", numeric_c, not_num)]
    listOfMarks <- base::split(df, factor(df[, "OTU"], levels = unique(df[, "OTU"])))
    nam <- names(listOfMarks)
    i <- 0
    for (df in listOfMarks) {
      i <- i + 1
      listOfMarks[[i]] <- plyr::rbind.fill(
        df[which(df$markName == "chrSize"), ],
        df[which(df$markName != "chrSize"), ][order(
          df$OTU[which(df$markName != "chrSize")],
          df$markName[which(df$markName != "chrSize")]
        ), ]
      )
    }
    names(listOfMarks) <- nam
    plist <- rbind.fill(listOfMarks[sort(names(listOfMarks))])

    df <- makeNumCols(plist)

    return(df)
  } else {
    return(perList)
  }
}

Try the idiogramFISH package in your browser

Any scripts or data that you put into this service are public.

idiogramFISH documentation built on Aug. 22, 2023, 5:08 p.m.