R/base_package_functions.R

Defines functions makefactorgroup myColorRamp5 subsetSamples subsetGenes assessScale

Documented in assessScale makefactorgroup myColorRamp5 subsetGenes subsetSamples

####Standard Functions, Functions Used within the Package, Data Assessment and Manipulation####

#' @export
'%notin%' <- function(x, table){ !(match(x, table, nomatch = 0) > 0)}




#' @export
assessScale <- function(     ###Returns percent of data above, below, and within range of scale for heatmap
  data ##data to be assessed,
){
  above <- sum(data > params$scale.range[2], na.rm=T)/sum(!is.na(data))
  within <- sum(data <= params$scale.range[2] & data >= params$scale.range[1], na.rm=T)/sum(!is.na(data))
  below <- sum(data < params$scale.range[1], na.rm=T)/sum(!is.na(data))
  report <- c(below, within, above); names(report) <- c("Percent Below Range", "Percent Within Range","Percent Above Range")
  return(report)
}




#' @export
subsetGenes <- function(  ##subset samples out of matrix based on metadata group
  data, ##matrix of values, with genes in the rows and samples in the columns
  group, ##group from which to subset, vector that is same length as columns
  take.out, ##which part of the group to extract, one or more of the factors in the "group"
  list,
  order.by = NULL, ##sample to order by
  exact = F,
  subset.by = "annotation" #other option is by "name"
){

  if (subset.by == "annotation") {
    temp.annotations.genes <- params$annotations.genes

    if (group %in% colnames(temp.annotations.genes)) {

      if (any(rownames(data) %notin% rownames(temp.annotations.genes))) {
        stop('rownames of input data do not match rownames of annotations, cannot link annotations to data')
      }
      temp.annotations.genes <- temp.annotations.genes[match(rownames(data), rownames(temp.annotations.genes)),, drop = FALSE]

      groupings <- as.factor(temp.annotations.genes[,group] )

      if (sum(take.out %in% groupings) != length(take.out)) {stop('provided arguments to take.out not found in indicated group')}
      subset <- data[which(groupings %in% take.out),]
    }else{
      if (sum(take.out %in% group) != length(take.out)) {stop('provided arguments to take.out not found in indicated group')}
      subset <- data[which(group %in% take.out),]
    }
  }

  if (subset.by == "name") {
    if(exact==TRUE){
      subset <- data[which(rownames(data) %in% list),];
      if (nrow(subset) == 0 ) {stop('exact matches for list not found in rownames of data')}
    }

    if (exact==FALSE){
      subset <- data[grep(paste(list, collapse = "|"),rownames(data)),]
      if (nrow(subset) == 0 ) {stop('inexact matches for list not found in rownames of data')}
    }

    if (is.null(order.by)==FALSE){
      subset <- subset[,order(data[which(rownames(data) %in% order.by),],na.last = F)]
    }
  }

  return(subset)
}



#' @export
subsetSamples <- function(  ##subset samples out of matrix based on metadata group
  data, ##matrix of values, with genes in the rows and samples in the columns
  group, ##group from which to subset, vector that is same length as columns
  take.out, ##which part of the group to extract, one or more of the factors in the "group"
  list,
  order.by = NULL, ##sample to order by
  exact = F,
  subset.by = "annotation" #other option is by "name"
){

  if (subset.by == "annotation") {
    temp.annotations <- params$annotations

    if (group %in% colnames(temp.annotations)) {

      if (any(colnames(data) %notin% rownames(temp.annotations))) {
        stop('colnames of input data do not match rownames of annotations, cannot link annotations to data')
      }
      temp.annotations <- temp.annotations[match(colnames(data), rownames(temp.annotations)),, drop = FALSE]

      groupings <- as.factor(temp.annotations[,group] )

      if (sum(take.out %in% groupings) != length(take.out)) {stop('provided arguments to take.out not found in indicated group')}
      subset <- data[,which(groupings %in% take.out)]
    }else{
      if (sum(take.out %in% group) != length(take.out)) {stop('provided arguments to take.out not found in indicated group')}
      subset <- data[,which(group %in% take.out)]
    }
  }

  if (subset.by == "name") {
    if(exact==TRUE){
      subset <- data[,which(colnames(data) %in% list)];
      if (ncol(subset) == 0 ) {stop('exact matches for list not found in colnames of data')}
    }

    if (exact==FALSE){
      subset <- data[,grep(paste(list, collapse = "|"),colnames(data))]
      if (ncol(subset) == 0 ) {stop('inexact matches for list not found in colnames of data')}
    }

    if (is.null(order.by)==FALSE){
      subset <- subset[order(subset[,which(colnames(data) %in% order.by)],na.last = F),]
    }
  }

  return(subset)
}




#' @importFrom grDevices col2rgb colorRamp colorRampPalette rgb
#' @export
myColorRamp5 <- function(colors, values, percent.mad = 0.5) {  ###color data over a range, assumes 5 colors, sets in quadrants according to median +/- mad
  out <- rep(rgb(0,0,0),length(values))
  for(i in 1:length(values)){
    if(is.na(values[i])){
    } else{
      if (values[i] <= (median(values,na.rm=T)-percent.mad*mad(values,na.rm=T))){
        v <- (values[i] - min(values,na.rm=T))/( (median(values,na.rm=T)-percent.mad*mad(values,na.rm=T)) - min(values, na.rm=T) )
        x <- colorRamp(colors[1:2])(v)
        out[i] <- rgb(x[,1], x[,2], x[,3], maxColorValue = 255)}

      if (values[i]>median(values,na.rm=T)-percent.mad*mad(values,na.rm=T) & values[i]<=median(values, na.rm=T)){
        v <- (values[i] - (median(values,na.rm=T)-percent.mad*mad(values,na.rm=T)) )/ (median(values, na.rm=T) -(median(values,na.rm=T)-percent.mad*mad(values,na.rm=T)))
        x <- colorRamp(colors[2:3])(v)
        out[i] <- rgb(x[,1], x[,2], x[,3], maxColorValue = 255)}

      if (values[i]<=median(values,na.rm=T)+percent.mad*mad(values,na.rm=T) & values[i]>median(values, na.rm=T)){
        v <- (values[i] - median(values, na.rm=T))/ ( (median(values,na.rm=T)+percent.mad*mad(values,na.rm=T))- median(values, na.rm=T))
        x <- colorRamp(colors[3:4])(v)
        out[i] <- rgb(x[,1], x[,2], x[,3], maxColorValue = 255)}

      if (values[i]>median(values,na.rm=T)+percent.mad*mad(values,na.rm=T)){
        v <- (values[i] - (median(values,na.rm=T)+percent.mad*mad(values,na.rm=T)))/(max(values, na.rm=T)  - (median(values,na.rm=T)+percent.mad*mad(values,na.rm=T)))
        x <- colorRamp(colors[4:5])(v)
        out[i] <- rgb(x[,1], x[,2], x[,3], maxColorValue = 255)}
    }
  }
  return(out)
}


#' @export
makefactorgroup <- function(
  annots,
  levels,
  specify.gaps = NULL,
  return.gaps = FALSE
){

  if (length(levels) == 1) {
    Level.1 <- annots[,which(colnames(annots)==levels[1])]
    factor.group <- factor(Level.1); names(factor.group) <- rownames(annots)


    if (is.null(specify.gaps) == FALSE) {
      if (length(specify.gaps)  != length(levels)) {stop('length of gap specifications not equal to number of levels provided')}

      gaps <- rep(cumsum(rev(rev(rle(as.vector(sort(factor.group)))$lengths)[-1])), specify.gaps[1])

    }else{gaps <- cumsum(rev(rev(rle(as.vector(sort(factor.group)))$lengths)[-1]))}

  }

  if (length(levels) == 2) {
    Level.1 <- annots[,which(colnames(annots)==levels[1])]
    Level.2 <- annots[,which(colnames(annots)==levels[2])]
    combo <- data.frame((Level.1), (Level.2), paste(Level.1, Level.2)); colnames(combo) <- c("Level1","Level2","Combo"); rownames(combo) <- rownames(annots)
    bylevel1 <- combo[order(combo[,1]),]
    bylevel2 <- bylevel1[order(bylevel1[,2]),]

    factor.group <- factor(combo[,which(colnames(combo)=="Combo")], levels=c(unique(as.character(bylevel2[,which(colnames(bylevel2)=="Combo")])))); names(factor.group) <- rownames(annots)

    if (is.null(specify.gaps) == FALSE) {
      if (length(specify.gaps)  != length(levels)) {stop('length of gap specifications not equal to number of levels provided')}

      gaps <- sort(c( rep(cumsum(rev(rev(rle(as.vector(bylevel2$Level1))$lengths)[-1])), specify.gaps[1]),
                      rep(cumsum(rev(rev(rle(as.vector(bylevel2$Level2))$lengths)[-1])), specify.gaps[2])))

    }else{gaps <- cumsum(rev(rev(rle(as.vector(bylevel2$Combo))$lengths)[-1]))}
  }

  if (length(levels) == 3 ) {
    Level.1 <- annots[,which(colnames(annots)==levels[1])]
    Level.2 <- annots[,which(colnames(annots)==levels[2])]
    Level.3 <- annots[,which(colnames(annots)==levels[3])]

    combo <- data.frame((Level.1), (Level.2),(Level.3), paste(Level.1, Level.2,Level.3)); colnames(combo) <- c("Level1","Level2","Level3","Combo"); rownames(combo) <- rownames(annots)
    bylevel1 <- combo[order(combo[,1]),]
    bylevel2 <- bylevel1[order(bylevel1[,2]),]
    bylevel3 <- bylevel2[order(bylevel2[,3]),]

    factor.group <- factor(combo[,which(colnames(combo)=="Combo")], levels=c(unique(as.character(bylevel3[,which(colnames(bylevel3)=="Combo")])))); names(factor.group) <- rownames(annots)

    if (is.null(specify.gaps) == FALSE) {
      if (length(specify.gaps)  != length(levels)) {stop('length of gap specifications not equal to number of levels provided')}

      gaps <- sort(c( rep(cumsum(rev(rev(rle(as.vector(bylevel3$Level1))$lengths)[-1])), specify.gaps[1]),
                      rep(cumsum(rev(rev(rle(as.vector(bylevel3$Level2))$lengths)[-1])), specify.gaps[2]),
                      rep(cumsum(rev(rev(rle(as.vector(bylevel3$Level3))$lengths)[-1])), specify.gaps[3]) ) )

    }else{gaps <- cumsum(rev(rev(rle(as.vector(bylevel3$Combo))$lengths)[-1]))}

  }

  if (return.gaps == FALSE) {return(factor.group)}

  if (return.gaps == TRUE) {return(list(factor.group = factor.group, gaps = gaps))}

}
axm323/dataVisEasy documentation built on Feb. 1, 2024, 11:53 p.m.