R/rgb.R

Defines functions scale.rows scale.cols dim.sum norm rep.row rep.col rep.mat auto.mar auto.legend auto.layout reorder.cols reorder.rows indicators.factor cut.quantile scale.range gradient rcut auto.format auto.signif interval2range dimOrdered is.dimOrdered rsplit accumulate zip which.min2 which.max2 which2 merge.names sort_levels not ls.sizes format.bytes inch.symbol inchx as.named.vector named.numeric named.list shuffle rank.stable na.dummy clean rand.partition round.partition cone2hsv hsv2cone test.col2hsv rgb2hsv col2hsv rgb2name

Documented in sort_levels

# functions to convert colour representations

rgb2name <- function(r,g,b) {
  if(missing(g)) {
    r <- as.matrix(r)
    if(ncol(r) != 3) {
      if(nrow(r) == 3) r <- t(r)
      else stop("matrix must have 3 rows or 3 columns")
    }
  } else {
    r <- array(c(r,g,b),c(length(r),3))
  }
  if(max(r) > 1) r <- r/255
  cc <- colors()
  # remove duplicates
  #cc <- setdiff(cc, c("gray100","grey100"))
  crgb <- col2rgb(cc)/255
  # distance matrix
  d <- sqdist(r,t(crgb))
  cc[apply(d, 1, which.min)]
}

col2hsv <- function(col) {
  rgb2hsv(col2rgb(col))
}

rgb2hsv <- function(r,g,b) {
  matrix.out = F
  if(missing(g)) {
    if(is.list(r)) {
      b = r[[3]]
      g = r[[2]]
      r = r[[1]]
    } else {
      matrix.out = T
      if(nrow(r) != 3) {
        if(ncol(r) == 3) r <- t(r)
        else stop("matrix must have 3 rows or 3 columns")
      }
      b <- r[3,]
      g <- r[2,]
      r <- r[1,]
    }
  }
  n <- length(r)
  lim <- array(0,c(2,n))
  lim[1,] <- pmin(r,g,b)
  lim[2,] <- pmax(r,g,b)
  rang <- lim[2,]-lim[1,]
  v <- lim[2,]
  s <- v
  i <- (lim[2,] > 0)
  s[!i] <- 0
  s[i] <- rang[i]/lim[2,i]
  h <- v
  h[s == 0] <- 0
  i <- (s > 0) & (r == lim[2,])
  h[i] <- (g[i] - b[i])/rang[i]/6
  i <- (s > 0) & (g == lim[2,])
  h[i] <- (2 + (b[i]-r[i])/rang[i])/6
  i <- (s > 0) & (b == lim[2,])
  h[i] <- (4 + (r[i]-g[i])/rang[i])/6
  i <- (h < 0)
  h[i] <- h[i] + 1
  if(matrix.out)
    t(array(c(h,s,v),c(n,3),list(NULL,c("hue","saturation","value"))))
  else
    data.frame(hue=h,saturation=s,value=v)
}

test.col2hsv <- function() {
  h <- col2hsv(palette())
  h[3,] = h[3,]/255
  print(palette())
  print(sapply(1:ncol(h), function(i) hsv(h[1,i],h[2,i],h[3,i])))
}

hsv2cone <- function(h,s,v) {
  if(missing(s)) {
    if(nrow(h) != 3) {
      if(ncol(h) == 3) h <- t(h)
      else stop("matrix must have 3 rows or 3 columns")
    }
    v <- h[3,]
    s <- h[2,]
    h <- h[1,]
  }
  x <- s*v*cos(h*2*pi)
  y <- s*v*sin(h*2*pi)
  z <- v
  n <- length(x)
  t(array(c(x,y,z),c(n,3),list(NULL,c("x","y","z"))))
}
cone2hsv <- function(x,y,z) {
  if(missing(y)) return(cone2hsv(x[1,],x[2,],x[3,]))
  v <- z
  h <- (atan2(y,x)/2/pi + 1) %% 1
  s <- sqrt(x^2 + y^2)/v
  i <- which(s > 1 | is.na(s))
  if(length(i) > 0) s[i] <- 0
  r <- rbind(h,s,v)
  rownames(r) <- c("hue","saturation","value")
  r
}

# this is only for routines used by minka library

round.partition <- function(x) {
  # rounds the elements of x, while preserving the sum
  diff(c(0,round(cumsum(x))))
}
rand.partition <- function(n) {
  # sample from hypergeometric
  # n is vector of desired table counts
  sample(rep(1:length(n),n))
}

clean <- function(x,threshold=0.5) {
  mostly.na = function(x) (mean(is.na(x))>threshold)
  bad = sapply(x,mostly.na)
  # remove variables with many NAs
  x = x[!bad]
  # remove cases with any remaining NAs
  x = na.omit(x)
  # recompute factor levels
  for(v in colnames(x)) {
    if(is.factor(x[,v])) x[,v] = factor(x[,v])
  }
  x
}

na.dummy <- function(x,value=666) {
  # replace NAs with a dummy value
  if(is.data.frame(x)) return(apply.df(x,na.dummy))
  x[which(is.na(x))] = value
  x
}

rank.stable <- function(x) {
  # handles ties better than "rank"
  n <- length(x)
  r <- rep(0,n)
  r[order(x)] <- 1:n
  r
}

shuffle <- function(x) {
  if(length(dim(x)) == 2) {
    x[order(runif(nrow(x))),order(runif(ncol(x)))]
  } else {
    x[order(runif(length(x)))]
  }
}

named.list <- function(...,names.) {
  lst <- list(...)
  names(lst) = names.
  lst
}
named.numeric <- function(names.) {
  x <- numeric(length(names.))
  names(x) = names.
  x
}
as.named.vector <- function(a) {
  if(is.matrix(a) || is.data.frame(a)) {
    if(ncol(a) == 1) {
      x = a[,1]
      names(x) = rownames(a)
    } else {
      x = as.vector(as.matrix(a))
      #names(x) = outer(rownames(a),colnames(a),FUN=function(a,b) paste(a,b,sep="."))
      names(x) = outer(rownames(a),colnames(a),FUN=paste)
    }
    x
  } else if(is.list(a)) {
    unlist(a)
  } else stop("don't know what to do")
}

# companions to xinch,yinch
inchx = function(x,warn.log=T) {
  if (warn.log && par("xlog"))
    warning("x log scale:  inchx() is non-sense")
  x / diff(par("usr")[1:2]) * par("pin")[1]
}
inchy = function (y, warn.log = TRUE)
{
  if (warn.log && par("ylog"))
    warning("y log scale:  inchy() is non-sense")
  y / diff(par("usr")[3:4]) * par("pin")[2]
}

inch.symbol <- function(pch=par("pch"),cex=par("cex")) {
  # returns the height in inches of the given plotting symbol
  # with current graphics settings.
  # this is for pch="H"
  inch = par("csi")*cex/1.5
  if(pch <= 20) inch = inch/2
  inch
}

format.bytes = function(x) {
  # returns byte sizes in pretty form
  # example: format.bytes(c(2,1030,2e6))
  # based on code by Fang Chen
  round.to.half = function(x) round(x*2)/2
  s = as.character(x)
  names(s) = names(x)
  kilo = 2^10; mega = 2^20
  i = (x >= mega)
  s[i] = paste(round.to.half(x[i]/mega),"M",sep="")
  i = (x >= kilo) & (x < mega)
  s[i] = paste(round(x[i]/kilo),"k",sep="")
  s
}
ls.sizes <- function(name=parent.frame(), pretty = T, top = 10, ...) {
  # lists the `top' biggest objects in the given environment
  # suggested by Fang Chen
  x <- sapply(ls(name,...),function(x) object.size(get(x,name)))
  if(top > length(x)) top = length(x);
  x = rev(rev(sort(x))[1:top])
  if(pretty) x = format.bytes(x)
  x <- as.data.frame(x)
  colnames(x) <- "bytes"
  x
}

# smallest number such that 1+eps > 1
eps <- 1e-15

# returns the elements of x not indexed by i
not <- function(x,i) {
  if(is.numeric(i)) return(x[-i])
  if(is.character(i)) return(x[setdiff(names(x),i)])
  if(is.logical(i)) return(x[!i])
}


#' Sort levels of a factor
#'
#' The levels of a factor are sorted according to a summary statistic.
#' @param f a factor
#' @param x a vector, same length as \code{f}
#' @param fun function to use to summarize groups of \code{x}
#' @return
#'   A copy of \code{f} with re-ordered levels.
#' @author Tom Minka
#' @examples
#' data(OrchardSprays)
#' sort_levels(OrchardSprays$treatment, OrchardSprays$decrease)
#' sort_levels(OrchardSprays$treatment, OrchardSprays$decrease, fun=mean)
#' @export
sort_levels <- function(f,x,fun=median) {
  if(length(x) == length(f)) x <- tapply(x,f,fun)
  if(length(x) != length(levels(f))) stop("wrong number of values to sort by")
  factor(f,levels=levels(f)[order(x)])
}

# n is a vector of names
# i is a vector of indices
# merges names n[i] into a composite name
# returns a shorter vector of names
merge.names <- function(n,i=1:length(n)) {
  if(is.dimOrdered(n)) {
    first <- n[i[1]]
    last <- n[i[length(i)]]
    # what kind of range notation should we use?
    if(F) {
      split <- "\\.\\."; sep <- ".."
    } else {
      split <- "-"; sep <- "-"
    }
    s1 <- unlist(strsplit(first,split))
    s2 <- unlist(strsplit(last,split))
    # work around a bug in strsplit
    if(length(s2) == 1) s2 <- c("",s2)
    n[i] <- paste(s1[[1]], s2[[2]], sep=sep)
  } else {
    n[i] <- paste(n[i],collapse=",")
  }
  a <- attributes(n)
  n <- n[-i[-1]]
  attributes(n) <- a
  n
}

which2 <- function(x) {
  i <- which(x)-1
  j <- floor(i/nrow(x))
  i <- i - j*nrow(x)
  cbind(i+1,j+1)
}

which.max2 <- function(x) {
  i <- which.max(x)-1
  j <- floor(i/nrow(x))
  i <- i - j*nrow(x)
  c(i+1,j+1)
}

which.min2 <- function(x) {
  i <- which.min(x)-1
  j <- floor(i/nrow(x))
  i <- i - j*nrow(x)
  c(i+1,j+1)
}


zip <- function(a,b,fun="*") {
  fun <- match.fun(fun)
  r <- lapply(1:length(a), function(i) fun(a[[i]],b[[i]]))
  names(r) <- names(a)
  r
}

accumulate <- function(lst,fun="+") {
  fun <- match.fun(fun)
  r <- lst[[1]]
  for(i in 2:length(lst)) {
    #r <- r + lst[[i]]
    r <- fun(r,lst[[i]])
  }
  r
}

# returns split(x,f) where f is a random factor
# if p is a single number then f has two factors in the proportion (p,1-p)
rsplit <- function(x,p) {
  if(is.data.frame(x)) n <- nrow(x)
  else n <- length(x)
  n1 <- ceiling(p*n)
  i <- c(rep(1,n1),rep(2,n-n1))
  split(x,sample(i))
}

#' @export
is.dimOrdered <- function(d) {
  # dimensions are ordered by default
  a <- attr(d,"ordered")
  is.null(a) || a
}
#' @export
dimOrdered <- function(x) {
  dn = dimnames(x)
  # never return NULL
  if(is.null(dn)) dn = dim(x)
  sapply(dn,is.dimOrdered)
}
#' @export
"dimOrdered<-" <- function(x,value) {
  d = 1:length(dim(x))
  if(length(value) == 1) value = rep(value,length(d))
  for(i in d) {
    attr(dimnames(x)[[i]],"ordered") <- value[i]
  }
  x
}

# convert from interval to range notation
interval2range <- function(x) {
  x <- unlist(strsplit(x,","))
  x[1] <- substr(x[1],2,nchar(x[1]))
  x[1] <- format(as.numeric(x[1]))
  if(x[1] == "-Inf") x[1] <- "."
  x[2] <- substr(x[2],1,nchar(x[2])-1)
  x[2] <- format(as.numeric(x[2]))
  if(x[2] == "Inf") x[2] <- "."
  paste(x[1],x[2],sep="-")
}

auto.signif <- function(x,tol=0.1) {
  # rounds x to minimum number of digits to achieve specified tolerance.
  # i.e. if rounded value is y then abs(y-x)/abs(x) <= tol.
  # also see break.quantile(pretty=T)
  for(i in 1:length(x)) {
    if(x[i] != 0) {
      for(d in 1:10) {
        y = signif(x[i],d)
        if(abs(y - x[i])/abs(x[i]) <= tol) break
      }
      x[i] = y
    }
  }
  x
}

auto.format <- function(x) {
  # use the minimal number of digits to make x's unique
  # similar to abbrev
  for(digits in 0:6) {
    s = formatC(x,digits=digits,format="f")
    if(all(duplicated(s) == duplicated(x))) break
  }
  s
}

# used by color.plot and predict.plot given
rcut <- function(x,b) {
  f <- cut(x,b,include.lowest=T)
  levels(f) <- sapply(levels(f),interval2range)
  f
}

gradient <- function(x) {
  n <- length(x)
  g <- (diff(x[1:(n-1)])+diff(x[2:n]))/2
  g = c(x[2]-x[1],g,x[n]-x[n-1])
  names(g) = names(x)
  g
}

scale.range <- function(x) {
  (x-min(x,na.rm=T))/(diff(range(x,na.rm=T))+eps)
}

cut.quantile <- function(x,n=3,suffix="") {
  #if(missing(suffix)) suffix = deparse(substitute(x))
  #x[is.na(x)] <- 0
  b <- quantile(unique(x),probs=seq(0,1,len=n+1),na.rm=T)
  if(length(suffix) == 1) {
    if(n == 5) labels <- c("low","med.low","med","med.high","high")
    else if(n == 4) labels <- c("low","med.low","med.high","high")
    else if(n == 3) labels <- c("low","med","high")
    else if(n == 2) labels <- c("low","high")
    else labels <- paste("cut",1:n,sep="")
    labels = paste(labels,suffix)
  } else labels = suffix
  cut(x,b,include=T,labels=labels)
}

indicators.factor <- function(y) {
  # convert a factor into a matrix of indicators
  # result is level by case
  # works if y contains NAs
  r <- array(FALSE,c(length(levels(y)),length(y)),list(levels(y),names(y)))
  for(lev in levels(y)) r[lev,y==lev] <- TRUE
  r
}

reorder.rows <- function(y,i) {
  # indexes a matrix while preserving attributes
  if(is.na(nrow(y))) {
    y[i]
  } else {
    a <- attributes(y); y <- y[i,]
    if(!is.null(a$dimnames[[1]])) a$dimnames[[1]] <- a$dimnames[[1]][i]
    attributes(y) <- a
    y
  }
}

reorder.cols <- function(y,i) {
  # indexes a matrix while preserving attributes
  if(is.na(ncol(y))) {
    y[i]
  } else {
    a <- attributes(y); y <- y[,i]
    if(!is.null(a$dimnames[[2]])) a$dimnames[[2]] <- a$dimnames[[2]][i]
    attributes(y) <- a
    y
  }
}

# Graphics tools

auto.layout <- function(len,asp=1) {
  # asp is the aspect ratio of one cell
  layout <- c(1,len)
  din <- par("din")
  asp <- din[2]/din[1]/asp
  if(asp > 1) {
    layout[2] <- round(sqrt(len/asp))
    layout[1] <- ceiling(len/layout[2])
  } else {
    layout[1] = round(sqrt(len*asp))
    layout[2] = ceiling(len/layout[1])
  }
  layout
}

auto.legend <- function(labels, col=1, lty, pch, text.col, ...) {
  # barplot could use an auto.legend
  usr <- par("usr")
  top <- usr[4]
  left <- usr[2]-max(strwidth(labels))-xinch(0.04)
  y <- cumsum(strheight(labels)+yinch(0.04))
  if(missing(lty) && missing(pch)) {
    text.only <- T
    if(missing(text.col)) text.col <- col
  }
  for(i in 1:length(labels)) {
    text(left,top-y[i],labels[i],col=text.col[i],adj=0,...)
  }
}

auto.mar <- function(main="",sub="",xlab="x",ylab="y",axes=T,
                     xaxt=if(axes) par("xaxt") else "n",
                     yaxt=if(axes) par("yaxt") else "n",
                     las=par("las"),xtick=NULL,ytick=NULL,cex.axis=1,
                     mar.scale=NULL,key=F,...) {
  # note: cex=1 actually means cex=par("cex")
  # mar is c(bottom,left,top,right)
  mar = c(4.5,4,0,0.1)
  if(!is.null(main) && main != "") mar[3] = mar[3] + 1
  if(key) mar[3] = mar[3] + 1
  if(is.null(xlab) || xlab == "") {
    if(xaxt == "n") mar[1] = 0.05
    else mar[1] = 2.5
  }
  if(is.null(ylab) || ylab == "") {
    if(yaxt == "n") mar[2] = 0
    else mar[2] = 2
  }
  if(is.null(mar.scale)) mar.scale = 2
  if(xaxt != "n" && (las == 3 || las == 2)) {
    if(!is.null(xtick)) {
      # this requires a figure to already be set up
      mar[1] = max(strwidth(xtick,units="inches",cex=cex.axis))
      # convert to "lines of text"
      #mar[1] = mar[1]/min(strheight(ytick,units="inches",cex=cex))
      mar[1] = mar[1]/0.1875 + 1
    } else {
      mar[1] = mar[1]*mar.scale
    }
  }
  if(yaxt != "n" && (las == 1 || las == 2)) {
    if(!is.null(ytick)) {
      # this requires a figure to already be set up
      mar[2] = max(strwidth(ytick,units="inches",cex=cex.axis))
      # convert to "lines of text"
      #mar[2] = mar[2]/min(strheight(ytick,units="inches",cex=cex))
      mar[2] = mar[2]/0.1875 + 1
    } else {
      mar[2] = mar[2]*mar.scale
    }
  }
  if(!is.null(sub) && sub != "") {
    mar[1] = mar[1] + 2
  }
  mar
}

# matlab compatibility

rep.mat <- function(x,n,m) {
  array(rep(x,n*m),c(length(x)*n,m))
}
rep.col <- function(x,n) {
  rep.mat(x,1,n)
}
rep.row <- function(x,n) {
  t(rep.col(x,n))
}

# eye <- function(n) {
#   diag(n)
# }
norm <- function(x) {
  sqrt(sum(x^2))
}
# behaves like matlab "sum"
dim.sum <- function(x,dim) {
  apply(x,setdiff(1:length(dim(x)),dim),sum)
}
scale.cols <- function(x,s) {
  x * rep.row(s,nrow(x))
}
scale.rows <- function(x,s) {
  x * rep.col(s,ncol(x))
}
paulemms/datamining documentation built on March 1, 2023, 4:01 p.m.