R/helper.R

Defines functions error.bars cv.folds cleanzero which.min.pos squelch importance.liso constrain repeatplot objsort seq.log removeZero ave.sorted plot.multistep modrow combmulti multistep as.array.multistep as.vector.multistep dim.multistep abs.multistep min.multistep max.multistep summary.multistep predict.multistep cdiag

Documented in abs.multistep dim.multistep max.multistep min.multistep multistep plot.multistep predict.multistep seq.log summary.multistep

#some helper functions
cdiag = function(x = 1,nrow,ncol){
	diag(as.vector(x) -> y,nrow = length(y))
}

##NOTES: defining a new sparse format for step functions in many dims
#
#class <- "multistep"
#$params 
#index   number of breaks    intercept
#$values 
#coefchaincount breakpoint delta previous
#$intercept 
#$ranges
#min max


# * -> used to get Y's out of it
"*.multistep" <- function(e1, e2){
  if (!inherits(e2,"multistep")){
    return ("*.multistep"(e2,e1))
  }
  pp <- nrow(e2$params)
  if (is.null(dim(e1))){
    e1 <- matrix(e1,ncol = pp)
  }
  nn <- nrow(e1)
  addup <- function(vect){
    i = vect[1]
    if (e2$params[i,2] == 0){
      return(rep(0,length(vect)-1))
    } else {
      matchtable = e2$values[e2$params[i,1]:(e2$params[i,1]+e2$params[i,2]-1),2:3,drop = FALSE]
      res = sapply(vect[-1], function(x)  sum(matchtable[matchtable[,1] <= x,2])) + e2$params[i,3]
    }
    res
  }
  results = rowSums(matrix(apply(rbind(1:pp,e1),2,function(x) addup(x)),nrow=nn)) + e2$intercept
  results
}

predict.multistep<- function(object, newx, ...){
  object * newx
}

#the same as *, but does linear interpolation
"&.multistep" <- function(x, obj){
  if (!inherits(obj,"multistep")){
    return ("&.multistep"(obj,x))
  }
  pp <- nrow(obj$params)
  if (is.null(dim(x))){
    x <- matrix(x,ncol = pp)
  }
  nn <- nrow(x)
  addup <- function(vect){
    i = vect[1]
    if (obj$params[i,2] == 0){
      return(rep(0,length(vect)-1))
    } else {
      matchtable = rbind(c(obj$range[i,1],0),obj$values[obj$params[i,1]:(obj$params[i,1]+obj$params[i,2]-1),2:3,drop = FALSE])
      matchtable[,2] = cumsum(matchtable[,2])
      res = approx(matchtable, xout=vect[-1],rule=2)$y + obj$params[i,3]
    }
    res
  }
  results = rowSums(matrix(apply(rbind(1:pp,x),2,function(a) addup(a)),nrow=nn)) + obj$intercept
  results
}

summary.multistep <- function(object, ...){
list(max = max(object), min = min(object), totalvar=abs(object), dim=dim(object))
}

max.multistep <- function(x,..., na.rm){
  individualmax <- function(vect){
    if (vect[2] == 0){
      return(0)
    } else {
      return( max(cumsum(c(vect[3],x$values[vect[1]:(vect[1]+vect[2]-1), 3]))))
    }
  }
  res <- drop(apply(x$params,1,individualmax))
  res
}
min.multistep <- function(x, ...,na.rm){
  individualmin <- function(vect){
    if (vect[2] == 0){
      return(0)
    } else {
      return( min(cumsum(c(vect[3],x$values[vect[1]:(vect[1]+vect[2]-1), 3]))))
    }
  }
 
  res <- drop(apply(x$params,1,individualmin))
  res
}
abs.multistep <- function(x){
  individualtv <- function(vect){
    if (vect[2] == 0){
      return(0)
    } else {
      return( sum(abs(x$values[vect[1]:(vect[1]+vect[2]-1), 3])) )
    }
  }
  res <- drop(apply(x$params,1,individualtv))
}

dim.multistep <- function(x){
  res <- drop(x$params[,2])
}

#pull out a coefchain
as.vector.multistep <- function(x, mode){
  coefchain = replace(rep(0, (x$n - 1)*nrow(x$params)), x$values[,1], x$values[,3])
  coefchain
}

#turns multistep into 'fits'
as.array.multistep <- function( x, dims = 1:nrow(x$param), explode = FALSE, signs = NULL, ...){
  coefchain = replace(rep(0, (x$n - 1)*nrow(x$params)), x$values[,1], x$values[,3])
  if (!explode){
  result = apply(rbind(x$params[dims,3],matrix(coefchain, x$n - 1))[,dims], 2, cumsum)
  } else {
    if (is.null(signs)){
      result = scale(apply(rbind(0, cbind(matrix(pmax(coefchain,0), x$n-1)[,dims],apply(-matrix(pmin(coefchain, 0), x$n-1)[,dims],2,rev))),2,cumsum), center=TRUE,scale=FALSE)
    } else {
      if (length(signs) == 1) signs = rep(signs, length(dims))
      #okay, we should explode or flip the appropriate signs.
      result1 = scale(apply(rbind(0, cbind(matrix(pmax(coefchain,0), x$n-1)[,dims],apply(-matrix(pmin(coefchain, 0), x$n-1)[,dims],2,rev))),2,cumsum), center=TRUE,scale=FALSE)
      result = apply(rbind(x$params[dims,3],matrix(coefchain, x$n - 1))[,dims], 2, cumsum)
      for (i in dims){
        if (signs[i] == -1){
          result[,i] = rev(result[,i])
        } else if (signs[i] == 0){
          result[,i] = result1[,i]
          result = cbind(result, result1[,i + length(dims)])
        }
      }
    }
  }
  result
}


multistep <- function(coefchain,x=NULL,intercept=0,sortedx = apply(x,2,sort),names = NULL, pinters=NULL,...){
  pp <- ncol(sortedx)
  nn <- nrow(sortedx)
  coefchainm <- matrix(coefchain, ncol=pp)
  sparsitycounts <- drop(apply(coefchainm, 2, function(a) sum(a !=0)))
  if (is.null(pinters)) {pinters <- drop(apply(coefchainm, 2, function(a) -sum(cumsum(a))/nn))}
  params <- cbind(c(1,cumsum(sparsitycounts)+1)[1:pp], sparsitycounts, pinters)
  coefchaincounts <- which(coefchain != 0)
  breakpoints <- as.vector(sortedx[-1,])[coefchaincounts]
  previous <- as.vector(sortedx[-nn,])[coefchaincounts]
  delta <- coefchain[coefchaincounts]
  values <- cbind(coefchaincounts, breakpoints, delta, previous)
  if (!is.null(names)){
    dimnames(params)[[1]] = names
  }
  obj <- list(params = params, values = values, intercept = intercept, range = cbind(sortedx[1,] , sortedx[nn,]),n=nn, ...)
  class(obj) <- "multistep"
  obj
}

#combine two or more multistep fits, or scale
#default is average
combmulti <- function(multisteplist, scales = rep(1/length(multisteplist), length(multisteplist)), x = NULL, sortedx=apply(x,2,sort)){
  newcoefchain = apply(sapply(multisteplist,as.vector) ,1,function(a) sum(a * scales))
  newintercept = sum(scales * sapply(multisteplist, function(a) a$intercept))
  newpinters = apply(sapply(multisteplist, function(a) min(a)), 1, function(b) sum(b*scales) )
  tempmultistep = multistep(newcoefchain, intercept=newintercept, sortedx = sortedx, pinters = newpinters, names = dimnames(multisteplist[[1]]$params)[[1]])
#re-intercept to give correct weighted means against the first set of weights

#copy over to get the remaining fields in the original
  result = multisteplist[[1]]
  result$params = tempmultistep$params
  result$values = tempmultistep$values
  result$intercept = tempmultistep$intercept
  result

}
#gets 'row and column' for a value
modrow <- function(n, rows){
  c( (n -1)%/% rows + 1, (n-1) %% rows +1 )
}
  
#add um adding, allow obj = NULL
  #needs a bit smarter code using plot.window. Too busy to do this
plot.multistep <- function(x = NULL, xpoints=NULL, ypoints = NULL, dims = 1:max(nrow(x$param), ncol(xpoints)) , ylimit = cbind(min(min(x),max(x)), max(max(x), min(x))), grid = TRUE, add = FALSE, titles = !add,...){
  numplots = length(dims)
  if (!add){
  if (grid) op <- par(mar = c(2,2,1,0) + 0.1,mfrow = c((ceiling(sqrt(numplots))) -> width,ceiling(numplots/width)))
  } else {
    numberrows = par()$mfrow[2]
    op = par()
  }
  for (idim in 1:length(dims)){
    if (add &grid) par(mfg = modrow(idim, numberrows))
    if (!is.null(x)){
      if (dim(x)[dims[idim]] == 0){
        plotdata = NULL
      } else {
        plotdata = x$values[x$params[dims[idim],1]:(x$params[dims[idim],1] + x$params[dims[idim],2] - 1),2:3, drop=FALSE]
        plotdata[,2] = cumsum(plotdata[,2]) + x$params[dims[idim],3]
      }
      plotdata = rbind(c(x$range[dims[idim],1], x$params[dims[idim], 3]), plotdata)
      plotdata = rbind(plotdata, c(x$range[dims[idim],2], tail(plotdata[,2], n=1)))
      if (!add){
        plot(plotdata,type="s", ann=F,frame.plot=F,axes=T, ylim = ylimit, ...)#,ann = F, axes = F)
      } else {
        lines(plotdata, type="s",...)
      }
    }
    if (titles){
      if (is.null(dimnames(x$params)[[1]][dims[idim]])){
        title(main = dims[idim])
      } else {
        title(main = dimnames(x$params)[[1]][dims[idim]])
      }
    }
    if (!(is.null(xpoints))){
      if (!is.null(x)){
        points(xpoints[,dims[idim]],ypoints,pch=".")
      } else {
        points(xpoints[,dims[idim]],ypoints,pch=".",...)
      }
    }
  }
  if (grid) invisible(op) # return the old pars, don't reset!
}

#ave for sorted factors by Charles Berry
#plus weights
ave.sorted <- function(y, x, weights = NULL){
  reps <- rle(x)$lengths
  lens <- rep(reps,reps)
  uniqLens <- unique(lens)
  for (i in uniqLens[ uniqLens != 1]){
    if (is.null(weights)){
      y[ lens == i] <- rep( colMeans(matrix(y[ lens == i], nr=i)), each=i)
    } else {
      w = matrix(weights[lens == i], nr = i)
      y[ lens == i] <- rep( colSums(matrix(y[ lens == i], nr=i) * w)/colSums(w), each=i)
    }
  }
  y
}

#changes step function to a new set of steps
#rejig <- (stepobj, newx, newY=NULL){
removeZero <- function(obj){
  obj[which(obj!=0)]
}

#create log scale grids
seq.log <- function(from = 1, to = 1, length.out = 50, add.zero = FALSE, shifting = 0, ...){
  res = exp(seq(from = log(from + shifting), to = log(to + shifting), length=length.out - add.zero)) - shifting
  if (add.zero) {
    if (from > to) {
      res = c(res,0)
    } else {
      res = c(0,res)
    }
  }
  res
}

objsort <- function(){
  sort(sapply(ls(1), function(x) object.size(get(x))))
}

#plots a matrix as a set of lines or points with legend
repeatplot <- function(x, y, margin = 1, col = 1, type = "p", form = 1, new = TRUE, lims = TRUE, ...){
  if(missing(y)) {
    y <- x
    x <- 1:dim(x)[3 - margin]
  }
  if (margin != 1){
    x = t(x)
    y = t(y)
  }
  dd = nrow(y)
  nn = ncol(y)
  if (length(col) < dd) col = rep(col,dd)
  if (length(type) < dd) type = rep(type,dd)
  if (length(form) < dd) form = rep(form,dd)
  if (!is.matrix(drop(x))){
    x <- matrix(rep(x, dd), dd, byrow=T)
  }
  if (new){
    if (lims){
      plot(x[1,], y[1,], col = col[1], type= type[1], xlim = range(x), ylim = range(y), ...)
    } else {
      plot(x[1,], y[1,], col = col[1], type= type[1], ...)
    }
  }
  for (i in (new + 1):dd){
    if (type[i] == "l") lines(x[i,], y[i,], col= col[i], lty = form[i])
    if (type[i] == "p") points(x[i,], y[i,], col= col[i], pch = form[i])
  }
}

#constrain x to a given range
constrain <- function(x, minimum, maximum=-minimum){
  if (minimum > maximum) {
    temp <- minimum
    minimum <- maximum
    maximum <- temp
  }
  x = replace(x, which(x > maximum), maximum)
  x = replace(x, which(x < minimum), minimum)
  x
}

importance.liso = function(obj, weights = rep(1/obj$n, obj$n)){
  sqrt(colSums(as.array.multistep(obj)^2 * apply(obj$sorter, 2, function(x) weights[x])))
}

# USAGE: 
# squelch((maxnumofcols), cols, F)[oldindex] will give the new index with cols
# removed
# squelch((oldnumofcols), cols, T)[oldindex] will give the new index with cols
# added (cols are original positions)
squelch <- function(n, cols=NULL, add = F){
  if (add){
    setdiff(1:n, cols)
  } else {
    cumsum(c(1,replace(rep(1, n), cols, 0)))
  }
}

#which min amongst x > eps
which.min.pos <- function(x, eps = 1e-16){
  x[x <= eps] = NA
  which.min(x)
}

#remove all but one zero column or row, for plotting
cleanzero <- function(x, mar=1, eps=1e-15){
  if (mar == 2) return(t(cleanzero(t(x))))

  zerorows = which(rowSums(abs(x)) < eps)
  if (length(zerorows) > 1){
    newx <- x[-zerorows[-1], ]
  }
  attr(newx, "indices") = (1:nrow(x))[-zerorows[-1]]
  newx
}


cv.folds = function(n, folds = 10) 
{
    split(sample(1:n), rep(1:folds, length = n))
}

error.bars = function(x, upper, lower, width = 0.02, ...) 
{
    xlim <- range(x)
    barw <- diff(xlim) * width
    segments(x, upper, x, lower, ...)
    segments(x - barw, upper, x + barw, upper, ...)
    segments(x - barw, lower, x + barw, lower, ...)
    range(upper, lower)
}

Try the liso package in your browser

Any scripts or data that you put into this service are public.

liso documentation built on May 29, 2017, 6:47 p.m.