R/genericfns.R

Defines functions stat_mode check_md5 Findalphabeta_invgamma Findalphabeta_gamma Findalphabeta_beta md5_fun md5_cache last odd which.odd even which.even is.odd is.even repmat basename_no_ext basename_ext remove_col

Documented in check_md5 Findalphabeta_beta Findalphabeta_gamma Findalphabeta_invgamma md5_cache md5_fun stat_mode

#' Find the mode from a set of samples
#'
#' @description This function finds the mode from a set of samples on a discrete space, i.e. it returns the value which is sampled the most often.
#' @param x vector
#' @return a numeric
#' @export
#' @examples
#' x <- rpois(lambda = 10,n=1000)
#' stat_mode(x)
stat_mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

#' Check md5 sum in cache directory
#'
#' @description This function takes an md5 and checks whether it is in the default cache directory ~/cache/.
#' @param md5 the md5 checksum
#' @param path the directory where the cache is stored
#' @return True or False, depending on whether the file exists in the cache.
#' @export
#' @examples
#' require(digest)
#' md5 <- digest(2)
#' check_md5(md5,".")
check_md5 <- function(md5,path) {
  return(file.exists(file.path(path,paste0(md5,".rda"))))
}


#' @title Quantile difference when tuning inverse Gamma distribution hyper-parameters

#' @description This function returns the normalised difference between specified and required 5 and 95 percentiles of the inverse gamma distribution over the variance. The percentiles represent those of the required error, i.e. the root of the variance. For example, if the error of an observation is believed to be between 2 and 10, with 90% confidence, then we can see how distant an inverse gamma distribution with shape and scale parameters in \code{pars} differs from this belief by calling \code{Findalphabeta_invgamma(pars,2,10)}. This function can then be passed to an optimisation routine to find better values for \code{pars}.
#'
#' @param pars the shape and scale parameters (in that order) of the inverse-Gamma distribution
#' @param p5 the 5 percentile of the desired error distribution
#' @param p95 the 95 percentils of the desired error distribution
#' @return A numeric which is a measure of the discrepancy between the inverse-Gamma distribution and the 5/95 percentiles.
#' @keywords Inverse Gamma distribution, prior elicitation
#' @export
#' @examples
#'
#' require(actuar)
#' # Find the an inverse-gamma distribution of the variance parameter corresponding to my prior belief of the error (sqrt(variance)) lying between p5=2, p95=10
#' initpars <- c(5,0.1)
#' hyp_pars <- optim(par=initpars,Findalphabeta_invgamma, p5=2, p95=10)
#'
#' # Now simulate from an inverse Gamma with these parameters and verify quantiles
#' X <- rinvgamma(shape = hyp_pars$par[1], scale = hyp_pars$par[2],n=10000)
#' print( quantile( sqrt(X),c(0.05,0.95)))
Findalphabeta_invgamma <- function(pars,p5,p95) {
  if(any(pars<0)) {
      return(Inf)
  } else {
     return(sum((qinvgamma(c(0.05,0.95),shape=pars[1],scale=pars[2]) - c(p5^2,p95^2))^2/c(p5^2,p95^2)))
 }
}

#' @title Quantile difference when tuning Gamma distribution hyper-parameters

#' @description This function returns the normalised difference between specified and required 5 and 95 percentiles of the Gamma distribution. The percentiles represent those of the required error, i.e. the root of the variance. For example, if the error of an observation is believed to be between 1 and 5, with 90% confidence, then we can see how distant a Gamma distribution over the precision with shape and rate parameters in \code{pars} differs from this belief by calling \code{Findalphabeta_gamma(pars,1,5)}. This function can then be passed to an optimisation routine to find better values for \code{pars}.
#'
#' @param pars the shape and rate parameters (in that order) of the Gamma distribution
#' @param p5 the 5 percentile of the desired error distribution
#' @param p95 the 95 percentils of the desired error distribution
#' @return A numeric which is a measure of the discrepancy between the Gamma distribution over the precision and the 5/95 percentiles of the error.
#' @keywords Gamma distribution, prior elicitation
#' @export
#' @examples
#'
#' require(actuar)
#' # Find the Gamma distribution over the precision corresponding to my prior belief of the error (1/sqrt(precision)) lying between p5=2, p95=10
#' initpars <- c(5,0.1)
#' hyp_pars <- optim(par=initpars,Findalphabeta_gamma, p5=1, p95=5)
#'
#' # Now simulate from a Gamma with these parameters and verify quantiles
#' X <- rgamma(shape = hyp_pars$par[1], rate = hyp_pars$par[2],n=10000)
#' print( quantile(1/sqrt(X),c(0.05,0.95)))
Findalphabeta_gamma <- function(pars,p5,p95) {
  if(any(pars<0)) {
      return(Inf)
  } else {
    return( sum((qgamma(c(0.05,0.95),shape=pars[1],rate=pars[2]) - c(1/p95^2,1/p5^2))^2/c(1/p95^2,1/p5^2)))
  }
}

#' @title Quantile difference when tuning Beta distribution hyper-parameters
#' @description This function returns the normalised difference between specified and required 95 percentiles of the Beta distribution. This function can then be passed to an optimisation routine to find required parameter values.
#'
#' @param pars the two shape parameters of the Beta distribution
#' @param p95 the desired 95 percentile
#' @return A numeric which is a measure of the discrepancy between the desired and real 95 percentile.
#' @keywords Beta distribution, prior elicitation
#' @export
#' @examples
#' require(actuar)
#' initpars <- c(2,6)
#' hyp  = optim(par=c(2,6),Findalphabeta_beta, p95=0.12)
#' hyp_shape1 <- hyp$par[1]
#' hyp_shape2 <- hyp$par[2]
#' # Now simulate from a Beta with these parameters and verify quantiles
#' X <- rbeta(shape1 = hyp_shape1, shape2 = hyp_shape2,n=10000)
#' print(quantile(X,0.95))
Findalphabeta_beta <- function(pars,p95) {
  if(any(pars<=0)) {
    return(Inf)
  } else {
    return( sum((qbeta(0.95,shape1=pars[1],shape2=pars[2]) - p95))^2/p95)
  }
}


#' Generate md5 checksum from function
#'
#' Creates an md5 checksum of a function by dumping it and then digesting the file.
#' @param fn the function to digest
#' @param path the directory where dumping will be carried out
#' @details Note that a ~/cache folder needs to be present for this to work.
#' @keywords md5, digest
#' @export
#' @examples
#' myfun1 <- function(x) x^2
#' x <- md5_fun(myfun1,".")
md5_fun <- function(fn,path) {
  path <- file.path(path,"fun_check.R")
  dump(ls(pattern = 'fn'),path)
  #save(fn,file=path)
  fun_digest <- digest::digest(file=path)
  unlink(path)
  return(fun_digest)
}

#' md5 checksum function
#'
#' Creates a wrapper for generating the md5 checksum for the calling function and its arguments. If a file with the md5 as its filename exists in the specified folder then this is loaded. Otherwise the function is evaluated and the results are stored in the specified folder.
#' @param path the path to where the files will be cached. 
#' @return An md5 wrapper for intensive computations. This has arguments
#' \itemize{
#' \item \code{fn}: the function being called
#' \item \code{...}: the arguments to be passed to the function
#' \item \code{print_output}: if T, details of md5 digest are outputted to screen.
#' }
#' @details md5 checksums on the called function and any function-arguments are generated by creating a text file in the specified folder and digesting 
#' the text file before deleting it. md5 checksums on character arguments are carried out on the file (if the file exists) or on the character as appropriate.
#' @keywords md5, digest
#' @export
#' @examples
#' myfun1 <- function(x) x^2
#' myfun2 <- function(y,f) y^3 + f(y)
#' md5_wrapper <- md5_cache(".")
#' x <- md5_wrapper(myfun2,2,myfun1)
md5_cache <- function(path) {
 stopifnot(is.character(path))
 dir.create(file.path(path), showWarnings = FALSE)
 md5 <- NA

 function(fn,...,print_output=F) {
     args <- list(...)
     if (length(args)==0) stop("Nothing to check md5 against")
     md5 <<- md5_fun(fn,path)
     lapply(args,function(x) {  
         if(is.character(x) & length(x) == 1) {
             if(file.exists(x)) {
                 md5_temp <- digest(file=x)
             } else {
                 md5_temp <- digest(x)
             }
         } else {
             if(class(x) == "function") {
                 md5_temp <- md5_fun(x,path)
             } else {
                 md5_temp <- digest(x)
             }
         }
         md5 <<- digest(c(md5,md5_temp))
     })
     
     if(check_md5(md5,path)) {
         if(print_output) cat("Found existing MD5 for data. Loading...",sep="\n")
         load(file=file.path(path,paste(md5,".rda",sep=""))) 
     } else {
         cat("Data not found. Evaluating expression...",sep="\n")
         flush.console()
         X <- fn(...)
         save(X,file=file.path(path,paste0(md5,".rda")))
     }
     if(print_output) cat(paste("md5 for this result is ",md5,sep=""),sep="\n")
     return(X)  
 }
}


last <- function(x) {
  if(class(x) == "numeric") {
    return (tail(x, n=1)) 
  } else {
    return(NULL)
}}

odd <- function(x) {
 return(x[x %% 2 == 1]) 
}
which.odd <- function(x) {
  return(which(x %% 2 == 1)) 
}
even <- function(x) {
  return(x[x %% 2 == 0]) 
}
which.even <- function(x) {
  return(which(x %% 2 == 0)) 
}
is.odd <- function(x) {
  return(x %% 2 == 1) }

is.even <- function(x) {
  return(x %% 2 == 0) }
  
repmat <-function(X,m,n){
mx = dim(X)[1]
nx = dim(X)[2]
return(matrix(t(matrix(X,mx,nx*n)),mx*m,nx*n,byrow=T))
}
basename_no_ext <- function(path) {
  return(substr(basename(path),1,nchar(basename(path)) - 4))
}
basename_ext <- function(path) {
  return(substr(basename(path),nchar(basename(path)) - 2,nchar(basename(path))))
}

# Remove column from df
remove_col <- function(df,colnames) {
  return(df[,!(names(df) %in% colnames)])  
}
shazhe/mvst0 documentation built on May 29, 2019, 9:20 p.m.