R/SNPgvf.R
In FunctanSNP: Functional Analysis (with Interactions) for Dense SNP Data

Documented in SNPgvf

##' @title Transform the sequence (genotypes) data into the genetic variant function
##'
##' @description This function conducts the ordinary linear square smoothing analysis and models the genotypes of an individual (such as the sequence data generated by simX containing only 0, 1, 2) as the genetic variant function X(t).
##'
##' @param location a numeric vector defining the sampling sites of the sequence data.
##' @param X a matrix specifyinging the sequence data, with the number of rows equal to the number of samples.
##' @param type a character specifying the type of the basis functions. The options are "Bspline", "Exponential", "Fourier", "Monomial", and "Power".
##' @param nbasis an integer specifying the number of basis functions.
##' @param params in addition to rangeval (a vector of length 2 giving the lower and upper limits of the range of permissible values for the function) and nbasis, all bases have one or two parameters unique to that basis type or shared with one other:
##' \itemize{
##' \item{bspline:}{ Argument norder = the order of the spline, which is one more than the degree of the polynomials used. This defaults to 4, which gives cubic splines.}
##' \item{exponential:}{ Argument ratevec. In fda_2.0.2, this defaulted to 1. In fda_2.0.3, it will default to 0:1.}
##' \item{fourier:}{ Argument period defaults to diff(rangeval).}
##' \item{monomial/power:}{ Argument exponents. Default = 0:(nbasis-1). For monomial bases, exponents must be distinct nonnegative integers. For power bases, they must be distinct real numbers.}
##' }
##' @param Plot should the estimated genetic variant function X(t) be plotted (TRUE) or not (default = FALSE).
##'
##' @return An 'fd' object that contains the estimated genetic variant function.
##'
##' @seealso See Also as \code{\link{simX}}, \code{\link{plotRawdata}}.
##'
##' @import fda
##' @export
##' @examples
##' library(FunctanSNP)
##' n <- 20
##' m <- 50
##' simdata <- simX(n, m, seed = 1, d.ratio = 0)
##' X <- simdata$X
##' location <- simdata$location
##' SNPgvfres <- SNPgvf(location, X, type = "Bspline", nbasis = 5, params = 4, Plot = FALSE)
##' plotRawdata(location, X)
##' plotGVF(SNPgvfres)
##'

SNPgvf <- function(location, X, type, nbasis, params, Plot = FALSE){
  if (!inherits(location, "numeric")) { stop("x should be of vector type.") }
  if (!inherits(X, "matrix")) { stop("X should be of matrix type.") }

  if(type == "Bspline"){
    fbasis <- create.bspline.basis(rangeval = c(min(location), max(location)), nbasis = nbasis, norder = params)
  }

  if(type == "Exponential"){
    fbasis <- create.exponential.basis(rangeval = c(min(location), max(location)), nbasis = nbasis, ratevec = params)
  }

  if(type == "Fourier"){
    fbasis <- create.fourier.basis(rangeval = c(min(location), max(location)), nbasis = nbasis, period = params)
  }

  if(type == "Monomial"){
    fbasis <- create.monomial.basis(rangeval = c(min(location), max(location)), nbasis = nbasis, exponents = params)
  }

  if(type == "Power"){
    fbasis <- create.power.basis(rangeval = c(min(location), max(location)), nbasis = nbasis, exponents = params)
  }

  n <- nrow(X)
  m <- ncol(X)
  truelengths <- length( which( is.na(as.vector(X)) == FALSE ) )
  if(truelengths == n*m){#Regular
    basisMatrix <- getbasismatrix(evalarg = location, basisobj = fbasis, nderiv = 0, returnMatrix = FALSE)
    basisCoef <- solve(t(basisMatrix) %*% basisMatrix) %*% t(basisMatrix) %*% t(X)
  }

  if(truelengths != n*m){#Irregular
    location_list <- lapply(1:n, function(i) location[which(is.na(X[i, ]) == FALSE)])
    X_list <- lapply(1:n, function(i) X[i, which(is.na(X[i, ]) == FALSE)])
    coefFunc <- function(i){
      basisMatrix <- getbasismatrix(evalarg = location_list[[i]], basisobj = fbasis, nderiv = 0, returnMatrix = FALSE)
      basiscoef <- solve(t(basisMatrix) %*% basisMatrix) %*% t(basisMatrix) %*% X_list[[i]]
      return(basiscoef)
    }
    basisCoef <- mapply(coefFunc, 1:n)
  }
  gvf <- fd(coef = basisCoef, basisobj = fbasis)
  if(Plot == TRUE){ plot(gvf, xlab = "Location", ylab = "X") }
  return(gvf)
}


## @param dropind a vector of integers specifiying the basis functions to be dropped, if any.
## @param quadvals a matrix with two columns and a number of rows equal to the number of quadrature points for numerical evaluation of the penalty integral. The first column of quadvals contains the quadrature points, and the second column the quadrature weights. A minimum of 5 values are required for each inter-knot interval, and that is often enough. For Simpson's rule, these points are equally spaced, and the weights are proportional to 1, 4, 2, 4, ..., 2, 4, 1.
## @param values a list of matrices with one row for each row of quadvals and one column for each basis function. The elements of the list correspond to the basis functions and their derivatives evaluated at the quadrature points contained in the first column of quadvals.
## @param basisvalues A list of lists, allocated by code such as vector("list",1). This field is designed to avoid evaluation of a basis system repeatedly at a set of argument values. Each list within the vector corresponds to a specific set of argument values, and must have at least two components, which may be tagged as you wish. 'The first component in an element of the list vector contains the argument values. The second component in an element of the list vector contains a matrix of values of the basis functions evaluated at the arguments in the first component. The third and subsequent components, if present, contain matrices of values their derivatives up to a maximum derivative order. Whenever function getbasismatrix is called, it checks the first list in each row to see, first, if the number of argument values corresponds to the size of the first dimension, and if this test succeeds, checks that all of the argument values match. This takes time, of course, but is much faster than re-evaluation of the basis system. Even this time can be avoided by direct retrieval of the desired array. For example, you might set up a vector of argument values called "evalargs" along with a matrix of basis function values for these argument values called "basismat". You might want too use tags like "args" and "values", respectively for these. You would then assign them to basisvalues with code such as the following:
## \itemize{
## \item{basisobj\$basisvalues <- vector("list",1)}
## \item{basisobj\$basisvalues[[1]] <- list(args=evalargs, values=basismat)}
## }
#
#, dropind = dropind, quadvals = quadvals, values = values, basisvalues = basisvalues