R/from_features.R
In shinyWGD: 'Shiny' Application for Whole Genome Duplication Analysis

Documented in dfltBWrange dfltCounts drvkde SignifFeatureRegion symconv2D.ks symconv3D.ks symconv4D.ks

########## R function: SignifFeatureRegion ##########

# For determining the region of significant
# gradient for a particular bandwidth and
# significance level.

# Last changed: 18 JAN 2006

#' SignifFeatureRegion
#'
#' This function computes the significance of features based on gradient and curvature analysis.
#'
#' @param n The sample size.
#' @param d The dimensionality of the data.
#' @param gcounts A numeric vector representing data counts.
#' @param gridsize A numeric vector specifying the grid size.
#' @param dest A kernel density estimate.
#' @param bandwidth The bandwidth parameter.
#' @param signifLevel The significance level.
#' @param range.x The range of x values.
#' @param grad A logical value indicating whether to compute the gradient significance.
#' @param curv A logical value indicating whether to compute the curvature significance.
#' @param neg.curv.only A logical value indicating whether to consider negative curvature only.
#'
#' @importFrom stats pchisq
#'
#' @return A list containing the significance results for gradient and curvature.
#'
SignifFeatureRegion <- function(n, d, gcounts, gridsize, dest, bandwidth, signifLevel, range.x, grad=TRUE, curv=TRUE, neg.curv.only=TRUE)
{
  h <- bandwidth

  ESS <- n*dest$est*prod(h)*(sqrt(2*pi)^d)
  SigESS <- ESS >= 5

  Sig.scalar <- array(NA, dim=gridsize)
  Sig2.scalar <- array(NA, dim=gridsize)

  dest$est[dest$est<0] <- 0
  ## constant for variance of gradient estimate
  Sig.scalar <- 1/2*(2*sqrt(pi))^(-d)*n^(-1)*prod(h)^(-1)*dest$est

  ##  constants for variance of curvature estimate
  if (d==1)
    Sig2.scalar <- (8*sqrt(pi)*n*prod(h))^(-1)*dest$est
  else if (d==2)
    Sig2.scalar <- (16*pi*n*prod(h))^(-1)*dest$est
  else if (d==3)
    Sig2.scalar <- (32*pi^(3/2)*n*prod(h))^(-1)*dest$est
  else if (d==4)
    Sig2.scalar <- (64*pi^2*n*prod(h))^(-1)*dest$est

  ## Matrix square root - taken from Stephen Lake
  ## http://www5.biostat.wustl.edu/s-news/s-news-archive/200109/msg00067.html

  matrix.sqrt <- function(A)
  {
    sva <- svd(A)
    if (min(sva$d)>=0)
      Asqrt <- sva$u %*% diag(sqrt(sva$d)) %*% t(sva$v)
    else
      stop("Matrix square root is not defined")
    return(Asqrt)
  }


  if (d>1)
  {
    WaldGrad <- array(NA, dim=gridsize)
    WaldCurv <- array(NA, dim=gridsize)
    local.mode <- array(FALSE, dim=gridsize)
  }

  if (d==1)
  {
    if (grad)
    {
      obj1 <- drvkde(gcounts, drv=1, bandwidth=h, binned=TRUE, range.x=range.x, se=FALSE)
      fhat1 <- obj1$est

      Sig.inv12 <- 1/sqrt(Sig.scalar * h^(-2))
      WaldGrad <- (Sig.inv12 * fhat1)^2
    }

    if (curv)
    {
      obj2 <- drvkde(gcounts,drv=2,bandwidth=h,binned=TRUE,range.x=range.x, se=FALSE)
      fhat2 <- obj2$est

      Sig2.inv12 <- 1/sqrt(Sig2.scalar * 3*h^(-4))
      lambda1 <- Sig2.inv12 * fhat2
      WaldCurv <- lambda1^2
      local.mode <- (lambda1 < 0)
    }
  }

  if (d==2)
  {
    if (grad)
    {
      obj10 <- drvkde(gcounts,drv=c(1,0),bandwidth=h,binned=TRUE,
                      range.x=range.x,se=FALSE)
      obj01 <- drvkde(gcounts,drv=c(0,1),bandwidth=h,binned=TRUE,
                      range.x=range.x,se=FALSE)
      fhat10 <- obj10$est
      fhat01 <- obj01$est

      for (i1 in 1:gridsize[1])
        for (i2 in 1:gridsize[2])
          if (SigESS[i1,i2])
          {
            Sig.inv12 <- 1/sqrt(Sig.scalar[i1,i2] * h^(-2))
            WaldGrad[i1,i2] <- sum((Sig.inv12 * c(fhat10[i1,i2], fhat01[i1,i2]))^2)
          }
    }


    if (curv)
    {
      Sig2.mat <-
        matrix(c(3/h[1]^4, 0, 1/(h[1]^2*h[2]^2),
                 0, 1/(h[1]^2*h[2]^2), 0,
                 1/(h[1]^2*h[2]^2), 0, 3/h[2]^4),
               nrow=3, ncol=3)

      Sig2.mat.inv <- chol2inv(chol(Sig2.mat))

      obj20 <- drvkde(gcounts,drv=c(2,0),bandwidth=h,
                      binned=TRUE,range.x=range.x, se=FALSE)
      obj11 <- drvkde(gcounts,drv=c(1,1),bandwidth=h,
                      binned=TRUE,range.x=range.x, se=FALSE)
      obj02 <- drvkde(gcounts,drv=c(0,2),bandwidth=h,
                      binned=TRUE,range.x=range.x, se=FALSE)
      fhat20 <- obj20$est
      fhat11 <- obj11$est
      fhat02 <- obj02$est

      for (i1 in 1:gridsize[1])
        for (i2 in 1:gridsize[2])
          if (SigESS[i1,i2])
          {
            Sig2.inv12 <- sqrt(1/Sig2.scalar[i1,i2])*matrix.sqrt(Sig2.mat.inv)
            fhat.temp <- Sig2.inv12 %*%
              c(fhat20[i1,i2], fhat11[i1,i2], fhat02[i1,i2])

            WaldCurv[i1,i2] <- sum(fhat.temp^2)
          }

      lambda1 <- ((fhat20 + fhat02) - sqrt((fhat20-fhat02)^2 + 4*fhat11^2))/2
      lambda2 <- ((fhat20 + fhat02) + sqrt((fhat20-fhat02)^2 + 4*fhat11^2))/2
      local.mode <- (lambda1 < 0) & (lambda2 < 0)
    }
  }


  if (d==3)
  {
    if (grad)
    {
      obj100 <- drvkde(gcounts,drv=c(1,0,0),bandwidth=h,
                       binned=TRUE,range.x=range.x, se=FALSE)
      obj010 <- drvkde(gcounts,drv=c(0,1,0),bandwidth=h,
                       binned=TRUE,range.x=range.x, se=FALSE)
      obj001 <- drvkde(gcounts,drv=c(0,0,1),bandwidth=h,
                       binned=TRUE,range.x=range.x, se=FALSE)

      fhat100 <- obj100$est
      fhat010 <- obj010$est
      fhat001 <- obj001$est

      for (i1 in 1:gridsize[1])
        for (i2 in 1:gridsize[2])
          for (i3 in 1:gridsize[3])
            if (SigESS[i1,i2,i3])
            {
              Sig.inv12 <- 1/sqrt(Sig.scalar[i1,i2,i3] * h^(-2))
              WaldGrad[i1,i2,i3] <-
                sum((Sig.inv12 * c(fhat100[i1,i2,i3], fhat010[i1,i2,i3],
                                   fhat001[i1,i2,i3]))^2)
            }
    }


    if (curv)
    {
      obj200 <- drvkde(gcounts,drv=c(2,0,0),bandwidth=h,
                       binned=TRUE,range.x=range.x, se=FALSE)
      obj110 <- drvkde(gcounts,drv=c(1,1,0),bandwidth=h,
                       binned=TRUE,range.x=range.x, se=FALSE)
      obj101 <- drvkde(gcounts,drv=c(1,0,1),bandwidth=h,
                       binned=TRUE,range.x=range.x, se=FALSE)
      obj020 <- drvkde(gcounts,drv=c(0,2,0),bandwidth=h,
                       binned=TRUE,range.x=range.x, se=FALSE)
      obj011 <- drvkde(gcounts,drv=c(0,1,1),bandwidth=h,
                       binned=TRUE,range.x=range.x, se=FALSE)
      obj002 <- drvkde(gcounts,drv=c(0,0,2),bandwidth=h,
                       binned=TRUE,range.x=range.x, se=FALSE)
      fhat200 <- obj200$est
      fhat110 <- obj110$est
      fhat101 <- obj101$est
      fhat020 <- obj020$est
      fhat011 <- obj011$est
      fhat002 <- obj002$est

      Sig2.mat <-
        matrix(c(3/h[1]^4, 0, 0, 1/(h[1]*h[2])^2, 0, 1/(h[1]*h[3])^2,
                 0, 1/(h[1]*h[2])^2, 0, 0, 0, 0,
                 0, 0, 1/(h[1]*h[3])^2, 0, 0, 0,
                 1/(h[1]*h[2])^2, 0, 0, 3/h[2]^4, 0, 1/(h[2]*h[3])^2,
                 0, 0, 0, 0, 1/(h[2]*h[3])^2, 0,
                 1/(h[1]*h[3])^2, 0, 0, 1/(h[2]*h[3])^2, 0, 3/h[3]^4),
               nrow=6, ncol=6)

      Sig2.mat.inv <- chol2inv(chol(Sig2.mat))

      ## at each grid point, find eigenvalues of vech'ed curvature matrix
      for (i1 in 1:gridsize[1])
        for (i2 in 1:gridsize[2])
          for (i3 in 1:gridsize[3])
            if (SigESS[i1,i2,i3])
            {
              Sig2.inv12 <- sqrt(1/Sig2.scalar[i1,i2,i3]) *
                matrix.sqrt(Sig2.mat.inv)
              fhat.temp <- Sig2.inv12 %*%
                c(fhat200[i1,i2,i3], fhat110[i1,i2,i3], fhat101[i1,i2,i3],
                  fhat020[i1,i2,i3], fhat011[i1,i2,i3], fhat002[i1,i2,i3])

              D2.mat <-
                matrix(c(fhat200[i1,i2,i3], fhat110[i1,i2,i3], fhat101[i1,i2,i3],
                         fhat110[i1,i2,i3], fhat020[i1,i2,i3], fhat011[i1,i2,i3],
                         fhat101[i1,i2,i3], fhat011[i1,i2,i3], fhat002[i1,i2,i3]),
                       nrow=3)
              lambda <- eigen(D2.mat, symmetric=TRUE, only.values=TRUE)$values

              WaldCurv[i1,i2,i3] <- sum(fhat.temp^2)

              local.mode[i1,i2,i3] <- all(lambda < 0)
          }
    }

  }

  if (d==4)
  {
    if (grad)
    {
      obj1000 <- drvkde(gcounts,drv=c(1,0,0,0),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj0100 <- drvkde(gcounts,drv=c(0,1,0,0),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj0010 <- drvkde(gcounts,drv=c(0,0,1,0),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj0001 <- drvkde(gcounts,drv=c(0,0,0,1),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)

      fhat1000 <- obj1000$est
      fhat0100 <- obj0100$est
      fhat0010 <- obj0010$est
      fhat0001 <- obj0001$est

      for (i1 in 1:gridsize[1])
        for (i2 in 1:gridsize[2])
          for (i3 in 1:gridsize[3])
            for (i4 in 1:gridsize[4])
              if (SigESS[i1,i2,i3,i4])
              {
                Sig.inv12 <- 1/sqrt(Sig.scalar[i1,i2,i3,i4] * h^(-2))
                WaldGrad[i1,i2,i3,i4] <-
                  sum((Sig.inv12*c(fhat1000[i1,i2,i3,i4],fhat0100[i1,i2,i3,i4],
                                   fhat0010[i1,i2,i3,i4],fhat0001[i1,i2,i3,i4]))^2)
              }
    }

    if (curv)
    {
      obj2000 <- drvkde(gcounts,drv=c(2,0,0,0),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj1100 <- drvkde(gcounts,drv=c(1,1,0,0),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj1010 <- drvkde(gcounts,drv=c(1,0,1,0),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj1001 <- drvkde(gcounts,drv=c(1,0,0,1),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj0200 <- drvkde(gcounts,drv=c(0,2,0,0),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj0110 <- drvkde(gcounts,drv=c(0,1,1,0),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj0101 <- drvkde(gcounts,drv=c(0,1,0,1),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj0020 <- drvkde(gcounts,drv=c(0,0,2,0),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj0011 <- drvkde(gcounts,drv=c(0,0,1,1),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)
      obj0002 <- drvkde(gcounts,drv=c(0,0,0,2),bandwidth=h,
                        binned=TRUE,range.x=range.x, se=FALSE)

      fhat2000 <- obj2000$est
      fhat1100 <- obj1100$est
      fhat1010 <- obj1010$est
      fhat1001 <- obj1001$est
      fhat0200 <- obj0200$est
      fhat0110 <- obj0110$est
      fhat0101 <- obj0101$est
      fhat0020 <- obj0020$est
      fhat0011 <- obj0011$est
      fhat0002 <- obj0002$est

      Sig2.mat <-
        matrix(c(3/h[1]^4, 0, 0, 0, 1/(h[1]*h[2])^2, 0, 0, 1/(h[1]*h[3])^2, 0, 1/(h[1]*h[4])^2,
                 0, 1/(h[1]*h[2])^2, 0, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 1/(h[1]*h[3])^2, 0, 0, 0, 0, 0, 0, 0,
                 0, 0, 0, 1/(h[1]*h[4])^2, 0, 0, 0, 0, 0, 0,
                 1/(h[1]*h[2])^2, 0, 0, 0, 3/h[2]^4, 0, 0, 1/(h[2]*h[3])^2, 0, 1/(h[2]*h[4])^2,
                 0, 0, 0, 0, 0, 1/(h[2]*h[3])^2, 0, 0, 0, 0,
                 0, 0, 0, 0 ,0, 0, 1/(h[2]*h[4])^2, 0, 0, 0,
                 1/(h[1]*h[3])^2, 0, 0, 0, 1/(h[2]*h[3])^2, 0, 0, 3/h[3]^4, 0, 1/(h[3]*h[4])^2,
                 0, 0, 0, 0, 0, 0, 0, 0, 1/(h[3]*h[4])^2, 0,
                 1/(h[1]*h[4])^2, 0, 0, 0, 1/(h[2]*h[4])^2, 0, 0, 1/(h[3]*h[4])^2, 0, 3/h[4]^4),
               nrow=10, ncol=10)

      Sig2.mat.inv <- chol2inv(chol(Sig2.mat))

      for (i1 in 1:gridsize[1])
        for (i2 in 1:gridsize[2])
          for (i3 in 1:gridsize[3])
            for (i4 in 1:gridsize[4])
              if (SigESS[i1,i2,i3,i4])
              {
                Sig2.inv12 <- sqrt(1/Sig2.scalar[i1,i2,i3,i4]) *
                  matrix.sqrt(Sig2.mat.inv)
                fhat.temp <- Sig2.inv12 %*%
                  c(fhat2000[i1,i2,i3,i4], fhat1100[i1,i2,i3,i4],
                    fhat1010[i1,i2,i3,i4], fhat1001[i1,i2,i3,i4],
                    fhat0200[i1,i2,i3,i4], fhat0110[i1,i2,i3,i4],
                    fhat0101[i1,i2,i3,i4], fhat0020[i1,i2,i3,i4],
                    fhat0011[i1,i2,i3,i4], fhat0002[i1,i2,i3,i4])

                D2.mat <-
                  matrix(c(fhat2000[i1,i2,i3,i4], fhat1100[i1,i2,i3,i4], fhat1010[i1,i2,i3,i4], fhat1001[i1,i2,i3,i4],
                           fhat1100[i1,i2,i3,i4], fhat0200[i1,i2,i3,i4], fhat0110[i1,i2,i3,i4], fhat0101[i1,i2,i3,i4],
                           fhat1010[i1,i2,i3,i4], fhat0110[i1,i2,i3,i4], fhat0020[i1,i2,i3,i4], fhat0011[i1,i2,i3,i4],
                           fhat1001[i1,i2,i3,i4], fhat0101[i1,i2,i3,i4], fhat0011[i1,i2,i3,i4], fhat0002[i1,i2,i3,i4]),
                         nrow=4)
                WaldCurv[i1,i2,i3,i4] <- sum(fhat.temp^2)
                lambda <- eigen(D2.mat, symmetric=TRUE, only.values=TRUE)$values

                local.mode[i1,i2,i3,i4] <- all(lambda < 0)
              }
    }
  }


  ## multiple hypothesis testing - based on Hochberg's method
  ## - modified Bonferroni method using ordered p-values

  ## test statistic for gradient
  if (grad)
  {
    pval.Grad <- 1 - pchisq(WaldGrad, d)
    pval.Grad.ord <- pval.Grad[order(pval.Grad)]
    num.test <- sum(!is.na(pval.Grad.ord))

    if (num.test>=1)
      num.test.seq <- c(1:num.test, rep(NA, prod(gridsize) - num.test))
    else
      num.test.seq <- rep(NA, prod(gridsize))

    reject.nonzero <- ((pval.Grad.ord <= signifLevel/(num.test + 1 - num.test.seq)) &
                       (pval.Grad.ord > 0))
    reject.nonzero.ind <- which(reject.nonzero)

    ## p-value == 0 => reject null hypotheses automatically
    SignifGrad <- array(FALSE, dim=gridsize)
    SignifGrad[which(pval.Grad==0, arr.ind=TRUE)] <- TRUE

    ## p-value > 0 then reject null hypotheses indicated in reject.nonzero.ind
    for (i in reject.nonzero.ind)
      SignifGrad[which(pval.Grad==pval.Grad.ord[i], arr.ind=TRUE)] <- TRUE
  }

  ## test statistic for curvature
  if (curv)
  {
    pval.Curv <- 1 - pchisq(WaldCurv, d*(d+1)/2)
    pval.Curv.ord <- pval.Curv[order(pval.Curv)]
    num.test <- sum(!is.na(pval.Curv.ord))

    if (num.test>=1)
      num.test.seq <- c(1:num.test, rep(NA, prod(gridsize) - num.test))
    else
      num.test.seq <- rep(NA, prod(gridsize))
    reject.nonzero <- ((pval.Curv.ord <= signifLevel/(num.test + 1 - num.test.seq)) &(pval.Curv.ord > 0))
    reject.nonzero.ind <- which(reject.nonzero)

    SignifCurv <- array(FALSE, dim=gridsize)

    ## p-value == 0 => reject null hypotheses automatically
    SignifCurv[which(pval.Curv==0, arr.ind=TRUE)] <- TRUE

    ## p-value > 0 then reject null hypotheses indicated in reject.nonzero.ind
    for (i in reject.nonzero.ind)
      SignifCurv[which(pval.Curv==pval.Curv.ord[i], arr.ind=TRUE)] <- TRUE

    if (neg.curv.only) SignifCurv <- SignifCurv & local.mode
  }

  if (grad & !curv)
    return(list(grad=SignifGrad))
  else if (!grad & curv)
    return(list(curv=SignifCurv))
  else if (grad & curv)
    return(list(grad=SignifGrad, curv=SignifCurv))
}


########## End of SignifFeatureRegion ##########

# These are from package feature. They not exprted in the package.
######### R-function dfltBWrange  #########

# Obtain default set of grid counts from a
# multivariate point cloud 'x'.

# Last changed: 22 JUL 2005


#' dfltBWrange
#'
#' This function computes the default bandwidth range for kernel density estimation.
#'
#' @param x The input data, which can be a numeric vector or matrix.
#' @param tau A parameter used in bandwidth calculation.
#'
#' @return A list of bandwidth ranges for each dimension of the input data.
#'
dfltBWrange <- function(x,tau) {
  d <- ncol(x)
  if (d==1) x <- as.matrix(x)

  r <- 2
  cmb.fac.upp <- (4/((d+2*r+2)*nrow(x)))^(1/(d+2*r+4))
  r <- 0
  cmb.fac.low <- (4/((d+2*r+2)*nrow(x)))^(1/(d+2*r+4))

  sd <- IQR <- NULL
  ## Compute the scale in each direction
  st.devs <- apply(x,2,sd)
  IQR.vals <- apply(x, 2, IQR)/(stats::qnorm(3/4) - stats::qnorm(1/4))
  sig.hats <- apply(cbind(st.devs,IQR.vals),1,min)
  ##range.vals <- apply(x,2,max) - apply(x,2,min)

  range.h <- list()
  for (id in 1:d)
  {
    h.upp <- cmb.fac.upp*sig.hats[id]
    h.low <- 0.1*cmb.fac.low*sig.hats[id] ##3*(range.vals[id] + 2*tau*h.upp)/((gridsize[id]-1)*tau)
    range.h[[id]] <- c(h.low,h.upp)
  }

  return(range.h)
}

######## End of dfltBWrange ########
######### R-function:dfltCounts  #########
# Obtain default set of grid counts from a
# multivariate point cloud 'x'.
# Last changed: 18 JUL 2005

#' dfltCounts
#'
#' This function bins the input data into a regular grid.
#'
#' @param x The input data, which should be a numeric matrix.
#' @param gridsize A vector specifying the number of bins along each dimension.
#' @param h A vector specifying the bandwidth (smoothing parameter) along each dimension.
#' @param supp A parameter for determining the range of the bins.
#' @param range.x A list specifying the range of values for each dimension.
#' @param w A vector of weights for the data points.
#'
#' @return A list containing the binned counts and the range of values for each dimension.
#'
dfltCounts <- function(x,gridsize=rep(64,NCOL(x)),h=rep(0,NCOL(x)), supp=3.7, range.x, w)
{
  x <- as.matrix(x)
  d <- ncol(x)
  n <- nrow(x)
  if (missing(w)) w <- rep(1,n)

  if (missing(range.x))
  {
    range.x <- list()
    for (id in 1:d)
      range.x[[id]] <- c(min(x[,id])-supp*h[id],max(x[,id])+supp*h[id])
  }

  a <- unlist(lapply(range.x,min))
  b <- unlist(lapply(range.x,max))

  gpoints <- list()
  for (id in 1:d)
    gpoints[[id]] <- seq(a[id],b[id],length=gridsize[id])

  if ((d!=1)&(d!=2)&(d!=3)&(d!=4)) stop("binning implemented only for d=1,2,3,4")

  if (d==1) gcounts <- ks::binning(x=x, bgridsize=gridsize, h=h, xmin=a, xmax=b, w=w)$counts
  else if (d>1) gcounts <- ks::binning(x=x, bgridsize=gridsize, H=diag(h^2), xmin=a, xmax=b, w=w)$counts

  ##if (d==1) gcounts <- linbin.ks(x,gpoints[[1]], w=w)
  ##if (d==2) gcounts <- linbin2D.ks(x,gpoints[[1]],gpoints[[2]], w=w)
  ##if (d==3) gcounts <- linbin3D.ks(x,gpoints[[1]],gpoints[[2]],gpoints[[3]], w=w)
  ##if (d==4) gcounts <- linbin4D.ks(x,gpoints[[1]],gpoints[[2]],gpoints[[3]],gpoints[[4]], w=w)

  return(list(counts=gcounts,range.x=range.x))
}

######## End of dfltCounts ########

#####################################################################
## Matt Wand's version of binned kernel density derivative estimation
##
## Computes the mth derivative of a binned
## d-variate kernel density estimate based
## on grid counts.
#############################################################

#' drvkde
#'
#' Compute the mth derivative of a binned d-variate kernel density estimate based on grid counts.
#'
#' @param x The input data.
#' @param drv The order of the derivative to compute.
#' @param bandwidth The bandwidth (smoothing parameter) along each dimension.
#' @param gridsize The size of the grid.
#' @param range.x A list specifying the range of values for each dimension.
#' @param binned A logical indicating whether the input data is already binned.
#' @param se A logical indicating whether to compute standard errors.
#' @param w A vector of weights for the data points.
#'
#' @return A list containing the estimated density or derivative, and optionally, standard errors.
#'
drvkde <- function(x,drv,bandwidth,gridsize,range.x,binned=FALSE,se=TRUE, w)
{
  d <- length(drv)
  if (d==1) x <- as.matrix(x)

  ## Rename common variables
  h <- bandwidth
  tau <- 4 + max(drv)
  if (length(h)==1) h <- rep(h,d)

  if (missing(gridsize))
    if (!binned)   ## changes 16/02/2009
    {
      if (d==1) gridsize <- 401
      else if (d==2) gridsize <- rep(151,d)
      else if (d==3) gridsize <- rep(51, d)
      else if (d==4) gridsize <- rep(21, d)
    }
  else
  {
    if (d==1) gridsize <- dim(x)[1]
    else gridsize <- dim(x)
  }

  if(missing(w)) w <- rep(1,nrow(x))
  ## Bin the data if not already binned

  if (missing(range.x))
  {
    range.x <- list()
    for (id in 1:d)
      range.x[[id]] <- c(min(x[,id])-tau*h[id],max(x[,id])+tau*h[id])
  }

  a <- unlist(lapply(range.x,min))
  b <- unlist(lapply(range.x,max))

  M <- gridsize
  gpoints <- list()

  for (id in 1:d)
    gpoints[[id]] <- seq(a[id],b[id],length=M[id])

  if (binned==FALSE)
  {
    if (d==1) gcounts <- ks::binning(x=x, bgridsize=gridsize, h=h, xmin=a, xmax=b, w=w)$counts
    else if (d>1) gcounts <- ks::binning(x=x, bgridsize=gridsize, H=diag(h^2), xmin=a, xmax=b, w=w)$counts
  }
  else
    gcounts <- x

  n <- sum(gcounts)

  kapmid <- list()
  for (id in (1:d))
  {
    ## changes to Lid 13/02/2009
    Lid <- max(min(floor(tau*h[id]*(M[id]-1)/(b[id]-a[id])),M[id]),d)
    lvecid <- (0:Lid)
    facid  <- (b[id]-a[id])/(h[id]*(M[id]-1))
    argid <- lvecid*facid
    kapmid[[id]] <- stats::dnorm(argid)/(h[id]^(drv[id]+1))
    hmold0 <- 1
    hmold1 <- argid
    if (drv[id]==0) hmnew <- 1
    if (drv[id]==1) hmnew <- argid
    if (drv[id] >= 2)
      for (ihm in (2:drv[id]))
      {
        hmnew <- argid*hmold1 - (ihm-1)*hmold0
        hmold0 <- hmold1   # Compute drv[id] degree Hermite polynomial
        hmold1 <- hmnew    # by recurrence.
      }
    kapmid[[id]] <- hmnew*kapmid[[id]]*(-1)^drv[id]
  }

  if (d==1) kappam <- kapmid[[1]]/n
  if (d==2) kappam <- outer(kapmid[[1]],kapmid[[2]])/n
  if (d==3) kappam <- outer(kapmid[[1]],outer(kapmid[[2]],kapmid[[3]]))/n
  if (d==4) kappam <- outer(kapmid[[1]],outer(kapmid[[2]],outer(kapmid[[3]],kapmid[[4]])))/n

  if (!any(c(d==1,d==2,d==3,d==4))) stop("only for d=1,2,3,4")

  if (d==1)
  {
    est <- symconv.ks(kappam,gcounts,skewflag=(-1)^drv)
    if (se) est.var <- ((symconv.ks((n*kappam)^2,gcounts)/n) - est^2)/(n-1)
  }

  if (d==2)
  {
    ##est <- ks:::symconv.nd(kappam,gcounts,d=d)
    ##if (se) est.var <- ((ks:::symconv.nd((n*kappam)^2,gcounts,d=d)/n) - est^2)/(n-1)
    est <- symconv2D.ks(kappam,gcounts,skewflag=(-1)^drv)
    if (se) est.var <- ((symconv2D.ks((n*kappam)^2,gcounts)/n) - est^2)/(n-1)
  }

  if (d==3)
  {
    est <- symconv3D.ks(kappam,gcounts,skewflag=(-1)^drv)
    if (se) est.var <- ((symconv3D.ks((n*kappam)^2,gcounts)/n) - est^2)/(n-1)
  }

  if (d==4)
  {
    est <- symconv4D.ks(kappam,gcounts,skewflag=(-1)^drv)
    if (se) est.var <- ((symconv4D.ks((n*kappam)^2,gcounts)/n) - est^2)/(n-1)
  }

  if (se)
  {
    est.var[est.var<0] <- 0
    return(list(x.grid=gpoints,est=est,se=sqrt(est.var)))
  }
  else if (!se)
    return(list(x.grid=gpoints,est=est))
}


########################################################################
## Discrete convolution
########################################################################


## Computes the discrete convolution of
## a symmetric or skew-symmetric response
## vector r and a data vector s.
## If r is symmetric then "skewflag"=1.
## If r is skew-symmetric then "skewflag"=-1.


#' symconv.ks
#'
#' Perform symmetric convolution using FFT.
#'
#' @param rr The first input vector.
#' @param ss The second input vector.
#' @param skewflag A scalar value to apply skew correction.
#'
#' @importFrom stats fft
#'
#' @return A vector representing the result of the symmetric convolution.
#'
symconv.ks <- function (rr,ss,skewflag)
{
  L <- length(rr) - 1
  M <- length(ss)
  P <- 2^(ceiling(log(M + L)/log(2)))
  rp <- rep(0,P)
  rp[1:(L+1)] <- rr
  if (L>0) rp[(P-L+1):P] <- skewflag*rr[(L+1):2]
  sp <- rep(0,P)
  sp[1:M] <- ss
  R <- fft(rp)
  S <- fft(sp)
  t <- fft(R * S, TRUE)
  return((Re(t)/P)[1:M])
}


#' symconv2D.ks
#'
#' Perform symmetric 2D convolution using FFT.
#'
#' @param rr The first input matrix.
#' @param ss The second input matrix.
#' @param skewflag A vector of two scalar values for skew correction along each dimension.
#'
#' @importFrom stats fft
#'
#' @return A matrix representing the result of the symmetric 2D convolution.
#'
symconv2D.ks <- function(rr, ss, skewflag=rep(1,2))
{
  L <- dim(rr)-1
  M <- dim(ss)
  L1 <- L[1]
  L2 <- L[2]               # find dimensions of r,s
  M1 <- M[1]
  M2 <- M[2]
  P1 <- 2^(ceiling(log(M1+L1)/log(2))) # smallest power of 2 >= M1+L1
  P2 <- 2^(ceiling(log(M2+L2)/log(2))) # smallest power of 2 >= M2+L2

  rp <- matrix(0,P1,P2)
  rp[1:(L1+1),1:(L2+1)] <- rr
  if (L1>0)
    rp[(P1-L1+1):P1,1:(L2+1)] <- skewflag[1]*rr[(L1+1):2,]
  if (L2>0)
    rp[1:(L1+1),(P2-L2+1):P2] <- skewflag[2]*rr[,(L2+1):2]
  if (L1 > 0 & L2 > 0)
    rp[(P1-L1+1):P1,(P2-L2+1):P2] <- prod(skewflag)*rr[(L1+1):2,(L2+1):2]
  # wrap around version of rr
  sp <- matrix(0,P1,P2)
  sp[1:M1,1:M2] <- ss                 # zero-padded version of ss

  RR <- fft(rp)        # Obtain FFT's of rr and ss
  SS <- fft(sp)
  tt <- fft(RR*SS,TRUE)               # invert element-wise product of FFT's
  return((Re(tt)/(P1*P2))[1:M1,1:M2]) # return normalized truncated tt
}

#' symconv3D.ks
#'
#' Perform symmetric 3D convolution using FFT.
#'
#' @param rr The first input 3D array.
#' @param ss The second input 3D array.
#' @param skewflag A vector of three scalar values for skew correction along each dimension.
#'
#' @importFrom stats fft
#'
#' @return A 3D array representing the result of the symmetric 3D convolution.
#'
symconv3D.ks <- function(rr, ss, skewflag=rep(1,3))
{
  L <- dim(rr) - 1
  M <- dim(ss)
  P <- 2^(ceiling(log(M+L)/log(2))) # smallest powers of 2 >= M+L
  L1 <- L[1] ; L2 <- L[2] ; L3 <- L[3]
  M1 <- M[1] ; M2 <- M[2] ; M3 <- M[3]
  P1 <- P[1] ; P2 <- P[2] ; P3 <- P[3]
  sf <- skewflag

  rp <- array(0,P)
  rp[1:(L1+1),1:(L2+1),1:(L3+1)] <- rr
  if (L1>0)
    rp[(P1-L1+1):P1,1:(L2+1),1:(L3+1)] <- sf[1]*rr[(L1+1):2,1:(L2+1),1:(L3+1)]
  if (L2>0)
    rp[1:(L1+1),(P2-L2+1):P2,1:(L3+1)] <- sf[2]*rr[1:(L1+1),(L2+1):2,1:(L3+1)]
  if (L3>0)
    rp[1:(L1+1),1:(L2+1),(P3-L3+1):P3] <- sf[3]*rr[1:(L1+1),1:(L2+1),(L3+1):2]
  if (L1>0 & L2>0)
    rp[(P1-L1+1):P1,(P2-L2+1):P2,1:(L3+1)] <- sf[1]*sf[2]*rr[(L1+1):2,(L2+1):2,1:(L3+1)]
  if (L2>0 & L3>0)
    rp[1:(L1+1),(P2-L2+1):P2,(P3-L3+1):P3] <- sf[2]*sf[3]*rr[1:(L1+1),(L2+1):2,(L3+1):2]
  if (L1>0 & L3>0)
    rp[(P1-L1+1):P1,1:(L2+1),(P3-L3+1):P3] <- sf[1]*sf[3]*rr[(L1+1):2,1:(L2+1),(L3+1):2]
  if (L1>0 & L2>0 & L3>0)
    rp[(P1-L1+1):P1,(P2-L2+1):P2,(P3-L3+1):P3] <- sf[1]*sf[2]*sf[3]*rr[(L1+1):2,(L2+1):2,(L3+1):2]

  sp <- array(0,P)
  sp[1:M1,1:M2,1:M3] <- ss            # zero-padded version of ss

  RR <- fft(rp)                       # Obtain FFT's of rr and ss
  SS <- fft(sp)
  tt <- fft(RR*SS,TRUE)               # invert element-wise product of FFT's
  return((Re(tt)/(P1*P2*P3))[1:M1,1:M2,1:M3]) # return normalized truncated tt
}

#' symconv4D.ks
#'
#' Perform symmetric 4D convolution using FFT.
#'
#' @param rr The first input 4D array.
#' @param ss The second input 4D array.
#' @param skewflag A vector of four scalar values for skew correction along each dimension.
#' @param fftflag A vector of two Boolean values for FFT flag.
#'
#' @importFrom stats fft
#'
#' @return A 4D array representing the result of the symmetric 4D convolution.
#'
symconv4D.ks <- function(rr, ss, skewflag=rep(1,4) , fftflag=rep(TRUE,2))
{
  L <- dim(rr) - 1
  M <- dim(ss)
  P <- 2^(ceiling(log(M+L)/log(2))) # smallest powers of 2 >= M+L
  L1 <- L[1] ; L2 <- L[2] ; L3 <- L[3] ; L4 <- L[4]
  M1 <- M[1] ; M2 <- M[2] ; M3 <- M[3] ; M4 <- M[4]
  P1 <- P[1] ; P2 <- P[2] ; P3 <- P[3] ; P4 <- P[4]
  sf <- skewflag

  rp <- array(0,P)
  rp[1:(L1+1),1:(L2+1),1:(L3+1),1:(L4+1)] <- rr

  if (L1>0)
    rp[(P1-L1+1):P1,1:(L2+1),1:(L3+1),1:(L4+1)] <- sf[1]*rr[(L1+1):2,1:(L2+1),1:(L3+1),1:(L4+1)]
  if (L2>0)
    rp[1:(L1+1),(P2-L2+1):P2,1:(L3+1),1:(L4+1)] <- sf[2]*rr[1:(L1+1),(L2+1):2,1:(L3+1),1:(L4+1)]
  if (L3>0)
    rp[1:(L1+1),1:(L2+1),(P3-L3+1):P3,1:(L4+1)] <- sf[3]*rr[1:(L1+1),1:(L2+1),(L3+1):2,1:(L4+1)]
  if (L4>0)
    rp[1:(L1+1),1:(L2+1),1:(L3+1),(P4-L4+1):P4] <- sf[4]*rr[1:(L1+1),1:(L2+1),1:(L3+1),(L4+1):2]

  if (L1>0 & L2 >0)
    rp[(P1-L1+1):P1,(P2-L2+1):P2,1:(L3+1),1:(L4+1)] <- sf[1]*sf[2]*rr[(L1+1):2,(L2+1):2,1:(L3+1),1:(L4+1)]
  if (L2>0 & L3>0)
    rp[1:(L1+1),(P2-L2+1):P2,(P3-L3+1):P3,1:(L4+1)] <- sf[2]*sf[3]*rr[1:(L1+1),(L2+1):2,(L3+1):2,1:(L4+1)]
  if (L3>0 & L4>0)
    rp[1:(L1+1),1:(L2+1),(P3-L3+1):P3,(P4-L4+1):P4] <- sf[3]*sf[4]*rr[1:(L1+1),1:(L2+1),(L3+1):2,(L4+1):2]
  if (L1>0 & L3>0)
    rp[(P1-L1+1):P1,1:(L2+1),(P3-L3+1):P3,1:(L4+1)] <- sf[1]*sf[3]*rr[(L1+1):2,1:(L2+1),(L3+1):2,1:(L4+1)]
  if (L2>0 & L4>0)
    rp[1:(L1+1),(P2-L2+1):P2,1:(L3+1),(P4-L4+1):P4] <- sf[2]*sf[4]*rr[1:(L1+1),(L2+1):2,1:(L3+1),(L4+1):2]
  if (L1>0 & L4>0)
    rp[(P1-L1+1):P1,1:(L2+1),1:(L3+1),(P4-L4+1):P4] <- sf[1]*sf[4]*rr[(L1+1):2,1:(L2+1),1:(L3+1),(L4+1):2]

  if (L1>0 & L2>0 & L3>0)
    rp[(P1-L1+1):P1,(P2-L2+1):P2,(P3-L3+1):P3,1:(L4+1)] <- sf[1]*sf[2]*sf[3]*rr[(L1+1):2,(L2+1):2,(L3+1):2,1:(L4+1)]
  if (L1>0 & L2>0 & L4>0)
    rp[(P1-L1+1):P1,(P2-L2+1):P2,1:(L3+1),(P4-L4+1):P4] <- sf[1]*sf[2]*sf[4]*rr[(L1+1):2,(L2+1):2,1:(L3+1),(L4+1):2]
  if (L2>0 & L3>0 & L4>0)
    rp[1:(L1+1),(P2-L2+1):P2,(P3-L3+1):P3,(P4-L4+1):P4] <- sf[2]*sf[3]*sf[4]*rr[1:(L1+1),(L2+1):2,(L3+1):2,(L4+1):2]

  if (L1>0 & L2>0 & L3>0 & L4>0)
    rp[(P1-L1+1):P1,(P2-L2+1):P2,(P3-L3+1):P3,(P4-L4+1):P4] <- sf[1]*sf[2]*sf[3]*sf[4]*rr[(L1+1):2,(L2+1):2,(L3+1):2,(L4+1):2]

  sp <- array(0,P)
  sp[1:M1,1:M2,1:M3,1:M4] <- ss            # zero-padded version of ss

  RR <- fft(rp)                       # Obtain FFT's of rr and ss
  SS <- fft(sp)
  tt <- fft(RR*SS,TRUE)               # invert element-wise product of FFT's
  return((Re(tt)/(P1*P2*P3*P4))[1:M1,1:M2,1:M3,1:M4]) # return normalized truncated tt
}
Any scripts or data that you put into this service are public.
shinyWGD documentation built on April 4, 2025, 2:28 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
shinyWGD
'Shiny' Application for Whole Genome Duplication Analysis

R/from_features.R
In shinyWGD: 'Shiny' Application for Whole Genome Duplication Analysis

Defines functions symconv4D.ks symconv3D.ks symconv2D.ks drvkde dfltCounts dfltBWrange SignifFeatureRegion

Documented in dfltBWrange dfltCounts drvkde SignifFeatureRegion symconv2D.ks symconv3D.ks symconv4D.ks

Try the shinyWGD package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

shinyWGD 'Shiny' Application for Whole Genome Duplication Analysis

R/from_features.R In shinyWGD: 'Shiny' Application for Whole Genome Duplication Analysis

Defines functions symconv4D.ks symconv3D.ks symconv2D.ks drvkde dfltCounts dfltBWrange SignifFeatureRegion

Documented in dfltBWrange dfltCounts drvkde SignifFeatureRegion symconv2D.ks symconv3D.ks symconv4D.ks

Try the shinyWGD package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

shinyWGD
'Shiny' Application for Whole Genome Duplication Analysis

R/from_features.R
In shinyWGD: 'Shiny' Application for Whole Genome Duplication Analysis