R/Internal_functions.R

# Internal functions
# Written by Kevin Potter
# email: kevin.w.potter@gmail.com
# Please email me directly if you
# have any questions or comments
# Last updated 2018-10-26

# Table of contents
# 1) area_trap
# 2) find_na
# 3) ert

# 1)
area_trap = function( a, b, h ) {
  # Purpose:
  # Computes the area of a trapezoid.
  # Arguments:
  # a - The first width
  # b - The second width
  # h - The height
  # Returns:
  # The area for the trapezoid

  out = h * (a + b)/2

  return( out )
}

# 2)
find_na = function( df, type = 1, col_index = NULL ) {
  # Purpose:
  # Given a data frame, either indicates whether
  # there are any missing values present in a
  # row, or indicates the frequency of missing
  # values per column.
  # Arguments:
  # df        - A data frame of observations
  # type      - If type == 1, returns a logical
  #             vector indicating which rows
  #             have any missing values. If
  #             type == 2, returns the number
  #             of missing values in each column
  # col_index - An optional index of which columns
  #             to examine
  # Returns:
  # Either returns a logical vector indicating
  # which rows have any missing values, or
  # returns the number of missing values in each
  # column.

  # Initialize output
  out = NULL

  # Select columns to check
  if ( !is.null( col_index ) ) {
    df = df[,col_index]
  }

  if ( type == 1 ) {
    out = apply( df, 1, function(x) any( is.na(x) ) )
  }
  if ( type == 2 ) {
    out = apply( df, 2, function(x) sum( is.na(x)) )
  }

  return( out )
}

# 3)
ert = function( start = NULL ) {
  # Purpose:
  # Function to estimate the run time of
  # a function using 'Sys.time()'.
  # Arguments:
  # start - Optional parameter indicating the
  #         start time at which the function
  #         began
  # Returns:
  # If start is NULL, returns the current time,
  # otherwise returns the difference between the
  # current time and the start time.

  if ( is.null( start ) ) {
    # Set start time
    start = Sys.time()
    return( start )
  } else {
    # Compute duration
    end = Sys.time()
    out = end - start
    return( out )
  }

}

# 4)
find_non_zero_glmnet = function( fit, one_se = T ) {
  # Purpose:
  # Extracts non-zero coefficients from a 'cv.glmnet'
  # object.
  # Arguments:
  # fit    - Output from 'cv.glmnet'
  # one_se - Logical; if TRUE, usings 'lambda.1se'
  # Returns:
  # A list with the non-zero coefficients and the
  # intercept.

  if ( one_se ) {
    bl = fit$lambda.1se
  } else {
    bl = fit$lambda.min
  }

  cf = coef( fit, s = bl )
  b0 = cf[1,1]
  nms = rownames( cf )
  nms[1] = 'Intercept'
  cf = cf[-1,1]
  sel = cf != 0
  cf = cf[sel]

  return( list( b = cf, b0 = b0 ) )
}
rettopnivek/binclass documentation built on May 13, 2019, 4:46 p.m.