R/mosaic_utils.R

Defines functions reeval_best_mod get_free_comb_mod multi_pipeline choose_best_mod get_mod_fits calc_param_exp calc_param_lin calc_param_echo_lin calc_param_echo calc_param_echo_lin_joint calc_param_echo_joint calc_param_echo_lin_joint_constr calc_param_echo_joint_constr normalize_all smoothing_all_untied smoothing_all_tied genes_unexpressed_all get_hs get_type_gam calc_start_echo_lin calc_start_echo calc_peaks smooth_y_val calc_weights_boot calc_var_boot calc_weights calc_var avg_all_rep fcn_single_one_rep fcn_single fcn_one_rep fcn echo_lin_joint_res_constr echo_joint_res_constr echo_joint echo_lin_joint exp_mod lin_mod echo_lin_mod alt_form echo_exp echo_lin echo_lin_exp echo_lin_lin

# MOSAIC Function Source
# By Hannah De los Santos
# Code description: Contains all the funcitons for extended harmonic oscillator work, in order to have less confusion between scripts.

# combined models ----

#' Function to represent the free model of ECHO linear and linear models
#'
#' @param a1 Amplitude, ECHO linear
#' @param gam1 Amplitude.Change.Coefficient (amount of damping/forcing), ECHO linear
#' @param omega1 Radial frequency, ECHO linear
#' @param phi1 Phase Shift (radians), ECHO linear
#' @param slo1 Slope, ECHO linear
#' @param y_shift1 Equilibrium shift, ECHO linear
#' @param slo2 Slope, linear
#' @param y_shift2 Equilibrium shift, linear
#' @param t time
#' @param s1 binary vector, with 1's corresponding to rna data
#' @param s2 binary vector, with 1's corresponding to protein data
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
echo_lin_lin <- function(a1,gam1,omega1,phi1,slo1,y_shift1,slo2,y_shift2,t, s1, s2){
  return(((a1*exp(-1*gam1*(t)/2)*cos((omega1*t)+phi1)+slo1*t+y_shift1)*s1)+((slo2*t+y_shift2)*s2))
}

#' Function to represent the free model of ECHO linear and exponential models
#'
#' @param a1 Amplitude, ECHO linear
#' @param gam1 Amplitude.Change.Coefficient (amount of damping/forcing), ECHO linear
#' @param omega1 Radial frequency, ECHO linear
#' @param phi1 Phase Shift (radians), ECHO linear
#' @param slo1 Slope, ECHO linear
#' @param y_shift1 Equilibrium shift, ECHO linear
#' @param a2 Amplitude, exponential
#' @param gamma2 Growth Rate, exponential
#' @param y_shift2 Equilibrium shift, exponential
#' @param t time
#' @param s1 binary vector, with 1's corresponding to rna data
#' @param s2 binary vector, with 1's corresponding to protein data
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
echo_lin_exp <- function(a1,gam1,omega1,phi1,slo1,y_shift1,a2,gamma2,y_shift2,t, s1, s2){
  return(((a1*exp(-1*gam1*(t)/2)*cos((omega1*t)+phi1)+slo1*t+y_shift1)*s1)+((a2*exp(gamma2*t)+y_shift2)*s2))
}

#' Function to represent the free model of ECHO and linear models
#'
#' @param a1 Amplitude, ECHO
#' @param gam1 Amplitude.Change.Coefficient (amount of damping/forcing), ECHO
#' @param omega1 Radial frequency, ECHO
#' @param phi1 Phase Shift (radians), ECHO
#' @param y_shift1 Equilibrium shift, ECHO
#' @param slo2 Slope, linear
#' @param y_shift2 Equilibrium shift, linear
#' @param t time
#' @param s1 binary vector, with 1's corresponding to rna data
#' @param s2 binary vector, with 1's corresponding to protein data
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
echo_lin <- function(a1,gam1,omega1,phi1,y_shift1,slo2,y_shift2,t, s1, s2){
  return(((a1*exp(-1*gam1*(t)/2)*cos((omega1*t)+phi1)+y_shift1)*s1)+((slo2*t+y_shift2)*s2))
}

#' Function to represent the free model of ECHO and exponential models
#'
#' @param a1 Amplitude, ECHO
#' @param gam1 Amplitude.Change.Coefficient (amount of damping/forcing), ECHO
#' @param omega1 Radial frequency, ECHO
#' @param phi1 Phase Shift (radians), ECHO
#' @param y_shift1 Equilibrium shift, ECHO
#' @param a2 Amplitude, exponential
#' @param gamma2 Growth Rate, exponential
#' @param y_shift2 Equilibrium shift, exponential
#' @param t time
#' @param s1 binary vector, with 1's corresponding to rna data
#' @param s2 binary vector, with 1's corresponding to protein data
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
echo_exp <- function(a1,gam1,omega1,phi1,y_shift1,a2,gamma2,y_shift2,t, s1, s2){
  return(((a1*exp(-1*gam1*(t)/2)*cos((omega1*t)+phi1)+y_shift1)*s1)+((a2*exp(gamma2*t)+y_shift2)*s2))
}

# models ----

#' Function to represent ECHO model
#'
#' @param a Amplitude
#' @param gam Amplitude.Change.Coefficient (amount of damping/forcing)
#' @param omega Radial frequency
#' @param phi Phase Shift (radians)
#' @param y_shift Equilibrium shift
#' @param t time
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
alt_form <- function(a,gam,omega,phi,y_shift,t){
  return (a*exp(-1*gam*(t)/2)*cos((omega*t)+phi)+y_shift)
}

#' Function to represent ECHO Linear model
#'
#' @param a Amplitude
#' @param gam Amplitude.Change.Coefficient (amount of damping/forcing)
#' @param omega Radial frequency
#' @param phi Phase Shift (radians)
#' @param slo Slope
#' @param y_shift Equilibrium shift
#' @param t time
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
echo_lin_mod <- function(a,gam,omega,phi,slo,y_shift,t){
  return (a*exp(-1*gam*(t)/2)*cos((omega*t)+phi)+slo*t+y_shift)
}

#' Function to represent linear model
#'
#' @param slo Slope
#' @param y_shift Equilibrium shift
#' @param t time
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
lin_mod <- function(slo,y_shift,t){
  return(slo*t+y_shift)
}

#' Function to represent exponential model
#'
#' @param a Amplitude
#' @param gamma Growth Rate
#' @param y_shift Equilibrium shift
#' @param t time
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
exp_mod <- function(a,gamma,y_shift,t){
  return(a*exp(gamma*t)+y_shift)
}

#' Function to represent the ECHO linear completely joint model (no slack for protein)
#'
#' @param a1 Amplitude, rna
#' @param a2 Amplitude, protein
#' @param gam1 Amplitude.Change.Coefficient (amount of damping/forcing), rna
#' @param gam2 Amplitude.Change.Coefficient (amount of damping/forcing), protein
#' @param omega1 Radial frequency, both
#' @param phi1 Phase Shift (radians), rna
#' @param phi2 Phase Shift (radians), protein
#' @param slo1 Slope, rna
#' @param slo2 Slope, protein
#' @param y_shift1 Equilibrium shift, rna
#' @param y_shift2 Equilibrium shift, protein
#' @param t time
#' @param s1 binary vector, with 1's corresponding to rna data
#' @param s2 binary vector, with 1's corresponding to protein data
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
echo_lin_joint <- function(a1,a2, gam1,gam2, omega1, phi1,phi2, slo1,slo2, y_shift1,y_shift2, t, s1, s2){
  return ((a1*s1+a2*s2)*exp(-1*(gam1*s1+gam2*s1)*(t)/2)*cos((omega1*t)+(phi1*s1+phi2*s2))+((slo1*s1 + slo2*s2)*t)+(y_shift1*s1+y_shift2*s2))
}

#' Function to represent the ECHO completely joint model (no slack for protein)
#'
#' @param a1 Amplitude, rna
#' @param a2 Amplitude, protein
#' @param gam1 Amplitude.Change.Coefficient (amount of damping/forcing), rna
#' @param gam2 Amplitude.Change.Coefficient (amount of damping/forcing), protein
#' @param omega1 Radial frequency, both
#' @param phi1 Phase Shift (radians), rna
#' @param phi2 Phase Shift (radians), protein
#' @param y_shift1 Equilibrium shift, rna
#' @param y_shift2 Equilibrium shift, protein
#' @param t time
#' @param s1 binary vector, with 1's corresponding to rna data
#' @param s2 binary vector, with 1's corresponding to protein data
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
echo_joint <- function(a1,a2, gam1,gam2, omega1, phi1,phi2, y_shift1,y_shift2, t, s1, s2){
  return ((a1*s1+a2*s2)*exp(-1*(gam1*s1+gam2*s2)*(t)/2)*cos((omega1*t)+(phi1*s1+phi2*s2))+(y_shift1*s1+y_shift2*s2))
}

#' Function to represent the ECHO joint model (with slack for protein)
#'
#' @param a1 Amplitude, rna
#' @param a2 Amplitude, protein
#' @param gam1 Amplitude.Change.Coefficient (amount of damping/forcing), rna
#' @param gam2 Amplitude.Change.Coefficient (amount of damping/forcing), protein
#' @param omega1 Period (hours)
#' @param change Change in period for protein (hours)
#' @param phi1 Phase Shift (radians), rna
#' @param phi2 Phase Shift (radians), protein
#' @param y_shift1 Equilibrium shift, rna
#' @param y_shift2 Equilibrium shift, protein
#' @param t time
#' @param s1 binary vector, with 1's corresponding to rna data
#' @param s2 binary vector, with 1's corresponding to protein data
#' @param resol resolution
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
echo_joint_res_constr <- function(a1,a2, gam1,gam2, omega1,chang, phi1,phi2, y_shift1,y_shift2, t, s1, s2, resol){
  return ((a1*s1+a2*s2)*exp(-1*(gam1*s1+gam2*s2)*(t)/2)*cos(((2*pi/(omega1*s1+((omega1+chang)*s2)))*t)+(phi1*s1+phi2*s2))+(y_shift1*s1+y_shift2*s2))
}

#' Function to represent the ECHO linear joint model (with slack for protein)
#'
#' @param a1 Amplitude, rna
#' @param a2 Amplitude, protein
#' @param gam1 Amplitude.Change.Coefficient (amount of damping/forcing), rna
#' @param gam2 Amplitude.Change.Coefficient (amount of damping/forcing), protein
#' @param omega1 Period (hours)
#' @param change Change in period for protein (hours)
#' @param phi1 Phase Shift (radians), rna
#' @param phi2 Phase Shift (radians), protein
#' @param slo1 Slope, rna
#' @param slo2 Slope, protein
#' @param y_shift1 Equilibrium shift, rna
#' @param y_shift2 Equilibrium shift, protein
#' @param t time
#' @param s1 binary vector, with 1's corresponding to rna data
#' @param s2 binary vector, with 1's corresponding to protein data
#' @param resol resolution
#'
#' @return numerical vector of model outputs
#' @keywords internal
#' @noRd
echo_lin_joint_res_constr <- function(a1,a2, gam1,gam2, omega1,chang, phi1,phi2, slo1,slo2, y_shift1,y_shift2, t, s1, s2, resol){
  return ((a1*s1+a2*s2)*exp(-1*(gam1*s1+gam2*s2)*(t)/2)*cos(((2*pi/(omega1*s1+((omega1+chang)*s2)))*t)+(phi1*s1+phi2*s2))+((slo1*s1 + slo2*s2)*t)+(y_shift1*s1+y_shift2*s2))
}

# optimization support functions ----

#' Function to calculate optimization objective for weighted nls (with replicates)
#'
#' @param p paramters
#' @param t time points
#' @param s1 binary vector, with 1's corresponding to rna data
#' @param s2 binary vector, with 1's corresponding to protein data
#' @param w weights
#' @param y experimental data
#' @param fcall function call (model)
#' @param jcall jacobian call
#'
#' @return numeric vector of objective function
#' @keywords internal
#' @noRd
fcn <- function(p, t, s1, s2, w, y, fcall, jcall){
  sqrt(w)*(y - do.call("fcall", c(list(t = t, s1 =s1, s2 = s2), as.list(p))))
}

#' Function to calculate optimization objective for non-weighted nls (with no replicates)
#'
#' @param p paramters
#' @param t time points
#' @param s1 binary vector, with 1's corresponding to rna data
#' @param s2 binary vector, with 1's corresponding to protein data
#' @param y experimental data
#' @param fcall function call (model)
#' @param jcall jacobian call
#'
#' @return numeric vector of objective function
#' @keywords internal
#' @noRd
fcn_one_rep <- function(p, t, s1, s2, y, fcall, jcall){
  (y - do.call("fcall", c(list(t = t, s1 =s1, s2 = s2), as.list(p))))
}

#' Function to calculate optimization objective for weighted nls (with replicates) for one omics type
#'
#' @param p paramters
#' @param t time points
#' @param w weights
#' @param y experimental data
#' @param fcall function call (model)
#' @param jcall jacobian call
#'
#' @return numeric vector of objective function
#' @keywords internal
#' @noRd
fcn_single <- function(p, t, w, y, fcall, jcall){
  sqrt(w)*(y - do.call("fcall", c(list(t = t), as.list(p))))
}

#' Function to calculate optimization objective for non-weighted nls (with no replicates) for one omics type
#'
#' @param p paramters
#' @param t time points
#' @param y experimental data
#' @param fcall function call (model)
#' @param jcall jacobian call
#'
#' @return numeric vector of objective function
#' @keywords internal
#' @noRd
fcn_single_one_rep <- function(p, t, y, fcall, jcall){
  (y - do.call("fcall", c(list(t = t), as.list(p))))
}

# supports ----

#' Function to calculate the average of all replicates. Used primarily for cases with multiple replicates.
#'
#' @param num_reps number of replicates
#' @param genes dataframe of gene expressions (first column is names, other columns are numeric)
#' @param timen numeric vector of time points
#'
#' @return a matrix of means of gene expressions for replicates
#' @keywords internal
#' @noRd
avg_all_rep <- function(num_reps, genes, timen){
  # originally based on heat map code, but it will work fine here

  #get matrix of just the relative expression over time
  hm_mat <- as.matrix(genes[,2:ncol(genes)])

  #if there are replicates, average the relative expression for each replicate
  mtx_reps <- list() # to store actual replicate matrix
  mtx_count <- list() # to store how many are NA
  for (i in 1:num_reps){
    mtx_reps[[i]] <- hm_mat[, seq(i,ncol(hm_mat), by=num_reps)]
    mtx_count[[i]] <- is.na(mtx_reps[[i]])
    mtx_reps[[i]][is.na(mtx_reps[[i]])] <- 0
  }
  repmtx <- matrix(0L,ncol = length(timen),nrow = nrow(hm_mat))+num_reps # to store how many we should divide by
  hm_mat <- matrix(0L,ncol = length(timen),nrow = nrow(hm_mat)) # to store the final result
  for (i in 1:num_reps){
    hm_mat <- hm_mat + mtx_reps[[i]] # sum the replicates
    repmtx <- repmtx - mtx_count[[i]] # how many replicates are available for each time point
  }
  repmtx[repmtx==0] <- NA # to avoid division by 0 and induce NAs if there are no time points available
  hm_mat <- hm_mat/repmtx

  return(hm_mat)
}

#' Function to calculate the variance of replicates at a certain time point
#'
#' @param x time point
#' @param current_gene row number of gene being examined
#' @param num_reps number of replicates
#' @param genes dataframe of gene expressions (first column is names, other columns are numeric)
#'
#' @return the variance of replicates at a certain time point
#' @importFrom stats var
#' @keywords internal
#' @noRd
calc_var <- function(x, current_gene, num_reps, genes){
  eps <- 1e-7 # slight adjustment for 0 variance case
  std2 <- var(unlist(genes[current_gene,c(x:(num_reps-1+x))]), na.rm = TRUE) # calc variance
  if (is.na(std2)){ # possible bug for NA
    std2 <- 1
  }
  return(1/(std2+eps))
}

#' Function to calculate the weights for replicate fitting (these weights are the inverse of the variance at each time point)
#'
#' @param current_gene row number of gene being examined
#' @param num_reps number of replicates
#' @param genes dataframe of gene expressions (first column is names, other columns are numeric)
#'
#' @return the variance of replicates at a certain time point
#' @keywords internal
#' @noRd
calc_weights <- function(current_gene, num_reps, genes){
  return(sapply(seq(2,ncol(genes), by = num_reps), function(x) calc_var(x,current_gene,num_reps, genes)))
}

#' Function to calculate the variance of replicates at a certain time point for one gene
#'
#' @param x time point
#' @param boot_gene gene time course being examined
#' @param num_reps number of replicates
#'
#' @return the variance of replicates at a certain time point
#' @importFrom stats var
#' @keywords internal
#' @noRd
calc_var_boot <- function(x, boot_gene, num_reps){
  eps <- 1e-7 # slight adjustment for 0 variance case
  std2 <- var(unlist(boot_gene[c(x:(num_reps+x))]), na.rm = TRUE) # calc variance
  if (is.na(std2)){ # possible bug for NA
    std2 <- 1
  }
  return(1/(std2+eps))
}

#' Function to calculate the weights for replicate fitting for one gene (these weights are the inverse of the variance at each time point)
#'
#' @param boot_gene gene time course being examined
#' @param num_reps number of replicates
#'
#' @return the variance of replicates at a certain time point
#' @keywords internal
#' @noRd
calc_weights_boot <- function(boot_gene, num_reps){
  return(sapply(seq(1,length(boot_gene), by = num_reps), function(x) calc_var_boot(x,boot_gene,num_reps)))
}

#' Function to smooth, with weighting, a given averaged expression
#'
#' @param y_val vector of averaged expressions
#' @param timen numeric vector of time points
#'
#' @return smoothed y_val
#' @keywords internal
#' @noRd
smooth_y_val <- function(y_val, timen){
  # smoothing the starting averages - weighted or unweighted?
  #get matrix of just the relative expression over time
  all_reps <- matrix(c(y_val), nrow = 1, ncol = length(y_val))

  # weighted averaging

  #if there are replicates, average the relative expression for each replicate
  center_reps <- list() # to store actual replicate matrix
  mtx_count <- list() # to store how many are NA
  for (i in 1:1){
    center_reps[[i]] <- all_reps[, seq(i,ncol(all_reps), by=1)]*2
    mtx_count[[i]] <- is.na(center_reps[[i]])*2
    center_reps[[i]][is.na(center_reps[[i]])] <- 0
  }

  repmtx_l <- list() # store amount to divide by for each rep
  repmtx <- matrix(0L,ncol = length(timen),nrow = nrow(all_reps))+4 # to store how many we should divide by
  repmtx[,c(1,ncol(repmtx))] <- repmtx[,c(1,ncol(repmtx))] - 1 # we only divide by 3 at edges

  # sum the replicates
  left <- c(rep(0,1),center_reps[[i]][-length(center_reps[[i]])]/2) # left shifted matrix
  right <- c(center_reps[[i]][-1]/2,rep(0,1)) # right shifted matrix
  center_reps[[i]] <- left + center_reps[[i]] + right

  # figure out how many replicates are actually available for each time point
  left_na <- c(rep(0,1),mtx_count[[i]][-length(mtx_count[[i]])]/2) # left shifted matrix
  right_na <- c(mtx_count[[i]][-1]/2,rep(0,1)) # right shifted matrix
  repmtx_l[[i]] <- repmtx - left_na - mtx_count[[i]] - right_na
  # to avoid division by 0 and induce NAs if there are no time points available
  repmtx_l[[i]][repmtx_l[[i]]==0] <- NA

  dat <- all_reps
  x <- 0
  # averaging to get smoothed replicates
  dat[,seq(1+x,ncol(all_reps),by=1)] <- center_reps[[x+1]]/repmtx_l[[x+1]]
  dat[is.na(all_reps)] <- NA # do not impute missing values

  return(dat[1,])
}

#' Function to calculate the "peaks" vector for starting points for echo-type models, "peaks" can be peaks or troughs
#'
#' @param y_val vector of averaged expressions
#' @param resol resolution
#' @param y0 initial equilibrium shift
#' @param timen numeric vector of time points
#'
#' @return list with numeric vector of peaks, and the time points for those peaks
#' @keywords internal
#' @noRd
calc_peaks <- function(y_val, resol, y0, timen){
  # figure out the resolution modifier (must have at least 24 hour data)
  mod <- 0
  if (resol <= ((1/12)+10^-8)){ # 17 hour surround
    mod <- 102
  } else if (resol <= ((1/6)+10^-8)){ # 15 hour surround
    mod <- 45
  } else if (resol <= ((1/4)+10^-8)){ # 13 hour surround
    mod <- 26
  } else if (resol <= ((1/2)+10^-8)){ # 11 hour surround
    mod <- 11
  } else if (resol <= 1){ # 8 hour surround
    mod <- 4
  } else if (resol <=2){ # 6 hour surround
    mod <- 3
  } else if (resol <= 4){ # 4 hour surround
    mod <- 2
  } else{
    mod <- 1
  }

  # calculate the amount of peaks
  peaks <- c(); # vector of peak values
  peaks_time <- c(); # vector of peak timen
  counting <- 1; # counter
  # go through all time points and calculate peaks (max values)
  for(i in (mod+1):(length(y_val)-mod)){
    # deal with complete missingness
    if (suppressWarnings(max(y_val[(i-mod):(i+mod)], na.rm = TRUE)) == -Inf | is.na(y_val[i])){
      next
    }
    # otherwise continue as normal
    if (y_val[i] == max(y_val[(i-mod):(i+mod)], na.rm = TRUE)){
      peaks[counting] <- y_val[i]
      peaks_time[counting] <- timen[i]
      counting <- counting+1
    }
  }

  # calculate amount of troughs
  troughs <- c() # vector of trough values
  troughs_time <- c() # vector of trough times
  counting <- 1 # counter
  # go through all time points and calculate peaks (max values)
  for(i in (mod+1):(length(y_val)-mod)){
    # deal with complete missingness
    if (suppressWarnings(min(y_val[(i-mod):(i+mod)], na.rm = TRUE)) == -Inf | is.na(y_val[i])){
      next
    }
    # otherwise continue as normal
    if (y_val[i] == min(y_val[(i-mod):(i+mod)], na.rm = TRUE)){
      troughs[counting] <- y_val[i]
      troughs_time[counting] <- timen[i]
      counting <- counting+1
    }
  }

  peaks_per_diff <- 0
  troughs_per_diff <- 0
  if (length(peaks)>1 & length(troughs)>1){
    # new criterion: which peaks are more evenly distributed?
    # what should the period be, based on the amount of peaks/troughs seen in the time course
    peaks_per <- length(timen)*resol/length(peaks)
    troughs_per <- length(timen)*resol/length(troughs)

    # get avg period, calculated as the difference in time between successive peaks
    peaks_avg_per <- mean(sapply(seq(length(peaks_time),2,-1),
                                 function(x){peaks_time[x]-peaks_time[x-1]}))
    troughs_avg_per <- mean(sapply(seq(length(troughs_time),2,-1),
                                   function(x){troughs_time[x]-troughs_time[x-1]}))

    # calculate the difference between the theoretical and estimated period
    peaks_per_diff <- abs(peaks_per-peaks_avg_per)
    troughs_per_diff <- abs(troughs_per-troughs_avg_per)
  }

  # if there is only one peak, or if our troughs are more consistent, we choose troughs
  if ((length(peaks)<=1 & length(troughs)>length(peaks)) |
      (troughs_per_diff < peaks_per_diff)){
    # flip the troughs for accurate calculations later
    # absolute value the difference from the midline
    peaks <- abs(y0 - troughs)
    peaks_time <- troughs_time
  } else {# else we choose peaks
    # absolute value the difference from the midline
    peaks <- abs(peaks - y0)
  }
  # it's possible for this not to work if you don't have more than one
  # oscillation over your time course
  # }

  return(list("peaks"=peaks, "peaks_time"=peaks_time))

}

#' Function to calculate the starting points for echo models
#'
#' @param y_val vector of averaged expressions
#' @param over_cut postive number indicating the cutoff for a gene to be considered repressed/overexpressed
#' @param timen numeric vector of time points
#' @param resol resolution
#' @param low lower limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#' @param high upper limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#'
#' @return list with all starting points for parameters (amplitude, ac coeff, frequency, phase shift, equilibrium shift)
#' @noRd
calc_start_echo <- function(y_val, over_cut, timen, resol, low, high){
  # smooth the expression
  y_val <- smooth_y_val(y_val, timen)

  # calculate starting y_shift
  y0 <- mean(y_val,na.rm = TRUE) # intial value for the equilibrium shift
  if (y0 < 10^-10 && y0 > -10^-10){
    y0 <- 10^-8 # avoiding 0 mean, which makes gradient singular
  }

  # calculate peaks
  peaks_list <- calc_peaks(y_val, resol, y0, timen)
  peaks <- peaks_list$peaks
  peaks_time <- peaks_list$peaks_time

  # calc starting amplitude
  if (length(peaks) >0){
    a0 <- abs(peaks[1]) # y0 already removed
  } else {
    a0 <- max(y_val,na.rm = TRUE) - y0
  }

  # intial value for gamma
  if (length(peaks)==0){ # if there are no peaks, we account for that
    gam0 <- 0
  } else if (which.max(peaks)==1){ # if the highest peak is first, then damping is likely
    if (length(peaks)>1){
      # trying to figure out gamma based on logarithmic decrement
      n <- 1
      # n <- peaks_time[2]-peaks_time[1]
      log_dec <- (1/n)*log(abs(peaks[1]/peaks[2]))
      gam0 <- 1/(sqrt(1+((2*pi/log_dec)^2)))/2
    } else{ # use a guess
      gam0 <- .01
    }
  } else{ # otherwise forcing is likely
    if (length(peaks)>1){
      # trying to figure out gamma based on logarithmic decrement
      n <- 1
      # n <- peaks_time[2]-peaks_time[1]
      log_dec <- (1/n)*log(abs(peaks[2]/peaks[1]))
      gam0 <- -1*1/(sqrt(1+((2*pi/log_dec)^2)))/2
    } else{ # use a guess
      gam0 <- -.01
    }
  }

  # restricting ac coeff to not be overexpressed/repressed
  if (gam0 < -over_cut){
    gam0 <- -over_cut
  } else if (gam0 > over_cut){
    gam0 <- over_cut
  }

  # let frequency depend on amount of peaks = (length(timen)*resol/(no of peaks+1 [accounts for phase shift])
  if (length(peaks) == 0){
    if (high == -Inf || low == Inf){
      w0 <- 2*pi/(length(timen)*resol/2)
    } else{
      # want to get their actual integer period values
      highfix <- (high/2/pi)^-1
      lowfix <- (low/2/pi)^-1
      w0 <- 2*pi/(length(timen)*resol/((highfix+lowfix)/2))
    }
  } else if (length(peaks) == 1){ # phase shift causes only one peak to appear
    w0 <- 2*pi/(length(timen)*resol/(length(peaks)+1))
  } else{
    w0 <- 2*pi/(length(timen)*resol/(length(peaks)))
  }

  # can't be outside the specified parameters
  if (w0 > low){
    w0 <- low
  } else if (w0 < high){
    w0 <- high
  }


  # we estimate our phase shift on the second and third nonmissing sets of points for accuracy
  # if you have less than 3 points nonmissing, I have no hope for you

  # higher fidelity guess for higher amounts of points
  # this still works for 3 points,
  beg1 <- which(!is.na(y_val))[2]
  beg2 <- which(!is.na(y_val))[3]
  # middle
  mid1 <- which(!is.na(y_val))[floor(length(timen)/2)]
  mid2 <- which(!is.na(y_val))[floor(length(timen)/2)+1]
  # end
  en1 <- which(!is.na(y_val))[length(timen)-2]
  en2 <- which(!is.na(y_val))[length(timen)-1]

  min_vect <- rep(Inf, length(0:11))
  for (i in 0:11){
    # if the phase shift at the beginning, middle, and end are the smallest value available for the fitted value
    min_vect[i+1] <- sum(abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[beg1])-y_val[beg1]),
                         abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[mid1])-y_val[mid1]),
                         abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[en1])-y_val[en1]),
                         abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[beg2])-y_val[beg2]),
                         abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[mid2])-y_val[mid2]),
                         abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[en2])-y_val[en2]))
  }
  phi0 <- (which.min(abs(min_vect))-1)*pi/6 # intial value for phase shift

  # form all starting parameters into a nice list
  start_param <- list(gam=gam0,a=a0,omega=w0,phi=phi0,y_shift=y0)

  return(start_param)
}

#' Function to calculate the starting points for echo linear models
#'
#' @param y_val vector of averaged expressions
#' @param slo0 initial slope
#' @param y_shift0 initial equilibrium value
#' @param over_cut postive number indicating the cutoff for a gene to be considered repressed/overexpressed
#' @param timen numeric vector of time points
#' @param resol resolution
#' @param low lower limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#' @param high upper limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#'
#' @return list with all starting points for parameters (amplitude, ac coeff, frequency, phase shift, slope, equilibrium shift)
#' @keywords internal
#' @noRd
calc_start_echo_lin <- function(y_val, slo0, y_shift0, over_cut, timen, resol, low, high){
  # subtract sloped baseline
  y_val <- y_val - lin_mod(slo0,y_shift0,timen)

  # smooth y_val
  y_val <- smooth_y_val(y_val, timen)

  # calculate starting y_shift -- only used here for calculations
  y0 <- mean(y_val,na.rm = TRUE) # intial value for the equilibrium shift
  if (y0 < 10^-10 && y0 > -10^-10){
    y0 <- 10^-8 # avoiding 0 mean, which makes gradient singular
  }

  # calculate peaks
  peaks_list <- calc_peaks(y_val, resol, y0, timen)
  peaks <- peaks_list$peaks
  peaks_time <- peaks_list$peaks_time

  # calc starting amplitude
  if (length(peaks) >0){
    a0 <- abs(peaks[1]) # y0 already removed from peaks
  } else {
    a0 <- max(y_val,na.rm = TRUE) - y0
  }

  # intial value for gamma
  if (length(peaks)==0){ # if there are no peaks, we account for that
    gam0 <- 0
  } else if (which.max(peaks)==1){ # if the highest peak is first, then damping is likely
    if (length(peaks)>1){
      # trying to figure out gamma based on logarithmic decrement
      n <- 1
      # n <- peaks_time[2]-peaks_time[1]
      log_dec <- (1/n)*log(abs(peaks[1]/peaks[2]))
      gam0 <- 1/(sqrt(1+((2*pi/log_dec)^2)))/2
    } else{ # use a guess
      gam0 <- .01
    }
  } else{ # otherwise forcing is likely
    if (length(peaks)>1){
      # trying to figure out gamma based on logarithmic decrement
      n <- 1
      # n <- peaks_time[2]-peaks_time[1]
      log_dec <- (1/n)*log(abs(peaks[2]/peaks[1]))
      gam0 <- -1*1/(sqrt(1+((2*pi/log_dec)^2)))/2
    } else{ # use a guess
      gam0 <- -.01
    }
  }

  # restricting ac coeff to not be overexpressed/repressed
  if (gam0 < over_cut){
    gam0 <- -over_cut
  } else if (gam0 > over_cut){
    gam0 <- over_cut
  }

  # let frequency depend on amount of peaks = (length(timen)*resol/(no of peaks+1 [accounts for phase shift])
  if (length(peaks) == 0){
    if (high == -Inf || low == Inf){
      w0 <- 2*pi/(length(timen)*resol/2)
    } else{
      # want to get their actual integer period values
      highfix <- (high/2/pi)^-1
      lowfix <- (low/2/pi)^-1
      w0 <- 2*pi/(length(timen)*resol/((highfix+lowfix)/2))
    }
  } else if (length(peaks) == 1){ # phase shift causes only one peak to appear
    w0 <- 2*pi/(length(timen)*resol/(length(peaks)+1))
  } else{
    w0 <- 2*pi/(length(timen)*resol/(length(peaks)))
  }

  # can't be outside the specified parameters
  if (w0 > low){
    w0 <- low
  } else if (w0 < high){
    w0 <- high
  }


  # we estimate our phase shift on the second and third nonmissing points for accuracy
  # if you have less than 3 points nonmissing, I have no hope for you
  # higher fidelity guess for higher amounts of points
  # this still works for 3 points
  # beginning
  beg1 <- which(!is.na(y_val))[2]
  beg2 <- which(!is.na(y_val))[3]
  # middle
  mid1 <- which(!is.na(y_val))[floor(length(timen)/2)]
  mid2 <- which(!is.na(y_val))[floor(length(timen)/2)+1]
  # end
  en1 <- which(!is.na(y_val))[length(timen)-2]
  en2 <- which(!is.na(y_val))[length(timen)-1]

  min_vect <- rep(10000, length(0:11))
  for (i in 0:11){
    # if the phase shift at both the second and third gene values are the smallest value available for the fitted value
    min_vect[i+1] <- sum(abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[beg1])-y_val[beg1]),
                         abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[mid1])-y_val[mid1]),
                         abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[en1])-y_val[en1]),
                         abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[beg2])-y_val[beg2]),
                         abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[mid2])-y_val[mid2]),
                         abs(alt_form(a0,gam0,w0,(i*pi/6),y0,timen[en2])-y_val[en2]))
  }
  phi0 <- (which.min(abs(min_vect))-1)*pi/6 # intial value for phase shift

  # form all starting parameters into a nice list
  start_param <- list(gam=gam0,a=a0,omega=w0,phi=phi0,slo=slo0,y_shift=y_shift0)

  return(start_param)
}

#' Function to categorize ac coefficient into categories
#'
#' @param gam ac coefficient
#' @param harm_cut postive number indicating the cutoff for a gene to be considered harmonic
#' @param over_cut postive number indicating the cutoff for a gene to be considered repressed/overexpressed
#'
#' @return string, ac coefficient categories
#' @keywords internal
#' @noRd
get_type_gam <- function(gam, harm_cut, over_cut){
  # categorize gamma in ac coeff categories
  if (gam < -over_cut){
    type_gam <- "Overexpressed"
  } else if (gam <= -harm_cut){
    type_gam <- "Forced"
  } else if (gam <= harm_cut){
    type_gam <- "Harmonic"
  } else if (gam <= over_cut){
    type_gam <- "Damped"
  } else{
    type_gam <- "Repressed"
  }
  return(type_gam)
}

#' Function to calculate hours shifted (phase shift, in hours) for ECHO-type rhythms
#'
#' @param a initial amplitude
#' @param phi phase shift, radians
#' @param omega radian frequency
#'
#' @return numeric, phase shift, in hours, relative to the 0 specified in the time course
#' @keywords internal
#' @noRd
get_hs <- function(a,phi,omega){
  # calculating the phase shift in terms of period (omega inverse of period)
  if (!is.na(a)){ # all param will either be na or not
    if (a >= 0){ # positive amplitudes
      if (phi > 0){ # shift to the left
        frac_part <- (phi/(2*pi)) - trunc(phi/(2*pi))
        dist_peak <- frac_part*(2*pi/omega) # distance from first peak
        phase_hours <- (2*pi/omega)-dist_peak
      } else { # shift to the right
        frac_part <- (phi/(2*pi)) - trunc(phi/(2*pi))
        dist_peak <- frac_part*(2*pi/omega) # distance from first peak
        phase_hours <- abs(dist_peak)
      }
    } else { # negative ampltitudes
      if (phi > 0){ # shift to the left
        frac_part <- (phi/(2*pi)) - trunc(phi/(2*pi))
        dist_peak <- frac_part*(2*pi/omega) # distance from first peak
        if (abs(frac_part) < .5){
          phase_hours <- (2*pi/omega)-dist_peak - (2*pi/omega/2)
        } else {
          phase_hours <- (2*pi/omega)-dist_peak + (2*pi/omega/2)
        }
      } else { # shift to the right
        frac_part <- (phi/(2*pi)) - trunc(phi/(2*pi))
        dist_peak <- frac_part*(2*pi/omega) # distance from first peak
        if (abs(frac_part) < .5){
          phase_hours <- abs(dist_peak) + (2*pi/omega/2)
        } else {
          phase_hours <- abs(dist_peak) - (2*pi/omega/2)
        }
      }
    }
  } else {
    phase_hours <- NA
  }

  return(phase_hours)
}

#' Function for determining whether gene values are unexpressed (less than 70% expressed (i.e., below specified cutoff)) for full matrix
#'
#' @param rem_unexpr_amt what percentage of the time course must be expressed
#' @param rem_unexpr_amt_below cutoff for expression
#'
#' @return boolean if there is 70% expression
#' @keywords internal
#' @noRd
genes_unexpressed_all <- function(genes, rem_unexpr_amt, rem_unexpr_amt_below){
  #get matrix of just the relative expression over time
  all_reps <- as.matrix(genes[,2:ncol(genes)])
  # get how many genes are expressed for each gene
  tot_expressed <- rowSums(abs(all_reps) > abs(rem_unexpr_amt_below),na.rm = TRUE)

  # return true if amount is less than threshold
  return(tot_expressed <= (ncol(all_reps)*rem_unexpr_amt))
}

#' Function to calculate a rolling average (3-wide window) for a set of data (smoothing) for paired
#' (tied) replicates
#'
#' @param is_weighted logical if there is smoothing, is it weighted (1,2,1) smoothing, or unweighted smoothing (1,1,1)
#' @param num_reps number of replicates
#'
#' @return smoothed data
#' @keywords internal
#' @noRd
smoothing_all_tied <- function(is_weighted, num_reps, genes, avg_genes, timen){
  # originally based on heat map code, but it will work fine here

  #get matrix of just the relative expression over time
  all_reps <- as.matrix(genes[,2:ncol(genes)])

  if (!is_weighted){

    #if there are replicates, average the relative expression for each replicate
    center_reps <- list() # to store actual replicate matrix
    mtx_count <- list() # to store how many are NA
    for (i in 1:num_reps){
      center_reps[[i]] <- all_reps[, seq(i,ncol(all_reps), by=num_reps)]
      mtx_count[[i]] <- is.na(center_reps[[i]])
      center_reps[[i]][is.na(center_reps[[i]])] <- 0
    }

    repmtx_l <- list() # store amount to divide by for each rep
    repmtx <- matrix(0L,ncol = length(timen),nrow = nrow(all_reps))+3 # to store how many we should divide by
    repmtx[,c(1,ncol(repmtx))] <- repmtx[,c(1,ncol(repmtx))] - 1 # we only divide by 2 at edges
  } else{ # weighted averaging

    #if there are replicates, average the relative expression for each replicate
    center_reps <- list() # to store actual replicate matrix
    mtx_count <- list() # to store how many are NA
    for (i in 1:num_reps){
      center_reps[[i]] <- all_reps[, seq(i,ncol(all_reps), by=num_reps)]*2
      mtx_count[[i]] <- is.na(center_reps[[i]])*2
      center_reps[[i]][is.na(center_reps[[i]])] <- 0
    }

    repmtx_l <- list() # store amount to divide by for each rep
    repmtx <- matrix(0L,ncol = length(timen),nrow = nrow(all_reps))+4 # to store how many we should divide by
    repmtx[,c(1,ncol(repmtx))] <- repmtx[,c(1,ncol(repmtx))] - 1 # we only divide by 3 at edges

  }
  for (i in 1:num_reps){
    # sum the replicates
    left <- cbind(matrix(0,nrow(all_reps),1),center_reps[[i]][,-ncol(center_reps[[i]])]/2) # left shifted matrix
    right <- cbind(center_reps[[i]][,-1]/2,matrix(0,nrow(genes),1)) # right shifted matrix
    center_reps[[i]] <- left + center_reps[[i]] + right

    # figure out how many replicates are actually available for each time point
    left_na <- cbind(matrix(0,nrow(all_reps),1),mtx_count[[i]][,-ncol(mtx_count[[i]])]/2) # left shifted matrix
    right_na <- cbind(mtx_count[[i]][,-1]/2,matrix(0,nrow(genes),1)) # right shifted matrix
    repmtx_l[[i]] <- repmtx - left_na - mtx_count[[i]] - right_na
    # to avoid division by 0 and induce NAs if there are no time points available
    repmtx_l[[i]][repmtx_l[[i]]==0] <- NA
  }

  dat <- genes
  for (x in 0:(num_reps-1)){
    dat[,seq(2+x,ncol(genes),by=num_reps)] <- center_reps[[x+1]]/repmtx_l[[x+1]]
  }
  dat[is.na(genes)] <- NA # do not impute missing values
  return(dat)
}

#' Function to calculate a rolling average (3-wide window) for a set of data (smoothing) for paired
#' (tied) replicates
#'
#' @param is_weighted logical if there is smoothing, is it weighted (1,2,1) smoothing, or unweighted smoothing (1,1,1)
#' @param num_reps number of replicates
#'
#' @return smoothed data
#' @keywords internal
#' @noRd
smoothing_all_untied <- function(is_weighted, num_reps, genes, avg_genes, timen){
  #get matrix of just the relative expression over time
  all_reps <- as.matrix(genes[,2:ncol(genes)])

  if (!is_weighted){

    #if there are replicates, average the relative expression for each replicate
    center_reps <- list() # to store actual replicate matrix
    mtx_count <- list() # to store how many are

    side_reps <- avg_genes
    mtx_side_count <- is.na(side_reps)
    side_reps[is.na(side_reps)] <- 0
    for (i in 1:num_reps){
      center_reps[[i]] <- all_reps[, seq(i,ncol(all_reps), by=num_reps)]
      mtx_count[[i]] <- is.na(center_reps[[i]])
      center_reps[[i]][is.na(center_reps[[i]])] <- 0
    }

    repmtx_l <- list() # store amount to divide by for each rep
    repmtx <- matrix(0L,ncol = length(timen),nrow = nrow(all_reps))+3 # to store how many we should divide by
    repmtx[,c(1,ncol(repmtx))] <- repmtx[,c(1,ncol(repmtx))] - 1 # we only divide by 3 at edges
  } else{ # weighted averaging

    #if there are replicates, average the relative expression for each replicate
    center_reps <- list() # to store actual replicate matrix
    mtx_count <- list() # to store how many are

    side_reps <- avg_genes
    mtx_side_count <- is.na(side_reps)
    side_reps[is.na(side_reps)] <- 0

    for (i in 1:num_reps){
      center_reps[[i]] <- all_reps[, seq(i,ncol(all_reps), by=num_reps)]*2
      mtx_count[[i]] <- is.na(center_reps[[i]])*2
      center_reps[[i]][is.na(center_reps[[i]])] <- 0
    }

    repmtx_l <- list() # store amount to divide by for each rep
    repmtx <- matrix(0L,ncol = length(timen),nrow = nrow(all_reps))+4 # to store how many we should divide by
    repmtx[,c(1,ncol(repmtx))] <- repmtx[,c(1,ncol(repmtx))] - 1 # we only divide by 3 at edges

  }
  for (i in 1:num_reps){
    # sum the replicates
    left <- cbind(matrix(0,nrow(genes),1),side_reps[,-ncol(side_reps)]) # left shifted matrix
    right <- cbind(side_reps[,-1],matrix(0,nrow(genes),1)) # right shifted matrix
    center_reps[[i]] <- left + center_reps[[i]] + right

    # figure out how many replicates are actually available for each time point
    left_na <- cbind(matrix(0,nrow(all_reps),1),mtx_side_count[,-ncol(mtx_side_count)]) # left shifted matrix
    right_na <- cbind(mtx_side_count[,-1],matrix(0,nrow(genes),1)) # right shifted matrix
    repmtx_l[[i]] <- repmtx - left_na - mtx_count[[i]] - right_na
    # to avoid division by 0 and induce NAs if there are no time points available
    repmtx_l[[i]][repmtx_l[[i]]==0] <- NA
  }

  dat <- genes # assigning to dataframe to return
  for (x in 0:(num_reps-1)){
    dat[,seq(2+x,ncol(genes),by=num_reps)] <- center_reps[[x+1]]/repmtx_l[[x+1]]
  }
  dat[is.na(genes)] <- NA # do not impute missing values
  return(dat)
}

#' Function to normalize expressions in a matricized manner, by row.
#'
#' @param genes data frame of genes with the following specifications: first row is column labels, first column has gene labels/names, and all other columns have expression data. This expression data must be ordered by time point then by replicate, and must have evenly spaced time points. Any missing data must have cells left blank.
#' @return normalized expression data
#' @keywords internal
#' @noRd
normalize_all <- function(genes){

  #get matrix of just the relative expression over time
  all_reps <- as.matrix(genes[,2:ncol(genes)])

  # vector of row means
  all_row_mean <- rowMeans(all_reps, na.rm = TRUE)
  # vector of standard deviations
  all_row_stdev <- sqrt(rowSums((all_reps - rowMeans(all_reps,na.rm = TRUE))^2, na.rm = TRUE)/(dim(all_reps)[2] - 1))

  # get the matrix of normalized expressions
  all_reps_normal <- (all_reps - all_row_mean)/all_row_stdev
  # if standard deviation is 0, imposes NA, so this expression shouldn't be considered anyway
  # and is now constant
  all_reps_normal[is.na(all_reps_normal)] <- 0

  # create dataframe with normalized expressions
  dat <- genes
  dat[,-1] <- all_reps_normal
  dat[is.na(genes)] <- NA # do not impute missing values

  return(list("dat"=dat, "means"=all_row_mean, "stdevs"=all_row_stdev))
}

#' Function for getting an object of class nlsModel, EDITED VERY SPECIFICALLY FOR MOSAIC PROBLEM
#'
#' @param form formula specifying model
#' @param data dataframe containing the data to be used
#' @param start named initial values for parameters
#' @param wts weights for data
#'
#' @return object of class nlsModel
#' @author R Development Core Team
#' @importFrom stats setNames
#' @keywords internal
#' @noRd
nlsModel_edit <- function (form, data, start, wts)
{
  thisEnv <- environment()
  env <- new.env(parent = environment(form))
  for (i in names(data)) {
    assign(i, data[[i]], envir = env)
  }
  ind <- as.list(start)
  parLength <- 0
  for (i in names(ind)) {
    temp <- start[[i]]
    storage.mode(temp) <- "double"
    assign(i, temp, envir = env)
    ind[[i]] <- parLength + seq(along = start[[i]])
    parLength <- parLength + length(start[[i]])
  }
  useParams <- rep(TRUE, parLength)
  lhs <- eval(form[[2]], envir = env)
  rhs <- eval(form[[3]], envir = env)
  resid <- lhs - rhs
  dev <- sum(resid^2)
  if (is.null(attr(rhs, "gradient"))) {
    # the gradient doesn't matter, we're not evaluating anything
    # getRHS.noVarying <- function() numericDeriv(form[[3]],
    # names(ind), env)
    # getRHS <- getRHS.noVarying
    # rhs <- getRHS()
    attr(rhs, "gradient") <- matrix(1, nrow = length(data$y), ncol = length(names(ind)))
  }
  else {
    getRHS.noVarying <- function() eval(form[[3]], envir = env)
    getRHS <- getRHS.noVarying
  }
  dimGrad <- dim(attr(rhs, "gradient"))
  marg <- length(dimGrad)
  if (marg > 0) {
    gradSetArgs <- vector("list", marg + 1)
    for (i in 2:marg) gradSetArgs[[i]] <- rep(TRUE, dimGrad[i -
                                                              1])
    useParams <- rep(TRUE, dimGrad[marg])
  }
  else {
    gradSetArgs <- vector("list", 2)
    useParams <- rep(TRUE, length(attr(rhs, "gradient")))
  }
  npar <- length(useParams)
  gradSetArgs[[1]] <- (~attr(ans, "gradient"))[[2]]
  gradCall <- switch(length(gradSetArgs) - 1, call("[", gradSetArgs[[1]],
                                                   gradSetArgs[[2]]), call("[", gradSetArgs[[1]], gradSetArgs[[2]],
                                                                           gradSetArgs[[2]]), call("[", gradSetArgs[[1]], gradSetArgs[[2]],
                                                                                                   gradSetArgs[[2]], gradSetArgs[[3]]), call("[", gradSetArgs[[1]],
                                                                                                                                             gradSetArgs[[2]], gradSetArgs[[2]], gradSetArgs[[3]],
                                                                                                                                             gradSetArgs[[4]]))
  getRHS.varying <- function() {
    ans <- getRHS.noVarying()
    attr(ans, "gradient") <- eval(gradCall)
    ans
  }
  QR <- qr(attr(rhs, "gradient"))
  qrDim <- min(dim(QR$qr))
  # if (QR$rank < qrDim)
  #   stop("singular gradient matrix at initial parameter estimates")
  getPars.noVarying <- function() unlist(setNames(lapply(names(ind),
                                                         get, envir = env), names(ind)))
  getPars.varying <- function() unlist(setNames(lapply(names(ind),
                                                       get, envir = env), names(ind)))[useParams]
  getPars <- getPars.noVarying
  internalPars <- getPars()
  setPars.noVarying <- function(newPars) {
    assign("internalPars", newPars, envir = thisEnv)
    for (i in names(ind)) {
      assign(i, unname(newPars[ind[[i]]]), envir = env)
    }
  }
  setPars.varying <- function(newPars) {
    internalPars[useParams] <- newPars
    for (i in names(ind)) {
      assign(i, unname(internalPars[ind[[i]]]), envir = env)
    }
  }
  setPars <- setPars.noVarying
  on.exit(remove(i, data, parLength, start, temp, m))
  m <- list(resid = function() resid, fitted = function() rhs,
            formula = function() form, deviance = function() dev,
            gradient = function() attr(rhs, "gradient"), conv = function() {
              rr <- qr.qty(QR, resid)
              sqrt(sum(rr[1:npar]^2)/sum(rr[-(1:npar)]^2))
            }, incr = function() qr.coef(QR, resid), setVarying = function(vary = rep(TRUE,
                                                                                      length(useParams))) {
              assign("useParams", if (is.character(vary)) {
                temp <- logical(length(useParams))
                temp[unlist(ind[vary])] <- TRUE
                temp
              } else if (is.logical(vary) && length(vary) != length(useParams)) stop("setVarying : vary length must match length of parameters") else {
                vary
              }, envir = thisEnv)
              gradCall[[length(gradCall)]] <- useParams
              if (all(useParams)) {
                assign("setPars", setPars.noVarying, envir = thisEnv)
                assign("getPars", getPars.noVarying, envir = thisEnv)
                assign("getRHS", getRHS.noVarying, envir = thisEnv)
                assign("npar", length(useParams), envir = thisEnv)
              } else {
                assign("setPars", setPars.varying, envir = thisEnv)
                assign("getPars", getPars.varying, envir = thisEnv)
                assign("getRHS", getRHS.varying, envir = thisEnv)
                assign("npar", length((1:length(useParams))[useParams]),
                       envir = thisEnv)
              }
            }, setPars = function(newPars) {
              setPars(newPars)
              assign("resid", lhs - assign("rhs", getRHS(), envir = thisEnv),
                     envir = thisEnv)
              assign("dev", sum(resid^2), envir = thisEnv)
              assign("QR", qr(attr(rhs, "gradient")), envir = thisEnv)
              return(QR$rank < min(dim(QR$qr)))
            }, getPars = function() getPars(), getAllPars = function() getPars(),
            getEnv = function() env, trace = function() cat(format(dev),
                                                            ": ", format(getPars()), "\n"), Rmat = function() qr.R(QR),
            predict = function(newdata = list(), qr = FALSE) {
              eval(form[[3]], as.list(newdata), env)
            })
  class(m) <- "nlsModel"
  m
}

#' Function for getting an object of class nls, EDITED VERY SPECIFICIALLY FOR MOSAIC PROBLEM.
#'
#' @param formula a nonlinear model formula including variables and parameters. Will be coerced to a formula if necessary.
#' @param data an optional data frame in which to evaluate the variables in formula and weights. Can also be a list or an environment, but not a matrix.
#' @param start a named list or named numeric vector of starting estimates. When start is missing (and formula is not a self-starting model, see selfStart), a very cheap guess for start is tried (if algorithm != "plinear").
#' @param control an optional list of control settings. See nls.control for the names of the settable control values and their effect.
#' @param algorithm character string specifying the algorithm to use. The default algorithm is a Gauss-Newton algorithm. "plinear" and "port" are not available and will change to default.
#' @param trace logical value indicating if a trace of the iteration progress should be printed. Default is FALSE. If TRUE the residual (weighted) sum-of-squares and the parameter values are printed at the conclusion of each iteration. When the "plinear" algorithm is used, the conditional estimates of the linear parameters are printed after the nonlinear parameters. When the "port" algorithm is used the objective function value printed is half the residual (weighted) sum-of-squares.
#' @param subset an optional vector specifying a subset of observations to be used in the fitting process.
#' @param weights an optional numeric vector of (fixed) weights. When present, the objective function is weighted least squares.
#' @param na.action a function which indicates what should happen when the data contain NAs. The default is set by the na.action setting of options, and is na.fail if that is unset. The 'factory-fresh' default is na.omit. Value na.exclude can be useful.
#' @param model logical. If true, the model frame is returned as part of the object. Default is FALSE.
#' @param lower, upper vectors of lower and upper bounds, replicated to be as long as start. If unspecified, all parameters are assumed to be unconstrained. Bounds can only be used with the "port" algorithm. They are ignored, with a warning, if given for other algorithms.
#' @param ... Additional optional arguments. None are used at present.
#'
#' @return a list containing:
#' \item{m}{an nlsModel object incorporating the model}
#' \item{data}{the expression that was passed to nls as the data argument The actual data values are present in the environment of the m component}
#' \item{call}{the matched call with several components, notably algorithm}
#' \item{naaction}{the "naaction" attribute (if any) of the model frame}
#' \item{dataClasses}{the "dataClasses" attribute (if any) of the "terms" attribute of the model frame}
#' \item{model}{if model = TRUE, the model frame}
#' \item{weights}{if weights is supplied, the weights}
#' \item{convInfo}{a list with convergence information}
#' \item{control}{the control list used, see the control argument}
#' \item{convergence}{message for an algorithm = "port" fit only, a convergence code (0 for convergence) and message To use these is deprecated, as they are available from convInfo now}
#' @author R Development Core Team
#' @importFrom stats as.formula
#' @importFrom stats getInitial
#' @importFrom stats model.weights
#' @keywords internal
#' @noRd
nls_edit <- function (formula, data = parent.frame(), start, control = nls.control(),
                      algorithm = c("default", "plinear", "port"), trace = FALSE,
                      subset, weights, na.action, model = FALSE, lower = -Inf,
                      upper = Inf, ...)
{
  formula <- as.formula(formula)
  algorithm <- match.arg(algorithm)
  if (!is.list(data) && !is.environment(data))
    stop("'data' must be a list or an environment")
  mf <- cl <- match.call()
  varNames <- all.vars(formula)
  if (length(formula) == 2L) {
    formula[[3L]] <- formula[[2L]]
    formula[[2L]] <- 0
  }
  form2 <- formula
  form2[[2L]] <- 0
  varNamesRHS <- all.vars(form2)
  mWeights <- missing(weights)
  pnames <- if (missing(start)) {
    if (!is.null(attr(data, "parameters"))) {
      names(attr(data, "parameters"))
    }
    else {
      cll <- formula[[length(formula)]]
      if (is.symbol(cll)) {
        cll <- substitute(S + 0, list(S = cll))
      }
      fn <- as.character(cll[[1L]])
      if (is.null(func <- tryCatch(get(fn), error = function(e) NULL)))
        func <- get(fn, envir = parent.frame())
      if (!is.null(pn <- attr(func, "pnames")))
        as.character(as.list(match.call(func, call = cll))[-1L][pn])
    }
  }
  else names(start)
  env <- environment(formula)
  if (is.null(env))
    env <- parent.frame()
  if (length(pnames))
    varNames <- varNames[is.na(match(varNames, pnames))]
  lenVar <- function(var) tryCatch(length(eval(as.name(var),
                                               data, env)), error = function(e) -1L)
  if (length(varNames)) {
    n <- vapply(varNames, lenVar, 0)
    if (any(not.there <- n == -1L)) {
      nnn <- names(n[not.there])
      if (missing(start)) {
        if (algorithm == "plinear")
          stop("no starting values specified")
        warning("No starting values specified for some parameters.\n",
                "Initializing ", paste(sQuote(nnn), collapse = ", "),
                " to '1.'.\n", "Consider specifying 'start' or using a selfStart model",
                domain = NA)
        start <- setNames(as.list(rep_len(1, length(nnn))),
                          nnn)
        varNames <- varNames[i <- is.na(match(varNames,
                                              nnn))]
        n <- n[i]
      }
      else stop(gettextf("parameters without starting value in 'data': %s",
                         paste(nnn, collapse = ", ")), domain = NA)
    }
  }
  else {
    if (length(pnames) && any((np <- sapply(pnames, lenVar)) ==
                              -1)) {
      message(sprintf(ngettext(sum(np == -1), "fitting parameter %s without any variables",
                               "fitting parameters %s without any variables"),
                      paste(sQuote(pnames[np == -1]), collapse = ", ")),
              domain = NA)
      n <- integer()
    }
    else stop("no parameters to fit")
  }
  respLength <- length(eval(formula[[2L]], data, env))
  if (length(n) > 0L) {
    varIndex <- n%%respLength == 0
    if (is.list(data) && diff(range(n[names(n) %in% names(data)])) >
        0) {
      mf <- data
      if (!missing(subset))
        warning("argument 'subset' will be ignored")
      if (!missing(na.action))
        warning("argument 'na.action' will be ignored")
      if (missing(start))
        start <- getInitial(formula, mf)
      startEnv <- new.env(hash = FALSE, parent = environment(formula))
      for (i in names(start)) assign(i, start[[i]], envir = startEnv)
      rhs <- eval(formula[[3L]], data, startEnv)
      n <- NROW(rhs)
      wts <- if (mWeights)
        rep_len(1, n)
      else eval(substitute(weights), data, environment(formula))
    }
    else {
      vNms <- varNames[varIndex]
      if (any(nEQ <- vNms != make.names(vNms)))
        vNms[nEQ] <- paste0("`", vNms[nEQ], "`")
      mf$formula <- as.formula(paste("~", paste(vNms,
                                                collapse = "+")), env = environment(formula))
      mf$start <- mf$control <- mf$algorithm <- mf$trace <- mf$model <- NULL
      mf$lower <- mf$upper <- NULL
      mf[[1L]] <- quote(stats::model.frame)
      mf <- eval.parent(mf)
      n <- nrow(mf)
      mf <- as.list(mf)
      wts <- if (!mWeights)
        model.weights(mf)
      else rep_len(1, n)
    }
    if (any(wts < 0 | is.na(wts)))
      stop("missing or negative weights not allowed")
  }
  else {
    varIndex <- logical()
    mf <- list(0)
    wts <- numeric()
  }
  if (missing(start))
    start <- getInitial(formula, mf)
  for (var in varNames[!varIndex]) mf[[var]] <- eval(as.name(var),
                                                     data, env)
  varNamesRHS <- varNamesRHS[varNamesRHS %in% varNames[varIndex]]
  m <- switch(algorithm, plinear = nlsModel_edit(formula,mf, start, wts), port = nlsModel_edit(formula, mf, start, wts), nlsModel_edit(formula, mf, start, wts))
  ctrl <- nls.control()
  if (!missing(control)) {
    control <- as.list(control)
    ctrl[names(control)] <- control
  }
  # if (algorithm != "port") { # port is not used
    if (!identical(lower, -Inf) || !identical(upper, +Inf)) {
      warning("upper and lower bounds ignored unless algorithm = \"port\"")
      cl$lower <- NULL
      cl$upper <- NULL
    }
    if (ctrl$maxiter > 0){
      # This is never reached for this use.
      # convInfo <- .Call(C_nls_iter, m, ctrl, trace)
    } else {
      convInfo <- list("isConv" = F, "finIter" = 0, "finTol" = NA, "stopCode" = 9,
                       "stopMessage"="The number of iterations has reached maxiter.")
    }
    nls.out <- list(m = m, convInfo = convInfo, data = substitute(data),
                    call = cl)
  # }
  # else {
  #   # PORT IS NOT USED
  #   pfit <- nls_port_fit(m, start, lower, upper, control,
  #                        trace, give.v = TRUE)
  #   iv <- pfit[["iv"]]
  #   msg.nls <- port_msg(iv[1L])
  #   conv <- (iv[1L] %in% 3:6)
  #   if (!conv) {
  #     msg <- paste("Convergence failure:", msg.nls)
  #     if (ctrl$warnOnly)
  #       warning(msg)
  #     else stop(msg)
  #   }
  #   v. <- port_get_named_v(pfit[["v"]])
  #   cInfo <- list(isConv = conv, finIter = iv[31L], finTol = v.[["NREDUC"]],
  #                 nEval = c(`function` = iv[6L], gradient = iv[30L]),
  #                 stopCode = iv[1L], stopMessage = msg.nls)
  #   cl$lower <- lower
  #   cl$upper <- upper
  #   nls.out <- list(m = m, data = substitute(data), call = cl,
  #                   convInfo = cInfo, convergence = as.integer(!conv),
  #                   message = msg.nls)
  # }
  nls.out$call$algorithm <- algorithm
  nls.out$call$control <- ctrl
  nls.out$call$trace <- trace
  nls.out$na.action <- attr(mf, "na.action")
  nls.out$dataClasses <- attr(attr(mf, "terms"), "dataClasses")[varNamesRHS]
  if (model)
    nls.out$model <- mf
  if (!mWeights)
    nls.out$weights <- wts
  nls.out$control <- control
  class(nls.out) <- "nls"
  nls.out
}

# calculate model parameters ----

#' Function to calculate echo joint model parameters with inequality constraints (slack for protein)
#'
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param resol resolution
#' @param num_reps number of replicates
#' @param start_type "orig", only calculate starting points from scratch
#' @param res vector of parameter estimates from a completely joint echo model
#' @param harm_cut postive number indicating the cutoff for a gene to be considered harmonic
#' @param over_cut postive number indicating the cutoff for a gene to be considered repressed/overexpressed
#' @param genes_rna dataframe of rna gene expressions (first column is names, other columns are numeric)
#' @param genes_protein dataframe of protein gene expressions (first column is names, other columns are numeric)
#' @param avg_genes_rna matrix of averaged gene expressions, over replicates, for rna
#' @param avg_genes_pro matrix of averaged gene expressions, over replicates, for protein
#' @param low lower limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#' @param high upper limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#'
#' @return list which contains: model name, fit model object, parameters, joint p value, rna p value, protein p value, data frame of parameters for output, ac coefficient category for rna and protein
#' @import minpack.lm
#' @importFrom stats cor.test
#' @importFrom stats nls.control
#' @importFrom stats coef
#' @importFrom stats AIC
#' @keywords internal
#' @noRd
calc_param_echo_joint_constr <- function(current_gene, timen, resol, num_reps, start_type = "orig", res, harm_cut, over_cut, genes_rna, genes_pro, avg_genes_rna, avg_genes_pro, low, high){
  gene_n <- as.character(genes_rna[current_gene,1]) # gene name

  # calculating averages for initial values
  if (num_reps == 1){
    y_val_rna <- as.numeric(genes_rna[current_gene,-1])
    y_val_pro <- as.numeric(genes_pro[current_gene,-1]) # all the gene values
  } else{
    y_val_pro <- avg_genes_pro[current_gene,] # starting values determined by average of replicates
    y_val_rna <- avg_genes_rna[current_gene,] # starting values determined by average of replicates
  }

  # calculate first starting points for rna and protein, using the original method ("orig")
  rna_start <- calc_start_echo(y_val_rna, over_cut, timen, resol, low, high)
  pro_start <- calc_start_echo(y_val_pro, over_cut, timen, resol, low, high)

  # we're going to use the starting value of rna for the joint period, respective for everything else
  # now we need to format it into a way that is nice (renaming)
  names(rna_start) <- paste0(names(rna_start),"1")
  names(pro_start) <- paste0(names(pro_start),"2")
  start_param <- c(rna_start,pro_start)
  names(start_param)[names(start_param)=="omega2"] <- "chang"
  # changing starting period to be in hours
  start_param$omega1 <- 2*pi/start_param$omega1
  start_param$chang <- 0

  # create second starting points -- based on the results of a completely joint model
  start_joint <- start_param
  # change the values
  start_joint[names(start_joint)!="chang"] <- as.list(res[,names(start_joint)[names(start_joint)!="chang"]])
  # changing starting joint period to be in hours
  start_joint$omega1 <- 2*pi/start_joint$omega1

  # putting together expression values and time points
  y_stack <- c(unlist(genes_rna[current_gene,-1]), unlist(genes_pro[current_gene,-1]))
  t_stack <- c(rep(timen, each=num_reps), (rep(timen, each = num_reps)))
  # calculate weights for time points, if there are replicates
  if (num_reps > 1){
    weights <- c(rep(calc_weights_boot(unlist(genes_rna[current_gene,-1]), num_reps), each = num_reps),
                 rep(calc_weights_boot(unlist(genes_pro[current_gene,-1]), num_reps), each = num_reps))
  }

  # time points, repeated for each replicate
  t_rep <- length(rep(timen, each=num_reps))
  # binary vectors to indicate rna and protein in concatenation
  s1 <- rep(c(1,0), c(t_rep,t_rep)) # rna
  s2 <- rep(c(0,1), c(t_rep,t_rep)) # protein

  # constraints are in the order of the start parameters
  # preallocate constraints
  lower_constr <- upper_constr <- c(rep(1, length(start_param)))
  # name the constraints
  names(lower_constr) <- names(upper_constr) <- names(start_param)
  # assign upper and lower constraints
  lower_constr <- c( -over_cut,-Inf, 2*pi/low, -Inf, min(y_stack[1:(t_rep)]),
                     -over_cut,-Inf, (-resol+1e-12), -Inf, min(y_stack[((t_rep)+1):length(y_stack)]))
  upper_constr <- c( over_cut, Inf, 2*pi/high, Inf, max(y_stack[1:t_rep]),
                     over_cut, Inf, resol-1e-12, Inf, max(y_stack[((t_rep)+1):length(y_stack)]))

  # preallocate
  temp <- data.frame() # df for data
  coeff <- c() # coefficient
  best <- "" # best model
  num_iter <- 1000 # max number of iterations
  if (num_reps == 1){ # one replicate
    #put the timen and data point into a data frame
    temp <- data.frame(y = y_stack, t =t_stack, s1 = s1, s2 = s2, resol = rep(resol,length(y_stack)))
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    # first: fitting with original starting points
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn_one_rep,
                              fcall = echo_joint_res_constr,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff.orig <- coeff <- coef(oscillator_init) #extract parameter estimates
    iter.orig <- oscillator_init$niter # extract amount of iterations

    # put coefficient estimates into an nls object
    oscillator.fit.orig <- nls_edit(
      formula = y ~ echo_joint_res_constr(a1,a2, gam1,gam2, omega1,chang, phi1,phi2, y_shift1,y_shift2, t, s1, s2, resol),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], y_shift1=coeff[5],
                   gam2=coeff[6], a2=coeff[7], chang = coeff[8], phi2 = coeff[9], y_shift2=coeff[10]
      ),
      control = nls.control(maxiter = 0)
    )

    # second: fitting with starting points from a completely joint model
    oscillator_init <- nls.lm(par = start_joint,
                              fn = fcn_one_rep,
                              fcall = echo_joint_res_constr,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff.joint <- coeff <- coef(oscillator_init) #extract parameter estimates
    iter.joint <- oscillator_init$niter # extract amount of iterations

    # put coefficient estimates into an nls object
    oscillator.fit.joint <- nls_edit(
      formula = y ~ echo_joint_res_constr(a1,a2, gam1,gam2, omega1,chang, phi1,phi2, y_shift1,y_shift2, t, s1, s2, resol),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], y_shift1=coeff[5],
                   gam2=coeff[6], a2=coeff[7], chang = coeff[8], phi2 = coeff[9], y_shift2=coeff[10]
      ),
      control = nls.control(maxiter = 0)
    )

  } else{ # multiple replicates
    #put the timen and data point into a data frame
    temp <- data.frame(y = y_stack, t =t_stack, w= weights, s1 = s1, s2 = s2, resol = rep(resol,length(y_stack)))
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    # first: fitting with original starting points
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn,
                              fcall = echo_joint_res_constr,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              w = temp$w,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff.orig <- coeff <- coef(oscillator_init) #extract parameter estimates
    iter.orig <- oscillator_init$niter # extract amount of iterations

    # put coefficient estimates into an nls object
    oscillator.fit.orig <- nls_edit(
      formula = y ~ echo_joint_res_constr(a1,a2, gam1,gam2, omega1,chang, phi1,phi2, y_shift1,y_shift2, t, s1, s2, resol),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], y_shift1=coeff[5],
                   gam2=coeff[6], a2=coeff[7], chang = coeff[8], phi2 = coeff[9], y_shift2=coeff[10]
      ),
      control = nls.control(maxiter = 0)
    )

    # second: fitting with starting points from a completely joint model
    oscillator_init <- nls.lm(par = start_joint,
                              fn = fcn,
                              fcall = echo_joint_res_constr,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              w = temp$w,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff.joint <- coeff <- coef(oscillator_init) #extract parameter estimates
    iter.joint <- oscillator_init$niter # extract amount of iterations

    # put coefficient estimates into an nls object
    oscillator.fit.joint <- nls_edit(
      formula = y ~ echo_joint_res_constr(a1,a2, gam1,gam2, omega1,chang, phi1,phi2, y_shift1,y_shift2, t, s1, s2, resol),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], y_shift1=coeff[5],
                   gam2=coeff[6], a2=coeff[7], chang = coeff[8], phi2 = coeff[9], y_shift2=coeff[10]
      ),
      control = nls.control(maxiter = 0)
    )

  }

  # compare the results of the starting points using aic
  aic_res <- AIC(oscillator.fit.orig, oscillator.fit.joint)
  ord_start <- c("orig","joint") # order of starting point fits
  # choose fit based on which has the lowest aic
  oscillator.fit <- list(oscillator.fit.orig, oscillator.fit.joint)[[which.min(aic_res$AIC)]] # nls object
  best <- ord_start[which.min(aic_res$AIC)] # best starting points
  coeff <- list(coeff.orig, coeff.joint)[[which.min(aic_res$AIC)]] # coefficient
  num_iter <- c(iter.orig, iter.joint)[which.min(aic_res$AIC)] # number of iterations

  # coefficients and name, with some other options
  res <- data.frame("gene_n"=gene_n,
                    "gam1" =  coeff[1],
                    "a1" =  coeff[2],
                    "omega1" =  coeff[3],
                    "per1" =  coeff[3],
                    "phi1" =  coeff[4],
                    "y_shift1" = coeff[5],
                    "gam2" =  coeff[6],
                    "a2" =  coeff[7],
                    "chang" =  coeff[8],
                    "per2" =  (coeff[3]+coeff[8]),
                    "phi2" =  coeff[9],
                    "y_shift2" = coeff[10],
                    "iter" = num_iter,
                    "best" = best,
                    stringsAsFactors = F)

  # save data for the final dataframe
  # get oscillation type
  type_gam_rna <- get_type_gam(coeff[1], harm_cut, over_cut)
  type_gam_pro <- get_type_gam(coeff[6], harm_cut, over_cut)

  # get hours shifted
  phase_hours_rna <- get_hs(coeff[2],coeff[4],2*pi/coeff[3])
  phase_hours_pro <- get_hs(coeff[7],coeff[9],2*pi/(coeff[3]+coeff[8]))

  # final df row vector
  fin <- c("a1" =  coeff[2],
           "gam1" =  coeff[1],
           "osc1" = type_gam_rna,
           "omega1" =  2*pi/coeff[3],
           "per1" =  coeff[3],
           "phi1" =  coeff[4],
           "hs1" = phase_hours_rna,
           "y_shift1" = coeff[5],
           "a2" =  coeff[7],
           "gam2" =  coeff[6],
           "osc2" = type_gam_pro,
           "omega2" =  2*pi/(coeff[3]+coeff[8]),
           "per2" =  (coeff[3]+coeff[8]),
           "phi2" =  coeff[9],
           "hs2" = phase_hours_pro,
           "y_shift2" = coeff[10])

  # adjust names correctly
  echo_param_n <- c("Initial_Amplitude", "AC_Coefficient", "Oscillation_Type", "Radian_Frequency", "Period", "Phase_Shift", "Hours_Shifted", "Equilibrium_Value")
  echo_joint_param_n <- c(paste0(echo_param_n, "_RNA"), paste0(echo_param_n,"_Protein"))
  # add those name
  names(fin) <- echo_joint_param_n

  # generating p-values
  # joint pvalue
  all_pred <- oscillator.fit$m$predict() # fitted values
  testing <- suppressWarnings(cor.test(all_pred,temp$y, method = "kendall"))
  joint_pval <- testing$p.value

  # rna pvalue
  rna_val <- genes_rna[current_gene,-1]
  rna_val <- rna_val[!is.na(rna_val)]
  testing <- suppressWarnings(cor.test(all_pred[1:length(rna_val)],rna_val, method = "kendall"))
  rna_pval <- testing$p.value

  # pro pvalue
  pro_val <- genes_pro[current_gene,-1]
  pro_val <- pro_val[!is.na(pro_val)]
  testing <- suppressWarnings(cor.test(all_pred[(length(rna_val)+1):length(all_pred)],pro_val, method = "kendall"))
  pro_pval <- testing$p.value

  # results list
  res <- list("mod_name" = "echo_joint",
              "model" = oscillator.fit,
              "param" = res,
              "joint_pval" = joint_pval,
              "rna_pval" = rna_pval,
              "pro_pval" = pro_pval,
              "final" = fin,
              "type_gam_rna" = type_gam_rna,
              "type_gam_pro" = type_gam_pro)

  return(res)
}

#' Function to calculate echo linear joint model parameters with inequality constraints (slack for protein)
#'
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param resol resolution
#' @param num_reps number of replicates
#' @param start_type "orig", only calculate starting points from scratch
#' @param res vector of parameter estimates from a completely joint echo linear model
#' @param harm_cut postive number indicating the cutoff for a gene to be considered harmonic
#' @param over_cut postive number indicating the cutoff for a gene to be considered repressed/overexpressed
#' @param genes_rna dataframe of rna gene expressions (first column is names, other columns are numeric)
#' @param genes_protein dataframe of protein gene expressions (first column is names, other columns are numeric)
#' @param avg_genes_rna matrix of averaged gene expressions, over replicates, for rna
#' @param avg_genes_pro matrix of averaged gene expressions, over replicates, for protein
#' @param low lower limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#' @param high upper limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#'
#' @return list which contains: model name, fit model object, parameters, joint p value, rna p value, protein p value, data frame of parameters for output, ac coefficient category for rna and protein
#' @import minpack.lm
#' @importFrom stats cor.test
#' @importFrom stats nls.control
#' @importFrom stats coef
#' @importFrom stats AIC
#' @keywords internal
#' @noRd
calc_param_echo_lin_joint_constr <- function(current_gene, timen, resol, num_reps, start_type = "orig", res, harm_cut, over_cut, genes_rna, genes_pro, avg_genes_rna, avg_genes_pro, low, high){
  gene_n <- as.character(genes_rna[current_gene,1]) # gene name

  # calculating averages for initial values
  if (num_reps == 1){
    y_val_rna <- as.numeric(genes_rna[current_gene,-1])
    y_val_pro <- as.numeric(genes_pro[current_gene,-1]) # all the gene values
  } else{
    y_val_pro <- avg_genes_pro[current_gene,] # starting values determined by average of replicates
    y_val_rna <- avg_genes_rna[current_gene,] # starting values determined by average of replicates
  }

  # calculating linear model to remove for starting values for rna and protein
  # rna
  # calculate linear model parameters
  lin.fit <- lm(as.numeric(genes_rna[current_gene,-1])~(rep(timen, each = num_reps)))
  coeff <- as.numeric(lin.fit$coefficients)
  y_shift0_rna <- coeff[1]
  slo0_rna <- coeff[2]
  # protein
  # calculate linear model parameters
  lin.fit <- lm(as.numeric(genes_pro[current_gene,-1])~(rep(timen, each = num_reps)))
  coeff <- as.numeric(lin.fit$coefficients)
  y_shift0_pro <- coeff[1]
  slo0_pro <- coeff[2]

  # calculate starting points using the original method
  rna_start <- calc_start_echo_lin(y_val_rna, slo0_rna, y_shift0_rna, over_cut, timen, resol, low, high)
  pro_start <- calc_start_echo_lin(y_val_pro, slo0_pro, y_shift0_pro, over_cut, timen, resol, low, high)

  # we're going to use the starting value of rna for the joint period, respective for everything else
  # now we need to format it into a way that is nice
  names(rna_start) <- paste0(names(rna_start),"1")
  names(pro_start) <- paste0(names(pro_start),"2")
  start_param <- c(rna_start,pro_start)
  names(start_param)[names(start_param)=="omega2"] <- "chang"
  # changing starting period to be in hours
  start_param$omega1 <- 2*pi/start_param$omega1
  start_param$chang <- 0

  # create second starting points -- based on the results of a completely joint model
  start_joint <- start_param
  # change the values
  start_joint[names(start_joint)!="chang"] <- as.list(res[,names(start_joint)[names(start_joint)!="chang"]])
  # changing starting joint period to be in hours
  start_joint$omega1 <- 2*pi/start_joint$omega1

  # putting together expression values and time points
  y_stack <- c(unlist(genes_rna[current_gene,-1]), unlist(genes_pro[current_gene,-1]))
  t_stack <- c(rep(timen, each=num_reps), (rep(timen, each = num_reps)))
  # calculate weights for time points, if there are replicates
  if (num_reps > 1){
    weights <- c(rep(calc_weights_boot(unlist(genes_rna[current_gene,-1]), num_reps), each = num_reps),
                 rep(calc_weights_boot(unlist(genes_pro[current_gene,-1]), num_reps), each = num_reps))
  }

  # time points, repeated for each replicate
  t_rep <- length(rep(timen, each=num_reps))
  # binary vectors to indicate rna and protein in concatenation
  s1 <- rep(c(1,0), c(t_rep,t_rep)) # rna
  s2 <- rep(c(0,1), c(t_rep,t_rep)) # protein

  # constraints are in the order of the start parameters
  # preallocate constraints
  lower_constr <- upper_constr <- c(rep(1, length(start_param)))
  # name the constraints
  names(lower_constr) <- names(upper_constr) <- names(start_param)
  # assign upper and lower constraints
  lower_constr <- c( -over_cut,-Inf, 2*pi/low, -Inf, -Inf, min(y_stack[1:(t_rep)]),
                     -over_cut,-Inf, (-resol+1e-12), -Inf, -Inf, min(y_stack[((t_rep)+1):length(y_stack)]))
  upper_constr <- c( over_cut, Inf, 2*pi/high, Inf, Inf, max(y_stack[1:t_rep]),
                     over_cut, Inf, resol-1e-12, Inf, Inf, max(y_stack[((t_rep)+1):length(y_stack)]))

  # preallocate
  temp <- data.frame() # df for data
  coeff <- c() # coefficient
  best <- "" # best model
  num_iter <- 1000 # max number of iterations
  if (num_reps == 1){ # one replicate
    #put the timen and data point into a data frame
    temp <- data.frame(y = y_stack, t =t_stack, s1 = s1, s2 = s2)
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    # first: fitting with original starting points
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn_one_rep,
                              fcall = echo_lin_joint_res_constr,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff.orig <- coeff <- coef(oscillator_init) #extract parameter estimates
    iter.orig <- oscillator_init$niter # extract amount of iterations

    # put coefficient estimates into an nls object
    oscillator.fit.orig <- nls_edit(
      formula = y ~ echo_lin_joint_res_constr(a1,a2, gam1,gam2, omega1,chang, phi1,phi2, slo1,slo2, y_shift1,y_shift2, t, s1, s2, resol),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], slo1 = coeff[5], y_shift1=coeff[6],
                   gam2=coeff[7], a2=coeff[8], chang = coeff[9], phi2 = coeff[10],  slo2 = coeff[11], y_shift2=coeff[12]
      ),
      control = nls.control(maxiter = 0)
    )

    # second: fitting with starting points from a completely joint model
    oscillator_init <- nls.lm(par = start_joint,
                              fn = fcn_one_rep,
                              # jac = fcn.jac,
                              fcall = echo_lin_joint_res_constr,
                              # jcall = jac_echo_joint,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff.joint <- coeff <- coef(oscillator_init) #extract parameter estimates
    iter.joint <- oscillator_init$niter # extract amount of iterations

    # put coefficient estimates into an nls object
    oscillator.fit.joint <- nls_edit(
      formula = y ~ echo_lin_joint_res_constr(a1,a2, gam1,gam2, omega1,chang, phi1,phi2, slo1,slo2, y_shift1,y_shift2, t, s1, s2, resol),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], slo1 = coeff[5], y_shift1=coeff[6],
                   gam2=coeff[7], a2=coeff[8], chang = coeff[9], phi2 = coeff[10],  slo2 = coeff[11], y_shift2=coeff[12]
      ),
      control = nls.control(maxiter = 0)
    )

  } else{ # multiple replicates
    #put the timen and data point into a data frame
    temp <- data.frame(y = y_stack, t =t_stack, w= weights, s1 = s1, s2 = s2)
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    # first: fitting with original starting points
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn,
                              # jac = fcn.jac,
                              fcall = echo_lin_joint_res_constr,
                              # jcall = jac_echo_joint,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              w = temp$w,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff.orig <- coeff <- coef(oscillator_init) #extract parameter estimates
    iter.orig <- oscillator_init$niter # extract amount of iterations

    # put coefficient estimates into an nls object
    oscillator.fit.orig <- nls_edit(
      formula = y ~ echo_lin_joint_res_constr(a1,a2, gam1,gam2, omega1,chang, phi1,phi2, slo1,slo2, y_shift1,y_shift2, t, s1, s2, resol),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], slo1 = coeff[5], y_shift1=coeff[6],
                   gam2=coeff[7], a2=coeff[8], chang = coeff[9], phi2 = coeff[10],  slo2 = coeff[11], y_shift2=coeff[12]
      ),
      control = nls.control(maxiter = 0)
    )

    # second: fitting with starting points from a completely joint model
    oscillator_init <- nls.lm(par = start_joint,
                              fn = fcn,
                              # jac = fcn.jac,
                              fcall = echo_lin_joint_res_constr,
                              # jcall = jac_echo_joint,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              w = temp$w,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff.joint <- coeff <- coef(oscillator_init) #extract parameter estimates
    iter.joint <- oscillator_init$niter # extract amount of iterations

    # put coefficient estimates into an nls object
    oscillator.fit.joint <- nls_edit(
      formula = y ~ echo_lin_joint_res_constr(a1,a2, gam1,gam2, omega1,chang, phi1,phi2, slo1,slo2, y_shift1,y_shift2, t, s1, s2, resol),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], slo1 = coeff[5], y_shift1=coeff[6],
                   gam2=coeff[7], a2=coeff[8], chang = coeff[9], phi2 = coeff[10],  slo2 = coeff[11], y_shift2=coeff[12]
      ),
      control = nls.control(maxiter = 0)
    )
  }

  # compare the results of the starting points using aic
  aic_res <- AIC(oscillator.fit.orig, oscillator.fit.joint)
  ord_start <- c("orig","joint") # order of starting point fits
  # choose fit based on which has the lowest aic
  oscillator.fit <- list(oscillator.fit.orig, oscillator.fit.joint)[[which.min(aic_res$AIC)]] # nls object
  best <- ord_start[which.min(aic_res$AIC)] # best starting points
  coeff <- list(coeff.orig, coeff.joint)[[which.min(aic_res$AIC)]] # coefficient
  num_iter <- c(iter.orig, iter.joint)[which.min(aic_res$AIC)] # number of iterations

  # coefficients and name, with some other options
  res <- data.frame("gene_n"=gene_n,
                    "gam1" =  coeff[1],
                    "a1" =  coeff[2],
                    "omega1" =  coeff[3],
                    "per1" =  coeff[3],
                    "phi1" =  coeff[4],
                    "slo1" = coeff[5],
                    "y_shift1" = coeff[6],
                    "gam2" =  coeff[7],
                    "a2" =  coeff[8],
                    "chang" =  coeff[9],
                    "per2" =  (coeff[3]+coeff[9]),
                    "phi2" =  coeff[10],
                    "slo2" = coeff[11],
                    "y_shift2" = coeff[12],
                    "iter" = num_iter,
                    "best" = best,
                    stringsAsFactors = F)

  # save data for the final dataframe
  # get oscillation type
  type_gam_rna <- get_type_gam(coeff[1], harm_cut, over_cut)
  type_gam_pro <- get_type_gam(coeff[7], harm_cut, over_cut)

  # get hours shifted
  phase_hours_rna <- get_hs(coeff[2],coeff[4],2*pi/coeff[3])
  phase_hours_pro <- get_hs(coeff[8],coeff[10],2*pi/(coeff[3]+coeff[9]))

  # final df row vector
  fin <- c("a1" =  coeff[2],
           "gam1" =  coeff[1],
           "osc1" = type_gam_rna,
           "omega1" =  2*pi/coeff[3],
           "per1" =  coeff[3],
           "phi1" =  coeff[4],
           "hs1" = phase_hours_rna,
           "slo1" = coeff[5],
           "y_shift1" = coeff[6],
           "a2" =  coeff[8],
           "gam2" =  coeff[7],
           "osc2" = type_gam_pro,
           "omega2" =  2*pi/(coeff[3]+coeff[9]),
           "per2" =  (coeff[3]+coeff[9]),
           "phi2" =  coeff[10],
           "hs2" = phase_hours_pro,
           "slo2" = coeff[11],
           "y_shift2" = coeff[12])

  # adjust names correctly
  echo_lin_param_n <- c("Initial_Amplitude", "AC_Coefficient", "Oscillation_Type", "Radian_Frequency", "Period", "Phase_Shift", "Hours_Shifted", "Slope", "Equilibrium_Value")
  echo_lin_joint_param_n <- c(paste0(echo_lin_param_n, "_RNA"), paste0(echo_lin_param_n,"_Protein"))
  # add those names
  names(fin) <- echo_lin_joint_param_n

  # generating p-values
  # joint pvalue
  all_pred <- oscillator.fit$m$predict() # fitted values
  testing <- suppressWarnings(cor.test(all_pred,temp$y, method = "kendall"))
  joint_pval <- testing$p.value

  # rna pvalue
  rna_val <- genes_rna[current_gene,-1]
  rna_val <- rna_val[!is.na(rna_val)]
  testing <- suppressWarnings(cor.test(all_pred[1:length(rna_val)],rna_val, method = "kendall"))
  rna_pval <- testing$p.value

  # pro pvalue
  pro_val <- genes_pro[current_gene,-1]
  pro_val <- pro_val[!is.na(pro_val)]
  testing <- suppressWarnings(cor.test(all_pred[(length(rna_val)+1):length(all_pred)],pro_val, method = "kendall"))
  pro_pval <- testing$p.value

  # results list
  res <- list("mod_name" = "echo_lin_joint",
              "model" = oscillator.fit,
              "param" = res,
              "joint_pval" = joint_pval,
              "rna_pval" = rna_pval,
              "pro_pval" = pro_pval,
              "final" = fin,
              "type_gam_rna" = type_gam_rna,
              "type_gam_pro" = type_gam_pro)

  return(res)
}

#' Function to calculate echo joint model parameters for a completely joint model (no slack for protein), for the purpose of getting starting points for the slacked model
#'
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param resol resolution
#' @param num_reps number of replicates
#' @param start_type "orig", only calculate starting points from scratch
#' @param harm_cut postive number indicating the cutoff for a gene to be considered harmonic
#' @param over_cut postive number indicating the cutoff for a gene to be considered repressed/overexpressed
#' @param genes_rna dataframe of rna gene expressions (first column is names, other columns are numeric)
#' @param genes_protein dataframe of protein gene expressions (first column is names, other columns are numeric)
#' @param avg_genes_rna matrix of averaged gene expressions, over replicates, for rna
#' @param avg_genes_pro matrix of averaged gene expressions, over replicates, for protein
#' @param low lower limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#' @param high upper limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#'
#' @return list which contains: model name, fit model object, and parameters
#' @import minpack.lm
#' @importFrom stats cor.test
#' @importFrom stats nls.control
#' @importFrom stats coef
#' @keywords internal
#' @noRd
calc_param_echo_joint <- function(current_gene, timen, resol, num_reps, start_type = "orig", harm_cut, over_cut, genes_rna, genes_pro, avg_genes_rna, avg_genes_pro, low, high){
  gene_n <- as.character(genes_rna[current_gene,1]) # gene name

  # calculating averages for initial values
  if (num_reps == 1){
    y_val_rna <- as.numeric(genes_rna[current_gene,-1])
    y_val_pro <- as.numeric(genes_pro[current_gene,-1]) # all the gene values
  } else{
    y_val_pro <- avg_genes_pro[current_gene,] # starting values determined by average of replicates
    y_val_rna <- avg_genes_rna[current_gene,] # starting values determined by average of replicates
  }

  # calculate first starting points for rna and protein, using the original method ("orig")
  rna_start <- calc_start_echo(y_val_rna, over_cut, timen, resol, low, high)
  pro_start <- calc_start_echo(y_val_pro, over_cut, timen, resol, low, high)

  # we're going to use the starting value of rna for the joint period, respective for everything else
  # now we need to format it into a way that is nice (renaming)
  names(rna_start) <- paste0(names(rna_start),"1")
  names(pro_start) <- paste0(names(pro_start),"2")
  start_param <- c(rna_start,pro_start)
  start_param$omega2 <- NULL

  # putting together expression values and time points
  y_stack <- c(unlist(genes_rna[current_gene,-1]), unlist(genes_pro[current_gene,-1]))
  t_stack <- c(rep(timen, each=num_reps), (rep(timen, each = num_reps)))
  # calculate weights for time points, if there are replicates
  if (num_reps > 1){
    weights <- c(rep(calc_weights_boot(unlist(genes_rna[current_gene,-1]), num_reps), each = num_reps),
                 rep(calc_weights_boot(unlist(genes_pro[current_gene,-1]), num_reps), each = num_reps))
  }

  # time points, repeated for each replicate
  t_rep <- length(rep(timen, each=num_reps))
  # binary vectors to indicate rna and protein in concatenation
  s1 <- rep(c(1,0), c(t_rep,t_rep)) # rna
  s2 <- rep(c(0,1), c(t_rep,t_rep)) # protein

  # constraints are in the order of the start parameters
  # preallocate constraints
  lower_constr <- upper_constr <- c(rep(1, length(start_param)))
  # assign upper and lower constraints
  names(lower_constr) <- names(upper_constr) <- names(start_param)
  lower_constr <- c( -over_cut,-Inf, high, -Inf, min(y_stack[1:(t_rep)]),
                     -over_cut,-Inf, -Inf, min(y_stack[((t_rep)+1):length(y_stack)]))
  upper_constr <- c( over_cut, Inf, low, Inf, max(y_stack[1:t_rep]),
                     over_cut, Inf, Inf, max(y_stack[((t_rep)+1):length(y_stack)]))

  # preallocate
  temp <- data.frame() # df for data
  coeff <- c() # coefficient
  if (num_reps == 1){ # one replicate
    #put the timen and data point into a data frame
    temp <- data.frame(y = y_stack, t =t_stack, s1 = s1, s2 = s2)
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    # fitting with original starting points
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn_one_rep,
                              fcall = echo_joint,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff <- coef(oscillator_init) #extract parameter estimates

    # put coefficient estimates into an nls object
    oscillator.fit <- nls_edit(
      formula = y ~ echo_joint(a1,a2, gam1,gam2, omega1, phi1,phi2, y_shift1,y_shift2, t, s1, s2),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], y_shift1=coeff[5],
                   gam2=coeff[6], a2=coeff[7], phi2 = coeff[8], y_shift2=coeff[9]
      ),
      control = nls.control(maxiter = 0)
    )

  } else{ # multiple replicates
    #put the timen and data point into a data frame
    temp <- data.frame(y = y_stack, t =t_stack, w= weights, s1 = s1, s2 = s2)
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    # fitting with original starting points
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn,
                              fcall = echo_joint,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              w = temp$w,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff <- coef(oscillator_init) #extract parameter estimates

    # put coefficient estimates into an nls object
    oscillator.fit <- nls_edit(
      formula = y ~ echo_joint(a1,a2, gam1,gam2, omega1, phi1,phi2, y_shift1,y_shift2, t, s1, s2),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], y_shift1=coeff[5],
                   gam2=coeff[6], a2=coeff[7], phi2 = coeff[8], y_shift2=coeff[9]
      ),
      control = nls.control(maxiter = 0)
    )
  }

  # parameters
  res <- data.frame("gene_n"=gene_n,
                    "gam1" =  coeff[1],
                    "a1" =  coeff[2],
                    "omega1" =  coeff[3],
                    "phi1" =  coeff[4],
                    "y_shift1" = coeff[5],
                    "gam2" =  coeff[6],
                    "a2" =  coeff[7],
                    "phi2" =  coeff[8],
                    "y_shift2" = coeff[9],
                    stringsAsFactors = F)

  # results list
  res <- list("mod_name" = "echo_joint",
              "model" = oscillator.fit,
              "param" = res)
  return(res)
}

#' Function to calculate echo linear joint model parameters for a completely joint model (no slack for protein), for the purpose of getting starting points for the slacked model
#'
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param resol resolution
#' @param num_reps number of replicates
#' @param start_type "orig", only calculate starting points from scratch
#' @param harm_cut postive number indicating the cutoff for a gene to be considered harmonic
#' @param over_cut postive number indicating the cutoff for a gene to be considered repressed/overexpressed
#' @param genes_rna dataframe of rna gene expressions (first column is names, other columns are numeric)
#' @param genes_protein dataframe of protein gene expressions (first column is names, other columns are numeric)
#' @param avg_genes_rna matrix of averaged gene expressions, over replicates, for rna
#' @param avg_genes_pro matrix of averaged gene expressions, over replicates, for protein
#' @param low lower limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#' @param high upper limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#'
#' @return list which contains: model name, fit model object, and parameters
#' @import minpack.lm
#' @importFrom stats cor.test
#' @importFrom stats nls.control
#' @importFrom stats coef
#' @keywords internal
#' @noRd
calc_param_echo_lin_joint <- function(current_gene, timen, resol, num_reps, start_type = "orig", harm_cut, over_cut, genes_rna, genes_pro, avg_genes_rna, avg_genes_pro, low, high){
  gene_n <- as.character(genes_rna[current_gene,1]) # gene name

  # calculating averages for initial values
  if (num_reps == 1){
    y_val_rna <- as.numeric(genes_rna[current_gene,-1])
    y_val_pro <- as.numeric(genes_pro[current_gene,-1]) # all the gene values
  } else{
    y_val_pro <- avg_genes_pro[current_gene,] # starting values determined by average of replicates
    y_val_rna <- avg_genes_rna[current_gene,] # starting values determined by average of replicates
  }

  # calculating linear model to remove for starting values for rna and protein
  # rna
  # calculate linear model parameters
  lin.fit <- lm(as.numeric(genes_rna[current_gene,-1])~(rep(timen, each = num_reps)))
  coeff <- as.numeric(lin.fit$coefficients)
  y_shift0_rna <- coeff[1]
  slo0_rna <- coeff[2]
  # protein
  # calculate linear model parameters
  lin.fit <- lm(as.numeric(genes_pro[current_gene,-1])~(rep(timen, each = num_reps)))
  coeff <- as.numeric(lin.fit$coefficients)
  y_shift0_pro <- coeff[1]
  slo0_pro <- coeff[2]

  # calculate starting points using the original method
  rna_start <- calc_start_echo_lin(y_val_rna, slo0_rna, y_shift0_rna, over_cut, timen, resol, low, high)
  pro_start <- calc_start_echo_lin(y_val_pro, slo0_pro, y_shift0_pro, over_cut, timen, resol, low, high)

  # we're going to use the starting value of rna for the joint period, respective for everything else
  # now we need to format it into a way that is nice
  names(rna_start) <- paste0(names(rna_start),"1")
  names(pro_start) <- paste0(names(pro_start),"2")
  start_param <- c(rna_start,pro_start)
  start_param$omega2 <- NULL

  # putting together expression values and time points
  y_stack <- c(unlist(genes_rna[current_gene,-1]), unlist(genes_pro[current_gene,-1]))
  t_stack <- c(rep(timen, each=num_reps), (rep(timen, each = num_reps)))
  # calculate weights for time points, if there are replicates
  if (num_reps > 1){
    weights <- c(rep(calc_weights_boot(unlist(genes_rna[current_gene,-1]), num_reps), each = num_reps),
                 rep(calc_weights_boot(unlist(genes_pro[current_gene,-1]), num_reps), each = num_reps))
  }

  # time points, repeated for each replicate
  t_rep <- length(rep(timen, each=num_reps))
  # binary vectors to indicate rna and protein in concatenation
  s1 <- rep(c(1,0), c(t_rep,t_rep)) # rna
  s2 <- rep(c(0,1), c(t_rep,t_rep)) # protein

  # constraints are in the order of the start parameters
  # preallocate constraints
  lower_constr <- upper_constr <- c(rep(1, length(start_param)))
  # name the constraints
  names(lower_constr) <- names(upper_constr) <- names(start_param)
  # assign upper and lower constraints
  lower_constr <- c( -over_cut,-Inf, high, -Inf, -Inf,  min(y_stack[1:(t_rep)]),
                     -over_cut,-Inf, -Inf, -Inf, min(y_stack[((t_rep)+1):length(y_stack)]))
  upper_constr <- c( over_cut, Inf, low, Inf, Inf, max(y_stack[1:t_rep]),
                     over_cut, Inf, Inf, Inf, max(y_stack[((t_rep)+1):length(y_stack)]))

  # preallocate
  temp <- data.frame() # df for data
  coeff <- c() # coefficient
  if (num_reps == 1){ # one replicate
    #put the timen and data point into a data frame
    temp <- data.frame(y = y_stack, t =t_stack, s1 = s1, s2 = s2)
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    # fitting with original starting points
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn_one_rep,
                              fcall = echo_lin_joint,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff <- coef(oscillator_init) #extract parameter estimates

    # put coefficient estimates into an nls object
    oscillator.fit <- nls_edit(
      formula = y ~ echo_lin_joint(a1,a2, gam1,gam2, omega1, phi1,phi2, slo1,slo2, y_shift1,y_shift2, t, s1, s2),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], slo1 = coeff[5], y_shift1=coeff[6],
                   gam2=coeff[7], a2=coeff[8], phi2 = coeff[9], slo2 = coeff[10], y_shift2=coeff[11]
      ),
      control = nls.control(maxiter = 0)
    )
  } else{ # multiple replicates
    #put the timen and data point into a data frame
    temp <- data.frame(y = y_stack, t =t_stack, w= weights, s1 = s1, s2 = s2)
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    # fitting with original starting points
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn,
                              fcall = echo_lin_joint,
                              lower = lower_constr,
                              upper = upper_constr,
                              t = temp$t,
                              y = temp$y,
                              s1 = temp$s1,
                              s2 = temp$s2,
                              w = temp$w,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    coeff <- coef(oscillator_init) #extract parameter estimates

    # put coefficient estimates into an nls object
    oscillator.fit <- nls_edit(
      formula = y ~ echo_lin_joint(a1,a2, gam1,gam2, omega1, phi1,phi2, slo1,slo2, y_shift1,y_shift2, t, s1, s2),
      data=temp,
      start = list(gam1=coeff[1], a1=coeff[2], omega1 = coeff[3], phi1 = coeff[4], slo1 = coeff[5], y_shift1=coeff[6],
                   gam2=coeff[7], a2=coeff[8], phi2 = coeff[9], slo2 = coeff[10], y_shift2=coeff[11]
      ),
      control = nls.control(maxiter = 0)
    )
  }

  # parameters
  res <- data.frame("gene_n"=gene_n,
                    "gam1" =  coeff[1],
                    "a1" =  coeff[2],
                    "omega1" =  coeff[3],
                    "phi1" =  coeff[4],
                    "slo1" = coeff[5],
                    "y_shift1" = coeff[6],
                    "gam2" =  coeff[7],
                    "a2" =  coeff[8],
                    "slo2" = coeff[9],
                    "phi2" =  coeff[10],
                    "y_shift2" = coeff[11],
                    stringsAsFactors = F)

  # results list
  res <- list("mod_name" = "echo_lin_joint",
              "model" = oscillator.fit,
              "param" = res)
  return(res)
}

#' Function to calculate echo model parameters
#'
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param resol resolution
#' @param num_reps number of replicates
#' @param genes dataframe of gene expressions (first column is names, other columns are numeric)
#' @param avg_genes matrix of averaged gene expressions, over replicates
#' @param harm_cut postive number indicating the cutoff for a gene to be considered harmonic
#' @param over_cut postive number indicating the cutoff for a gene to be considered repressed/overexpressed
#' @param low lower limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#' @param high upper limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#'
#' @return list which contains: model name, fit model object, parameters, p value, data frame of parameters for output, ac coefficient category
#' @import minpack.lm
#' @importFrom stats cor.test
#' @importFrom stats nls.control
#' @importFrom stats coef
#' @keywords internal
#' @noRd
calc_param_echo <- function(current_gene, timen, resol, num_reps, genes, avg_genes, harm_cut, over_cut, low, high){
  gene_n <- as.character(genes[current_gene,1]) # gene name

  # calculating averages for initial values
  if (num_reps == 1){
    y_val <- as.numeric(genes[current_gene,-1]) # all the gene values
  } else{
    y_val <- avg_genes[current_gene,] # starting values determined by average of replicates
  }

  # calculate starting values
  start_param <- calc_start_echo(y_val, over_cut, timen, resol, low, high)

  # preallocate df for data
  temp <- data.frame()
  if (num_reps == 1){ # one replicate
    # put the timen and data points into a data frame
    temp <- data.frame(y=y_val,t=timen)
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    # fitting
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn_single_one_rep,
                              fcall = alt_form,
                              lower=c(-over_cut, -Inf, high, -Inf, min(temp$y)),
                              upper=c(over_cut, Inf, low, Inf, max(temp$y)),
                              t = temp$t,
                              y = temp$y,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    # get coefficients
    coeff <- coef(oscillator_init)

    # put coefficient estimates into an nls object
    oscillator.fit <- nls_edit(
      formula = y ~ alt_form(a, gam, omega, phi, y_shift, t),
      data=temp,
      start = list(gam=coeff[1], a=coeff[2], omega = coeff[3], phi = coeff[4], y_shift=coeff[5]),
      control = nls.control(maxiter = 0)
    )

  } else{ # multiple replicates
    #put the timen and data points into a data frame, and weights
    weights <- calc_weights(current_gene,num_reps, genes)
    temp <- data.frame(y=cbind(unlist(genes[current_gene,-1])),t=cbind(rep(timen,each = num_reps)),w=cbind(rep(weights,each = num_reps)))
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    # fitting
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn_single,
                              fcall = alt_form,
                              lower=c(-over_cut, -Inf, high, -Inf, min(temp$y)),
                              upper=c(over_cut, Inf, low, Inf, max(temp$y)),
                              t = temp$t,
                              y = temp$y,
                              w = temp$w,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    # get coefficients
    coeff <- coef(oscillator_init)

    # put coefficient estimates into an nls object
    oscillator.fit <- nls_edit(
      formula = y ~ alt_form(a, gam, omega, phi, y_shift, t),
      data=temp,
      start = list(gam=coeff[1], a=coeff[2], omega = coeff[3], phi = coeff[4], y_shift=coeff[5]),
      control = nls.control(maxiter = 0)
    )
  }

  did_conv <- oscillator.fit$convInfo$isConv # did the fit converge
  num_iter <- oscillator.fit$convInfo$finIter # amount of iterations

  coeff <- coef(oscillator.fit) #extract parameter estimates

  # alt_form parameters:
  # the parameters go in the order of: gam,a,omega,phi,y_shift
  res <- data.frame("gene_n"=gene_n,
                    "gam" =  coeff[1],
                    "a" =  coeff[2],
                    "omega" =  coeff[3],
                    "phi" =  coeff[4],
                    "y_shift" = coeff[5],
                    stringsAsFactors = F)

  # save data for the final dataframe
  # get oscillation type
  type_gam <- get_type_gam(coeff[1], harm_cut, over_cut)

  # get hours shifted
  phase_hours <- get_hs(coeff[2],coeff[4],coeff[3])

  # make final dataframe row
  fin <- c(coeff[2], coeff[1], type_gam, coeff[3], 2*pi/coeff[3], coeff[4], phase_hours, coeff[5])
  echo_param_n <- c("Initial_Amplitude", "AC_Coefficient", "Oscillation_Type", "Radian_Frequency", "Period", "Phase_Shift", "Hours_Shifted", "Equilibrium_Value")
  names(fin) <- echo_param_n

  # generating p-values
  all_pred <- oscillator.fit$m$predict() # fitted values
  testing <- suppressWarnings(cor.test(all_pred,temp$y, method = "kendall"))
  pval <- testing$p.value

  # results list
  res <- list("mod_name" = "echo",
              "model" = oscillator.fit,
              "param" = res,
              "pval" = pval,
              "final" = fin,
              "type_gam" = type_gam)

  return(res)
}

#' Function to calculate echo linear model parameters
#'
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param resol resolution
#' @param num_reps number of replicates
#' @param genes dataframe of gene expressions (first column is names, other columns are numeric)
#' @param avg_genes matrix of averaged gene expressions, over replicates
#' @param harm_cut postive number indicating the cutoff for a gene to be considered harmonic
#' @param over_cut postive number indicating the cutoff for a gene to be considered repressed/overexpressed
#' @param low lower limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#' @param high upper limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#'
#' @return list which contains: model name, fit model object, parameters, p value, data frame of parameters for output, ac coefficient category
#' @import minpack.lm
#' @importFrom stats cor.test
#' @importFrom stats nls.control
#' @importFrom stats coef
#' @keywords internal
#' @noRd
calc_param_echo_lin <- function(current_gene, timen, resol, num_reps, genes, avg_genes, harm_cut, over_cut, low, high){
  gene_n <- as.character(genes[current_gene,1]) # gene name

  # calculating linear model to remove for starting values
  # calculate linear model parameters
  lin.fit <- lm(as.numeric(genes[current_gene,-1])~(rep(timen, each = num_reps)))
  coeff <- lin.fit$coefficients
  y_shift0 <- coeff[1]
  slo0 <- coeff[2]

  # calculating averages for initial values
  if (num_reps == 1){
    y_val <- as.numeric(genes[current_gene,-1]) # all the gene values
  } else{
    y_val <- avg_genes[current_gene,] # starting values determined by average of replicates
  }

  # get starting parameters
  start_param <- calc_start_echo_lin(y_val, slo0, y_shift0, over_cut, timen, resol, low, high)

  # preallocate df for data
  temp <- data.frame()
  if (num_reps == 1){ # one replicate
    # put the timen into a data frame
    temp <- data.frame(y=y_val,t=timen)
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    # fitting
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn_single_one_rep,
                              fcall = echo_lin_mod,
                              lower=c(-over_cut, -Inf, high, -Inf, -Inf, min(temp$y)),
                              upper=c(over_cut, Inf, low, Inf, Inf, max(temp$y)),
                              t = temp$t,
                              y = temp$y,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    # get coefficients
    coeff <- coef(oscillator_init)

    # put coefficient estimates into an nls object
    oscillator.fit <- nls_edit(
      formula = y ~ echo_lin_mod(a, gam, omega, phi, slo, y_shift, t),
      data=temp,
      start = list(gam=coeff[1], a=coeff[2], omega = coeff[3], phi = coeff[4], slo = coeff[5], y_shift=coeff[6]),
      control = nls.control(maxiter = 0)
    )

  } else{ # multiple replicates (add weights)
    #put the timen and data point into a data frame
    weights <- calc_weights(current_gene,num_reps, genes)
    temp <- data.frame(y=cbind(unlist(genes[current_gene,-1])),t=cbind(rep(timen,each = num_reps)),w=cbind(rep(weights,each = num_reps)))
    temp <- temp[!is.na(temp$y),] # remove any missing data points


    #fitting
    oscillator_init <- nls.lm(par = start_param,
                              fn = fcn_single,
                              fcall = echo_lin_mod,
                              lower=c(-over_cut, -Inf, high, -Inf, -Inf, min(temp$y)),
                              upper=c(over_cut, Inf, low, Inf, Inf, max(temp$y)),
                              t = temp$t,
                              y = temp$y,
                              w = temp$w,
                              control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                       ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    # get coefficients
    coeff <- coef(oscillator_init)

    # put coefficient estimates into an nls object
    oscillator.fit <- nls_edit(
      formula = y ~ echo_lin_mod(a, gam, omega, phi, slo, y_shift, t),
      data=temp,
      start = list(gam=coeff[1], a=coeff[2], omega = coeff[3], phi = coeff[4], slo = coeff[5], y_shift=coeff[6]),
      control = nls.control(maxiter = 0)
    )
  }

  did_conv <- oscillator.fit$convInfo$isConv # did the fit converge
  num_iter <- oscillator.fit$convInfo$finIter # amount of iterations

  coeff <- coef(oscillator.fit) #extract parameter estimates

  # alt_form parameters:
  # the parameters go in the order of: gam,a,omega,phi,y_shift
  res <- data.frame("gene_n"=gene_n,
                    "gam" =  coeff[1],
                    "a" =  coeff[2],
                    "omega" =  coeff[3],
                    "phi" =  coeff[4],
                    "slo" = coeff[5],
                    "y_shift" = coeff[6],
                    stringsAsFactors = F)

  # save data for the final dataframe
  # get oscillation type
  gam <- coeff[1]
  type_gam <- get_type_gam(gam, harm_cut, over_cut)

  # get hours shifted
  a <- coeff[2]
  phi <- coeff[4]
  omega <- coeff[3]
  phase_hours <- get_hs(a,phi,omega)

  # row for final data frame
  fin <- c(coeff[2], coeff[1], type_gam, coeff[3], 2*pi/coeff[3], coeff[4], phase_hours, coeff[5], coeff[6])
  names(fin) <- echo_lin_param_n <- c("Initial_Amplitude", "AC_Coefficient", "Oscillation_Type", "Radian_Frequency", "Period", "Phase_Shift", "Hours_Shifted", "Slope", "Equilibrium_Value")

  # generating p-values
  all_pred <- oscillator.fit$m$predict() # fitted values
  testing <- suppressWarnings(cor.test(all_pred,temp$y, method = "kendall"))
  pval <- testing$p.value

  # results list
  res <- list("mod_name" = "echo_lin",
              "model" = oscillator.fit,
              "param" = res,
              "pval" = pval,
              "final" = fin,
              "type_gam" = type_gam)

  return(res)
}

#' Function to calculate linear model parameters
#'
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param resol resolution
#' @param num_reps number of replicates
#' @param genes dataframe of gene expressions (first column is names, other columns are numeric)
#' @param avg_genes matrix of averaged gene expressions, over replicates
#'
#' @return list which contains: model name, fit model object, parameters, fit p value, slope p value, data frame of parameters for output
#' @importFrom stats cor.test
#' @importFrom stats lm
#' @keywords internal
#' @noRd
calc_param_lin <- function(current_gene, timen, resol, num_reps, genes, avg_genes){
  # calculate linear model parameters
  # put data in a df
  temp <- data.frame(y=as.numeric(genes[current_gene,-1]),t=(rep(timen, each = num_reps)))
  temp <- temp[!is.na(temp$y),] # remove any missing data points

  # get linear fit
  lin.fit <- lm(temp$y~temp$t)
  # get coefficients
  coeff <- lin.fit$coefficients
  y_shift <- coeff[1]
  slo <- coeff[2]

  # get results dataframe
  res <- as.data.frame(t(c(slo,y_shift)))
  # add gene name
  gene_n <- genes[current_gene,1]
  res <- cbind(data.frame("gene_n"=gene_n, stringsAsFactors = F), res)

  # make final dataframe row
  fin <- c(coeff[2], coeff[1])
  names(fin) <- lin_param_n <- c("Slope", "Equilibrium_Value")

  # generating p-values
  all_pred <- lin.fit$fitted.values # fitted values
  testing <- suppressWarnings(cor.test(all_pred,temp$y, method = "kendall"))
  pval <- testing$p.value

  # generate slope p-value
  sumfit <- summary(lin.fit)
  slo_pval <- sumfit$coefficients[2,4]

  # results list
  res <- list("mod_name" = "lin",
              "model" = lin.fit,
              "param" = res,
              "pval" = pval,
              "slo_pval" = slo_pval,
              "final" = fin)

  return(res)
}

#' Function to calculate exponential model parameters
#'
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param resol resolution
#' @param num_reps number of replicates
#' @param genes dataframe of gene expressions (first column is names, other columns are numeric)
#' @param avg_genes matrix of averaged gene expressions, over replicates
#'
#' @return list which contains: model name, fit model object, parameters, p value, data frame of parameters for output
#' @import minpack.lm
#' @importFrom stats cor.test
#' @importFrom stats nls.control
#' @importFrom stats coef
#' @importFrom stats AIC
#' @importFrom stats lm
#' @keywords internal
#' @noRd
calc_param_exp <- function(current_gene, timen, resol, num_reps, genes, avg_genes){
  # calculating averages for initial values
  if (num_reps == 1){
    y_val <- as.numeric(genes[current_gene,-1]) # all the gene values
  } else{
    y_val <- avg_genes[current_gene,] # starting values determined by average of replicates
  }

  # fitting based on actual values (assume positive amplitude)
  # shift values to get rid of y shift
  y_shift_e0 <- min(y_val,na.rm = T)
  y_edit <- y_val
  y_edit <- y_val-y_shift_e0
  # make it the next worst value, after the edit, so it doesn't skew the points (because log(0)=-Inf)
  y_edit[which.min(y_val)] <- abs(min(y_edit[-which.min(y_val)], na.rm = T))
  # do the fitting and assignment
  exp_mod_coeff <- lm(log(y_edit)~timen)$coefficients
  gam_e0 <- exp_mod_coeff[2]
  a_e0 <- exp(exp_mod_coeff[1])

  # fitting based on negative values (assuming negative ampltide)
  y_shift_e0_neg <- min(-y_val,na.rm = T)
  y_edit <- -y_val
  y_edit <- -y_val-y_shift_e0_neg
  # make it the next worst value, after the edit, so it doesn't skew the points (because log(0)=-Inf)
  y_edit[which.min(-y_val)] <- abs(min(y_edit[-which.min(-y_val)], na.rm = T))
  # do the fitting and assignment
  exp_mod_coeff <- lm(log(y_edit)~timen)$coefficients
  gam_e0_neg <- exp_mod_coeff[2]
  a_e0_neg <- -exp(exp_mod_coeff[1])
  y_shift_e0_neg <- -y_shift_e0_neg

  if (num_reps == 1){
    # set up data in df
    temp <- data.frame(y=cbind(unlist(genes[current_gene,-1])),t=cbind(rep(timen,each = num_reps)))
    temp <- temp[!is.na(temp$y),] # remove any missing data points


    #first: fitting with starting points based on positive amplitude assumption
    exp_mod_pos <- nls.lm(par = list(a=a_e0,gam=gam_e0, y_shift=y_shift_e0),
                          fn = fcn_single_one_rep,
                          fcall = exp_mod,
                          t = temp$t,
                          y = temp$y,
                          control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                   ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    #second: fitting with starting points based on negative amplitude assumption
    exp_mod_neg <- nls.lm(par = list(a=a_e0_neg,gam=gam_e0_neg, y_shift=y_shift_e0_neg),
                          fn = fcn_single_one_rep,
                          fcall = exp_mod,
                          t = temp$t,
                          y = temp$y,
                          control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                   ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )
  } else { #replicates
    # calculate weights
    weights <- calc_weights(current_gene,num_reps, genes)
    # set up data in df
    temp <- data.frame(y=cbind(unlist(genes[current_gene,-1])),t=cbind(rep(timen,each = num_reps)),w=cbind(rep(weights,each = num_reps)))
    temp <- temp[!is.na(temp$y),] # remove any missing data points

    #first: fitting with starting points based on positive amplitude assumption
    exp_mod_pos <- nls.lm(par = list(a=a_e0,gam=gam_e0, y_shift=y_shift_e0),
                          fn = fcn_single,
                          fcall = exp_mod,
                          t = temp$t,
                          y = temp$y,
                          w = temp$w,
                          control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                   ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )

    #second: fitting with starting points based on negative amplitude assumption
    exp_mod_neg <- nls.lm(par = list(a=a_e0_neg,gam=gam_e0_neg, y_shift=y_shift_e0_neg),
                          fn = fcn_single,
                          fcall = exp_mod,
                          t = temp$t,
                          y = temp$y,
                          w = temp$w,
                          control = nls.lm.control(nprint=0, maxiter = 1000, maxfev = 2000,
                                                   ftol=1e-6, ptol=1e-6, gtol=1e-6)
    )
  }

  # get coefficients
  coeff_pos <- coef(exp_mod_pos)
  coeff_neg <- coef(exp_mod_neg)

  # put coefficient estimates into an nls object
  # positive amplitude assumption
  exp_mod.fit_pos <- nls_edit(
    formula = y ~ exp_mod(a, gam, y_shift, t),
    data=temp,
    start = list(a=coeff_pos[1],gam=coeff_pos[2], y_shift=coeff_pos[3]),
    control = nls.control(maxiter = 0)
  )

  # negative amplitude assumption
  exp_mod.fit_neg <- nls_edit(
    formula = y ~ exp_mod(a, gam, y_shift, t),
    data=temp,
    start = list(a=coeff_neg[1],gam=coeff_neg[2], y_shift=coeff_neg[3]),
    control = nls.control(maxiter = 0)
  )

  # choose final fit based on lowest aic
  aic_res <- AIC(exp_mod.fit_pos, exp_mod.fit_neg)
  exp_mod.fit<- list(exp_mod.fit_pos, exp_mod.fit_neg)[[which.min(aic_res$AIC)]] # fitted object
  coeff <- list(coeff_pos, coeff_neg)[[which.min(aic_res$AIC)]] # coefficient

  gene_n <- genes[current_gene,1] # gene name
  # parameters
  res <- data.frame("gene_n"=gene_n,
                    "a" = coeff[1],
                    "gam" = coeff[2],
                    "y_shift" = coeff[3],
                    stringsAsFactors = F)

  # final df row
  fin <- coeff
  names(fin) <- exp_param_n <- c("Initial_Amplitude", "Growth_Rate", "Equilibrium_Value")

  # generating p-values
  all_pred <- exp_mod.fit$m$predict() # fitted values
  testing <- suppressWarnings(cor.test(all_pred,temp$y, method = "kendall"))
  pval <- testing$p.value

  # results list
  res <- list("mod_name" = "exp",
              "model" = exp_mod.fit,
              "param" = res,
              "pval" = pval,
              "final" = fin)

  return(res)
}

# mosaic run functions ----

#' Function to get the fit of the best model chosen
#'
#' @param best_mod a list containing the model parameters and model name
#' @param timen time points vector
#' @param resol resolution
#'
#' @return vector of fitted values
#' @keywords internal
#' @noRd
get_mod_fits <- function(best_mod, timen, resol){
  # turn parameters into a vector
  prm <- suppressWarnings(as.numeric(best_mod$param))

  # get the fitted values for each model, based on the parameters
  fit_val <-
    if (best_mod$mod_name == "echo"){
      alt_form(prm[3],prm[2],prm[4],prm[5],prm[6], timen)
    } else if (best_mod$mod_name == "echo_lin"){
      echo_lin_mod(prm[3],prm[2],prm[4],prm[5],prm[6],prm[7], timen)
    } else if (best_mod$mod_name == "exp"){
      exp_mod(prm[2],prm[3],prm[4],timen)
    } else if (best_mod$mod_name == "lin") {
      lin_mod(prm[2], prm[3], timen)
    } else {
      s1 <- rep(c(1,0), c(length(timen), length(timen)))
      s2 <- rep(c(0,1), c(length(timen), length(timen)))
      if (best_mod$mod_name == "echo_joint"){

        echo_joint_res_constr(
          prm[3],prm[9], # a
          prm[2],prm[8], # gam
          prm[4],prm[10], # omega
          prm[6],prm[12], # phi
          prm[7],prm[13], # yshift
          timen,s1,s2,resol)
      } else if (best_mod$mod_name == "echo_lin_joint"){
        echo_lin_joint_res_constr(
          prm[3],prm[10], # a
          prm[2],prm[9], # gam
          prm[4],prm[11], # omega
          prm[6],prm[13], # phi
          prm[7],prm[14], # slo
          prm[8],prm[15], # yshift
          timen,s1,s2,resol)
      }
    }

  return(fit_val)
}

#' Function to choose the best model (echo, echo linear, linear, exponential) for each dataset (not including joint model)
#'
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param resol resolution
#' @param num_reps number of replicates
#' @param genes dataframe of gene expressions (first column is names, other columns are numeric)
#' @param avg_genes matrix of averaged gene expressions, over replicates
#' @param harm_cut postive number indicating the cutoff for a gene to be considered harmonic
#' @param over_cut postive number indicating the cutoff for a gene to be considered repressed/overexpressed
#' @param low lower limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#' @param high upper limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#'
#' @return list containing the best model object (see the calc_param functions), and the dataframe with bic values, model names, pvalues, ac category
#' @importFrom stats BIC
#' @keywords internal
#' @noRd
choose_best_mod <- function(current_gene, timen, resol, num_reps, genes, avg_genes, harm_cut, over_cut, low, high){
  # model names
  model_n <- c("lin", "exp", "echo", "echo_lin")

  # values from fitting each of the models with each of the gene data
  lin_val <- calc_param_lin(current_gene, timen, resol, num_reps, genes, avg_genes)
  exp_val <- calc_param_exp(current_gene, timen, resol, num_reps, genes, avg_genes)
  echo_val <- calc_param_echo(current_gene, timen, resol, num_reps, genes, avg_genes, harm_cut, over_cut, low, high)
  echo_lin_val <- calc_param_echo_lin(current_gene, timen, resol, num_reps, genes, avg_genes, harm_cut, over_cut, low, high)

  # get the bic values for each of the fits (aic is a misnomer based on original tests)
  aic_vals <- BIC(lin_val$model, exp_val$model, echo_val$model, echo_lin_val$model)
  # get the pvalues for each of the fits
  pvals <- c(lin_val$pval, exp_val$pval, echo_val$pval, echo_lin_val$pval)

  # save results -- choose the best model based on the lowest bic
  best_mod <- list(lin_val, exp_val, echo_val, echo_lin_val)[[which.min(aic_vals$BIC)]]

  # save all the data based on the lowest bic
  aic_df <- data.frame(rbind(c(0, aic_vals$BIC, NA, NA, NA, NA, 0, 0, NA, NA)))
  aic_df[1,1] <- aic_df[1,9] <- model_n[which.min(aic_vals$BIC)] # best model name
  aic_df[1,10] <- aic_df[1,11] <- pvals[which.min(aic_vals$BIC)] # best model pvalue
  aic_df[1,12] <- aic_df[1,13] <- if (!is.null(best_mod$type_gam)) best_mod$type_gam else "" # best ac coeff category
  colnames(aic_df) <- c("best_mod_orig", "lin_val", "exp_val", "echo_val", "echo_lin_val", "echo_joint_val", "echo_lin_joint_val", "free_joint_val", "best_mod_after", "best_mod_pval_orig", "best_mod_pval_after", "best_mod_type_gam_orig", "best_mod_type_gam_after") # names

  # results list
  all_mod <- list("best_mod" = best_mod, "aic_df" = aic_df)

  return(all_mod)
}

#' Function to run the full mosaic pipeline
#'
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param resol resolution
#' @param num_reps number of replicates
#' @param final_df_row row for final mosaic data frame, unfilled
#' @param genes_rna dataframe of rna gene expressions (first column is names, other columns are numeric)
#' @param genes_protein dataframe of protein gene expressions (first column is names, other columns are numeric)
#' @param avg_genes_rna matrix of averaged gene expressions, over replicates, for rna
#' @param avg_genes_pro matrix of averaged gene expressions, over replicates, for protein
#' @param rem_unexpr_combine a logical vector indicating whether the gene is unexpressed in rna OR protein
#' @param harm_cut postive number indicating the cutoff for a gene to be considered harmonic
#' @param over_cut postive number indicating the cutoff for a gene to be considered repressed/overexpressed
#' @param low lower limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#' @param high upper limit when looking for rhythms, in hours. Will not be used if finding rhythms of any length within timecouse (run_all_per is TRUE).
#'
#' @return list containing the final filled mosaic data frame row, and the bic dataframes for rna and protein (before and after joint modeling results)
#' @keywords internal
#' @noRd
multi_pipeline <- function(current_gene, timen, resol, num_reps, final_df_row, genes_rna, genes_pro, avg_genes_rna, avg_genes_pro, rem_unexpr_combine, harm_cut = .03, over_cut = .15, low = 20, high = 28){
  # preallocating
  final_df <- final_df_row

  # remove unexpressed genes
  if (rem_unexpr_combine[current_gene]){
    final_df$Best_Model_RNA <- final_df$Best_Model_Protein <- "Unexpressed"
    return(list(
      "final_df" = final_df,
      "aic_rna" = data.frame(),
      "aic_pro" = data.frame()
    ))
  }

  # remove genes with less than 3 datapoints -- should be unexpressed
  if (sum(!is.na(genes_rna[,-1])) < 3 | sum(!is.na(genes_pro[,-1])) < 3){
    final_df$Best_Model_RNA <- final_df$Best_Model_Protein <- "Unexpressed"
    return(list(
      "final_df" = final_df,
      "aic_rna" = data.frame(),
      "aic_pro" = data.frame()
    ))
  }

  # get names of parameters in order
  # model name to pretty mapping
  mod_name_map <- c("echo_lin" = "ECHO Linear",
                    "echo" = "ECHO",
                    "echo_lin_joint" = "ECHO Linear Joint",
                    "echo_joint" = "ECHO Joint",
                    "exp_lin" = "Exponential Linear",
                    "exp" = "Exponential",
                    "lin" = "Linear",
                    "const" = "Constant")

  # parameter names
  echo_lin_param_n <- c("Initial_Amplitude", "AC_Coefficient", "Oscillation_Type", "Radian_Frequency", "Period", "Phase_Shift", "Hours_Shifted", "Slope", "Equilibrium_Value")
  echo_lin_joint_param_n <- c(paste0(echo_lin_param_n, "_RNA"), paste0(echo_lin_param_n,"_Protein"))
  echo_param_n <- c("Initial_Amplitude", "AC_Coefficient", "Oscillation_Type", "Radian_Frequency", "Period", "Phase_Shift", "Hours_Shifted", "Equilibrium_Value")
  echo_joint_param_n <- c(paste0(echo_param_n, "_RNA"), paste0(echo_param_n,"_Protein"))
  exp_param_n <- c("Initial_Amplitude", "Growth_Rate", "Equilibrium_Value")
  lin_param_n <- c("Slope", "Equilibrium_Value")

  # put all the specific parameter names in a list
  param_map <- list(
    "echo_lin" = echo_lin_param_n,
    "echo_lin_joint" = echo_lin_joint_param_n,
    "echo" = echo_param_n,
    "echo_joint" = echo_joint_param_n,
    "exp" = exp_param_n,
    "lin" = lin_param_n
  )

  # so, we start by getting the best model for RNA and protein
  # rna:
  genes <- genes_rna
  avg_genes <- avg_genes_rna
  rna_mods <- choose_best_mod(current_gene, timen, resol, num_reps, genes_rna, avg_genes_rna, harm_cut, over_cut, low, high)
  aic_rna <- rna_mods$aic_df # save the bic (aic is a misnomer based on earlier tests)

  # protein
  genes <- genes_pro
  avg_genes <- avg_genes_pro
  pro_mods <- choose_best_mod(current_gene, timen, resol, num_reps, genes_pro, avg_genes_pro, harm_cut, over_cut, low, high)
  aic_pro <- pro_mods$aic_df # save the bic (aic is a misnomer based on earlier tests)

  # now, are both models oscillatory, or is one oscillatory and one is not?
  send_joint <- which((aic_rna$best_mod_orig == "echo" | aic_pro$best_mod_orig == "echo" |
                         aic_rna$best_mod_orig == "echo_lin" | aic_pro$best_mod_orig == "echo_lin") &
                        !(aic_rna$best_mod_orig == "echo" & aic_pro$best_mod_orig == "echo") &
                        !(aic_rna$best_mod_orig == "echo_lin" & aic_pro$best_mod_orig == "echo_lin") &
                        !(aic_rna$best_mod_orig == "echo_lin" & aic_pro$best_mod_orig == "echo") &
                        !(aic_rna$best_mod_orig == "echo" & aic_pro$best_mod_orig == "echo_lin"))

  if (length(send_joint) > 0){ # if we have a joint model
    # first, we need to get the joint model

    # get what the oscillatory model to jointly model on is supposed to be (echo or echo linear)
    if ("echo" %in% c(aic_rna$best_mod_orig[1], aic_pro$best_mod_orig[1])){
      # get one choice for starting points, using a completely joint model
      # echo completely joint model
      ej_mod <- calc_param_echo_joint(current_gene, timen, resol, num_reps, "orig", harm_cut, over_cut, genes_rna, genes_pro, avg_genes_rna, avg_genes_pro, low, high)
      res <- ej_mod$param #extract parameter estimates

      # final fit: echo joint model with slack for protein
      ejc_mod <- calc_param_echo_joint_constr(current_gene, timen, resol, num_reps, "orig", res, harm_cut, over_cut, genes_rna, genes_pro, avg_genes_rna, avg_genes_pro, low, high)
    } else {
      # get one choice for starting points, using a completely joint model
      # echo lin completely joint model
      ej_mod <- calc_param_echo_lin_joint(current_gene, timen, resol, num_reps, "orig", harm_cut, over_cut, genes_rna, genes_pro, avg_genes_rna, avg_genes_pro, low, high)
      res <- ej_mod$param #extract parameter estimates

      # final fit: echo linear joint model with slack for protein
      ejc_mod <- calc_param_echo_lin_joint_constr(current_gene, timen, resol, num_reps, "orig", res, harm_cut, over_cut, genes_rna, genes_pro, avg_genes_rna, avg_genes_pro, low, high)
    }

    # then, compare with best free joint model
    # get best free joint model for rna and protein
    rna_mod <- rna_mods$best_mod
    pro_mod <- pro_mods$best_mod

    # compare joint to free model
    best_mod <- reeval_best_mod(ejc_mod, rna_mod, pro_mod, current_gene, timen, num_reps, resol, genes_rna, genes_pro)

    if (best_mod$mod_name != "free"){ # joint model is best model
      joint_mod <- best_mod$joint_mod
      # we want to substitute the joint model into the bic dataframes, updating the best model
      aic_rna$best_mod_after[1] <- aic_pro$best_mod_after[1] <- best_mod$mod_name # name
      aic_rna[1,paste0(best_mod$mod_name,"_val")] <-
        aic_pro[1,paste0(best_mod$mod_name,"_val")] <- best_mod$aic_joint # bic
      aic_rna$best_mod_pval_after[1] <- joint_mod$rna_pval # rna pvalue
      aic_pro$best_mod_pval_after[1] <- joint_mod$pro_pval # protein pvalue

      aic_rna$free_joint_val[1] <- aic_pro$free_joint_val[1] <- best_mod$aic_free # free model name
      aic_rna$best_mod_type_gam_after[1] <- joint_mod$type_gam_rna # ac coefficient category, rna
      aic_pro$best_mod_type_gam_after[1] <- joint_mod$type_gam_pro # ac coefficient category, protein

      # update the final mosaic dataframes

      # add the designation and the model names
      final_df$Best_Model_RNA[1] <- final_df$Best_Model_Protein[1] <- mod_name_map[[best_mod$mod_name]]
      final_df$P_Value_RNA[1] <- aic_rna$best_mod_pval_after[1]
      final_df$P_Value_Protein[1] <- aic_pro$best_mod_pval_after[1]
      final_df$P_Value_Joint[1] <- joint_mod$joint_pval

      # add the parameters
      final_df[1, param_map[[joint_mod$mod_name]]] <- joint_mod$final

      # add the fits
      final_df[1,(33+(length(timen)*num_reps)+1):(33+(length(timen)*num_reps)+length(timen))] <-
        get_mod_fits(joint_mod, timen, resol)[1:length(timen)]
      final_df[1,(33+(length(timen)*num_reps*2)+length(timen)+1):ncol(final_df)] <-
        get_mod_fits(joint_mod, timen, resol)[(length(timen)+1):(length(timen)*2)]
    } else { # the free model was best
      aic_rna$best_mod_after[1] <- aic_pro$best_mod_after[1] <- "free"
      aic_rna[1,paste0(best_mod$mod_name,"_val")] <-
        aic_pro[1,paste0(best_mod$mod_name,"_val")] <- best_mod$aic_joint
      aic_rna$free_joint_val[1] <- aic_pro$free_joint_val[1] <- best_mod$aic_free

      # add the designation and the model names
      final_df$Best_Model_RNA[1] <- mod_name_map[aic_rna$best_mod_orig[1]]
      final_df$Best_Model_Protein[1] <- mod_name_map[aic_pro$best_mod_orig[1]]
      final_df$P_Value_RNA[1] <- aic_rna$best_mod_pval_orig[1]
      final_df$P_Value_Protein[1] <- aic_pro$best_mod_pval_orig[1]

      # add the linear slope p-value, if it exists
      if (final_df$Best_Model_RNA[1] == "Linear"){
        final_df$P_Value_Linear_Slope_RNA[1] <- rna_mods$best_mod$slo_pval
      }
      if (final_df$Best_Model_Protein[1] == "Linear"){
        final_df$P_Value_Linear_Slope_Protein[1] <- pro_mods$best_mod$slo_pval
      }

      # add the parameters
      final_df[1, paste0(param_map[[aic_rna$best_mod_orig[1]]],"_RNA")] <-
        rna_mods$best_mod$final
      final_df[1, paste0(param_map[[aic_pro$best_mod_orig[1]]],"_Protein")] <-
        pro_mods$best_mod$final

      # add the fitted values
      final_df[1,(33+(length(timen)*num_reps)+1):(33+(length(timen)*num_reps)+length(timen))] <-
        get_mod_fits(rna_mods$best_mod, timen, resol)
      final_df[1,(33+(length(timen)*num_reps*2)+length(timen)+1):ncol(final_df)] <-
        get_mod_fits(pro_mods$best_mod, timen, resol)
    }
  } else { # there is no reason to joint model
    # add the designation and the model names
    final_df$Best_Model_RNA[1] <- mod_name_map[aic_rna$best_mod_orig[1]]
    final_df$Best_Model_Protein[1] <- mod_name_map[aic_pro$best_mod_orig[1]]
    final_df$P_Value_RNA[1] <- aic_rna$best_mod_pval_orig[1]
    final_df$P_Value_Protein[1] <- aic_pro$best_mod_pval_orig[1]

    # add the linear slope p-value, if it exists
    if (final_df$Best_Model_RNA[1] == "Linear"){
      final_df$P_Value_Linear_Slope_RNA[1] <- rna_mods$best_mod$slo_pval
    }
    if (final_df$Best_Model_Protein[1] == "Linear"){
      final_df$P_Value_Linear_Slope_Protein[1] <- pro_mods$best_mod$slo_pval
    }

    # add the parameters
    final_df[1, paste0(param_map[[aic_rna$best_mod_orig[1]]],"_RNA")] <-
      rna_mods$best_mod$final
    final_df[1, paste0(param_map[[aic_pro$best_mod_orig[1]]],"_Protein")] <-
      pro_mods$best_mod$final

    # add the fitted values
    final_df[1,(33+(length(timen)*num_reps)+1):(33+(length(timen)*num_reps)+length(timen))] <-
      get_mod_fits(rna_mods$best_mod, timen, resol)
    final_df[1,(33+(length(timen)*num_reps*2)+length(timen)+1):ncol(final_df)] <-
      get_mod_fits(pro_mods$best_mod, timen, resol)
  }

  # results list
  fin <- list(
    "final_df" = final_df,
    "aic_rna" = aic_rna,
    "aic_pro" = aic_pro
  )

  return(fin)

}

#' Function to combine respectively fitted models into one "free" model
#'
#' @param rna_mod best model object for rna (see the calc_param functions)
#' @param pro_mod best model object for protein (see the calc_param functions)
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param num_reps number of replicates
#' @param resol resolution
#' @param genes_rna dataframe of rna gene expressions (first column is names, other columns are numeric)
#' @param genes_protein dataframe of protein gene expressions (first column is names, other columns are numeric)
#'
#' @return nls model object of combined free models
#' @importFrom stats nls.control
#' @keywords internal
#' @noRd
get_free_comb_mod <- function(rna_mod, pro_mod, current_gene, timen, num_reps, resol, genes_rna, genes_pro){
  # model names
  model_n <- c("echo_lin","echo","exp_lin","exp","lin","const")

  # get the concatenated time points and weights
  t_stack <- c(rep(timen, each=num_reps), (rep(timen, each = num_reps)))
  weights <- c(rep(calc_weights_boot(unlist(genes_rna[current_gene,-1]), num_reps), each = num_reps),
               rep(calc_weights_boot(unlist(genes_pro[current_gene,-1]), num_reps), each = num_reps))

  # time points, repeated for each replicate
  t_rep <- length(rep(timen, each=num_reps))
  # binary vectors to indicate rna and protein in concatenation
  s1 <- rep(c(1,0), c(t_rep,t_rep)) # rna
  s2 <- rep(c(0,1), c(t_rep,t_rep)) # protein

  # figure out which omics type is the assumed oscillatory one
  # rna oscillatory
  if (which(model_n == rna_mod$mod_name) <= which(model_n == pro_mod$mod_name)){
    # order: rna is concatenated first
    rna_ord <- 1
    pro_ord <- 2

    # order models
    first_mod <- rna_mod
    second_mod <- pro_mod

    # concatenate data accordingly
    y_stack <- c(unlist(genes_rna[current_gene,-1]), unlist(genes_pro[current_gene,-1]))

    # model names put together
    mod_name <- paste0(rna_mod$mod_name,"_",pro_mod$mod_name)
  } else { # protein oscillatory
    # order: protein is concatenated first
    rna_ord <- 2
    pro_ord <- 1

    # order models
    first_mod <- pro_mod
    second_mod <- rna_mod

    # concatenate data accordingly
    y_stack <- c(unlist(genes_pro[current_gene,-1]), unlist(genes_rna[current_gene,-1]))

    # model names put together
    mod_name <- paste0(pro_mod$mod_name,"_",rna_mod$mod_name)
  }
  # put together the data
  temp <- data.frame(y = y_stack, t =t_stack, s1 = s1, s2 = s2, resol = rep(resol,length(y_stack)))
  temp <- temp[!is.na(temp$y),] # remove any missing data points

  # get the coefficients
  coeff1 <- as.numeric(first_mod$param[-1]) # oscillatory
  coeff2 <- as.numeric(second_mod$param[-1]) # non-oscillatory

  # model choice - very long switch statement!
  # nls model object of the free model
  free.fit <- {
    switch(mod_name,
           "echo_lin_lin" = { # echo linear, linear concatenation
             nls_edit(
               formula = y ~ echo_lin_lin(a1,gam1,omega1,phi1,slo1,y_shift1,slo2,y_shift2,t, s1, s2),
               data=temp,
               start = list(gam1=coeff1[1], a1=coeff1[2], omega1 = coeff1[3], phi1 = coeff1[4], slo1 = coeff1[5], y_shift1=coeff1[6],
                            slo2 = coeff2[1], y_shift2 = coeff2[2]
               ),
               control = nls.control(maxiter = 0)
             )
           },
           "echo_lin_exp" = { # echo linear, exponential concatenation
             nls_edit(
               formula = y ~ echo_lin_exp(a1,gam1,omega1,phi1,slo1,y_shift1,a2,gamma2,y_shift2,t, s1, s2),
               data=temp,
               start = list(gam1=coeff1[1], a1=coeff1[2], omega1 = coeff1[3], phi1 = coeff1[4], slo1 = coeff1[5], y_shift1=coeff1[6],
                            a2= coeff2[1],gamma2 = coeff2[2], y_shift2 = coeff2[3]
               ),
               control = nls.control(maxiter = 0)
             )
           },
           "echo_lin" = { # echo, linear concatenation
             nls_edit(
               formula = y ~ echo_lin(a1,gam1,omega1,phi1,y_shift1,slo2,y_shift2,t, s1, s2),
               data=temp,
               start = list(gam1=coeff1[1], a1=coeff1[2], omega1 = coeff1[3], phi1 = coeff1[4], y_shift1=coeff1[5],
                            slo2 = coeff2[1], y_shift2 = coeff2[2]
               ),
               control = nls.control(maxiter = 0)
             )
           },
           "echo_exp" = { # echo, exponential concatenation
             nls_edit(
               formula = y ~ echo_exp(a1,gam1,omega1,phi1,y_shift1,a2,gamma2,y_shift2,t, s1, s2),
               data=temp,
               start = list(gam1=coeff1[1], a1=coeff1[2], omega1 = coeff1[3], phi1 = coeff1[4], y_shift1=coeff1[5],
                            a2= coeff2[1],gamma2 = coeff2[2], y_shift2 = coeff2[3]
               ),
               control = nls.control(maxiter = 0)
             )
           }
    )
  }

  return(free.fit)
}

#' Function to compare the fitted joint model to the fitted "free" model
#'
#' @param joint model  best joint model object (see the calc_param functions)
#' @param rna_mod best model object for rna (see the calc_param functions)
#' @param pro_mod best model object for protein (see the calc_param functions)
#' @param current_gene index for gene that we're calculating parameters for
#' @param timen time points vector
#' @param num_reps number of replicates
#' @param resol resolution
#' @param genes_rna dataframe of rna gene expressions (first column is names, other columns are numeric)
#' @param genes_protein dataframe of protein gene expressions (first column is names, other columns are numeric)
#'
#' @return list containing which fitted model is better, the BICs of both, and the model (if joint is best)
#' @importFrom stats BIC
#' @keywords internal
#' @noRd
reeval_best_mod <- function(joint_mod, rna_mod, pro_mod, current_gene, timen, num_reps, resol, genes_rna, genes_pro){
  # get the concatenated "free" model
  free.fit <- get_free_comb_mod(rna_mod, pro_mod, current_gene, timen, num_reps, resol, genes_rna, genes_pro)

  # compare free to joint models with BIC
  mod_ord <- c("free","joint")
  aic <- BIC(free.fit, joint_mod$model)

  # choose the model with the lowest BIC
  if (mod_ord[which.min(aic$BIC)] == "free"){
    # free is best
    return(
      list(
        "mod_name" = "free",
        "aic_free" = aic$BIC[1],
        "aic_joint" = aic$BIC[2]
      )
    )
  } else {
    # joint is best
    return(
      list(
        "mod_name" = joint_mod$mod_name,
        "joint_mod" = joint_mod,
        "aic_free" = aic$BIC[1],
        "aic_joint" = aic$BIC[2]
      )
    )
  }
}

Try the mosaic.find package in your browser

Any scripts or data that you put into this service are public.

mosaic.find documentation built on Nov. 20, 2020, 9:06 a.m.