R/PSSLwithWeightsPrior.R

Defines functions PSSLwithWeightsPrior

#' A function rather aimed at developers
#' @noRd

PSSLwithWeightsPrior = function(newdat, weights, classProbCut = 0.9, ratioPrior = 1/2,  PShighGroup = "PShigh", 
                    PSlowGroup = "PSlow", imputeNA = FALSE, byrow = TRUE, imputeValue = c("median","mean")){
  imputeValue = imputeValue[1]
  ## imputee NA if imputeNA is true
  if(imputeNA){
    newdat = imputeNAs(dataIn = newdat, byrow = byrow, imputeValue = imputeValue)
  }
  
  # for PS approach, should standardize data first
  newdat = standardize(newdat)
  
  ### in case that some features in weight list are actually not in data, do the following
  tmp = intersect(names(weights), rownames(newdat))
  weights = weights[tmp]
  newdat = newdat[tmp,]

  #### in order to get mean and sd for each feature, use a ratio prior
  meansd = getMeanSdAllTraits(testdat = newdat, selectedTraits=names(weights), selectedTraitWeights=weights,
                              group1ratioPrior = ratioPrior)
  
  #  now, get mean of groups' means for each selected top features
  mean_2means = rowMeans(meansd[,1:2])
  mean_2means = cbind(mean_2means, meansd[,2], meansd[,1])
  colnames(mean_2means) = c("meanOfGroupMeans","refGroupMean","testGroupMean")
  
  # in fact, the above two objects can be combined into one matrix, and put the two most important items into col1 and col2
  PS_pars = cbind( mean_2means[,1],weights, mean_2means[,-1])
  colnames(PS_pars)[1] = colnames(mean_2means)[1]
  
  # get PS scores for all samples
  PS_score = apply(newdat[names(weights),], 2, getPS1sample, PSpars = PS_pars[,1:2])
  
  PS_class0 = ifelse(PS_score > 0,  PShighGroup,  PSlowGroup)  
  
  # #### 20190418: add classification calls based on Empirical Bayesian probabilities with EM (Expectation-Maximization)
  # emcut = AdaptGauss::EMGauss(PS_score, K = 2,fast=TRUE, MaxNumberofIterations = EMmaxRuns)
  # 
  # ### add a plot, hist with two distribution lines, do not need to save, just plot it
  # hist(PS_score, prob = TRUE, breaks = breaks)
  # curve(emcut$Weights[1]*dnorm(x, mean=emcut$Means[1], sd=emcut$SDs[1]), 
  #       col="red", lwd=2, add=TRUE, yaxt="n")
  # curve(emcut$Weights[2]*dnorm(x, mean=emcut$Means[2], sd=emcut$SDs[2]), 
  #       col="green", lwd=2, add=TRUE, yaxt="n")
  # abline(v=0, col = "red")
  # 

  #### 20190904, use the 0 theoretical cutoff to get two groups, which are used for empirial Bayesian prob calculation
  ttmp = PS_score[which(PS_score >= 0)]
  rtmp = PS_score[which(PS_score < 0)]
  testPSmean = mean(ttmp)
  refPSmean = mean(rtmp)
  testPSsd = sd(ttmp)
  refPSsd = sd(rtmp)
  
  PS_prob1 = getProb(PS_score, groupMeans = c(testPSmean, refPSmean), groupSds = c(testPSsd, refPSsd))
  
  PS_prob2= getProb(PS_score, groupMeans = c(refPSmean, testPSmean), groupSds = c(refPSsd, testPSsd))
  
  PS_class = rep("UNCLASS",length(PS_score))
  PS_class[which(PS_prob1 >= classProbCut)] = PShighGroup
  PS_class[which(PS_prob2 >= classProbCut)] = PSlowGroup
  
  # tmp = c(emcut$Means, emcut$SDs)
  # names(tmp) = c("testPSmean_EM","refPSmean_EM","testPSsd_EM","refPSsd_EM")
  
  #####################################################
  ########### after I used all PS_score, then define it as data.frame, otherwise, many of code will get trouble
  PS_score = data.frame(PS_score)
  # PS_test = cbind(PS_score, PS_class, PS_prob1, PS_prob2, PS_class0, PS_class_EM, PS_prob1_EM, PS_prob2_EM,
  #                   stringsAsFactors =F)
  # 
  # weights = data.frame(weights)
  # 
  # PS_pars =  list(weights, meansds = c(testPSmean, refPSmean, testPSsd, refPSsd), traitsmeansds = meansd,
  #                  tmp)
  # names(PS_pars) = c("weights","meansds","traitsmeansds", "meansds_EM")
  # 
  PS_test = cbind(PS_score, PS_class, PS_prob1, PS_prob2, PS_class0, stringsAsFactors =F)
  
  #weights = data.frame(weights)
  
  PS_pars =  list(weights, meansds = c(testPSmean, refPSmean, testPSsd, refPSsd), traitsmeans = mean_2means)
  names(PS_pars) = c("weights","meansds","traitsmeans")
  
  outs = list(PS_pars, PS_test)
  names(outs) = c("PS_pars","PS_test")
  
  return(outs)
  
}
ajiangsfu/PRPS documentation built on April 29, 2023, 10:13 p.m.