R/functions.R

Defines functions HiCRatio.Plot HiC.Plot ratio.color.bar color.bar ave.mean.median ave.filter matrixE.old ratioF workspace.folder.old matrixMultiE table.create combine.list testf data.combine filename.shortcut read.iced chip.selector inverse.matrix main.subsetter matrixMultiEt facs_density_plot3

Documented in ave.filter ave.mean.median chip.selector color.bar combine.list data.combine facs_density_plot3 filename.shortcut HiC.Plot HiCRatio.Plot inverse.matrix main.subsetter matrixE.old matrixMultiE matrixMultiEt ratio.color.bar ratioF read.iced table.create testf workspace.folder.old

#' This function plots a Ratio HiC plot.
#' The function uses global variables provided default by the pileup.r script.

#' @param plotData Data input in a matrix format.
#' @param plotTitle Title of the plot.
#' @details zoomC1 = zoomC/2
#' @details plotData$change <- log2(plotData[,3])
#' @details plotData$colvalue <- ifelse(plotData$change<0,-plotData$change, plotData$change)
#' @details rbPal <- colorRampPalette(c("white","blue"))
#' @details rbPal1 <- colorRampPalette(c("white","red"))
#' @details plotData$colvalue <- ifelse(plotData$colvalue<=ratioSBR, plotData$colvalue, ratioSBR)  #makes color values above the max = to the max range
#' @details plotData$colB <- ifelse(plotData$change<=0, plotData$colvalue, 0)
#' @details plotData$colR <- ifelse(plotData$change>=0, plotData$colvalue, 0)
#' @details plot(1,1,xlim=c(-zoomC1,zoomC1), ylim=c(-zoomC*(0.5*mirrorPlot),minY*1.5),
#' @details     ylab=yAxisTitle,xlab=xAxisTitle,yaxt="n",xaxt="n",yaxs="i",xaxs ="i") # added *2 to fit inverted plot
#' @details axis(2, at = ylabel,labels = y2label, las = 2)
#' @details axis(1, at = xlabel,labels = xlabel, las = 1)
#' @details palette(paste0(rbPal1(shades*ratioSBR ),"FF"))
#' @details points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+offsetf)*resKB*offset)-zoomC,
#' @details       ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
#' @details       pch=18,
#' @details      cex=sz,
#' @details      col=shades*plotData$colR)
#' @details palette(paste0(rbPal(shades*ratioSBR ),"FF"))
#' @details points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+offsetf)*resKB*offset)-zoomC,
#' @details        ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
#' @details        pch=18,
#' @details        cex=sz,
#' @details        col=shades*plotData$colB)
#' @details title(main=plotTitle,line = -plotTitleOffset)
#' @details if(arrowYes == 1){
#' @details   arrows(minarrowmax,arrowposx,minarrowmin,arrowposx,col="black",lwd=1,length=.025,code = 3)
#' @details   arrows(maxarrowmin,arrowposx,maxarrowmax,arrowposx,col="black",lwd=1,length=.025,code = 3)
#' @details }
#' @details }
#' @param ArrowYes Boolean, 1 = arrows at CHiP peaks, 0 = No arrow.
#' @return A Ratio HiC plot
#' @export
HiCRatio.Plot <- function(plotData,plotTitle,arrowYes=0,offsetf = 1) {
  zoomC1 = zoomC/2
  plotData$change <- log2(plotData[,3])
plotData$colvalue <- ifelse(plotData$change<0,-plotData$change, plotData$change)
rbPal <- colorRampPalette(c("white","blue"))
rbPal1 <- colorRampPalette(c("white","red"))
plotData$colvalue <- ifelse(plotData$colvalue<=ratioSBR, plotData$colvalue, ratioSBR)  #makes color values above the max = to the max range
plotData$colB <- ifelse(plotData$change<=0, plotData$colvalue, 0)
plotData$colR <- ifelse(plotData$change>=0, plotData$colvalue, 0)

plot(1,1,xlim=c(-zoomC1,zoomC1), ylim=c(-zoomC*(0.5*mirrorPlot),minY*1.5),
     ylab=yAxisTitle,xlab=xAxisTitle,yaxt="n",xaxt="n",yaxs="i",xaxs ="i") # added *2 to fit inverted plot
axis(2, at = ylabel,labels = y2label, las = 2)
axis(1, at = xlabel,labels = xlabel, las = 1)
palette(paste0(rbPal1(shades*ratioSBR ),"FF"))
points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+offsetf)*resKB*offset)-zoomC,
       ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
       pch=18,
       cex=sz,
       col=shades*plotData$colR)
palette(paste0(rbPal(shades*ratioSBR ),"FF"))
points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+offsetf)*resKB*offset)-zoomC,
       ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
       pch=18,
       cex=sz,
       col=shades*plotData$colB)
title(main=plotTitle,line = -plotTitleOffset)
if(arrowYes == 1){
  arrows(minarrowmax,arrowposx,minarrowmin,arrowposx,col="black",lwd=1,length=.025,code = 3)
  arrows(maxarrowmin,arrowposx,maxarrowmax,arrowposx,col="black",lwd=1,length=.025,code = 3)
}

}
#' This function plots a HiC plot.
#' The function uses global variables provided default by the pileup.r script.
#' @param plotData Data input in a matrix format.
#' @param plotTitle Title of the plot.
#' @param ArrowYes Boolean, 1 = arrows at CHiP peaks, 0 = No arrows.
#' @details HiC.Plot <- function(plotData,plotTitle,arrowYes=0,offsetf=1) \{
#' @details palette(paste0(colfunc(r*log2(maxC)),"FF"))
#' @details plot(1,1,xlim=c(-zoomC1,zoomC1), ylim=c(-zoomC*(0.5*mirrorPlot),minY*1.5),
#' @details     ylab=yAxisTitle,xlab=xAxisTitle,yaxt="n",xaxt="n",yaxs="i",xaxs ="i") # added *2 to fit inverted plot
#' @details axis(2, at = ylabel,labels = y2label, las = 2)
#' @details axis(1, at = xlabel,labels = xlabel, las = 1)
#' @details points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+offsetf)*resKB*offset)-zoomC,
#' @details       ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
#' @details       pch=18,
#' @details       cex=sz,col=r*log2(plotData$V3))
#' @details title(main=plotTitle,line = -plotTitleOffset)
#' @details if(arrowYes == 1)\{
#' @details arrows(minarrowmax,arrowposx,minarrowmin,arrowposx,col="black",lwd=1,length=.025,code = 3)
#' @details arrows(maxarrowmin,arrowposx,maxarrowmax,arrowposx,col="black",lwd=1,length=.025,code = 3)
#' @details \}
#' @details \}
#' @return A HiC plot.
#' @export
HiC.Plot <- function(plotData,plotTitle,arrowYes=0,offsetf=1) {

  palette(paste0(colfunc(r*log2(maxC)),"FF"))
  plot(1,1,xlim=c(-zoomC1,zoomC1), ylim=c(-zoomC*(0.5*mirrorPlot),minY*1.5),
       ylab=yAxisTitle,xlab=xAxisTitle,yaxt="n",xaxt="n",yaxs="i",xaxs ="i") # added *2 to fit inverted plot
  axis(2, at = ylabel,labels = y2label, las = 2)
  axis(1, at = xlabel,labels = xlabel, las = 1)
  points(((plotData$V1-0.5)*resKB+(plotData$V2-plotData$V1+offsetf)*resKB*offset)-zoomC,
         ((plotData$V2)*resKB-(plotData$V1)*resKB)-zoomC,
         pch=18,
         cex=sz,col=r*log2(plotData$V3))
  #points((HiCPlot[[i]][,1]-minbin+0.5)*1+(HiCPlot[[i]][,2]-HiCPlot[[i]][,1]+0.5)*1*offset,-((HiCPlot[[i]][,2]-minbin+0.5)*1-(HiCPlot[[i]][,1]-minbin+0.5)*1*1),pch=18,cex=sz,col=r*log2(HiCPlot[[i]][,3]), xlim=c(0,maxs), ylim=c(0,maxs))
  # arrows(0,0-(zoomC+1),0,0-(zoomC+1),col="black",lwd=2,length=.1) #Add CENtromere
  # p = HiCPlot[[2]]
  title(main=plotTitle,line = -plotTitleOffset)


  if(arrowYes == 1){
    arrows(minarrowmax,arrowposx,minarrowmin,arrowposx,col="black",lwd=1,length=.025,code = 3)
    arrows(maxarrowmin,arrowposx,maxarrowmax,arrowposx,col="black",lwd=1,length=.025,code = 3)
  }

}
#' Plots a ratio color bar
#' Example using pileup.r
#' @example ratio.color.bar(colorRampPalette(c("red","white","blue"))(shades*2), ratioSBR,round(ratioSBR+1,0))
#' @param lut A color vector for example red, white, blue
#' @param min Minimum value for the color bar
#' @param max Maximum value for the color bar
#' @param nticks Number of ticks for the color bar
#' @details ratio.color.bar <- function(lut,min,nticks, title='') \{
#' @details max=-min
#' @details ticks=seq(min, 0, len=nticks)\}
#' @details scale = (length(lut)-1)/(max-min)
#' @details tes <- 2^ticks
#' @details p = length(tes)
#' @details tes[p] = 0
#' @details plot(c(5,5), c(min,max), pch=15,cex=sz*1, type='n', bty='n', xlim=c(0,10), xaxt='n', xlab='', yaxt='n', ylab="Folds of Change Scalebar", main=title)
#' @details axis(2, ticks,tes ,las=1)  #red scalebar
#' @details axis(2, -ticks,tes ,las=1) #blue scalebar
#' @details for (i in 1:(length(lut)-1)) \{
#' @details   y = (i-1)/scale + min
#' @details   rect(1,y,9,y+1/scale, col=lut[i], border=lut[i])
#' @details \}
#' @details \}
#' @return A ratio color bar
#' @export
ratio.color.bar <- function(lut,min,nticks, title='') {
  max=-min
  ticks=seq(min, 0, len=nticks)
  scale = (length(lut)-1)/(max-min)
  tes <- 2^ticks
  p = length(tes)
  tes[p] = 0
  # tes2 = tes/2^4
  plot(c(5,5), c(min,max), pch=15,cex=sz*1, type='n', bty='n', xlim=c(0,10), xaxt='n', xlab='', yaxt='n', ylab="Folds of Change Scalebar", main=title)
  axis(2, ticks,tes ,las=1)  #red scalebar
  axis(2, -ticks,tes ,las=1) #blue scalebar
  for (i in 1:(length(lut)-1)) {
    y = (i-1)/scale + min
    rect(1,y,9,y+1/scale, col=lut[i], border=lut[i])
  }
}
#' Plots a color bar
#' @details color.bar <- function(C,sz) \{
#' @details #New scale bar plot:
#' @details  scalebar=1:C
#' @details  scalebar=2^(seq(log2(1), log2(C), length.out = 50))
#' @details  scalebar2=tail(scalebar,-1)
#' @details  scalebar=head(scalebar,-1)
#' @details  #plot:
#' @details  plot(5,5,pch=15,cex=sz*1, type="n",las=2, log="y", xlim=c(0,10), ylim=c(1,C), xaxt="n",xlab=NA, ylab="Interaction freq per 10 million pairs",at=c(1,2,5,10,20,50,100,200,500,1000,2000,5000,10000))
#' @details  rect(rep(1, length(scalebar)),scalebar,
#' @details       rep(9, length(scalebar)),scalebar2,pch=15,cex=sz*1, col=r*log2(scalebar),border = r*log2(scalebar))
#' @details \}
#' @param C An arbitary maximum for the colour scale
#' @param sz Scaling of bin pixels
#' @return A color bar
#' @export
color.bar <- function(C,sz) {
  #New scale bar plot:
  scalebar=1:C
  scalebar=2^(seq(log2(1), log2(C), length.out = 50))
  scalebar2=tail(scalebar,-1)
  scalebar=head(scalebar,-1)
  #plot:
  plot(5,5,pch=15,cex=sz*1, type="n",las=2, log="y", xlim=c(0,10), ylim=c(1,C), xaxt="n",xlab=NA, ylab="Interaction freq per 10 million pairs",at=c(1,2,5,10,20,50,100,200,500,1000,2000,5000,10000))

  rect(rep(1, length(scalebar)),scalebar,
       rep(9, length(scalebar)),scalebar2,pch=15,cex=sz*1, col=r*log2(scalebar),border = r*log2(scalebar))

}
#' This function plots pileups of each selected chromosome
#' @details save.mean.median <- function(aveType,aveLArg,rc,pT,arrowsYes=0,offsetf=1, noReflect=1)\{
#' @details if (noReflect != 1)\{
#' @details  aveLArg[[1]]  = ave.filter(list(aveLArg[[1]],inverse.matrix(aveLArg[[1]])),aveType)
#' @details  aveLArg[[2]]  = ave.filter(list(aveLArg[[2]],inverse.matrix(aveLArg[[2]])),aveType)
#' @details \}
#' @details HiC.Plot(matrix.sparse(aveLArg[[1]]),pT[1],arrowsYes,offsetf)
#' @details HiC.Plot(matrix.sparse(aveLArg[[2]]),pT[2],arrowsYes,offsetf)
#' @details color.bar(maxC,sz)
#' @details HiCRatio.Plot(ratioF(aveLArg[[1]],aveLArg[[2]],rc),pT[3],arrowsYes,offsetf)
#' @details ratio.color.bar(colorRampPalette(c("red","white","blue"))(shades*2), ratioSBR,round(ratioSBR+1,0))
#' @details \}
#' @param aveType Average type mean or median
#' @param aveLArg A list of chromosomes
#' @param pT A list of plot titles
#' @param arrowsYes Boolean. 1 = arrows at CHiP sights, 2 = no arrows
#' @param offsetf The plot offset. Default = 1 Path to the input file
#' @param noReflect Boolean.1 No x axis reflection. 0 means x axis reflection
#' @example ave.mean.median(m,pileListAve,ratioConstant,plotTitle,arrowTrue,noReflect = noReflectMatrix)
#' @return Plots pileups of selected chromosomes.
#' @export
ave.mean.median <- function(aveType,aveLArg,rc,pT,arrowsYes=0,offsetf=1, noReflect=1){
  if (noReflect != 1){
    aveLArg[[1]]  = ave.filter(list(aveLArg[[1]],inverse.matrix(aveLArg[[1]])),aveType)
    aveLArg[[2]]  = ave.filter(list(aveLArg[[2]],inverse.matrix(aveLArg[[2]])),aveType)
  }
  HiC.Plot(matrix.sparse(aveLArg[[1]]),pT[1],arrowsYes,offsetf)
  HiC.Plot(matrix.sparse(aveLArg[[2]]),pT[2],arrowsYes,offsetf)
  color.bar(maxC,sz)
  HiCRatio.Plot(ratioF(aveLArg[[1]],aveLArg[[2]],rc),pT[3],arrowsYes,offsetf)
  ratio.color.bar(colorRampPalette(c("red","white","blue"))(shades*2), ratioSBR,round(ratioSBR+1,0))
}
#' Filters out null points in the matrix.
#' Applies an average every to all the hits in the pileup.
#' @details ave.filter <-function(input,aveType)\{
#' @details t1 = Filter(Negate(is.null), input)
#' @details output = aaply(laply(t1, as.matrix), c(2, 3), aveType,na.rm=TRUE)
#' @details return (output)
#' @details \}
#' @param input a list of matrices of the same dimensions required
#' @param aveType The type of average. 'mean' or 'median'.
#' @return A pileup of averaged values calculated from a list of matrices.
#' @example ave.filter(pileUpList[[d]],av)
#' @export
#' @import plyr
ave.filter <-function(input,aveType){
  t1 = Filter(Negate(is.null), input)
  output = aaply(laply(t1, as.matrix), c(2, 3), aveType,na.rm=TRUE)
  return (output)
}
#' Converts a dataframe to a sparsematrix. required for ploting the HiC plots
#' @details matrix.sparse <- function (input)\{
#' @details i <- Matrix::Matrix(as.matrix(input), sparse = TRUE) # Converts dataframe back to a matrix and converts this to a sparseMatrix format
#' @details output = summary(i) #Expands back to a list
#' @details colnames(output)= c("V1","V2","V3")
#' @details return(output)
#' @details \}
#' @param input A dataframe of HiC interactions
#' @return A sparsematrix 3 columns.
#' @example HiC.Plot(matrix.sparse(aveLArg[[1]]),pT[1],arrowsYes,offsetf)
#' @export
#' @import Matrix
matrix.sparse <- function (input){
  i <- Matrix::Matrix(as.matrix(input), sparse = TRUE) # Converts dataframe back to a matrix and converts this to a sparseMatrix format
  output = summary(i) #Expands back to a list
   colnames(output)= c("V1","V2","V3")
  return(output)
}
#' Load a Matrix
#' Expands and aligns the matrices
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#' @details matrixE <- function(n,input,pres,minbinL,dimZ) \{
#' @details t <- sparseMatrix(i=input[,1]+pres-minbinL+1, j=input[,2]+pres-minbinL+1, x=input[,3],symmetric=T,dims=c(dimZ, dimZ)-1)
#' @details o = as.data.frame(as.matrix(t))
#' @details o[ o  ==0] = NA
#' @details kPlot <- matrix.sparse(o)
#' @details output=list(o,kPlot)
#' @details return(output)
#' @details \}
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export

matrixE.old <- function(n,input,pres,minbinL,dimZ) {
  t <- sparseMatrix(i=input[,1]+pres-minbinL+1, j=input[,2]+pres-minbinL+1, x=input[,3],symmetric=T,dims=c(dimZ, dimZ)-1)
  o = as.data.frame(as.matrix(t))
  o[ o  ==0] = NA
  kPlot <- matrix.sparse(o)
  output=list(o,kPlot)
  return(output)
}
#' Finds the ratio between two inputted matrices
#'
#' @details ratioF <- function(input1,input2,rc) \{
#' @details  input1[is.na(input1)] = 0
#' @details  input2[is.na(input2)] = 0
#' @details  input1 = input1 + rc
#' @details  input2 = input2 + rc
#' @details  ratio = input1/input2
#' @details  #Recompress sparsematrix:
#' @details k <- matrix.sparse(ratio)
#' @details  return (k)
#' @details \}
#' @param input1 matrix 1
#' @param input2 matrix 2
#' @param rc ratio constant
#' @return The ratio of change between two matrices output as a sparse matrix
#' @export

ratioF <- function(input1,input2,rc){
  input1[is.na(input1)] = 0
  input2[is.na(input2)] = 0
  input1 = input1 + rc

  input2 = input2 + rc
  ratio = input1/input2
  # ratio[ ratio  ==NA] = 2
  # # is.na(ratio)
  # ratio[is.na(ratio)] = 2 # Remove all NaNs


  #Recompress sparsematrix:


  k <- matrix.sparse(ratio)

  return (k)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
workspace.folder.old <- function(inputStr, toRemove) {
  toRemove=c("completed","dev","centromere","proteincolocalDBS","proteinDBS")
  d <- unlist(strsplit(inputStr, "/"))
  paste(d[!d %in% toRemove], collapse = "/")
}
#' Input a sparseMatrix dataframe.
#' Outputs a list of 3. 1. Averaged matrix data frame. 2. Averaged sparse data frame. 3. full matrix dataframe
#' This function first converts each element of the inputed list into a full matrix
#' The function then averages the full matrix using the \link[georgeFunctions::ave.filter]{ave.filter} function
#' The function then converts the full matrix back to a sparse matrix
#' All the data is outputed as a list
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
#' @import plyr
matrixMultiE <- function(n,input,pres,minbinL,dimZ,averg,BSlist) {
  lo = list()
  for (j in 1:length(BSlist$V1)){
    t <- sparseMatrix(i=input[[j]]$V1+pres[[j]]-minbinL[[j]]+1, j=input[[j]]$V2+pres[[j]]-minbinL[[j]]+1, x=input[[j]]$V3,symmetric=T,dims=c(dimZ, dimZ)-1)

    o = as.data.frame(as.matrix(t))
    o[ o  ==0] = NA
    lo[[j]] = o

  }
  c = o
  if (j > 1){
    c = ave.filter(lo,averg)
  }

  kPlot <- matrix.sparse(c)
  output=list(c,kPlot,lo)
  return(output)
}

#' Load a Matrix tests
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
table.create <- function(input1,n,input2=NULL,coLRange=500,noII=1,randTrue=0,seed){

  output = subset(input1,V1 ==n)
  if (randTrue == "1"){
    set.seed(seed)
    output$V2=sort(round(runif(length(output$V2),min=0,max=chrSize[n])))

  }
  if (!is.null(input2)){
  input2 = subset(input2,V1 ==n)
  output$V3 = mapply(testf,output$V2,MoreArgs = list(input2$V2,coLRange),SIMPLIFY=FALSE)
  if (noII == 1){output= output[which(output$V3 ==TRUE),]}
  if (noII == 0){output= output[which(output$V3 ==FALSE),]}
  output <- output[ -3 ]  #drops col 3
}
  if (length(output$V2)>1){
  output$V3=filter(output$V2,c(1,-1))#reports the difference between rows
  output= output[-length(output$V1),]#removes last row of the collumn, not needed as the distance of loop is calc from the row before
  output$V4 =  round((output$V3/2)+output$V2, digits = 0)
  }else{
    output$V3=0
    output$V4=0
    }

  return (output)


}

#' Flattens list elements
#'
#' removes the chromosome origin from a list.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
combine.list <-function(input,listpo = 3){
  #combines each chromosome loop pile up into 1 list
  a = length(input)
  output = list()

  input<-input[1:16]#fills list with nulls to 16
  iv = c(input[[1]][[listpo]],input[[2]][[listpo]],input[[3]][[listpo]],input[[4]][[listpo]],input[[5]][[listpo]],input[[6]][[listpo]],input[[7]][[listpo]],input[[8]][[listpo]],input[[9]][[listpo]],input[[10]][[listpo]],input[[11]][[listpo]],input[[12]][[listpo]],input[[13]][[listpo]],input[[14]][[listpo]],input[[15]][[listpo]],input[[16]][[listpo]])


  return (iv)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
testf <-function(a,b,coR){
  for (j in b){
    # max = a - tempC
    # min = tempC -a
    output = findInterval(a,c(j-coR,j+coR)) == 1
    if (output == TRUE) {return(output)}

  }
  # if (!is.null(max)){
  #   if (max<=coLR){
  #     output = 1
  #   }
  #   if (max<=coLR){
  #     output = 1
  #   }}

  return (FALSE)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
data.combine <-function(n,input,HiCbed,zoomC,resKB,HiC){
ti1 = list()
ti2 = list()

spres=list()
sminbinL=list()
d = length(input)
if (d >0) {


  for (j in 1:length(input))
  {

    #Subset data for chromosome of interest
    Dbed1=subset(HiCbed, V1==n)
    #subset data for region around centromere
    Dbed1 = subset(Dbed1, V2 %in% (input[j]-(zoomC*1000)):(input[j]+(zoomC*1000)) & V3 %in% (input[j]-(zoomC*1000)):(input[j]+(zoomC*1000)))
    minbin=min(Dbed1$V4)
    maxbin=max(Dbed1$V4)

    # # for (g in 2:3){ Dbed1[,g]= Dbed1[,g]}
    # maxs=(maxbin-minbin)*resKB #max x and y value to plot based on resolution selected
    p = zoomC - input[j]/1000
    if (p<0){p=0}
    spres[[j]] = p/resKB
    sminbinL[[j]] = minbin
    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####

    ti1[[j]] = subset(HiC[[1]], V1>=minbin & V1 <=maxbin & V2>=minbin & V2 <=maxbin) #breaks the data per chromosome and stores ina list
    ti2[[j]] = subset(HiC[[2]], V1>=minbin & V1 <=maxbin & V2>=minbin & V2 <=maxbin)
  }


  output = list(ti1,ti2,spres,sminbinL)
} else {


  output = list(NULL,NULL,1,1)
  # chrNR  <- chrN [ chrN != n ]

  # chrNR = chrN[-n]

}


# HiClist1[[n]]=ti1
# HiClist2[[n]]=ti2
# pres[[n]] = spres
# minbinL[[n]]=sminbinL
#
return (output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
filename.shortcut <-function(input){
  if (input == "spo11"){return ("HiC_SSY12_ndt80Dspo11Y135F_1B2_8h_down5")}
if (input == "wt"){return ("HiC_SSY14_ndt80D_2A2_8h_down5")}
if (input == "rec8"){return ("HiC_SSY20_ndt80Drec8D_1A2_8h_down5")}
if (input == "zip1"){return ("HiC_SSY25_ndt80Dzip1D_2A_8h_down5")}
if (input == "hop1"){return ("HiC_SSY49_ndt80Dhop1D_1A_8h_down5")}

}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
read.iced <-function(filename,binSize=10000){
  output=read.delim(paste0("matrix/",filename,"/new_sizes/iced/",binSize,"/",filename,"_",binSize,"_iced.matrix"), header=FALSE)
  return(output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
chip.selector <-function(input1,input2 = NULL,randTrue=0,seed=453){
  readsvec <- c()
  proteinOutput = list()
  for (s in chrN){

    proteinOutput[[s]] =table.create(input1=input1,n=s,input2=input2,coLRange=coLR,noII=noInverseInverse,randTrue=randTrue,seed=seed)


    proteinOutput[[s]] = subset(proteinOutput[[s]], V3 %in% (loopSMin:loopSMax))


    if (length(proteinOutput[[s]]$V1)>0){



      readsvec[s] = length(proteinOutput[[s]]$V1)
    }else{
      readsvec[s] = 0
      proteinOutput[[s]] = data.frame(V1=character())
    }

  }
  readsvec=ifelse(is.na(readsvec),0,readsvec)
  output = list(proteinOutput,readsvec)
  return (output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
inverse.matrix <- function(input){

  output = input[c(nrow(input):1),c(nrow(input):1),drop = FALSE]
  return (output)
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
main.subsetter <-function(input,midpoint = 1,chrN,HiCbed,zoomC,resKB,HiC){
  output = list()
  minbinL = list()
  pres = list()
  for (n in chrN){
    ti1 = list()

    spres=list()
    sminbinL=list()
    d = length(input[[n]]$V3)
    if (d >0) {


      for (j in 1:length(input[[n]]$V4))
      {

        #Subset data for chromosome of interest
        Dbed1=subset(HiCbed, V1==n)
        #subset data for region around centromere
        if (midpoint == 0){v=input[[n]]$V2[j]}else{v=input[[n]]$V4[j]}
        Dbed1 = subset(Dbed1, V2 %in% (v-(zoomC*1000)):(v+(zoomC*1000)) & V3 %in% (v-(zoomC*1000)):(v+(zoomC*1000)))
        minbin=min(Dbed1$V4)
        maxbin=max(Dbed1$V4)


        p = zoomC - v/1000
        if (p<0){p=0}
        spres[[j]] = p/resKB
        sminbinL[[j]] = minbin
        ####    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####    ####

        ti1[[j]] = subset(HiC, V1>=minbin & V1 <=maxbin & V2>=minbin & V2 <=maxbin) #breaks the data per chromosome and stores ina list

      }

      output[[n]] =ti1
      pres[[n]] <- spres
      minbinL[[n]]<-sminbinL
    } else {
      output[[n]]=NULL
      pres[[n]] <- 1
      minbinL[[n]]<-1
    }
  }


  return (list(output,minbinL,pres))
}
#' Load a Matrix
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
matrixMultiEt <- function(n,input,pres,minbinL,dimZ,averg,BSlist,noReflect=1,aveOnly=0) {
  lo = list()
  for (j in 1:length(BSlist$V1)){
    t <- sparseMatrix(j=input[[j]]$V1+pres[[j]]-minbinL[[j]]+1, i=input[[j]]$V2+pres[[j]]-minbinL[[j]]+1, x=input[[j]]$V3,symmetric=T,dims=c(dimZ, dimZ))
    o = as.data.frame(symmpart(as.matrix(t)))
    o[ o  ==0] = NA

    o = o[-nrow(o),-nrow(o)]
    lo[[j]] = o

  }
  if (aveOnly != 1){
  c = o
  if (j > 1){
    c = ave.filter(lo,averg)
  }
  if (noReflect != 1){c = ave.filter(list(inverse.matrix(c),c),averg)}
  kPlot <- matrix.sparse(c)
  }else{
    kPlot = 1
    c = 1
    }
  output=list(c,kPlot,lo)
  return(output)
}
#' facs density plot
#'
#' This function loads a file as a matrix. It assumes that the first column
#' contains the rownames and the subsequent columns are the sample identifiers.
#' Any rows with duplicated row names will be dropped with the first one being
#' kepted.
#'
#' @param infile Path to the input file
#' @return A matrix of the infile
#' @export
facs_density_plot3 <- function(strain_code, dir='./', output_dir="output", gate,
                               plot_color='black',
                               file_format='jpeg', user_input=TRUE,plotmindensity=TRUE,
                               gate_range_blue=c(300000,380000), 
                               gate_range_green=c(620000,750000),
                               gate_range_yellow=c(900000,910000),
                               gate_range_purple=c(1000000,1300000),
                               date,
                               peakcalib=50000){  
  # IO checks
  gate_range1 = gate_range_blue +peakcalib
  gate_range2 = gate_range_green +peakcalib
  gate_range3 = gate_range_yellow +peakcalib
  gate_range4 = gate_range_purple + peakcalib
  if (!requireNamespace("BiocManager", quietly = TRUE)) {
    # Ask user to save file
    question <- 'install BiocManager?'
    
    title <- question
    choices <- c('No', 'Yes')
    answer <- menu(choices, graphics = FALSE, title)
    if(answer == 0 | answer == 1){
      stop('Requires R package "BiocManager". Please install it.\n', call. = FALSE)
    }
    install.packages("BiocManager")
  } 
  if (!requireNamespace("flowCore", quietly = TRUE)) {
    # Ask user to save file
    question <- 'install flowCore?'
    
    title <- question
    choices <- c('No', 'Yes')
    answer <- menu(choices, graphics = FALSE, title)
    if(answer == 0 | answer == 1){
      stop('Requires R package "flowCore". Please install it.\n',
           '          source("http://bioconductor.org/biocLite.R")\n',
           '          biocLite("flowCore")', call. = FALSE)
    }
    BiocManager::install("flowCore")
  }
  
  
  if (!requireNamespace("ggcyto", quietly = TRUE)) {
    # Ask user to save file
    question <- 'install ggcyto?'
    
    title <- question
    choices <- c('No', 'Yes')
    answer <- menu(choices, graphics = FALSE, title)
    if(answer == 0 | answer == 1){
      stop('Requires R package "ggcyto". Please install it.\n',
           '          source("http://bioconductor.org/biocLite.R")\n',
           '          biocLite("ggcyto")', call. = FALSE)
    }
    BiocManager::install("ggcyto")
  }
  
  if (!requireNamespace("openCyto", quietly = TRUE)) {
    # Ask user to save file
    question <- 'install openCyto?'
    
    title <- question
    choices <- c('No', 'Yes')
    answer <- menu(choices, graphics = FALSE, title)
    if(answer == 0 | answer == 1){
      stop('Requires R package "openCyto". Please install it.\n',
           '          source("http://bioconductor.org/biocLite.R")\n',
           '          biocLite("openCyto")', call. = FALSE)
    }
    BiocManager::install("openCyto")
  }
  library(ggcyto)
  library(openCyto)
  
  
  if(!file_format %in% c('jpeg', 'pdf')){
    stop('"file_format" must be one of "jpg" and "pdf".', call. = FALSE)
  }
  
  # Import .fcs files
  message('Loading fcs files...')
  files <- list.files(fileloc)
  target_files <- files[grep(strain_code, files)]
  # Found files for required strain?
  if (!exists('target_files') | length(target_files) == 0) {
    stop('Could not find any files containing "', strain_code,
         '" in their name.', call. = FALSE)
  }
  target_files <- target_files[grep("fcs", target_files)]
  # Found .fcs files for required strain?
  if (!exists('target_files') | length(target_files) == 0) {
    stop('Could not find any ".fcs" files containing "', strain_code,
         '" in their name.', call. = FALSE)
  }
  
  # Load all files into list
  samples <- vector("list")
  for (file in target_files) {
    # Determine the time point of the file. This assumes that the format of 
    # the file name is: date_yeastLine_timePoint.fcs
    time_point <- strsplit(strsplit(file, "\\.") [[1]][1], "_")[[1]][2]
    
    # Read the FCS file
    samples[[time_point]] <- flowCore::read.FCS(paste0(fileloc, file))
  }
  
  # Convert to "flowSet"
  fs <- as(samples, "flowSet")
  
  message('Plotting data...')
  # Plot
  
  gate_range1[2]
  # Show ungated plot on screen
  den.gates.1 <- fsApply(fs, openCyto:::.mindensity2, channel = "FL1-A",pivot=TRUE,max=1)
  den.gates.2 <- fsApply(fs, openCyto:::.mindensity2, channel = "FL1-A",pivot=TRUE,min=gate_range1[1],max=gate_range1[2])
  den.gates.3 <- fsApply(fs, openCyto:::.mindensity2, channel = "FL1-A",pivot=TRUE,min=gate_range2[1],max=gate_range2[2])
  den.gates.4 <- fsApply(fs, openCyto:::.mindensity2, channel = "FL1-A",pivot=TRUE,min=gate_range3[1],max=gate_range3[2])
  den.gates.5 <- fsApply(fs, openCyto:::.mindensity2, channel = "FL1-A",pivot=TRUE,min=gate_range4[1],max=gate_range4[2])
  
  p= ggcyto(fs, aes(x = `FL1-A`)) + geom_density(fill = plot_color, aes(y = ..scaled..)) + coord_cartesian(xlim = gate)
  if (plotmindensity==TRUE){ 
    p= p +  geom_gate(den.gates.1)+geom_gate(den.gates.2,colour="blue") +geom_gate(den.gates.3,colour="green")+geom_gate(den.gates.4,colour="yellow")+geom_gate(den.gates.5,colour="purple") 
    
  }
  
  print(p)
  one=compute_stats(fs,den.gates.1,type = "count")
  totalc=one
  two=compute_stats(fs,den.gates.2,type = "count")
  three=compute_stats(fs,den.gates.3,type = "count")
  four=compute_stats(fs,den.gates.4,type = "count")
  five=compute_stats(fs,den.gates.5,type = "count")
  
  percentcalc1 <- function(input,previous){
    temp=input
    input[,3]=previous[,3]-input[,3]
    return(input)
  }
  # https://bioconductor.org/packages/devel/bioc/manuals/ggcyto/man/ggcyto.pdf
  percentcalc2 <- function(input,total){
    temp=(input/total)*100
    input=round(temp)
    return(input)
  }
  m <- data.frame(matrix(0, ncol = 5, nrow = nrow(one)))
  peaks <- data.frame(matrix(0, ncol = 12, nrow = nrow(one)))
  colnames(peaks) <- c("names","Debris", "G1","S","G2","Rest","Debris", "G1","S","G2","Rest","strainid")
  colnames(m) <- c(".rownames","G1", "G2","names","strainid")
  m[,1]=one[,1]
  m[,4]=one[,4]
  m[,5]=strain_code
  
  peaks[,1]=one[,4]
  peaks[,12]=strain_code
  
  one=percentcalc1(two,one)
  two=percentcalc1(three,two)
  three=percentcalc1(four,three)
  four=percentcalc1(five,four)
  
  totalg1g2= two[,3]+four[,3]
  m[,2]=percentcalc2(two[,3], totalg1g2)
  m[,3]=percentcalc2(four[,3], totalg1g2)
  print(m)
  peaks[,2]=round(one[,3])
  peaks[,3]=round(two[,3])
  peaks[,4]=round(three[,3])
  peaks[,5]=round(four[,3])
  peaks[,6]=round(five[,3])
  one[,3]=percentcalc2(one[,3], totalc[,3])
  two[,3]=percentcalc2(two[,3], totalc[,3])
  three[,3]=percentcalc2(three[,3], totalc[,3])
  four[,3]=percentcalc2(four[,3], totalc[,3])
  five[,3]=percentcalc2(five[,3], totalc[,3])
  
  print(one[1,3]+two[1,3]+three[1,3]+four[1,3]+five[1,3])
  
  peaks[,7]=one[,3]
  peaks[,8]=two[,3]
  peaks[,9]=three[,3]
  peaks[,10]=four[,3]
  peaks[,11]=five[,3]
  print(peaks[,7]+peaks[,8]+peaks[,9]+peaks[,10]+peaks[,11])
  print(p)
  file_name <- paste0(dir,output_dir,'/',strain_code)
  message(file_name)
  if (!(file.exists(output_dir))){
    
    dir.create(output_dir)
  }
  
  if (!(file.exists(paste0(output_dir,'/', date)))){
    
    dir.create(paste0(output_dir,'/', date))
  }
  if (!(file.exists(paste0(output_dir,'/',date,'/',strain_code)))){
    
    dir.create(paste0(output_dir,'/',date,'/',strain_code))
  }
  
  save.file.loc=paste0(output_dir,'/',date,'/',strain_code)
  print(save.file.loc)
  if (!missing(gate)) print(p + xlim(gate))
  message('Saving data:')
  message('   ', save.file.loc)
  print(save.file.loc)
  if (file_format == 'jpeg') {
    dev.copy(jpeg, paste0(save.file.loc,'/',strain_code,'.jpg'),width=1200,height=1200)
  } else {
    dev.copy(pdf, paste0(save.file.loc,'/',strain_code,'.pdf'))
  }
  
  dev.off()
  write.table(peaks,paste0(save.file.loc,'/',strain_code,"_peakdist.txt"),sep="\t",row.names=FALSE)
  write.table(m,paste0(save.file.loc,'/',strain_code,"_g1_g2_dist.txt"),sep="\t",row.names=FALSE)
  message('---')
  message('Done!')
}
gb305/Rpackages documentation built on Feb. 18, 2020, 2:55 p.m.