R/champ.Block.R

Defines functions champ.Block

Documented in champ.Block

if(getRversion() >= "3.1.0") utils::globalVariables(c("myNorm","myLoad"))

champ.Block <- function(beta=myNorm,
                        pheno=myLoad$pd$Sample_Group,
                        arraytype="450K",
                        maxClusterGap=250000,
                        B=500,
                        bpSpan=250000,
                        minNum=10,
                        cores=3)
{
  message("[===========================]")
  message("[<<<< ChAMP.Block START >>>>]")
  message("-----------------------------")
  
  
  #############  ConstResgion.R  ################
  # if(arraytype=="EPIC"){
  #   RSobject <- RatioSet(beta, annotation = c(array = "IlluminaHumanMethylationEPIC",annotation = "ilm10b4.hg19"))
  # }else{
  #   RSobject <- RatioSet(beta, annotation = c(array = "IlluminaHumanMethylation450k",annotation = "ilmn12.hg19"))
  # }
  
  if(arraytype %in% c("EPIC", "EPICv2")) {
    data("probe.features.epicv2")
  } else if(arraytype == "EPICv1") {
    data("probe.features.epicv1")
  } else if(arraytype == "450K") { 
    data("probe.features")
  } else (
    stop("arraytype must be `EPICv2`, `EPICv1`, `450K`")
  )
  
  
  if(cores > detectCores()) cores <- detectCores()
  registerDoParallel(cores = cores)
  
  # RSanno <- getAnnotation(RSobject)[,c("chr","pos","Relation_to_Island")]
  # RSanno <- RSanno[order(RSanno$chr,RSanno$pos),]
  # sbeta.m <- beta[rownames(RSanno),]
  # sregion.v <- RSanno$Relation_to_Island
  
  RSanno <- probe.features[rownames(beta), c("CHR", "MAPINFO", "cgi")]
  RSanno <- RSanno[order(RSanno$CHR,RSanno$MAPINFO),]
  colnames(RSanno) <- c("chr", "pos", "Relation_to_Island")
  sbeta.m <- beta
  sregion.v <- RSanno$Relation_to_Island
  
  message("<< Load Annotation Successfully >>")
  
  ### define open sea clusters
  opensea.idx <- which(sregion.v=="OpenSea")
  openseaCLID.v <- boundedClusterMaker(RSanno$chr[opensea.idx],RSanno$pos[opensea.idx],assumeSorted=TRUE,maxGap=500,maxClusterWidth=1500)
  names(openseaCLID.v) <- rownames(RSanno)[opensea.idx]
  
  ### define island clusters
  island.idx <- which(sregion.v=="Island")
  cpgiCLID.v <- boundedClusterMaker(RSanno$chr[island.idx],RSanno$pos[island.idx],assumeSorted=TRUE,maxGap=300,maxClusterWidth=1500)
  names(cpgiCLID.v) <- rownames(RSanno)[island.idx]
  
  ### define shore/shelve clusters
  sh.idx <- setdiff(1:length(sregion.v),c(opensea.idx,island.idx))
  shCLID.v <- boundedClusterMaker(RSanno$chr[sh.idx],RSanno$pos[sh.idx],assumeSorted=TRUE,maxGap=500,maxClusterWidth=1500)
  names(shCLID.v) <- rownames(RSanno)[sh.idx]
  
  allCLID.v <- rep(NA,length(sregion.v))
  allCLID.v[opensea.idx] <- paste("OS",openseaCLID.v,sep="")
  allCLID.v[island.idx] <- paste("CPGI",cpgiCLID.v,sep="")
  allCLID.v[sh.idx] <- paste("SH",shCLID.v,sep="")
  names(allCLID.v) <- rownames(RSanno)
  
  nf <- length(unique(allCLID.v))
  npg.v <- summary(factor(allCLID.v),maxsum=nf)
  
  message("<< Get Clusters by cgi-info Successfully >>")
  
  ### average profiles
  avbetaCL.m <- rowsum(sbeta.m,allCLID.v,reorder=FALSE)
  nf.v <- npg.v[match(rownames(avbetaCL.m),names(npg.v))]
  avbetaCL.m <- avbetaCL.m/nf.v
  rownames(sbeta.m) -> sortedCpGsbeta.v;
  rm(sbeta.m);
  
  message("<< Calculate Average Beta Value Successfully >>")
  ### find midpoint, start, end pos of each region
  tmpPOS.v <- RSanno$pos[match(names(allCLID.v),rownames(RSanno))]
  tmpCHR.v <- substr(RSanno$chr[match(names(allCLID.v),rownames(RSanno))],4,100)
  tmpCHR.v[which(tmpCHR.v=="X")] <- "23"
  tmpCHR.v[which(tmpCHR.v=="Y")] <- "24"
  tmpCHR.v <- as.numeric(tmpCHR.v)
  
  posCL.m <- rowsum(cbind(tmpPOS.v,tmpCHR.v),allCLID.v,reorder=FALSE);
  nf.v <- npg.v[match(rownames(posCL.m),names(npg.v))]
  posCL.m <- posCL.m/nf.v;
  colnames(posCL.m) <- c("Pos","Chr");
  
  message("<< Generate Block Position Successfully >>")
  os.idx <- grep("OS",rownames(avbetaCL.m));
  avbetaOS.m <- avbetaCL.m[os.idx,];
  posOS.m <- posCL.m[os.idx,];
  
  design.m <- data.frame(1,Sample_Group=as.numeric(as.factor(pheno))-1)
  selAUT.idx <- which(posOS.m[,2]<=22);
  blocks <- clusterMaker(chr=posOS.m[selAUT.idx,2],pos=posOS.m[selAUT.idx,1],assumeSorted = TRUE, maxGap = maxClusterGap);
  blockID.v <- levels(as.factor(blocks));
  message("<< New Clusters are generated for blocks >>")
  
  blockPROP.m <- data.frame(CHR=aggregate(posOS.m[selAUT.idx,2], by=list(blocks), function(x) x[1])[,2],
                            aggregate(posOS.m[selAUT.idx,1],by=list(blocks),function(x) c(mean(x),min(x),max(x),max(x)-min(x),length(x)))[,2])
  colnames(blockPROP.m) <- c("CHR","AvPos","Start","End","Size","NumberOS");
  
  message("<< Generate information for New Clusters >>")
  
  bh2.o <- bumphunter(avbetaOS.m[selAUT.idx,],design.m,chr=posOS.m[selAUT.idx,2],pos=posOS.m[selAUT.idx,1],cluster=blocks,coef=2, pickCutoff=TRUE, pickCutoffQ=0.75,smooth=TRUE,smoothFunction=loessByCluster, useWeights=FALSE, B=B,bpSpan=bpSpan,minNum=minNum);
  message("<< Run Bumphunter Successfully >>")
  
  Block <- bh2.o$tab
  rownames(Block) <- paste("Block",1:nrow(Block),sep="_")
  
  
  message("[<<<<< ChAMP.BLOCK END >>>>>]")
  message("[===========================]")
  message("[You may want to process Block.GUI() next.]\n")
  return(list(Block=Block,clusterInfo=blockPROP.m,allCLID.v=allCLID.v,avbetaCL.m=avbetaCL.m,posCL.m=posCL.m))
}
YuanTian1991/ChAMP documentation built on Feb. 21, 2023, 1:13 p.m.