wrMiscVignette1.R
In wrMisc: Analyze Experimental High-Throughput (Omics) Data

## ----setup0, include=FALSE, echo=FALSE, messages=FALSE, warnings=FALSE--------
suppressPackageStartupMessages({
    library(wrMisc)
})

## ----install, echo=TRUE, eval=FALSE-------------------------------------------
#  ## If not already installed, you'll have to install the package first.
#  ## This is the basic installation commande in R
#  install.packages("wrMisc")

## ----install2, echo=TRUE, eval=FALSE------------------------------------------
#  packages <- c("knitr", "rmarkdown", "BiocManager", "kableExtra", "boot", "data.tree", "data.table",
#    "fdrtool", "RColorBrewer", "Rcpp", "wrMisc", "wrGraph", "wrProteo")
#  checkInstallPkg <- function(pkg) {       # install function
#    if(!requireNamespace(pkg, quietly=TRUE)) install.packages(pkg) }
#  
#  ## install if not yet present
#  sapply(packages, checkInstallPkg)

## ----install3, echo=TRUE, eval=FALSE------------------------------------------
#  ## Installation of limma
#  BiocManager::install("limma")

## ----install4, echo=TRUE, eval=FALSE------------------------------------------
#  ## Now you can open this vignette out of R:
#  vignette("wrMiscVignette1", package="wrMisc")

## ----setup1-------------------------------------------------------------------
library("wrMisc")
library("knitr")

## This is 'wrMisc' version number :
packageVersion("wrMisc")

## ----basicVariability, echo=TRUE----------------------------------------------
grp1 <- rep(LETTERS[1:3], c(3,4,3))
sampNa1 <- paste0(grp1, c(1:3,1:4,1:3))
set.seed(2016); dat1 <- matrix(round(c(runif(50000) +rep(1:1000,50)),3), 
  ncol=10, dimnames=list(NULL,sampNa1))
dim(dat1)
head(dat1)

## ----sdForEachRow, echo=TRUE--------------------------------------------------
head(rowSds(dat1))
system.time(sd1 <- rowSds(dat1))
system.time(sd2 <- apply(dat1, 1, sd))

## ----usingApply, echo=TRUE----------------------------------------------------
table(round(sd1, 13)==round(sd2, 13))

## ----calculateRowCV, echo=TRUE------------------------------------------------
system.time(cv1 <- rowCVs(dat1))
system.time(cv2 <- apply(dat1, 1, sd) / rowMeans(dat1))
# typically the calculation using rowCVs is much faster
head(cv1)
# results from the 'conventional' way
head(cv2)

## ----rowGrpMeans1, echo=TRUE--------------------------------------------------
# we already defined the grouping :
grp1

## the mean for each group and row
system.time(mean1Gr <- rowGrpMeans(dat1, grp1))

## ----sdOrCVbyGrp, echo=TRUE---------------------------------------------------
## Now the sd for each row and group
system.time(sd1Gr <- rowGrpSds(dat1, grp1))
# will give us a matrix with the sd for each group & line 
head(sd1Gr)

# Let's check the results of the first line :
sd1Gr[1,] == c(sd(dat1[1,1:3]), sd(dat1[1,4:7]), sd(dat1[1,8:10]))

# The CV :
system.time(cv1Gr <- rowGrpCV(dat1, grp1))
head(cv1Gr)

## ----rowGrpNA1, echo=TRUE-----------------------------------------------------
mat2 <- c(22.2, 22.5, 22.2, 22.2, 21.5, 22.0, 22.1, 21.7, 21.5, 22, 22.2, 22.7,
   NA, NA, NA, NA, NA, NA, NA, 21.2,   NA, NA, NA, NA,
   NA, 22.6, 23.2, 23.2,  22.4, 22.8, 22.8, NA,  23.3, 23.2, NA, 23.7,
   NA, 23.0, 23.1, 23.0,  23.2, 23.2, NA, 23.3,  NA, NA, 23.3, 23.8)
mat2 <- matrix(mat2, ncol=12, byrow=TRUE)
## The definition of the groups (ie replicates)
gr4 <- gl(3, 4, labels=LETTERS[1:3])

## ----rowGrpNA2, echo=TRUE-----------------------------------------------------
rowGrpNA(mat2,gr4)

## ----naOmit, echo=TRUE--------------------------------------------------------
aA <- c(11:13,NA,10,NA)
 
str(naOmit(aA))

# the 'classical' na.omit also stores which elements were NA
str(na.omit(aA))

## ----minDiff, echo=TRUE-------------------------------------------------------
set.seed(2017); aa <- 10 *c(0.1 +round(runif(20),2), 0.53, 0.53)
head(aa)

minDiff(aa,ppm=FALSE)

## ----partUnlist_1, echo=TRUE--------------------------------------------------
bb <- list(fa=gl(2,2), ve=31:33, L2=matrix(21:28,ncol=2), li=list(li1=11:14,li2=data.frame(41:44)))
partUnlist(bb)
partUnlist(lapply(bb,.asDF2))

## ----unlist_1, echo=TRUE------------------------------------------------------
head(unlist(bb, recursive=FALSE))

## ----asSepList, echo=TRUE-----------------------------------------------------
bb <- list(fa=gl(2,2), ve=31:33, L2=matrix(21:28,ncol=2), li=list(li1=11:14,li2=data.frame(41:44)))
asSepList(bb)

## ----lappend1, echo=TRUE------------------------------------------------------
li1 <- list(a=1, b=2, c=3)
li2 <- list(A=11, b=2, C=13)
append(li1, li2)

## ----lappend2, echo=TRUE------------------------------------------------------
appendNR(li1, li2)

## ----lrbind, echo=TRUE--------------------------------------------------------
dat2 <- matrix(11:34, ncol=3, dimnames=list(letters[1:8], colnames=LETTERS[1:3]))
lst2 <- by(dat2, rep(1:3,c(3,2,3)), as.matrix)
lst2

# join list-elements (back) into single matrix
lrbind(lst2)

## ----mergeMatrixList, echo=TRUE-----------------------------------------------
mat1 <- matrix(11:18, ncol=2, dimnames=list(letters[3:6],LETTERS[1:2]))
mat2 <- matrix(21:28, ncol=2, dimnames=list(letters[2:5],LETTERS[3:4]))
mat3 <- matrix(31:38, ncol=2, dimnames=list(letters[c(1,3:4,3)],LETTERS[4:5]))
#
mergeMatrixList(list(mat1, mat2), useColumn="all")

# with custom names for the individual matrices
mergeMatrixList(list(m1=mat1, m2=mat2, mat3), mode="union", useColumn=2)

## ----mergeMatrices, echo=TRUE-------------------------------------------------
mergeMatrices(mat1, mat2)

mergeMatrices(mat1, mat2, mat3, mode="union", useColumn=2)

## custom names for matrix-origin
mergeMatrices(m1=mat1, m2=mat2, mat3, mode="union", useColumn=2)

## flexible/custom selection of columns
mergeMatrices(m1=mat1, m2=mat2, mat3, mode="union", useColumn=list(1,1:2,2))

## ----fuseCommonListElem, echo=TRUE--------------------------------------------
val1 <- 10 +1:26
names(val1) <- letters
(lst1 <- list(c=val1[3:6], a=val1[1:3], b=val1[2:3] ,a=val1[12], c=val1[13]))

## here the names 'a' and 'c' appear twice :
names(lst1)

## now, let's fuse all 'a' and 'c'
fuseCommonListElem(lst1)

## ----listBatchReplace1, echo=TRUE---------------------------------------------
lst1 <- list(m1=matrix(11:18, ncol=2), m2=matrix(21:30, ncol=2), indR=31:34,
  m3=matrix(c(21:23,NA,25:27,NA), ncol=2))
filterLiColDeList(lst1, useLines=2:3)
filterLiColDeList(lst1, useLines="allNA", ref=3)

## ----replInList1, echo=TRUE---------------------------------------------------
(lst1 <- list(aa=1:4, bb=c("abc","efg","abhh","effge"), cc=c("abdc","efg","efgh")))
listBatchReplace(lst1, search="efg", repl="EFG", silent=FALSE)

## ----listGroupsByNames, echo=TRUE---------------------------------------------
ser1 <- 1:7; names(ser1) <- c("AA","BB","AA.1","CC","AA.b","BB.e","A")

listGroupsByNames(ser1)

## ----listGroupsByNames2, echo=TRUE--------------------------------------------
listGroupsByNames((1:10)/5)

## ----filterList, echo=TRUE----------------------------------------------------
set.seed(2020); dat1 <- round(runif(80),2)
list1 <- list(m1=matrix(dat1[1:40], ncol=8), m2=matrix(dat1[41:80], ncol=8), other=letters[1:8])
rownames(list1$m1) <- rownames(list1$m2) <- paste0("line",1:5)
# Note: the list-element list1$other has a length different to that of filt. Thus, it won't get filtered.
filterList(list1, list1$m1[,1] >0.4)       # filter according to 1st column of $m1 ...
filterList(list1, list1$m1 >0.4) 

## ----matr2list, echo=TRUE-----------------------------------------------------
(mat1 <- matrix(1:12, ncol=3, dimnames=list(letters[1:4],LETTERS[1:3])))
str(matr2list(mat1))

## ----array0, echo=TRUE--------------------------------------------------------
(arr1 <- array(c(6:4,4:24), dim=c(4,3,2), dimnames=list(c(LETTERS[1:4]),
  paste("col",1:3,sep=""),c("ch1","ch2"))))

## ----arrayCV1, echo=TRUE------------------------------------------------------
arrayCV(arr1)

# this is equivalent to
cbind(rowCVs(arr1[,,1]), rowCVs(arr1[,,2]))

## ----arrayCV2, echo=TRUE------------------------------------------------------
arrayCV(arr1, byDim=2)

## ----cutArrayInCluLike, echo=TRUE---------------------------------------------
cutArrayInCluLike(arr1, cluOrg=c(2,1,2,1))

## ----filt3dimArr, echo=TRUE---------------------------------------------------
filt3dimArr(arr1, displCrit=c("col1","col2"), filtCrit="col2", filtVal=7, filtTy=">")

## ----repeated1, echo=TRUE-----------------------------------------------------
## some text toy data
tr <- c("li0","n",NA,NA, rep(c("li2","li3"),2), rep("n",4))

## ----repeated2, echo=TRUE-----------------------------------------------------
table(tr)
unique(tr) 
duplicated(tr, fromLast=FALSE)

## ----repeated3, echo=TRUE-----------------------------------------------------
aa <- c(11:16,NA,14:12,NA,14)
names(aa) <- letters[1:length(aa)]
aa

## ----findRepeated, echo=TRUE--------------------------------------------------
findRepeated(aa) 

## ----firstOfRepeated, echo=TRUE-----------------------------------------------
firstOfRepeated(aa)

aa[firstOfRepeated(aa)$indUniq]          # only unique with their names

unique(aa)                               # unique() does not return any names !

## ----correctToUnique1, echo=TRUE----------------------------------------------
correctToUnique(aa)

correctToUnique(aa, sep=".", NAenum=FALSE)       # keep NAs (ie without transforming to character)

## ----nonAmbiguousNum, echo=TRUE-----------------------------------------------
unique(aa)                                    # names are lost

nonAmbiguousNum(aa)
nonAmbiguousNum(aa, uniq=FALSE, asLi=TRUE)    # separate in list unique and repeated 

## ----sortByNRepeated, echo=TRUE-----------------------------------------------
cities <- c("Bangkok","London","Paris", "Singapore","New York City", "Istambul","Delhi","Rome","Dubai")
sortByNRepeated(x=cities[c(1:4)], y=cities[c(2:3,5:8)])

## or (unlimited) multiple inputs via list
choices1 <- list(Mary=cities[c(1:4)], Olivia=cities[c(2:3,5:8)], Paul=cities[c(5:3,9,5)])    # Note : Paul cited NYC twice !
table(unlist(choices1))
sortByNRepeated(choices1)
sortByNRepeated(choices1, filterIntraRep=FALSE)  # without correcting multiple citation of NYC by Paul

## ----cbindNR, echo=TRUE-------------------------------------------------------
## First we'll make some toy data :
(ma1 <- matrix(1:6, ncol=3, dimnames=list(1:2,LETTERS[3:1])))
(ma2 <- matrix(11:16, ncol=3, dimnames=list(1:2,LETTERS[3:5])))

## now we can join 2 or more matrixes  
cbindNR(ma1, ma2, summarizeAs="mean")       # average of both columns 'C'

## ----firstLineOfDat, echo=TRUE------------------------------------------------
(mat1 <- matrix(c(1:6, rep(1:3,1:3)), ncol=2, dimnames=list(letters[1:6],LETTERS[1:2])))

## ----firstLineOfDat2, echo=TRUE-----------------------------------------------
firstLineOfDat(mat1, refCol=2)

## ----firstOfRepLines, echo=TRUE-----------------------------------------------
mat2 <- matrix(c("e","n","a","n","z","z","n","z","z","b", 
  "","n","c","n","","","n","","","z"), ncol=2)
firstOfRepLines(mat2, out="conc")

# or as index :
firstOfRepLines(mat2)

## ----nonredDataFrame, echo=TRUE-----------------------------------------------
(df1 <- data.frame(cbind(xA=letters[1:5], xB=c("h","h","f","e","f"), xC=LETTERS[1:5])))

## ----nonredDataFrame2, echo=TRUE----------------------------------------------
nonredDataFrame(df1, useCol=c("xB","xC")) 

# without counter or concatenating
df1[which(!duplicated(df1[,2])),]
# or
df1[firstOfRepLines(df1,useCol=2),]

## ----get1stOfRepeatedByCol, echo=TRUE-----------------------------------------
mat2 <- cbind(no=as.character(1:20), seq=sample(LETTERS[1:15], 20, repl=TRUE),
  ty=sample(c("full","Nter","inter"),20,repl=TRUE), ambig=rep(NA,20), seqNa=1:20)
(mat2uniq <- get1stOfRepeatedByCol(mat2, sortBy="seq", sortSupl="ty"))

# the values from column 'seq' are indeed unique
table(mat2uniq[,"seq"])

# This will return all first repeated (may be >1) but without furter sorting 
#  along column 'ty' neither marking in comumn 'ambig').
mat2[which(duplicated(mat2[,2],fromLast=FALSE)),]

## ----nonAmbiguousMat, echo=TRUE-----------------------------------------------
nonAmbiguousMat(mat1,by=2)

## ----nonAmbiguousMat2, echo=TRUE----------------------------------------------
set.seed(2017); mat3 <- matrix(c(1:100,round(rnorm(200),2)), ncol=3,
  dimnames=list(1:100,LETTERS[1:3]));
head(mat3U <- nonAmbiguousMat(mat3, by="B", na="_", uniqO=FALSE), n=15)
head(get1stOfRepeatedByCol(mat3, sortB="B", sortS="B"))

## ----combineReplFromListToMatr, echo=TRUE-------------------------------------
lst2 <- list(aa_1x=matrix(1:12, nrow=4, byrow=TRUE), ab_2x=matrix(24:13, nrow=4, byrow=TRUE))
combineReplFromListToMatr(lst2)

## ----combineRedundLinesInListAcRef, echo=TRUE---------------------------------
x1 <- list(quant=matrix(11:34, ncol=3, dimnames=list(letters[8:1], LETTERS[11:13])), 
  annot=matrix(paste0(LETTERS[c(1:4,6,3:5)],LETTERS[c(1:4,6,3:5)]), ncol=1, 
  dimnames=list(paste(letters[1:8]),"xx")) )
combineRedundLinesInList(lst=x1, refNa="annot", datNa="quant", refColNa="xx")

## ----nonRedundLines, echo=TRUE------------------------------------------------
mat4 <- matrix(rep(c(1,1:3,3,1),2), ncol=2, dimnames=list(letters[1:6],LETTERS[1:2]))
nonRedundLines(mat4)

## ----filtSizeUniq, echo=TRUE--------------------------------------------------
# input: c and dd are repeated  :
filtSizeUniq(list(A="a", B=c("b","bb","c"), D=c("dd","d","ddd","c")), filtUn=TRUE, minSi=NULL)

# here a,b,c and dd are repeated  :
filtSizeUniq(list(A="a", B=c("b","bb","c"), D=c("dd","d","ddd","c")), ref=c(letters[c(1:26,1:3)],
  "dd","dd","bb","ddd"), filtUn=TRUE, minSi=NULL)   

## ----makeNRedMatr, echo=TRUE--------------------------------------------------
t3 <- data.frame(ref=rep(11:15,3), tx=letters[1:15],
  matrix(round(runif(30,-3,2),1), nc=2), stringsAsFactors=FALSE)
  
# First we split the data.frame in list  
by(t3,t3[,1], function(x) x)
t(sapply(by(t3,t3[,1],function(x) x), summarizeCols, me="maxAbsOfRef"))
(xt3 <- makeNRedMatr(t3, summ="mean", iniID="ref"))
(xt3 <- makeNRedMatr(t3, summ=unlist(list(X1="maxAbsOfRef")), iniID="ref"))

## ----makeNRedMatr2, echo=TRUE-------------------------------------------------
set.seed(2024)
df2 <- data.frame(transcrID=paste("TrID", 101:124, sep=""), geneID=paste("geID",rep(201:206,each=4)), 
  geneLe=round(runif(24, min=50, max=1500)), a=101:124, b=224:201)
df2 <- df2[-1*c(1,4:7,12:15),]  

(dfLongest <- makeNRedMatr(df2, summ=unlist(list(X1="maxOfRef")), iniID="geneID"))

 summarizeCols(df2[1:2,c(1:5,3)], me="min")  
 summarizeCols(df2[1:2,c(1:5,3)], me="mean") 
 summarizeCols(df2[1:2,c(1:5,3)], me="maxOfRef") 
 summarizeCols(df2[1:6,c(1:5,3)], me="maxOfRef")

(xt3 <- makeNRedMatr(t3, summ=unlist(list(X1="maxOfRef")), iniID=c("ref")))  

## ----combineRedBasedOnCol, echo=TRUE------------------------------------------
matr <- matrix(c(letters[1:6],"h","h","f","e",LETTERS[1:5]), ncol=3,
  dimnames=list(letters[11:15],c("xA","xB","xC")))
combineRedBasedOnCol(matr, colN="xB")
combineRedBasedOnCol(rbind(matr[1,],matr), colN="xB")

## ----convMatr2df, echo=TRUE---------------------------------------------------
x <- 1
dat1 <- matrix(1:10, ncol=2)
rownames(dat1) <- letters[c(1:3,2,5)]
## as.data.frame(dat1)  ...  would result in an error
convMatr2df(dat1)
convMatr2df(data.frame(a=as.character((1:3)/2), b=LETTERS[1:3], c=1:3))
tmp <- data.frame(a=as.character((1:3)/2), b=LETTERS[1:3], c=1:3, stringsAsFactors=FALSE)
convMatr2df(tmp)
tmp <- data.frame(a=as.character((1:3)/2), b=1:3, stringsAsFactors=FALSE)
convMatr2df(tmp) 

## ----combineOverlapInfo, echo=TRUE--------------------------------------------
set.seed(2013)
datT2 <- matrix(round(rnorm(200)+3,1), ncol=2, dimnames=list(paste("li",1:100,sep=""),
  letters[23:24]))
# (mimick) some short and longer names for each line
inf2 <- cbind(sh=paste(rep(letters[1:4],each=26), rep(letters,4),1:(26*4),sep=""),
  lo=paste(rep(LETTERS[1:4],each=26), rep(LETTERS,4), 1:(26*4), ",", 
  rep(letters[sample.int(26)],4), rep(letters[sample.int(26)],4), sep=""))[1:100,] 
## We'll use this to test :  
head(datT2, n=10)
## let's assign to each pair of x & y values a 'cluster' (column _clu_, the column _combInf_ tells us which lines/indexes are in this cluster)
head(combineOverlapInfo(datT2, disThr=0.03), n=10)
## it is also possible to rather display names (eg gene or protein-names) instead of index values
head(combineOverlapInfo(datT2, suplI=inf2[,2], disThr=0.03), n=10)

## ----getValuesByUnique, echo=TRUE---------------------------------------------
dat <- 11:19
names(dat) <- letters[c(6:3,2:4,8,3)]
## Here the names are not unique.
## Thus, the values can be binned by their (non-unique) names and a representative values calculated.

## Let's make a 'datUniq' with the mean of each group of values :
datUniq <- round(tapply(dat, names(dat), mean),1)
## now we propagate the mean values to the full vector 
getValuesByUnique(dat, datUniq)
cbind(ini=dat,firstOfRep=getValuesByUnique(dat, datUniq),
  indexUniq=getValuesByUnique(dat, datUniq, asIn=TRUE))

## ----combineByEitherFactor, echo=TRUE-----------------------------------------
nn <- rep(c("a","e","b","c","d","g","f"),c(3,1,2,2,1,2,1))
qq <- rep(c("m","n","p","o","q"),c(2,1,1,4,4))
nq <- cbind(nn,qq)[c(4,2,9,11,6,10,7,3,5,1,12,8),]
## Here we consider 2 columns 'nn' and 'qq' whe trying to regroup common values
##  (eg value 'a' from column 'nn' and value 'o' from 'qq') 
combineByEitherFactor(nq, 1, 2, nBy=FALSE)

## ----combineByEitherFactor2, echo=TRUE----------------------------------------
## the same, but including n by group/cluster
combineByEitherFactor(nq, 1, 2, nBy=TRUE)
## Not running further iterations works faster, but you may not reach 'convergence' immediately
combineByEitherFactor(nq,1, 2, nBy=FALSE)

## ----combineByEitherFactor3, echo=TRUE----------------------------------------
##  another example
mm <- rep(c("a","b","c","d","e"), c(3,4,2,3,1))
pp <- rep(c("m","n","o","p","q"), c(2,2,2,2,5))
combineByEitherFactor(cbind(mm,pp), 1, 2, con=FALSE, nBy=TRUE)

## ----multiCharReplace1, echo=TRUE---------------------------------------------
# replace character content
x1 <- c("ab","bc","cd","efg","ghj")
multiCharReplace(x1, cbind(old=c("bc","efg"), new=c("BBCC","EF")))

# works also on matrix and/or to replace numeric content : 
x3 <- matrix(11:16, ncol=2)
multiCharReplace(x3, cbind(12:13,112:113))

## ----multiCharReplace2, echo=TRUE---------------------------------------------
# replace and return logical vactor
x2 <- c("High","n/a","High","High","Low")
multiCharReplace(x2,cbind(old=c("n/a","Low","High"), new=c(NA,FALSE,TRUE)), convTo="logical")

## ----multiMatch1, echo=TRUE---------------------------------------------------
aa <- c("m","k","j; aa","m; aa; bb; o","n; dd","aa","cc")
bb <- c("aa","dd","aa; bb; q","p; cc") 
## result as list of indexes
(bOnA <- multiMatch(aa, bb, method="asIndex"))   # match bb on aa
## more convenient to the human reader
(bOnA <- multiMatch(aa, bb))                     # match bb on aa
(bOnA <- multiMatch(aa, bb, method="matchedL"))  # match bb on aa

## ----compGlobPat1, echo=TRUE--------------------------------------------------
aa <- letters[rep(c(3:1,4), each=2)]
ab <- letters[rep(c(5,8:6), each=2)]        # 'same general' pattern to aa
ac <- letters[c(1:2,1:3,3:4,4)]             # NOT 'same general' pattern to any other
ad <- letters[c(6:8,8:6,7:6)]               # NOT 'same general' pattern to any other

## ----compGlobPat2, echo=TRUE--------------------------------------------------
## get global patterns
cbind(aa= match(aa, unique(aa)),
  ab= match(ab, unique(ab)),
  ac= match(ac, unique(ac)),
  ad= match(ad, unique(ad)) )

## ----compGlobPat3, echo=TRUE--------------------------------------------------
bb <- data.frame(ind=1:length(aa), a=aa, b=ab, c=ac, d=ad)

## ----compGlobPat4, echo=TRUE--------------------------------------------------
replicateStructure(bb)

## ----compGlobPat5, echo=TRUE--------------------------------------------------
replicateStructure(bb, method="combAll")

## ----compGlobPat6, echo=TRUE--------------------------------------------------
replicateStructure(bb, method="combNonOrth")

## ----checkSimValueInSer, echo=TRUE--------------------------------------------
va1 <- c(4:7,7,7,7,7,8:10) + (1:11)/28600
checkSimValueInSer(va1, ppm=5)
data.frame(va=sort(va1), simil=checkSimValueInSer(va1))

## ----findCloseMatch1, echo=TRUE-----------------------------------------------
aA <- c(11:17); bB <- c(12.001,13.999); cC <- c(16.2,8,9,12.5,15.9,13.5,15.7,14.1,5)
(cloMa <- findCloseMatch(x=aA, y=cC, com="diff", lim=0.5, sor=FALSE))       

## ----closeMatchMatrix2, echo=TRUE---------------------------------------------
# all matches (of 2d arg) to/within limit for each of 1st arg ('x'); 'y' ..to 2nd arg = cC
# first let's display only one single closest/best hit
(maAa <- closeMatchMatrix(cloMa, aA, cC, lim=TRUE))  #

## ----closeMatchMatrix3, echo=TRUE---------------------------------------------
(maAa <- closeMatchMatrix(cloMa, aA, cC, lim=FALSE,origN=TRUE))  #
(maAa <- closeMatchMatrix(cloMa, cbind(valA=81:87, aA), cbind(valC=91:99, cC), colM=2,
  colP=2, lim=FALSE))
(maAa <- closeMatchMatrix(cloMa, cbind(aA,valA=81:87), cC, lim=FALSE, deb=TRUE))  #
a2 <- aA; names(a2) <- letters[1:length(a2)];  c2 <- cC; names(c2) <- letters[10 +1:length(c2)]
(cloM2 <- findCloseMatch(x=a2, y=c2, com="diff", lim=0.5, sor=FALSE)) 
(maA2 <- closeMatchMatrix(cloM2, predM=cbind(valA=81:87, a2),
  measM=cbind(valC=91:99, c2), colM=2, colP=2, lim=FALSE, asData=TRUE))
(maA2 <- closeMatchMatrix(cloM2, cbind(id=names(a2), valA=81:87,a2), cbind(id=names(c2),
  valC=91:99,c2), colM=3, colP=3, lim=FALSE, deb=FALSE)) 

## ----findSimilFrom2sets, echo=TRUE--------------------------------------------
aA <- c(11:17); bB <- c(12.001,13.999); cC <- c(16.2,8,9,12.5,12.6,15.9,14.1)
aZ <-  matrix(c(aA,aA+20), ncol=2, dimnames=list(letters[1:length(aA)],c("aaA","aZ")))
cZ <-  matrix(c(cC,cC+20), ncol=2, dimnames=list(letters[1:length(cC)],c("ccC","cZ")))
findCloseMatch(cC, aA, com="diff", lim=0.5, sor=FALSE)
findSimilFrom2sets(aA, cC)
findSimilFrom2sets(cC, aA)
findSimilFrom2sets(aA, cC, best=FALSE)
findSimilFrom2sets(aA, cC, comp="ppm", lim=5e4)
findSimilFrom2sets(aA, cC, comp="ppm", lim=9e4, bestO=FALSE)
# below: find fewer 'best matches' since search window larger (ie more good hits compete !)
findSimilFrom2sets(aA, cC, comp="ppm", lim=9e4, bestO=TRUE)

## ----fusePairs, echo=TRUE-----------------------------------------------------
(daPa <- matrix(c(1:5,8,2:6,9), ncol=2))
fusePairs(daPa, maxFuse=4)

## ----elimCloseCoord1, echo=TRUE-----------------------------------------------
da1 <- matrix(c(rep(0:4,5),0.01,1.1,2.04,3.07,4.5), ncol=2); da1[,1] <- da1[,1]*99; head(da1)
elimCloseCoord(da1)

## ----stableMode, echo=TRUE----------------------------------------------------
set.seed(2012); dat <- round(c(rnorm(120,0,1.2), rnorm(80,0.8,0.6), rnorm(25,-0.6,0.05), runif(200)),3)
dat <- dat[which(dat > -2 & dat <2)]
stableMode(dat)

## ----stableMode2, fig.height=8, fig.width=9, fig.align="center",  echo=TRUE----
layout(1:2)
plot(1:length(dat), sort(dat), type="l", main="Sorted Values", xlab="rank", las=1)
abline(h=stableMode(dat, silent=TRUE), lty=2,col=2)
legend("topleft",c("stableMode"), text.col=2, col=2, lty=2, lwd=1, seg.len=1.2, cex=0.8, xjust=0, yjust=0.5) 


plot(density(dat, kernel="gaussian", adjust=0.7), xlab="Value of dat", main="Density Estimate Plot")
useCol <- c("red","green","blue","grey55")
legend("topleft",c("dens","binning","BBmisc","allModes"), text.col=useCol, col=useCol,
  lty=2, lwd=1, seg.len=1.2, cex=0.8, xjust=0, yjust=0.5) 
abline(v=stableMode(dat, method="dens", silent=TRUE), lty=2, col="red", lwd=2)
abline(v=stableMode(dat, method="binning", silent=TRUE), lty=2, col="green")
abline(v=stableMode(dat, method="BBmisc", silent=TRUE), lty=2, col="blue")  
abline(v=stableMode(dat, method="allModes"), lty=2, col="grey55")  

## ----stableMode3, echo=TRUE---------------------------------------------------
set.seed(2021)
x <- sample(letters, 50000, replace=TRUE)
stableMode(dat, method="mode")
stableMode(dat, method="allModes")

## ----protectSpecChar1, echo=TRUE----------------------------------------------
aa <- c("abc","abcde","ab.c","ab.c.e","ab*c","ab\\d")
grepl("b.", aa)             # all TRUE
grepl(protectSpecChar("b."), aa)

## ----trimRedundText1, echo=TRUE-----------------------------------------------
txt1 <- c("abcd","abcde","abcdefg","abcdE",NA,"abcdEF")
trimRedundText(txt1)

## ----rmSharedWords1, echo=TRUE------------------------------------------------
txt2 <- c("abc d","abc de","abc defg","abc dE",NA,"abc dEF")
rmSharedWords(txt2)

## ----rmSharedWords2, echo=TRUE------------------------------------------------
txt3 <- c("ab 100 m","ab 120 m","ab 1000 m")
trimRedundText(txt3)
rmSharedWords(txt3)

## ----keepCommonText1, echo=TRUE-----------------------------------------------
txt1 <- c("abcd","abcde","abcdefg","abcdE",NA,"abcdEF")
trimRedundText(txt1, side="left")         # remove redundant 
keepCommonText(txt1, side="terminal")     # keep redundant
keepCommonText(txt1, side="center")       # computationally easier   

## ----keepCommonText2, echo=TRUE-----------------------------------------------
txt2 <- c("abcd_abc_kjh", "bcd_abc123", "cd_abc_po")
keepCommonText(txt2, side="center")       

## ----rmEnumeratorName1, echo=TRUE---------------------------------------------
xx <- c("hg_Re1","hjRe2_Re2","hk-Re3_Re33")
rmEnumeratorName(xx)
rmEnumeratorName(xx, newSep="--")
rmEnumeratorName(xx, incl="anyCase")

## ----rmEnumeratorName2, echo=TRUE---------------------------------------------
xy <- cbind(a=11:13, b=c("11#11","2_No2","333_samp333"), c=xx)
rmEnumeratorName(xy)
rmEnumeratorName(xy,incl=c("anyCase","trim2","rmEnumL"))

## ----rmEnumeratorName3, echo=TRUE---------------------------------------------
xz <- cbind(a=11:13, b=c("23#11","4#2","567#333"), c=xx)
apply(xz, 2, rmEnumeratorName, sepEnum=c("","_"), newSep="_", silent=TRUE)

## ----unifyEnumerator1, echo=TRUE----------------------------------------------
unifyEnumerator(c("ab-1","ab-2","c-3"))
unifyEnumerator(c("ab-R1","ab-R2","c-R3"))
unifyEnumerator(c("ab-1","c3-2","dR3"), stringentMatch=FALSE)

## ----checkUnitPrefix1, echo=TRUE----------------------------------------------
x1 <- c("10fg WW","xx 10fg 3pW"," 1pg 2.0W")
checkUnitPrefix(x1)

## different separators between digit and prefix:
x2 <- c("10fg WW","xx 8_fg 3pW"," 1 pg-2.0W")
checkUnitPrefix(x2, stringentSearch=TRUE)
checkUnitPrefix(x2, stringentSearch=FALSE)

## ----adjustUnitPrefix1, echo=TRUE---------------------------------------------
adjustUnitPrefix(c("10.psec", "2 fsec"), unit="sec")

## ----adjustUnitPrefix2, echo=TRUE---------------------------------------------
x2 <- c("abCc 500_nmol ABC", "abEe5_umol", "", "abFF_100_nmol_G", "abGg 2_mol", "abH.1 mmol")
rbind( adjustUnitPrefix(x2, unit="mol", returnType="allText") , 
  adjustUnitPrefix(x2, unit="mol", returnType="trim"),
  adjustUnitPrefix(x2, unit="mol", returnType=""))

## ----adjustUnitPrefix3, echo=TRUE---------------------------------------------
x3 <- c("2.psec abc","300 fsec etc", "34 5fsec")
adjustUnitPrefix(x3, unit=checkUnitPrefix(x3))

## ----mergeVectors1, echo=TRUE-------------------------------------------------
x1 <- c(a=1, b=11, c=21)
x2 <- c(b=12, c=22, a=2)
x3 <- c(a=3, d=43)
mergeVectors(vect1=x1, vect2=x2, vect3=x3)

## ----mergeVectors2, echo=TRUE-------------------------------------------------
mergeVectors(vect1=x1, vect2=x2, vect3=x3, inclInfo=TRUE)   # return list with additional info

## ----mergeVectors3, echo=TRUE-------------------------------------------------
x4 <- 41:44            # no names - not conform for merging and will be ignored
mergeVectors(x1, x2, x3, x4)

## ----matchMatrixLinesToRef1, echo=TRUE----------------------------------------
## Note : columns b and e allow non-ambigous match, not all elements of e are present in a
mat0 <- cbind(a=c("mvvk","axxd","bxxd","vv"),b=c("iwwy","iyyu","kvvh","gxx"), c=rep(9,4),
  d=c("hgf","hgf","vxc","nvnn"), e=c("_vv_","_ww_","_xx_","_yy_"))
matchMatrixLinesToRef(mat0[,1:4], ref=mat0[,5])
matchMatrixLinesToRef(mat0[,1:4], ref=mat0[1:3,5], inclInfo=TRUE)

matchMatrixLinesToRef(mat0[,-2], ref=mat0[,2], inclInfo=TRUE)   # needs 'reverse grep'

## ----orderMatrToRef1, echo=TRUE-----------------------------------------------
mat1 <- matrix(paste0("__",letters[rep(c(1,1,2,2,3),3) +rep(0:2,each=5)], rep(1:5)), ncol=3)
orderMatrToRef(mat1, paste0(letters[c(3,4,5,3,4)],c(1,3,5,2,4)))

mat2 <- matrix(paste0("__",letters[rep(c(1,1,2,2,3),3) +rep(0:2,each=5)], c(rep(1:5,2),1,1,3:5 )), ncol=3)
orderMatrToRef(mat2, paste0(letters[c(3,4,5,3,4)],c(1,3,5,1,4)))

mat3 <- matrix(paste0(letters[rep(c(1,1,2,2,3),3) +rep(0:2,each=5)], c(rep(1:5,2),1,1,3,3,5 )), ncol=3)
orderMatrToRef(mat3, paste0("__",letters[c(3,4,5,3,4)],c(1,3,5,1,3)))

## ----concatMatch1, echo=TRUE--------------------------------------------------
## simple example without concatenations or text-extensions
x0 <- c("ZZ","YY","AA","BB","DD","CC","D")
tab0 <- c("AA","BB,E","CC","FF,U")
match(x0, tab0)
concatMatch(x0, tab0)         # same result as match(), but with names

## now let's construct somthing similar but with concatenations and text-extensions
x1 <- c("ZZ","YY","AA","BB-2","DD","CCdef","Dxy")            # modif of single ID (no concat)
tab1 <- c("AA","WW,Vde,BB-5,E","CCab","FF,Uef")
match(x1, tab1)                   # match finds only the 'simplest' case (ie "AA")
concatMatch(x1, tab1)             # finds all hits as in example above

x2 <- c("ZZ,Z","YY,Y","AA,Z,Y","BB-2","DD","X,CCdef","Dxy")  # conatenated in 'x'
tab2 <- c("AA","WW,Vde,BB-5,E","CCab,WW","FF,UU")
concatMatch(x2, tab2)               # concatenation in both 'x' and 'table'

## ----checkStrictOrder1, echo=TRUE---------------------------------------------
set.seed(2005); mat1 <- rbind(matrix(round(runif(40),1),nc=4), rep(1,4))
head(mat1)
checkStrictOrder(mat1); mat1[which(checkStrictOrder(mat1)[,2]==0),]

## ----checkGrpOrder1, echo=TRUE------------------------------------------------
head(mat1)
checkGrpOrder(mat1)
checkGrpOrder(mat1, revRank=FALSE)    # only constant 'up' tested

## ----linModelSelect1, echo=TRUE-----------------------------------------------
li1 <- rep(c(4,3,3:6), each=3) + round(runif(18)/5,2)
names(li1) <- paste0(rep(letters[1:5], each=3), rep(1:3,6))
li2 <- rep(c(6,3:7), each=3) + round(runif(18)/5, 2)
dat2 <- rbind(P1=li1, P2=li2)
exp2 <- rep(c(11:16), each=3)
exp4 <- rep(c(3,10,30,100,300,1000), each=3)

## Check & plot for linear model 
linModelSelect("P1", dat2, expect=exp2)
linModelSelect("P2", dat2, expect=exp2)

## ----plotLinModelCoef1, echo=TRUE---------------------------------------------
set.seed(2020)
x1 <- matrix(rep(c(2,2:5),each=20) + runif(100) +rep(c(0,0.5,2:3,5),20), 
  byrow=FALSE, ncol=10, dimnames=list(LETTERS[1:10],NULL))
## just the 1st regression :
   summary(lm(b~a, data=data.frame(b=x1[,1], a=rep(1:5,each=2))))
## all regressions
x1.lmSum <- t(sapply(lapply(rownames(x1), linModelSelect, dat=x1, 
  expect=rep(1:5,each=2), silent=TRUE, plotGraph=FALSE), 
  function(x) c(x$coef[2,c(4,1)], startFr=x$startLev)))
x1.lmSum <- cbind(x1.lmSum, medQuantity=apply(x1,1,median))
x1.lmSum[,1] <- log10(x1.lmSum[,1])
head(x1.lmSum)

## ----plotLinModelCoef2, echo=TRUE---------------------------------------------
wrGraphOK <- requireNamespace("wrGraph", quietly=TRUE)      # check if package is available
if(wrGraphOK) wrGraph::plotW2Leg(x1.lmSum, useCol=c("Pr(>|t|)","Estimate","medQuantity","startFr"), 
  legendloc="topleft", txtLegend="start at")

## ----ratioAllComb0, echo=TRUE-------------------------------------------------
set.seed(2014); ra1 <- c(rnorm(9,2,1), runif(8,1,2))

## ----ratioAllComb1, echo=TRUE-------------------------------------------------
median(ra1[1:9]) / median(ra1[10:17])

## ----ratioAllComb2, echo=TRUE-------------------------------------------------
summary( ratioAllComb(ra1[1:9], ra1[10:17]))
boxplot(list(norm=ra1[1:9], unif=ra1[10:17], rat=ratioAllComb(ra1[1:9],ra1[10:17])))

## ----combineAsN1, echo=TRUE---------------------------------------------------
tm1 <- list(a1=LETTERS[1:7], a2=LETTERS[3:9], a3=LETTERS[6:10], a4=LETTERS[8:12])
combineAsN(tm1, nCombin=3, lev=gl(1,4))[,1,]

## ----combineAsN2, echo=TRUE---------------------------------------------------
## different levels/groups in list-elements
tm4 <- list(a1=LETTERS[1:15], a2=LETTERS[3:16], a3=LETTERS[6:17], a4=LETTERS[8:19],
  b1=LETTERS[5:19], b2=LETTERS[7:20], b3=LETTERS[11:24], b4=LETTERS[13:25], c1=LETTERS[17:26],
  d1=LETTERS[4:12], d2=LETTERS[5:11], d3=LETTERS[6:12], e1=LETTERS[7:10])
te4 <- combineAsN(tm4, nCombin=4, lev=substr(names(tm4),1,1))
str(te4)
te4[,,1]           # the counts part only

## ----readCsvBatch, echo=TRUE--------------------------------------------------
path1 <- system.file("extdata", package="wrMisc")
fiNa <-  c("pl01_1.csv","pl01_2.csv","pl02_1.csv","pl02_2.csv")
datAll <- readCsvBatch(fiNa, path1, silent=TRUE)
str(datAll)

## ----readCsvBatch2, echo=TRUE-------------------------------------------------
## batch reading of all csv files in specified path :
datAll2 <- readCsvBatch(fileNames=NULL, path=path1, silent=TRUE)
str(datAll2)

## ----readTabulatedBatch1, echo=TRUE-------------------------------------------
path1 <- system.file("extdata", package="wrMisc")
fiNa <-  c("a1.txt","a2.txt")
allTxt <- readTabulatedBatch(fiNa, path1)
str(allTxt)

## ----readVarColumns, echo=TRUE------------------------------------------------
path1 <- system.file("extdata", package="wrMisc")
fiNa <- "Names1.tsv"
datAll <- readVarColumns(fiName=file.path(path1,fiNa), sep="\t")
str(datAll)

## ----readGit1, echo=TRUE------------------------------------------------------
## An example url with tabulated data :
url1 <- "https://github.com/bigbio/proteomics-metadata-standard/blob/master/annotated-projects/PXD001819/PXD001819.sdrf.tsv"
gitDataUrl(url1)

## ----readGit2, echo=TRUE------------------------------------------------------
dataPxd <- try(read.delim(gitDataUrl(url1), sep='\t', header=TRUE))
str(dataPxd)

## ----presenceGrpFilt1, echo=TRUE----------------------------------------------
dat1 <- matrix(1:56,ncol=7)
dat1[c(2,3,4,5,6,10,12,18,19,20,22,23,26,27,28,30,31,34,38,39,50,54)] <- NA
grp1 <- gl(3,3)[-(3:4)]
dat1

## now let's filter
presenceGrpFilt(dat1, gr=grp1, presThr=0.75)  # stringent
presenceGrpFilt(dat1, gr=grp1, presThr=0.25)  # less stringent


## ----presenceFilt, echo=TRUE--------------------------------------------------
presenceFilt(dat1, gr=grp1, maxGr=1, ratM=0.1)
presenceFilt(dat1, gr=grp1, maxGr=2, rat=0.5)

## ----cleanReplicates, echo=TRUE-----------------------------------------------

(mat3 <- matrix(c(19,20,30,40, 18,19,28,39, 16,14,35,41, 17,20,30,40), ncol=4))
cleanReplicates(mat3, nOutl=1)
cleanReplicates(mat3, nOutl=3)


## ----normalizeThis0, echo=TRUE------------------------------------------------
set.seed(2015); rand1 <- round(runif(300) +rnorm(300,0,2),3)
dat1 <- cbind(ser1=round(100:1 +rand1[1:100]), ser2=round(1.2*(100:1 +rand1[101:200]) -2),
  ser3=round((100:1 +rand1[201:300])^1.2-3))
dat1 <- cbind(dat1, ser4=round(dat1[,1]^seq(2,5,length.out=100) +rand1[11:110],1))
## Let's introduce some NAs
dat1[dat1 <1] <- NA
## Let's get a quick overview of the data
summary(dat1)
## some selected lines (indeed, the 4th column appears always much higher)
dat1[c(1:5,50:54,95:100),]

## ----normalizeThis1, echo=TRUE------------------------------------------------
no1 <- normalizeThis(dat1, refGrp=1:3, meth="mean")
no2 <- normalizeThis(dat1, refGrp=1:3, meth="trimMean", trim=0.4)
no3 <- normalizeThis(dat1, refGrp=1:3, meth="median")
no4 <- normalizeThis(dat1, refGrp=1:3, meth="slope", quantFa=c(0.2,0.8))

## ----normalizeThis_plot1, echo=FALSE,eval=TRUE--------------------------------
boxplot(dat1, main="raw data", las=1)

## ----normalizeThis_plot2, echo=FALSE,eval=TRUE--------------------------------
layout(matrix(1:4, ncol=2))
boxplot(no1, main="mean normalization", las=1)
boxplot(no2, main="trimMean normalization", las=1)
boxplot(no3, main="median normalization", las=1)
boxplot(no4, main="slope normalization", las=1)

## ----rowNormalize1, echo=TRUE-------------------------------------------------
set.seed(2); AA <- matrix(rbinom(110, 10, 0.05), nrow=10)
AA[,4:5] <- AA[,4:5] *rep(4:3, each=nrow(AA))

AA1 <- rowNormalize(AA)
round(AA1, 2)

## ----rowNormalize2, echo=TRUE-------------------------------------------------
AC <- AA
AC[which(AC <1)] <- NA

(AC1 <- rowNormalize(AC))

## ----rowNormalize3, echo=TRUE-------------------------------------------------
(AC3 <- rowNormalize(AC, refLines=1:5, omitNonAlignable=TRUE))

## ----coordOfFilt1, echo=TRUE--------------------------------------------------
set.seed(2021); ma1 <- matrix(sample.int(n=40, size=27, replace=TRUE), ncol=9)
## let's test which values are >37
which(ma1 >37)      # doesn't tell which row & col
coordOfFilt(ma1, ma1 >37)

## ----trimmedMean1, echo=TRUE--------------------------------------------------
x <- c(17:11,27:28)
mean(x)
mean(x, trim=0.15)       # symmetric trimming
mean(x[x < 25])          # manual trimming
trimmedMean(x, trim=c(l=0, u=0.7))   # asymmetric trim

## ----rnormW1, echo=TRUE-------------------------------------------------------
## some sample data :
x1 <- (11:16)[-5]
mean(x1); sd(x1)

## ----rnormW2, echo=TRUE-------------------------------------------------------
## the standard way for gerenating normal random values
ra1 <- rnorm(n=length(x1), mean=mean(x1), sd=sd(x1))
## In particular with low n, the random values deviate somehow from expected mean and sd :
mean(ra1) -mean(x1) 
sd(ra1) -sd(x1)

## ----rnormW3, echo=TRUE-------------------------------------------------------
## random numbers with close fit to expected mean and sd :
ra2 <- rnormW(length(x1), mean(x1), sd(x1))
mean(ra2) -mean(x1) 
sd(ra2) -sd(x1)   # much closer to expected value

## ----moderTest2grp, echo=TRUE-------------------------------------------------
set.seed(2017); t8 <- matrix(round(rnorm(1600,10,0.4),2), ncol=8,
  dimnames=list(paste("l",1:200), c("AA1","BB1","CC1","DD1","AA2","BB2","CC2","DD2")))
t8[3:6,1:2] <- t8[3:6,1:2]+3     # augment lines 3:6 for AA1&BB1
t8[5:8,5:6] <- t8[5:8,5:6]+3     # augment lines 5:8 for AA2&BB2 (c,d,g,h should be found)
t4 <- log2(t8[,1:4]/t8[,5:8])
fit4 <- moderTest2grp(t4, gl(2,2))
## now we'll use limma's topTable() function to look at the 'best' results
if("list" %in% mode(fit4)) {  # if you have limma installed we can look further
  library(limma)
  topTable(fit4, coef=1,n=5)                      # effect for 3,4,7,8
  fit4in <- moderTest2grp(t4, gl(2,2), testO="<")
  if("list" %in% mode(fit4in)) topTable(fit4in, coef=1,n=5) }

## ----moderTestXgrp, echo=TRUE-------------------------------------------------
grp <- factor(rep(LETTERS[c(3,1,4)], c(2,3,3)))
set.seed(2017); t8 <- matrix(round(rnorm(208*8,10,0.4),2), ncol=8,
  dimnames=list(paste(letters[], rep(1:8,each=26),sep=""), paste(grp,c(1:2,1:3,1:3),sep="")))
t8[3:6,1:2] <- t8[3:6,1:2] +3                    # augment lines 3:6 (c-f) 
t8[5:8,c(1:2,6:8)] <- t8[5:8,c(1:2,6:8)] -1.5    # lower lines 
t8[6:7,3:5] <- t8[6:7,3:5] +2.2                  # augment lines 
## expect to find C/A in c,d,g, (h)
## expect to find C/D in c,d,e,f
## expect to find A/D in f,g,(h)  
test8 <- moderTestXgrp(t8, grp) 
head(test8$p.value, n=8) 

## ----pVal2lfdr, echo=TRUE-----------------------------------------------------
set.seed(2017); t8 <- matrix(round(rnorm(160,10,0.4),2), ncol=8, dimnames=list(letters[1:20],
  c("AA1","BB1","CC1","DD1","AA2","BB2","CC2","DD2")))
t8[3:6,1:2] <- t8[3:6,1:2] +3   # augment lines 3:6 (c-f) for AA1&BB1
t8[5:8,5:6] <- t8[5:8,5:6] +3   # augment lines 5:8 (e-h) for AA2&BB2 (c,d,g,h should be found)
head(pVal2lfdr(apply(t8, 1, function(x) t.test(x[1:4], x[5:8])$p.value)))

## ----fcCI, echo=TRUE----------------------------------------------------------
set.seed(2022); ran <- rnorm(50)
confInt(ran, alpha=0.05)
## plot points and confindence interval of mean
plot(ran, jitter(rep(1, length(ran))), ylim=c(0.95, 1.05), xlab="random variable 'ran'",main="Points and Confidence Interval of Mean (alpha=0.05)", ylab="", las=1)
points(mean(ran), 0.97, pch=3, col=4)     # mean
lines(mean(ran) +c(-1, 1) *confInt(ran, 0.05), c(0.97, 0.97), lwd=4, col=4)  # CI
legend("topleft","95% conficence interval of mean", text.col=4,col=4,lty=1,lwd=1,seg.len=1.2,cex=0.9,xjust=0,yjust=0.5)

## ----matchSampToPairw, echo=TRUE----------------------------------------------
## make example if limma is not installed
if(!requireNamespace("limma", quietly=TRUE)) test8 <- list(FDR=matrix(1, nrow=2, ncol=3, dimnames=list(NULL,c("A-C","A-D","C-D"))))
matchSampToPairw(unique(grp), colnames(test8$FDR)) 

## ----pairWiseConc1, echo=TRUE-------------------------------------------------
mat1 <- matrix(1:8, nrow=2, dimnames=list(NULL, paste0(1:4,"-",6:9)))
numPairDeColNames(mat1)

## ----replicateStructure1, echo=TRUE-------------------------------------------
## column a is all different, b is groups of 2,
## c & d  are groups of 2 nut NOT 'same general' pattern as b
strX <- data.frame(a=letters[18:11], b=letters[rep(c(3:1,4), each=2)],
 c=letters[rep(c(5,8:6), each=2)], d=letters[c(1:2,1:3,3:4,4)],
 e=letters[rep(c(4,8,4,7),each=2)], f=rep("z",8) )
strX

replicateStructure(strX[,1:2])
replicateStructure(strX[,1:4], method="combAll")
replicateStructure(strX[,1:4], method="combAll", exclNoRepl=FALSE)
replicateStructure(strX[,1:4], method="combNonOrth", exclNoRepl=TRUE)
replicateStructure(strX, method="lowest")

## ----std1, echo=TRUE----------------------------------------------------------
dat <- matrix(2*round(runif(100),2), ncol=4)
mean(dat); sd(dat)

datS <- scale(dat)
apply(datS, 2, sd)
# each column was teated separately
mean(datS); sd(datS); range(datS)
# the mean is almost 0.0 and the sd almost 1.0

datB <- scale(dat, center=TRUE, scale=FALSE)
mean(datB); sd(datB); range(datB)              # mean is almost 0

## ----std2, echo=TRUE----------------------------------------------------------
datS2 <- standardW(dat)
apply(datS2, 2, sd)
summary(datS2)
mean(datS2); sd(datS2)

datS3 <- standardW(dat, byColumn=TRUE)
apply(datS3, 2, sd)
summary(datS3)
mean(datS3); sd(datS3)

## ----scale1, echo=TRUE--------------------------------------------------------
datR2 <- apply(dat, 2, scaleXY, 1, 100)
summary(datR2); sd(datR2)

## ----clu01, echo=TRUE---------------------------------------------------------
nGr <- 3
irKm <- stats::kmeans(iris[,1:4], nGr, nstart=nGr*4)             # no need to standardize
   table(irKm$cluster, iris$Species)
   #wrGraph::plotPCAw(t(as.matrix(iris[,1:4])), sampleGrp=irKm,colBase=irKm$cluster,useSymb=as.numeric(as.factor(iris$Species)))

## ----clu02, echo=TRUE---------------------------------------------------------
## sort results by cluster number
head(reorgByCluNo(iris[,-5], irKm$cluster))
tail(reorgByCluNo(iris[,-5], irKm$cluster))

## ----clu03, echo=TRUE---------------------------------------------------------
## median an CV
ir2 <- reorgByCluNo(iris[,-5], irKm$cluster, addInfo=FALSE, retList=TRUE)

## ----clu04, echo=TRUE---------------------------------------------------------
sapply(ir2, function(x) apply(x, 2, median))

## ----clu05, echo=TRUE---------------------------------------------------------
sapply(ir2, colSds)

## ----rmOrphans1, echo=TRUE----------------------------------------------------
x=c(3:1,3:4,4:6,5:3)
cbind(x, def=rmOrphans(x), assign1=rmOrphans(x, reassign=TRUE), 
  assign1=rmOrphans(x, minN=0.2, reassign=TRUE) )

## ----filterNetw0, echo=TRUE---------------------------------------------------

lst2 <- list('121'=data.frame(ID=as.character(c(141,221,228,229,449)),11:15), 
  '131'=data.frame(ID=as.character(c(228,331,332,333,339)),11:15), 
  '141'=data.frame(ID=as.character(c(121,151,229,339,441,442,449)),c(11:17)), 
  '151'=data.frame(ID=as.character(c(449,141,551,552)),11:14),
  '161'=data.frame(ID=as.character(171),11),
  '171'=data.frame(ID=as.character(161),11),
  '181'=data.frame(ID=as.character(881:882),11:12) )

## ----filterNetw1, echo=TRUE---------------------------------------------------
(nw1 <- filterNetw(lst2, limInt=20, sandwLim=NULL, remOrphans=FALSE))

## ----filterNetw2, echo=TRUE---------------------------------------------------
(nw2 <- filterNetw(lst2, limInt=20, sandwLim=NULL, remOrphans=TRUE))

## ----filterNetw3, echo=TRUE---------------------------------------------------
(nw3 <- filterNetw(lst2, limInt=20, sandwLim=14, remOrphans=TRUE))

## ----propMatr1, echo=TRUE-----------------------------------------------------
pairs3L <- matrix(LETTERS[c(1,3,3, 2,2,1)], ncol=2)      # loop of 3
(netw13pr <- pairsAsPropensMatr(pairs3L))                # as prop matr

## ----contribToContigPerFrag, echo=TRUE----------------------------------------
path1 <- matrix(c(17,19,18,17, 4,4,2,3), ncol=2,
  dimnames=list(c("A/B/C/D","A/B/G/D","A/H","A/H/I"), c("sumLen","n")))
contribToContigPerFrag(path1)

## ----simpleFragFig, echo=TRUE-------------------------------------------------
frag1 <- cbind(beg=c(2,3,7,13,13,15,7,9,7, 3,3,5), end=c(6,12,8,18,20,20,19,12,12, 4,5,7))
rownames(frag1) <- letters[1:nrow(frag1)]
simpleFragFig(frag1)

## ----countSameStartEnd, echo=TRUE---------------------------------------------
countSameStartEnd(frag1)

## ----pasteC, echo=TRUE--------------------------------------------------------
pasteC(1:4)
pasteC(letters[1:4],quoteC="'")

## ----color-gradient1, echo=TRUE-----------------------------------------------
set.seed(2015); dat1 <- round(runif(15),2)
plot(1:15, dat1, pch=16, cex=2, las=1, col=colorAccording2(dat1),
  main="Color gradient according to value in y")
# Here we modify the span of the color gradient
plot(1:15, dat1, pch=16, cex=2, las=1, 
  col=colorAccording2(dat1, nStartO=0, nEndO=4, revCol=TRUE), main="blue to red")
# It is also possible to work with scales of transparency
plot(1:9, pch=3, las=1)
points(1:9, 1:9, col=transpGraySca(st=0, en=0.8, nSt=9,trans=0.3), cex=42, pch=16)

## ----convColorToTransp, fig.height=6, fig.width=3, echo=TRUE------------------
col0 <- c("#998FCC","#5AC3BA","#CBD34E","#FF7D73")
col1 <- convColorToTransp(col0,alph=0.7)
layout(1:2)
pie(rep(1,length(col0)), col=col0, main="no transparency")
pie(rep(1,length(col1)), col=col1, main="new transparency")

## ----sysDate1, echo=TRUE------------------------------------------------------
## To get started
Sys.Date()

## Compact English names (in European order), no matter what your local settings are :
sysDate() 

## ----DateTab, echo=TRUE-------------------------------------------------------
tabD <- cbind(paste0("univ",1:6), c(sysDate(style="univ1"), sysDate(style="univ2"), 
    sysDate(style="univ3"), sysDate(style="univ4"), as.character(sysDate(style="univ5")), 
    sysDate(style="univ6")), paste0("   local",1:6), 
  c(sysDate(style="local1"), sysDate(style="local2"), sysDate(style="local3"), 
    sysDate(style="local4"), sysDate(style="local5"), sysDate(style="local6")))   
knitr::kable(tabD, caption="Various ways of writing current date")

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
wrMisc documentation built on Sept. 11, 2024, 6:10 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
wrMisc
Analyze Experimental High-Throughput (Omics) Data

inst/doc/wrMiscVignette1.R
In wrMisc: Analyze Experimental High-Throughput (Omics) Data

Try the wrMisc package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

wrMisc Analyze Experimental High-Throughput (Omics) Data

inst/doc/wrMiscVignette1.R In wrMisc: Analyze Experimental High-Throughput (Omics) Data

Try the wrMisc package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

wrMisc
Analyze Experimental High-Throughput (Omics) Data

inst/doc/wrMiscVignette1.R
In wrMisc: Analyze Experimental High-Throughput (Omics) Data