# checkQ --------------------------------------------------------------------
#' @title Internal: Check input filetype.
#' @description Internal: Check input filetype.
#' @param files A character or character vector of one or more input text files
#' or a list of dataframes.
#' @param warn A logical indicating if a warning must be displayed for file
#' those are not STRUCTURE, TESS or BASIC file.
#' @return A character or character vector indicating input type 'STRUCTURE',
#' 'TESS', 'BASIC', 'CLUMPP', 'list', 'data.frame', 'UNIDENTIFIED' for all
#' selected files.
#' @noRd
#' @keywords internal
#' @importFrom utils read.table
#'
checkQ <- function(files=NULL,warn=FALSE) {
if(is.null(files)) stop("checkQ: Input is empty.")
if(class(files) != "list" && class(files) != "character") stop("checkQ: Input is not a character or list datatype.")
len1 <- length(files)
checkvec <- rep("UNIDENTIFIED",length=len1)
subtype <- rep(NA,length=len1)
for(i in seq_along(files))
{
chk <- FALSE
if(class(files)=="list")
{
if(class(files[[i]])=="data.frame")
{
chk <- TRUE
checkvec[i] <- "data.frame"
}
}
if(!chk)
{
read1 <- readLines(files[i],n=7,warn=FALSE)
# read BAPS file
if(!chk)
{
chk <- any(grepl("RESULTS OF ADMIXTURE ANALYSIS BASED",toupper(read1)))
if(chk) checkvec[i] <- "BAPS"
}
# read TESS file
if(!chk)
{
chk <- grepl("ESTIMATED CLUSTERING PROBABILITIES",toupper(read1)[1])
if(chk) checkvec[i] <- "TESS"
}
# read STRUCTURE file
if(!chk)
{
chk <- grepl("STRUCTURE BY PRITCHARD",toupper(read1)[4])
if(chk) checkvec[i] <- "STRUCTURE"
}
# read BASIC files
rm(read1)
if(!chk)
{
seps <- c("","\t",",")
subtypes <- c("SPACE","TAB","COMMA")
k=1
while(!chk)
{
if(class(try(suppressWarnings(read.table(files[i],header=FALSE,sep=seps[k],nrows=1,quote="",stringsAsFactors=FALSE))))!="try-error")
{
df <- read.table(files[i],header=FALSE,sep=seps[k],nrows=1,quote="",stringsAsFactors=FALSE)
if(all(sapply(df,is.numeric))) {
checkvec[i] <- "BASIC"
subtype[i] <- subtypes[k]
chk <- TRUE
}else{
if((ncol(df) > 2) && (is.character(df[,1])))
{
checkvec[i] <- "CLUMPP"
chk <- TRUE
}
}
}
k=k+1
if(k>3) break
}
}
}
if((!chk) && warn) warning(paste0("checkQ: ",files[i]," is not a STRUCTURE, TESS, BAPS, BASIC or CLUMPP file.\n"))
}
return(list(type=checkvec,subtype=subtype))
}
# readQ ------------------------------------------------------------------------
#' @title Convert run files (q-matrices) to qlist.
#' @description Takes one or more STRUCTURE, TESS, BAPS, BASIC (numeric delimited runs)
#' or CLUMPP format files and converts them to a qlist (list of dataframes).
#' @param files A character or character vector of one or more files.
#' @param filetype A character indicating input filetype. Options are 'auto',
#' 'structure','tess2','baps','basic' or 'clumpp'. See details.
#' @param indlabfromfile A logical indicating if individual labels must be read
#' from input file and used as row names for resulting dataframe. Spaces in
#' labels may be replaced with _. Currently only applicable to STRUCTURE runs.
#' @param readci A logical indicating if confidence intervals from the STRUCTURE
#' run file (if available) should be read. Set to FALSE by default as it take up
#' excess space. This argument is only applicable to STRUCTURE run files.
#' @return A list of lists with dataframes is returned. List items are named by
#' input filenames. File extensions such as '.txt','.csv','.tsv' and '.meanQ'
#' are removed from filename. In case filenames are missing or not available,
#' lists are named sample1, sample2 etc. For STRUCTURE runs, if individual
#' labels are present in the run file and \code{indlabfromfile=TRUE}, they are
#' added to the dataframe as row names. Structure metadata including loci,
#' burnin, reps, elpd, mvll, and vll is added as attributes to each dataframe.
#' When \code{readci=TRUE} and if CI data is available in STRUCTURE run files,
#' it is read in and attached as attribute named ci.
#' For CLUMPP files, multiple runs within one file are suffixed by -1, -2 etc.
#' @details
#' STRUCTURE, TESS2 and BAPS run files have unique layout and format (See
#' vignette). BASIC files can be Admixture run files, fastStructure meanQ files
#' or any tab-delimited, space-delimited or comma-delimited tabular data
#' without a header. CLUMPP files can be COMBINED, ALIGNED or MERGED files.
#' COMBINED files are generated from \code{clumppExport}. ALIGNED and
#' MERGED files are generated by CLUMPP.
#'
#' To convert TESS3 R objects to pophelper qlist, see \code{\link{readQTess3}}.
#'
#' See the \href{http://royfrancis.github.io/pophelper/articles/index.html#readq}{vignette} for more details.
#'
#' @seealso \code{\link{readQTess3}}
#'
#' @examples
#'
#' # STRUCTURE files
#' sfiles <- list.files(path=system.file("files/structure",package="pophelper"),
#' full.names=TRUE)
#' # create a qlist of all runs
#' slist <- readQ(sfiles)
#' slist <- readQ(sfiles,filetype="structure")
#'
#' # use ind names from file
#' slist <- readQ(sfiles[1],indlabfromfile=TRUE)
#'
#' # access the first run
#' slist <- readQ(sfiles)[[1]]
#'
#' # access names of runs
#' names(slist)
#'
#' # get attributes of a run
#' attributes(slist[[1]])
#'
#' # get attributes of all runs
#' lapply(slist,attributes)
#'
#' # TESS files
#' tfiles <- list.files(path=system.file("files/tess",package="pophelper"),
#' full.names=TRUE)
#' # create a qlist
#' tlist <- readQ(tfiles)
#'
#' # BASIC files
#' afiles <- list.files(path=system.file("files/admixture",package="pophelper"),
#' full.names=TRUE)
#' # create a qlist
#' alist <- readQ(afiles)
#'
#' # CLUMPP files
#' cfiles1 <- system.file("files/STRUCTUREpop_K4-combined.txt",
#' package="pophelper")
#' cfiles2 <- system.file("files/STRUCTUREpop_K4-combined-aligned.txt",
#' package="pophelper")
#' cfiles3 <- system.file("files/STRUCTUREpop_K4-combined-merged.txt",
#' package="pophelper")
#'
#' # create a qlist
#' clist1 <- readQ(cfiles1)
#' clist2 <- readQ(cfiles2)
#' clist3 <- readQ(cfiles3)
#'
#' # manually create qlist
#' df1 <- data.frame(Cluster1=c(0.2,0.4,0.6,0.2),Cluster2=c(0.8,0.6,0.4,0.8))
#' df2 <- data.frame(Cluster1=c(0.3,0.1,0.5,0.6),Cluster2=c(0.7,0.9,0.5,0.4))
#'
#' # one-element qlist
#' q1 <- list("sample1"=df1)
#' str(q1)
#'
#' # two-element qlist
#' q2 <- list("sample1"=df1,"sample2"=df2)
#' str(q2)
#'
#' @export
#'
readQ <- function(files=NULL,filetype="auto",indlabfromfile=FALSE,readci=FALSE) {
if(is.null(files) || (length(files)==0)) stop("readQ: No input files.")
if(!is.character(files)) stop("readQ: Argument 'files' is not a character datatype.")
flen <- length(files)
len <- length(files)
dlist <- vector("list")
for (i in seq_along(files))
{
# check file
if(filetype=="auto")
{
chk <- tolower(checkQ(files[i])$type)
if(chk %in% c("structure","tess","baps","basic","clumpp"))
{
if(chk=="structure") dfr <- readQStructure(files[i],indlabfromfile=indlabfromfile,readci=readci)
if(chk=="tess") dfr <- readQTess(files[i])
if(chk=="basic") dfr <- readQBasic(files[i])
if(chk=="clumpp") dfr <- readQClumpp(files[i])
if(chk=="baps") dfr <- readQBaps(files[i])
dlist <- append(dlist,dfr)
}else{
warning(paste0("readQ: Input file ",files[i]," was not identified as a STRUCTURE, TESS, BAPS, BASIC or CLUMPP filetype. Specify 'filetype' manually or check input.\n"))
}
}else{
if(filetype=="structure") dfr <- readQStructure(files[i],indlabfromfile=indlabfromfile,readci=readci)
if(filetype=="tess") dfr <- readQTess(files[i])
if(filetype=="basic") dfr <- readQBasic(files[i])
if(filetype=="clumpp") dfr <- readQClumpp(files[i])
if(filetype=="baps") dfr <- readQBaps(files[i])
dlist <- append(dlist,dfr)
}
}
return(dlist)
}
# readQStructure ------------------------------------------------------------
#' @title Convert STRUCTURE run files to qlist.
#' @description Takes one or more STRUCTURE run files and converts them to a
#' list of dataframes.
#' @param files A character or character vector of one or more STRUCTURE run
#' files. Use \code{choose.files(multi=TRUE)}
#' to select interactively.
#' @param indlabfromfile A logical indicating if individual labels must be read
#' from input file and used as row names for resulting dataframe. Spaces in
#' labels may be replaced with _.
#' @param readci A logical indicating if confidence intervals from the structure
#' file (if available) should be read. Set to FALSE by default as it take up
#' excess space.
#' @return A list of lists with dataframes is returned. If individual labels are
#' present in the STRUCTURE file, they are added to the dataframe as row names.
#' Structure metadata including loci, burnin, reps, elpd, mvll, and vll is added
#' as attributes to each dataframe. When \code{readci=TRUE} and if CI data is
#' available, it is read in and attached as attribute named ci. List items are
#' named by input filenames.
#' @details See the \href{http://royfrancis.github.io/pophelper/articles/index.html#readq}{vignette} for more details.
#' @examples
#' sfiles <- list.files(path=system.file("files/structure",package="pophelper"),
#' full.names=TRUE)
#' # create a qlist of all runs
#' slist <- readQStructure(sfiles)
#'
#' # use ind names from file
#' slist <- readQStructure(sfiles[1],indlabfromfile=TRUE)
#'
#' # access the first run
#' slist <- readQStructure(sfiles)[[1]]
#'
#' # access names of runs
#' names(slist)
#' @importFrom utils read.delim
#' @export
#'
readQStructure <- function(files=NULL,indlabfromfile=FALSE,readci=FALSE) {
if(is.null(files) || (length(files)==0)) stop("readQStructure: No input files.")
# number of files selected
flen <- length(files)
#check file
if(any(checkQ(files)$type != "STRUCTURE")) stop("readQStructure: Input may be in incorrect format.")
i <- 1
dlist <- vector("list",length=flen)
len1 <- length(files)
for (i in seq_along(files))
{
fname <- basename(files[i])
file1 <- readLines(as.character(files[i]),warn=FALSE)
# find individuals and get number of individuals
ind <- as.numeric(as.character(gsub("\\D","",grep("\\d individuals",file1,perl=TRUE,ignore.case=TRUE,value=TRUE)[1])))
if(is.na(ind)) warning(paste0("Number of individuals is NA in file: ",fname,"\n"))
# get value of k & error check
k <- as.numeric(as.character(gsub("\\D","",grep("\\d populations assumed",file1,perl=TRUE,ignore.case=TRUE,value=TRUE)[1])))
if(is.na(k)) warning(paste0("Value of K is NA in file: ",fname,"\n"))
# get number of loci & error check
loci <- as.numeric(gsub("\\D","",grep("\\d loci",file1,perl=TRUE,ignore.case=TRUE,value=TRUE)[1]))
if(is.na(loci)) warning(paste0("Number of Loci is NA in file: ",files[i],"\n"))
# get burn-in value & error check
burnin <- as.numeric(gsub("\\D","",grep("\\d Burn-in period",file1,perl=TRUE,ignore.case=TRUE,value=TRUE)[1]))
if(is.na(burnin)) warning(paste0("Burn-in value is NA in file: ",files[i],"\n"))
# get burn-in value & error check
reps <- as.numeric(gsub("\\D","",grep("\\d Reps",file1,perl=TRUE,ignore.case=TRUE,value=TRUE)[1]))
if(is.na(reps)) warning(paste0("Reps value is NA in file: ",files[i],"\n"))
# get est ln prob of data & error check
elpd <- as.numeric(gsub("=","",gsub("Estimated Ln Prob of Data","",grep("Estimated Ln Prob of Data",file1,perl=TRUE,ignore.case=TRUE,value=TRUE)[1])))
if(is.na(elpd)) warning(paste0("Estimated Ln Prob of Data is NA in file: ",files[i],"\n"))
# get mn value of ln likelihood & error check
mvll <- as.numeric(gsub("=","",gsub("Mean value of ln likelihood","",grep("Mean value of ln likelihood",file1,perl=TRUE,ignore.case=TRUE,value=TRUE)[1])))
if(is.na(mvll)) warning(paste0("Mean value of ln likelihood is NA in file: ",files[i],"\n"))
# get variance of ln likelihood else NA
vll <- as.numeric(gsub("=","",gsub("Variance of ln likelihood","",grep("Variance of ln likelihood",file1,perl=TRUE,ignore.case=TRUE,value=TRUE)[1])))
if(is.na(vll)) warning(paste0("Variance of ln likelihood is NA in file: ",files[i],"\n"))
file1 <- file1[grep(".+\\(\\d+\\).+\\:.+",file1)]
if(length(file1)==0)
{
cstart <- charmatch("Inferred ancestry of individuals",file1)
cend <- charmatch("Estimated Allele Frequencies in each",file1)
file1 <- file1[(cstart+2):(cend-1)]
}
file_a <- file1[file1 != ""]
rm(file1)
# error check
tc_file_a <- textConnection(file_a)
file_b <- read.delim(tc_file_a,header=FALSE,sep="",stringsAsFactors=FALSE)
close(tc_file_a)
suppressWarnings(
errorcheck <- try(
file_b[1,as.integer(grep(":",file_b[1,])+1):as.integer(max(grep("^[0-9]|[.]+$",file_b[1,]))),drop=FALSE],
silent=TRUE)
)
rm(file_b)
if(class(errorcheck)=="try-error")
{
# using manual substring
file_a <- gsub("\\([0-9.,]+\\)","",file_a)
file_b <- gsub(": ","",substr(file_a,regexpr(":\\W+\\d\\.\\d+",file_a),nchar(file_a)-1))
file_b <- sub("\\s+$","",sub("^\\s+","",file_b))
rm(file_a)
file_c <- as.vector(as.numeric(as.character(unlist(strsplit(file_b," ")))))
rm(file_b)
dframe <- as.data.frame(matrix(file_c,nrow=ind,byrow=TRUE),stringsAsFactors=FALSE)
}else{
# using textconnection
tc_file_a <- textConnection(file_a)
file_b <- read.delim(tc_file_a,header=FALSE,sep="",stringsAsFactors=FALSE)
close(tc_file_a)
dframe <- file_b[,as.integer(grep(":",file_b[1,])+1):as.integer(max(grep("^[0-9]|[.]+$",file_b[1,]))),drop=FALSE]
}
dframe <- as.data.frame(sapply(dframe,as.numeric),stringsAsFactors=FALSE)
colnames(dframe) <- paste0("Cluster",1:ncol(dframe))
row.names(dframe) <- 1:nrow(dframe)
#row.names(dframe) <- sprintf(paste0("%",paste0(rep(0,nchar(nrow(dframe))),collapse=""),nchar(nrow(dframe)),"d"),1:nrow(dframe))
# add labels
if(indlabfromfile)
{
labeldf <- file_b[,(grep("[0-9]",file_b[1,])[1]+1):(grep("[(]",file_b[1,])[1]-1),drop=FALSE]
if(ncol(labeldf) > 1) labeldf <- data.frame(V2=do.call(paste,c(labeldf,sep="_")),stringsAsFactors=FALSE)
if(nrow(labeldf)==nrow(dframe))
{
if(any(duplicated(labeldf[,1])))
{
warning(paste0("readQStructure: Individual names in file ",fname," not used due to presence of duplicate names.\n"))
}else{
row.names(dframe) <- as.character(labeldf[,1])
}
}else{
warning(paste0("readQStructure: Individual names in file ",fname," not used due to incorrect length.\n"))
}
}
attr(dframe,"ind") <- nrow(dframe)
attr(dframe,"k") <- ncol(dframe)
attr(dframe,"loci") <- loci
attr(dframe,"burnin") <- burnin
attr(dframe,"reps") <- reps
attr(dframe,"elpd") <- elpd
attr(dframe,"mvll") <- mvll
attr(dframe,"vll") <- vll
# confidence intervals
if(readci) {
cichk <- grep("([0-9.]+,[0-9.]+)",file_b[1,])
if(length(cichk)!=0) {
file_b <- apply(file_b[,cichk,drop=FALSE],1,paste0,collapse="")
file_b <- gsub("[()]","",gsub(")(",",",file_b,fixed=TRUE))
cframe <- as.data.frame(matrix(as.numeric(unlist(strsplit(file_b,","))),ncol=ncol(dframe)*2,byrow=TRUE))
colnames(cframe) <- as.vector(t(outer(paste0("Cluster",1:ncol(dframe)),c("L","H"),paste,sep="")))
row.names(cframe) <- row.names(dframe)
attr(dframe,"ci") <- cframe
}else{
warning("plotQStructure: Confidence intervals could not be read.\n")
}
}
dlist[[i]] <- dframe
}
fnames <- sub(".txt","",basename(files))
names(dlist) <- fnames
return(dlist)
}
# readQTess -----------------------------------------------------------------
#' @title Convert TESS cluster files to qlist.
#' @description Takes one or more TESS cluster run files and converts them to a
#' list of dataframes.
#' @param files A character or character vector of one or more TESS cluster run
#' files. Use \code{choose.files(multi=TRUE)} to select interactively.
#' @return A list of lists with dataframes is returned. List items are named by
#' input filename.
#' @details See the \href{http://royfrancis.github.io/pophelper/articles/index.html#readq}{vignette} for more details. Use \code{collectRunsTess()} to collect TESS runs into one directory.
#' @examples
#' tfiles <- list.files(path=system.file("files/tess",package="pophelper"),
#' full.names=TRUE)
#' # create a qlist
#' tlist <- readQTess(tfiles)
#' @importFrom utils read.delim
#' @export
#'
readQTess <- function(files=NULL) {
if(is.null(files) || (length(files)==0)) stop("readQTess: No input files.")
# number of files selected
flen <- length(files)
# check file
if(any(checkQ(files)$type != "TESS")) warning("readQTess: Input may contain incorrect input format.\n")
i <- 1
dlist <- vector("list",length=flen)
len1 <- length(files)
for (i in seq_along(files))
{
# read whole file in
file1 <- readLines(files[i],warn=FALSE)
# extract the cluster table part
file1 <- file1[3:c(grep("Estimated Allele Frequencies",file1)-1)]
# remove empty lines
file1 <- file1[file1 != ""]
# create a text connection
tc_file1 <- textConnection(file1)
# read as a table
file2 <- read.delim(tc_file1,header=FALSE,sep="\t",stringsAsFactors=FALSE)
# close text connection
close(tc_file1)
# choose columns 2 to numofcols-2
dframe <- file2[,2:(ncol(file2)-2)]
# remove temporary files
rm(file1,file2)
# convert all columns to numeric
dframe <- as.data.frame(sapply(dframe,as.numeric),stringsAsFactors=FALSE)
# add column names
colnames(dframe) <- paste0("Cluster",1:ncol(dframe))
# add attributes
attr(dframe,"ind") <- nrow(dframe)
attr(dframe,"k") <- ncol(dframe)
# add to list
dlist[[i]] <- dframe
}
# add file names as qlist names
fnames <- sub(".txt","",basename(files))
names(dlist) <- fnames
return(dlist)
}
# readQBasic ---------------------------------------------------------------
#' @title Convert delimited text files to qlist.
#' @description Takes one or more delimited numeric text files and converts each
#' of them to separate dataframes.
#' @param files A character or character vector of one or more delimited text
#' files. Use \code{choose.files(multi=TRUE)} to select interactively.
#' @return A list of lists with dataframes is returned. List items are named by
#' input filename.
#' @details Input files can be Admixture run files, fastStructure meanQ files.
#' or any tab-delimited, space-delimited or comma-delimited tabular data without
#' header. See the \href{http://royfrancis.github.io/pophelper/articles/index.html#readq}{vignette} for more details.
#' @examples
#' afiles <- list.files(path=system.file("files/admixture",package="pophelper"),
#' full.names=TRUE)
#' # create a qlist
#' alist <- readQBasic(afiles)
#' @importFrom utils read.delim
#' @export
#'
readQBasic <- function(files=NULL) {
if(is.null(files) || (length(files)==0)) stop("readQBasic: No input files.")
# number of files selected
flen <- length(files)
# check input file type
chk <- checkQ(files)
if(any(chk$type != "BASIC")) warning("readQBasic: Input may be in incorrect format.\n")
if(any(is.na(chk$subtype))) warning("readQBasic: Input may be in incorrect format.\n")
i <- 1
dlist <- vector("list",length=flen)
len1 <- length(files)
for (i in seq_along(files))
{
# read in delimited files
if(chk$subtype[i]=="SPACE") dframe <- read.delim(files[i],header=FALSE,sep="",dec=".",stringsAsFactors=FALSE)
if(chk$subtype[i]=="TAB") dframe <- read.delim(files[i],header=FALSE,sep="\t",dec=".",stringsAsFactors=FALSE)
if(chk$subtype[i]=="COMMA") dframe <- read.delim(files[i],header=FALSE,sep=",",dec=".",stringsAsFactors=FALSE)
# error if columns contain non-numeric
if(!all(sapply(dframe,is.numeric))) stop("readQBasic: One or more columns are not numeric.")
colnames(dframe) <- paste0("Cluster",1:ncol(dframe))
# add attributes
attr(dframe,"ind") <- nrow(dframe)
attr(dframe,"k") <- ncol(dframe)
dlist[[i]] <- dframe
}
# remove file name extensions
fnames <- sub(".txt","",basename(files))
fnames <- sub(".tsv","",basename(files))
fnames <- sub(".csv","",basename(files))
fnames <- sub(".meanQ","",basename(files))
# add file names to qlist
names(dlist) <- fnames
return(dlist)
}
# readQClumpp ---------------------------------------------------------------
#' @title Convert CLUMPP format numeric text files to qlist.
#' @description Takes one or more CLUMPP format numeric text files and converts
#' them to a list of dataframes.
#' @param files A character or character vector of one or more COMBINED, ALIGNED
#' or MERGED files. COMBINED files are generated from \code{clumppExport}.
#' ALIGNED and MERGED files are generated by CLUMPP.
#' Use \code{choose.files(multi=TRUE)} to select interactively.
#' @return A list of lists with dataframes is returned. Each list item is named
#' by input filename. Multiple runs within one file are suffixed by -1, -2 etc.
#' @details See the \href{http://royfrancis.github.io/pophelper/articles/index.html#readq}{vignette} for more details.
#' @examples
#' cfiles1 <- system.file("files/STRUCTUREpop_K4-combined.txt",package="pophelper")
#' cfiles2 <- system.file("files/STRUCTUREpop_K4-combined-aligned.txt",
#' package="pophelper")
#' cfiles3 <- system.file("files/STRUCTUREpop_K4-combined-merged.txt",
#' package="pophelper")
#'
#' # create a qlist
#' clist1 <- readQClumpp(cfiles1)
#' clist2 <- readQClumpp(cfiles2)
#' clist3 <- readQClumpp(cfiles3)
#'
#' @importFrom utils read.table
#' @export
#'
readQClumpp <- function(files=NULL) {
if(is.null(files) || (length(files)==0)) stop("readQClumpp: No input files.")
# number of files selected
flen <- length(files)
# check file
chk <- checkQ(files)
if(any(chk$type != "CLUMPP")) warning("readQClumpp: Input may be in incorrect format.\n")
i <- 1
k <- 1
dlist <- vector("list")
snames <- vector()
for (i in seq_along(files))
{
fname <- gsub(".txt","",basename(files[i]))
df1 <- read.table(files[i],header=FALSE,sep="",dec=".",quote="",stringsAsFactors=FALSE)
if(class(df1)!="data.frame") stop("readQClumpp: Read error. Check input format.")
df1[,1] <- factor(df1[ ,1])
indlev <- levels(df1[,1])
# error check
if((nrow(df1) %% length(indlev)) != 0) stop("readQClumpp: Number of individuals is not a multiple of the total number of rows.")
Ind <- as.numeric(as.character(length(indlev)))
tempb <- as.numeric(nrow(df1))
numruns <- as.numeric(tempb/Ind)
numk <- ncol(df1) - 2
df2 <- data.frame(Num=factor(rep(1:numruns,1,each=Ind)),
Ind=factor(rep(1:Ind,numruns)),
df1[,2:(numk+1)],stringsAsFactors=FALSE)
colnames(df2)[3:ncol(df2)] <- paste0("Cluster",1:(ncol(df2)-2))
for(j in 1:numruns)
{
dframe <- subset(df2,df2$Num==j)
dframe$Num <- NULL
dframe$Ind <- NULL
snames <- c(snames,paste0(fname,"-",j))
if(!all(sapply(dframe,is.numeric))) stop("readQClumpp: One or more columns are not numeric.")
attr(dframe,"ind") <- nrow(dframe)
attr(dframe,"k") <- ncol(dframe)
dlist[[k]] <- dframe
k <- k+1
}
}
snames <- sub(".txt","",snames)
snames <- sub(".tsv","",snames)
snames <- sub(".csv","",snames)
snames <- sub(".meanQ","",snames)
names(dlist) <- snames
return(dlist)
}
# readQTess3 -------------------------------------------------------------------
#' @title Convert TESS3 R object to pophelper qlist.
#' @description Takes a TESS3 R object and convert to pophelper qlist for use
#' with pophelper.
#' @param t3list A TESS3 object. An output from function \code{tess3()} from
#' package \code{tess3r}. This is not an external object. If imported from outside
#' of R, it must be read in as an Rdata or Rds file.
#' @param progressbar A logical indicating if execution progress must be shown.
#' @return A list of lists with dataframes (qlist) is returned. Each list item
#' is named by as sample1, sample2 etc. Within TESS3, 'tess3Main' attributes L,
#' n, ploidy, K, rmse and crossentropy are preserved as attributes in the qlist
#' dataframe.
#' @details See the \href{http://royfrancis.github.io/pophelper/}{vignette} for
#' more details.
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @export
#'
readQTess3 <- function(t3list=NULL,progressbar=FALSE) {
if(is.null(t3list)) stop("readQTess3: Input is empty.")
if(!any("tess3" %in% class(t3list))) warning("readQTess3: Input cannot be identified as a valid tess3 class object.\n")
length(t3list)
# initialise loop variables
len <- length(t3list)
qlist <- vector("list",length=len)
if(progressbar) pb <- txtProgressBar(min=0,max=len,style=3)
# loop to read in data
for(i in seq_along(t3list))
{
if(progressbar) setTxtProgressBar(pb,i)
if(!"tess3.run" %in% names(t3list[[i]])) stop("readQTess3: 'tess3.run' slot not found in list item ",i,".")
dlist <- t3list[[i]]$tess3.run[[1]]
dframe <- as.data.frame(dlist$Q,stringsAsFactors=FALSE)
colnames(dframe) <- paste0("Cluster",1:ncol(dframe))
# attribute values as added if available else set to NA
if("n" %in% names(dlist)) {attr(dframe,"ind") <- dlist$n} else {attr(dframe,"ind") <-NA}
if("K" %in% names(dlist)) {attr(dframe,"k") <- dlist$K} else {attr(dframe,"k") <- NA}
if("L" %in% names(dlist)) {attr(dframe,"loci") <- dlist$L} else {attr(dframe,"loci") <- NA}
if("gif" %in% names(dlist)) {attr(dframe,"gif") <- dlist$gif} else {attr(dframe,"gif") <- NA}
if("rmse" %in% names(dlist)) {attr(dframe,"rmse") <- dlist$rmse} else {attr(dframe,"rmse") <- NA}
if("crossentropy" %in% names(dlist)) {attr(dframe,"crossentropy") <- dlist$crossentropy} else {attr(dframe,"crossentropy") <- NA}
if("ploidy" %in% names(dlist)) {attr(dframe,"ploidy") <- dlist$ploidy} else {attr(dframe,"ploidy") <- NA}
qlist[[i]] <- dframe
}
if(progressbar) close(pb)
# list items are labelled
names(qlist) <- paste0("sample",1:len)
return(qlist)
}
# readQBaps -----------------------------------------------------------------
#' @title Convert BAPS cluster files to qlist.
#' @description Takes one or more BAPS cluster run files and converts them to a
#' list of dataframes.
#' @param files A character or character vector of one or more BAPS cluster run
#' files. Use \code{choose.files(multi=TRUE)} to select interactively.
#' @return A list of lists with dataframes is returned. List items are named by
#' input filename.
#' @details See the \href{http://royfrancis.github.io/pophelper/articles/index.html#readq}{vignette} for more details.
#' @examples
#' bfiles <- list.files(path=system.file("files/baps",package="pophelper"),
#' full.names=TRUE)
#' # create a qlist
#' blist <- readQBaps(bfiles)
#' @export
#'
readQBaps <- function(files=NULL) {
if(is.null(files) || (length(files)==0)) stop("readQBaps: No input files.")
# number of files selected
flen <- length(files)
# check if file type is BAPS
if(any(checkQ(files)$type != "BAPS")) warning("readQBaps: Input may be in incorrect format.\n")
i <- 1
dlist <- vector("list",length=flen)
for (i in seq_along(files))
{
# read in all lines from file
file1 <- readLines(files[i],warn=FALSE)
# extract the cluster table part
file1 <- file1[grep("^1:",file1):length(file1)]
# read table using delimiter : and use column V2
tc_file1 <- textConnection(file1)
file2 <- read.delim(tc_file1,sep=":",header=FALSE,stringsAsFactors=FALSE)$V2
# read table using delimiter space
tc_file2 <- textConnection(file2)
dframe <- read.delim(tc_file2,sep="",header=FALSE,stringsAsFactors=FALSE)
# close text connections
close(tc_file1,tc_file2)
# remove temporary objects
rm(file1,file2)
# convert all columns to numeric
dframe <- as.data.frame(sapply(dframe,as.numeric),stringsAsFactors=FALSE)
# create valid column names
colnames(dframe) <- paste0("Cluster",1:ncol(dframe))
# attach attributes to dataframe
attr(dframe,"ind") <- nrow(dframe)
attr(dframe,"k") <- ncol(dframe)
# place dataframe in a list
dlist[[i]] <- dframe
}
# remove .txt in all file names
fnames <- sub(".txt","",basename(files))
# label qlist objects with file names
names(dlist) <- fnames
return(dlist)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.