Nothing
f.sort.alleles.new <- function(data, design = "triad", xchrom = F, sex) {
if(xchrom & missing(sex)) stop("Sex variable must be specified for X-chromosome analyses!\n", call. = F)
## PREPARATIONS:
.alleles <- data$aux$alleles
.nalleles <- sapply(.alleles, length)
gen.data <- data$gen.data
# gen.data <- f.extract.ff.numeric( data$gen.data )
# # gen.data <- matrix( data$gen.data[,], ncol = ncol( data$gen.data ) )
# # gen.data[ gen.data == 0 ] <- NA
if( design %in% c("triad", "cc.triad") ){
fam.size <- 3
mark.size <- 8
fam <- "mf"
} else {
fam.size <- 1
mark.size <- 2
fam <- "c"
}
.nmarkers <- dim( gen.data )[2]/( fam.size*2 )
## SET IN "CANONICAL" ORDERING, WITH SMALLEST FIRST:
for(i in seq(1, .nmarkers * fam.size * 2, 2)){
.tmpcol1 <- pmin(gen.data[,i], gen.data[,i+1])
.tmpcol2 <- pmax(gen.data[,i], gen.data[,i+1])
gen.data[,i] <- .tmpcol1
gen.data[,i+1] <- .tmpcol2
}
## AGGREGATE DATA WITH A FREQUENCY COUNT:
if(!xchrom) .tmpd <- gen.data
if(xchrom) .tmpd <- cbind( gen.data, sex = sex )
.data.agg <- f.aggregate( .tmpd )
if(xchrom){# STORE SEX VARIABLE SEPARATELY
.sex <- .data.agg$sex
.data.agg$sex <- NULL
if(any(is.na(.sex))){
stop(paste(sum(.data.agg$freq[is.na(.sex)]), " missing values found in sex variable! Must be removed from file before analysis.\n", sep = ""), call. = F)
}
.tmp <- sort(unique(.sex))
if(!all(is.element(.tmp, c(1,2)))){
stop(paste("The sex variable is coded", paste(.tmp, collapse = " "), ". It should be coded 1 (males) and 2 (females).", sep = ""), call. = F)
}
#if(verbose) cat("\nNote: The following sex variable coding has been assumed: males = 1, females = 2")
}
.lines <- attr(.data.agg, "orig.lines")
.nlines.unique <- dim(.data.agg)[1]
## RESHAPE AND PERMUTE COLUMNS (ALL POSSIBLE PERMUTATIONS) WITHIN MOTHER, FATHER
## AND CHILD, RESULTING
## IN 6 (or 2) COLUMNS (ROW)SORTED BY MARKER AND BY PERMUTATION WITHIN MARKER, 8 (or 2) ROWS PR.
## TRIAD (or CHILD) PR MARKER.
.data.long <- as.matrix(.data.agg[,1:(fam.size * 2 * .nmarkers)])
.data.long <- t(matrix(as.numeric(t(.data.long)), nrow = fam.size * 2)) # STACK DATA LINE BY LINE
if( design %in% c("triad", "cc.triad") ){
.swap <- c(c(1,2,3,4,5,6), c(2,1,3,4,5,6), c(1,2,4,3,5,6), c(2,1,4,3,5,6), c(1,2,3,4,6,5), c(2,1,3,4,6,5), c(1,2,4,3,6,5), c(2,1,4,3,6,5))
dimn <- list(NULL, c("m1", "m2", "f1", "f2", "c1", "c2"))
} else {
.swap <- c(c(1,2), c(2,1))
dimn <- list(NULL, c("c1", "c2"))
}
.data.long <- .data.long[,.swap]
.data.long <- t(matrix(as.numeric(t(.data.long)), nrow = fam.size * 2))
dimnames(.data.long) <- dimn
## ADD A VECTOR ind.unique.line WHICH COUNTS LINE NUMBER AMONG THE UNIQUE LINES
## (DOES NOT REFER TO THE ORIGINAL LINE NUMBERS), AND A VECTOR ind.unique.line.marker
## WHICH IS A UNIQUE TAG FOR EACH UNIQUE LINE/MARKER COMBINATION:
if(!xchrom) {
.data.long <- cbind(.data.long, ind.unique.line = rep(1:.nlines.unique, each = mark.size*.nmarkers), ind.marker = rep(1:.nmarkers, rep(mark.size, .nmarkers)))
} else {
.data.long <- cbind(.data.long, sex = rep(.sex, each = mark.size * .nmarkers), ind.unique.line = rep(1:.nlines.unique, each = mark.size*.nmarkers), ind.marker = rep(1:.nmarkers, rep(mark.size, .nmarkers)))
}
.ind.unique.line.marker <- f.pos.in.grid(A = c(.nmarkers, .nlines.unique), comb = .data.long[,c("ind.marker", "ind.unique.line")])
.data.long <- cbind(.data.long, ind.unique.line.marker = .ind.unique.line.marker)
## IDENTIFY MENDELIANLY CONSISTENT COMBINATIONS:
if(!xchrom){
if( design != "cc" ){
.valid2 <- (.data.long[,2] == .data.long[,5]) # SECOND IN MOTHER EQUALS FIRST IN CHILD
.valid2[is.na(.valid2)] <- T # IF ONE OR BOTH ARE MISSING THE COMBINATION IS VALID
.valid4 <- (.data.long[,4] == .data.long[,6]) # SECOND IN FATHER EQUALS SECOND IN CHILD
.valid4[is.na(.valid4)] <- T # IF ONE OR BOTH ARE MISSING THE COMBINATION IS VALID
.valid <- .valid2 & .valid4
} else {
#----- taken from f.sort.alleles.cc.R ----
.valid <- rep(T, nrow(.data.long)) # CANNOT DETECT IF NOT ON xchrom
}
#----------
}
if(xchrom){
if( design != "cc" ){
## CHECK IF FATHER HAS TWO EQUAL ALLELES
.ia3 <- is.na(.data.long[,3])
.ia4 <- is.na(.data.long[,4])
.valid.f <- (.data.long[,3] == .data.long[,4]) # FATHER'S ALLELES EQUAL
.valid.f[.ia3 & .ia4] <- T # IF BOTH ARE MISSING THE COMBINATION IS VALID
.valid.f[is.na(.valid.f)] <- F # ELSE IT IS NOT
## CHECK THAT BOYS HAVE TWO EQUAL ALLELES
.ia5 <- is.na(.data.long[,5])
.ia6 <- is.na(.data.long[,6])
.girl <- (.data.long[,"sex"] == 2)
.valid.c <- (.girl | (.ia5 & .ia6) | (.data.long[,5] == .data.long[,6])) # IF A BOY, ALLELES SHOULD BE EQUAL OR BOTH MISSING
.valid.c[is.na(.valid.c)] <- F # ELSE IT IS NOT VALID
## CHECK THAT SECOND ALLELE OF MOTHER IS INHERITED
.valid2 <- (.data.long[,2] == .data.long[,5]) # SECOND IN MOTHER EQUALS FIRST IN CHILD
.valid2[is.na(.valid2)] <- T # IF ONE OR BOTH ARE MISSING THE COMBINATION IS VALID
## CHECK THAT SECOND ALLELE OF FATHER IS INHERITED BY DAUGHTERS
.valid4 <- (!.girl | .data.long[,4] == .data.long[,6]) # IF A GIRL, SECOND IN FATHER EQUALS SECOND IN CHILD
.valid4[is.na(.valid4)] <- T # IF ONE OR BOTH ARE MISSING THE COMBINATION IS VALID
.valid <- .valid2 & .valid4 & .valid.f & .valid.c
} else { # design == "cc"
## CHECK THAT BOYS HAVE TWO EQUAL ALLELES
.ia1 <- is.na(.data.long[,1])
.ia2 <- is.na(.data.long[,2])
.girl <- (.data.long[,"sex"] == 2)
.valid.c <- (.girl | (.ia1 & .ia2) | (.data.long[,1] == .data.long[,2])) # IF A BOY, ALLELES SHOULD BE EQUAL OR BOTH MISSING
.valid.c[is.na(.valid.c)] <- F # ELSE IT IS NOT VALID
.valid <- .valid.c
}
}# END if(xchrom)
.valid.markers <- tapply(.valid, as.dframe(.data.long[,c("ind.unique.line", "ind.marker")]), any)
.valid.unique.lines <- apply(.valid.markers, 1, all) # NOTE: ASSUMES COMPLETE LIST OF INTEGERS FOR ind.unique.line, AND SORTED..
.rows.with.Mendelian.inconsistency <- unlist(.lines[!.valid.unique.lines])
## REMOVE ALL INCONSISTENT LINES (WARNING: COMPLETELY INCONSISTENT WILL HERE DISAPP.!):
.keep <- .valid.unique.lines[.data.long[,"ind.unique.line"]] & .valid # KEEP ALL COSISTENT FOR THOSE WITH AT LEAST ONE CONSISTENT
.data.long <- .data.long[.keep,]
if( design != "cc" ){
## REDUCE TO A 4-COLUMN MATRIX (STILL LONG FORMAT) FOR ONLY MOTHER AND FATHER, REPLACE THE NAs WHENEVER
# POSSIBLE, AND LEAVE REAL NAs OPEN:
.ia2 <- is.na(.data.long[,2])
.ia4 <- is.na(.data.long[,4])
if(!xchrom){
.data.long[.ia2, 2] <- .data.long[.ia2, 5]
.data.long[.ia4, 4] <- .data.long[.ia4, 6]
}
if(xchrom){
.girl <- (.data.long[,"sex"] == 2)
.data.long[.ia2, 2] <- .data.long[.ia2, 5]
.data.long[.ia4 & .girl, 3] <- .data.long[.ia4 & .girl, 6]
.data.long[.ia4 & .girl, 4] <- .data.long[.ia4 & .girl, 6]
}
.data.long <- .data.long[,-c(5,6)]
}
## REMOVE DUPLICATE ROWS (FOR CC THESE ARE JUST ONE OF THE TWO HOMOZYGOTES):
.tag.allcol <- f.create.tag(.data.long)
.ind.unique.allcol <- !duplicated(.tag.allcol)
.data.long <- .data.long[.ind.unique.allcol, , drop = F]
## EXPAND ALL NAs WITH SEQUENCE OF ALL POSSIBLE ALLELES FOR THE CORRESPONDING MARKER:
# MERK: SKULLE VEL EGENTLIG UNNGAATT AT GUTTER
# BLE EKSPANDERT DOBBELT I xchrom, SLIK SOM DET
# UNNGAAS AT FEDRE BLIR DET!
#------------- combining with f.sort.alleles.cc -------------
# if(!xchrom){
# for(i in 4:1){# FOR EACH OF THE 4 ALLELES
# for(j in 1:.nmarkers){
# # HOW MANY LINES TO EXPAND EACH MISSING INTO:
# .nexpand <- rep(1, dim(.data.long)[1])
# .nexpand[is.na(.data.long[,i]) & (.data.long[,"ind.marker"] == j)] <- .nalleles[j]
# # CREATE THE INDEX FOR EXPANSION AND EXPAND:
# .ind.expand <- rep(seq(along = .nexpand), .nexpand)
# .data.long <- .data.long[.ind.expand, , drop = F]
# # REPLACE THE NAs IN THE EXPANDED DATA WITH SEQUENCE OF ALL POSSIBLE ALLELES AT MARKER:
# .data.long[is.na(.data.long[,i]) & (.data.long[,"ind.marker"] == j),i] <- 1:.nalleles[j]
# }
# }
# } else {
# for(i in 3:1){# FOR EACH OF THE 4 ALLELES, BUT SIMULTANEOUSLY FOR THE TWO ALLELES OF THE FATHER
# for(j in 1:.nmarkers){
# # HOW MANY LINES TO EXPAND EACH MISSING INTO:
# .nexpand <- rep(1, dim(.data.long)[1])
# .nexpand[is.na(.data.long[,i]) & (.data.long[,"ind.marker"] == j)] <- .nalleles[j]
# # CREATE THE INDEX FOR EXPANSION AND EXPAND:
# .ind.expand <- rep(seq(along = .nexpand), .nexpand)
# .data.long <- .data.long[.ind.expand,]
# # REPLACE THE NAs IN THE EXPANDED DATA WITH SEQUENCE OF ALL POSSIBLE ALLELES AT MARKER:
# if(i == 3) .data.long[is.na(.data.long[,3]) & (.data.long[,"ind.marker"] == j),3:4] <- 1:.nalleles[j]
# else .data.long[is.na(.data.long[,i]) & (.data.long[,"ind.marker"] == j),i] <- 1:.nalleles[j]
# }
# }
# } # THIS MATRIX DOES NOT HAVE ANY REDUNDANT ROWS (?), BUT MAYBE AFTER PLACING ON SINGLE LINE...(?)
#-------------------------------------
if( design == "cc" ){
.data.long <- f.expand.alleles( data = .data.long, max.allele = 2, nmarkers = .nmarkers, nalleles = .nalleles )
} else {
if( !xchrom ){
.data.long <- f.expand.alleles( data = .data.long, max.allele = 4, nmarkers = .nmarkers, nalleles = .nalleles )
} else {
.data.long <- f.expand.alleles( data = .data.long, max.allele = 3, nmarkers = .nmarkers, nalleles = .nalleles )
}
}
##
## SET MARKERS SIDE BY SIDE, WITH ALL POSSIBLE COMBINATIONS:
.line.long <- 1:(dim(.data.long)[1])
# MATRIX OF LINE NUMBERS CORRESPONDING TO EACH COMB OF UNIQUE LINES AND MARKERS:
.line.bits <- tapply(.line.long, as.dframe(.data.long[,c("ind.unique.line", "ind.marker"), drop = F]), function(x)x, simplify = F)
# CREATE ALL POSSIBLE COMBINATIONS OF MARKER GENOTYPES WITH OTHER MARKER GENOTYPES:
.line.seq <- apply(.line.bits, 1, function(x) as.numeric(t(as.matrix(do.call("expand.grid", x))))) # WARNING: IF ALL EXPANSIONS HAVE SAME LENGTH, THIS IS A MATRIX, NOT A VECTOR OF MODE LIST.
.line.seq <- as.numeric(unlist(.line.seq)) # as.numeric ALSO HANDLES .line.seq WHEN A MATRIX
# EXPAND DATA WITH ALL POSSIBLE COMBINATIONS:
.data.long <- .data.long[.line.seq,,drop = F]
.navn <- dimnames(.data.long)[[2]] # SAVE NAMES BEFORE RESHAPING
# REFORMAT DATA TO SET SIDE BY SIDE:
.data.long <- t(matrix(as.numeric(t(.data.long)), nrow = dim(.data.long)[2]*.nmarkers))
## SET CORRECT NAMES AFTER RESHAPING:
.navn <- as.character(t(outer(paste("l", 1:.nmarkers, sep = ""), .navn, function(x,y) paste(x,y,sep="."))))
dimnames(.data.long) <- list(NULL, .navn)
## REORDER COLUMNS AND FIX COLNAMES:
if(!xchrom){
if( design != "cc" ){
.indtmp <- c(as.numeric(outer(7*(0:(.nmarkers - 1)), 1:4, "+")), 5)
} else {
.indtmp <- c(as.numeric(outer(5*(0:(.nmarkers - 1)), 1:2, "+")), 3)
}
} else { #xchrom
if( design != "cc" ){
.indtmp <- c(as.numeric(outer(8*(0:(.nmarkers - 1)), 1:4, "+")), c(6, 5)) # SEX IS ADDED
} else {
.indtmp <- c(as.numeric(outer(6*(0:(.nmarkers - 1)), 1:2, "+")), c(4,3))
}
}
.data.long <- .data.long[, .indtmp, drop = F]
dimnames(.data.long)[[2]][dimnames(.data.long)[[2]] == "l1.ind.unique.line"] <- "ind.unique.line"
if(xchrom) dimnames(.data.long)[[2]][dimnames(.data.long)[[2]] == "l1.sex"] <- "sex"
## COMPUTE HAPLOTYPE NUMBER FOR MARKER COMBINATIONS:
.d2 <- ncol(.data.long)
if(!xchrom){
.pos.haplo <- f.pos.to.haplocomb( A = .nalleles, comb = .data.long[, 1:(.d2 - 1)], fam = fam )
} else { #xchrom
.pos.haplo <- f.pos.to.haplocomb( A = .nalleles, comb = .data.long[,1:(.d2 - 2)], fam = fam )
}
.haplo.comb <- f.pos.to.haplocomb( pos = .pos.haplo, A = prod(.nalleles), fam = fam )
if( design != "cc" ){
colnames(.haplo.comb) <- c("m1", "m2", "f1", "f2")
} else {
colnames(.haplo.comb) <- c("c1", "c2")
}
## PREPARE OUTPUT WITH HAPLOTYPE NUMBERS, INDEX OF UNIQUE LINES (THOSE REMAINING AFTER REMOVING MENDLIAN INCONSISTENCIES)
## AND THE CORRESPONDING FREQUENCIES:
## NOTE: ind.unique.line REFERS TO THE SEQUENCE 1:.nlines.unique (THOSE REMAINING) AND NOT THE ORIGINAL LINE NUMBERS
### BOER SJEKKE DENNE?:
if(!xchrom){
.ut <- cbind( comb = .haplo.comb, ind.unique.line = .data.long[,"ind.unique.line"], freq = .data.agg$freq[.data.long[,"ind.unique.line"]] )
} else {
.ut <- cbind( comb = .haplo.comb, sex = .data.long[,"sex"], ind.unique.line = .data.long[,"ind.unique.line"], freq = .data.agg$freq[.data.long[,"ind.unique.line"]] )
}
.ut <- as.dframe( .ut, row.names = as.character( 1:nrow( .ut ) ) )
# rownames( .ut ) <- rownames( .haplo.comb )
attr(.ut, "alleles") <- .alleles
attr(.ut, "rows.with.Mendelian.inconsistency") <- .rows.with.Mendelian.inconsistency
attr(.ut, "orig.lines") <- .lines # NOTE: .lines CAN BE INDEXED BY .tag AND .tag.unique (AS CHARACTER) OR BY ind.unique.line
return(.ut)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.