R/haplo.group.q
In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

Documented in haplo.group

#  $Author: sinnwell $
#  $Date: 2007/03/22 15:41:32 $ -->
#  $Header: /projects/genetics/cvs/cvsroot/haplo.stats/R/haplo.group.q,v 1.12 2007/03/22 15:41:32 sinnwell Exp $ -->
#  $Locker:  $  
#   $Log: haplo.group.q,v $
#   Revision 1.12  2007/03/22 15:41:32  sinnwell
#   fix call to haplo.em
#
#   Revision 1.11  2007/03/13 15:56:53  sinnwell
#   add weight for haplo.em on all people
#
#   Revision 1.10  2003/12/08 20:10:59  sinnwell
#    changed T,F to TRUE,FALSE
#
#   Revision 1.9  2003/10/15 16:13:10  schaid
#   changed em.control to control, to be consistent with haplo.em
#
#   Revision 1.8  2003/08/28 14:22:50  sinnwell
#   re-fix for rows.rem
#
#   Revision 1.7  2003/08/27 21:02:29  sinnwell
#   update for release 1.2.0 with PIN, keep rows.rem
#
#   Revision 1.6  2003/08/26 16:39:04  sinnwell
#   change license statement
#
#   Revision 1.5  2003/05/20 14:49:53  sinnwell
#   add more em control parameters
#
#   Revision 1.4  2003/04/22 20:29:27  sinnwell
#   use is.R function
#
#   Revision 1.3  2003/03/06 20:58:13  sinnwell
#   updates to make return obj a list and, and gnames
#
#   Revision 1.2  2003/01/17 16:56:39  sinnwell
#   revision for haplo.score version 1.2
#
# Revision 1.2  2002/09/16 14:46:42  sinnwell
# Fix RCS keywords
#
# License: 
# 
# Copyright 2003 Mayo Foundation for Medical Education and Research. 
# 
# This program is free software; you can redistribute it and/or modify it under the terms of 
# the GNU General Public License as published by the Free Software Foundation; either 
# version 2 of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful, but WITHOUT ANY 
# WARRANTY; without even the implied warranty of MERCHANTABILITY or 
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
# more details.
# 
# You should have received a copy of the GNU General Public License along with this 
# program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, 
# Boston, MA 02111-1307 USA
# 
# For other licensing arrangements, please contact Daniel J. Schaid.
# 
# Daniel J. Schaid, Ph.D.
# Division of Biostatistics
# Harwick Building Room 775
# Mayo Clinic
# 200 First St., SW
# Rochester, MN 55905
# 
# phone: 507-284-0639
# fax:      507-284-9542
# email: schaid@mayo.edu
# 

haplo.group <- function(group, geno, locus.label=NA,
                        miss.val=0, weight=NULL,
                        control=haplo.em.control())
    ## Developed by JP Sinnwell  & DJ Schaid ##
    ## Mayo HSR Biostatistics  ##
    ## 7/2002 ##
    ## update for allowing missing alleles 11/2002
    ## update for haplo.em with Progressive Insertion 8/2003
{
# Build a haplo.group object with a dataframe, number of loci, and counts by group
# The dataframe will contain all possible haplotypes and expected frequencies
# for the entire data set and the groups where the haplotype could exist

  group.name <- deparse(substitute(group))

 # Check dims of group and geno
  nr <- nrow(geno)
  if(length(group)!=nr) stop("Dims of group and geno are not compatible")

 # Set number of loci  
  n.loci <- ncol(geno)/2
  if(n.loci != (floor(ncol(geno)/2)) ) stop("Odd number of cols of geno")

 # Make groups either numeric or character  
  if (!is.numeric(group)) group <-  as.character(group)
 
 # Check for  missing group data 
  miss.group <- is.na(group) | group=="NA" 

 # Subset to non-missing group values:
  group <- group[!miss.group]
  geno <- geno[!miss.group,]

 # Create a haplo object (using haplo.em) using size-evaluation criteria
  haplo.all <- haplo.em(geno, locus.label, miss.val=miss.val, weight=weight,
                        control=control)

   # Check convergence of EM
  if(!haplo.all$converge) stop("EM for haplo failed to converge")

  ## new subset of removed genorows based on enumeration criteria of haplo.em

  rem.geno <- haplo.all$rows.rem
  if(length(rem.geno)>0) {
    geno <- geno[-rem.geno,]
    group <- group[-rem.geno]
  }

 # Make initial dataframe with total groups
  haplotype.all <-  haplo.all$haplotype
  prob.all <- haplo.all$hap.prob
  haplo.freq <- data.frame(haplotype.all, prob.all)

 # Subset haplotypes by group variable
 # Do not weight because sub-samples are not over-sampled.
  ugroup <-  sort(unique(group))
  gnames <- character(0)
  for (i.group in ugroup) {
     gnames <- c(gnames, paste(group.name, i.group, sep= "="))
     group.sub <- group==i.group
     geno.sub <- geno[group.sub,]
    # Create haplo object for i.group
    # rows to be removed are already removed from first run of haplo.em
     haplo.tmp <- haplo.em(geno.sub, locus.label, miss.val=miss.val,
                           control=control)

    # Check convergence of EM
     if(!haplo.tmp$converge) cat("Warning: EM failed to converge for group", i.group, "\n")
    # Create dataframe for the group with haplotypes and prob
     haplotype.sub <- haplo.tmp$haplotype
     prob.sub <- haplo.tmp$hap.prob

     df.sub <- data.frame(haplotype.sub,prob.sub)

    # Merge matching the haplotypes (cols 1:n.loci) and keep all rows
    # This will leave NA's where no match in sub group
    haplo.freq <- merge(haplo.freq, df.sub, by.x=1:n.loci, by.y=1:n.loci, all.x=TRUE, all.y=TRUE)

  }   # end for i.group
  
 # Create locus label if missing:
  if (all(is.na(locus.label))) {
     locus.label<- paste("loc-",1:n.loci,sep="")
  }
 # Set names for the dataframe
  names(haplo.freq) <-  c(locus.label, "Total", gnames)
 # Get group frequencies
  group.count <-  table(group)

  hapgroup <- list(group.df=haplo.freq, group.count=group.count, n.loci=n.loci)

  if(exists("is.R") && is.function(is.R) && is.R()) {
    class(hapgroup) <- c("haplo.group","data.frame")
  } else {
    setOldClass(c("haplo.group","data.frame"))
    oldClass(hapgroup) <- "haplo.group"
  }
  
  return(hapgroup)
}

Any scripts or data that you put into this service are public.

haplo.stats documentation built on May 29, 2024, 9:53 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

haplo.stats
Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

R/haplo.group.q
In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

Defines functions haplo.group

Documented in haplo.group

Try the haplo.stats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

haplo.stats Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

R/haplo.group.q In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

Defines functions haplo.group

Documented in haplo.group

Try the haplo.stats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

haplo.stats
Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

R/haplo.group.q
In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous