R/dataframe_sexing.R
In PELVIS: Probabilistic Sex Estimate using Logistic Regression, Based on VISual Traits of the Human Os Coxae

Documented in dataframe_sexing sexingFromFile

sexingFromFile <- function(data, ref, updateProgressBar = NULL,
                           conf_level = 0.95, strategy = c("BIC", "AIC", "None"),
                           trace = 1){
    .Deprecated("dataframe_sexing")
    dataframe_sexing(data, ref, updateProgressBar,
                     conf_level, strategy, trace)
}

dataframe_sexing <- function(data, ref, updateProgressBar = NULL,
                             conf_level = 0.95, strategy = c("BIC", "AIC", "None"),
                             trace = 1) {
### data: dataframe (individuals to estimate) with at least the eleven trichotomic traits
### ref: dataframe (learning dataset)
### updateProgressBar: function for progress bar in shiny app
### conf_level: 0.95 by default, confidence level needed to produce a sex estimate
### strategy: passed to indivSexing
### trace: passed to indivSexing
### (NB : les colonnes doivent contenir les labels adéquats, mais il peut éventuellement y avoir d'autres colonnes illustratives [e.g. age, ...] dans le jeu de données)

#################################################
### 1. Add the main characters PrSu, GrSN, InfP #
#################################################
    data <- data[ , c("PrSu1", "PrSu2", "PrSu3", "GrSN1", "GrSN2", "GrSN3",
                    "CArc", "InfP1", "InfP2", "InfP3", "IsPu")]
    metadat <- add_metavars(data)

########################################################
### 2. Prepare and initialize the dataframe of results #
########################################################
    df_res <- matrix(NA, ncol = 21, nrow = nrow(data))
    df_res <- as.data.frame(df_res, stringsAsFactors = FALSE)
    colnames(df_res) <- c("PrSu1", "PrSu2", "PrSu3", "PrSu",
                          "GrSN1", "GrSN2", "GrSN3", "GrSN",
                          "CArc", "InfP1", "InfP2", "InfP3", "InfP", "IsPu",
                          "Sex estimate (Bruzek 2002)",
                          "Statistical sex estimate (2019)",
                          "Prob(F)", "Prob(M)",
                          "Selected predictors in LR model",
                          "10-fold CV accuracy (%)", "Indet. rate in CV (%)")
    rownames(df_res) <- rownames(data)

########################################
### 3. Sex estimation with Bruzek 2002 #
########################################
    b02 <- apply(metadat[ , c("PrSu", "GrSN", "InfP", "CArc", "IsPu")],
                 MARGIN = 1, FUN = bruzek02)
    df_res[ , "Sex estimate (Bruzek 2002)"] <- factor(b02)

#################################
### 4. Sex estimation with GLMs #
#################################
    index_models <- 1
    dtf_models <- data.frame(InitialVars = character(0),
                             tenCV_perfRate = numeric(0),
                             tenCV_indetRate = numeric(0),
                             stringsAsFactors = FALSE)
    list_models <- list()

    for (i in 1:nrow(data)) { # for each individual,
        ## retrieve the observed traits on this individual:
        obs_traits <- paste(colnames(data)[which(!is.na(data[i, ]))],
                            collapse = ", ")
        if ((! obs_traits %in% dtf_models$InitialVars) & (sum(!is.na(data[i, ])) > 0)) {
            ## if those variables do not correspond to a GLM that has already been used:
            res <- indiv_sexing(ref = ref, new_ind = data[i, ],
                                strategy = strategy, trace = trace,
                                conf_level = conf_level) # then we compute this GLM,
            df_res[i, "Statistical sex estimate (2019)"] <- res$PredictedSex # and we store the results in the dataframe.
            df_res[i, "Prob(F)"] <- round(1 - res$PostProb, 3)
            df_res[i, "Prob(M)"] <- round(res$PostProb, 3)
            df_res[i, "Selected predictors in LR model"] <- res$VariablesUsed
            df_res[i, "10-fold CV accuracy (%)"] <- round(100 * res$cvRate, 2)
            df_res[i, "Indet. rate in CV (%)"] <- round(100 * res$cvIndet, 2)
            list_models[[index_models]] <- res$BestModel
            dtf_models[index_models, "InitialVars"] <- obs_traits
            dtf_models[index_models, "tenCV_perfRate"] <- round(100 * res$cvRate, 2)
            dtf_models[index_models, "tenCV_indetRate"] <- round(100 * res$cvIndet, 2)
            index_models <- index_models + 1 # here we increment the index of GLMs that have been used.
        } else if (sum(!is.na(data[i,])) > 0) { # # if those variables do correspond to a GLM that has already been used,
            where.mod <- which(dtf_models$InitialVars == obs_traits) # retrieve this model,
            mod_current <- list_models[[where.mod]]
            ## and use this model to return a sex estimate:
            postprob <- predict(mod_current, newdata = as.data.frame(data[i, ]), type = "response")
            df_res[i, "Statistical sex estimate (2019)"] <- finalSE(postprob, conf_level)
            df_res[i, "Prob(F)"] <- round(1 - postprob, 3)
            df_res[i, "Prob(M)"] <- round(postprob, 3)
            df_res[i, "Selected predictors in LR model"] <- paste(attr(mod_current$terms, "term.labels"), collapse = ", ")
            df_res[i, "10-fold CV accuracy (%)"] <- dtf_models[where.mod, "tenCV_perfRate"]
            df_res[i, "Indet. rate in CV (%)"] <- dtf_models[where.mod, "tenCV_indetRate"]
        } 
        ## Update progress bar:
        if (is.function(updateProgressBar)) { # i.e., if not-NULL,
            text <- paste("Currently processing individual ", rownames(data)[i], "...", sep = "")
            updateProgressBar(detail = text, total = nrow(data))
        }
    }
    
##########################################
### 5. Return the results in a dataframe #
##########################################
    df_res[ , c("PrSu1", "PrSu2", "PrSu3", "GrSN1", "GrSN2", "GrSN3", "CArc", "InfP1", "InfP2", "InfP3", "IsPu")] <- data
    df_res[ , c("PrSu", "GrSN", "InfP", "CArc", "IsPu")] <- metadat[ , c("PrSu", "GrSN", "InfP", "CArc", "IsPu")]
    return(df_res)
}

Any scripts or data that you put into this service are public.

PELVIS documentation built on Aug. 8, 2023, 5:09 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

PELVIS
Probabilistic Sex Estimate using Logistic Regression, Based on VISual Traits of the Human Os Coxae

R/dataframe_sexing.R
In PELVIS: Probabilistic Sex Estimate using Logistic Regression, Based on VISual Traits of the Human Os Coxae

Defines functions dataframe_sexing sexingFromFile

Documented in dataframe_sexing sexingFromFile

Try the PELVIS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PELVIS Probabilistic Sex Estimate using Logistic Regression, Based on VISual Traits of the Human Os Coxae

R/dataframe_sexing.R In PELVIS: Probabilistic Sex Estimate using Logistic Regression, Based on VISual Traits of the Human Os Coxae

Defines functions dataframe_sexing sexingFromFile

Documented in dataframe_sexing sexingFromFile

Try the PELVIS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PELVIS
Probabilistic Sex Estimate using Logistic Regression, Based on VISual Traits of the Human Os Coxae

R/dataframe_sexing.R
In PELVIS: Probabilistic Sex Estimate using Logistic Regression, Based on VISual Traits of the Human Os Coxae