Nothing
#major to-do list:
#nice to have
#'
## ==== RAKESVY + RAKEW8 ====
#These is the workhorse functions for the package
#they are designed to allow flexible input formats for targets - However, flexibility also can be dangerous!
#TO DO
#Don't rename columns of data frames when converting to w8margin
#' Flexibly Calculate Rake Weights
#' @import survey
#' @description Calculate rake weights on a data frame, or on a
#' \code{survey.design} object from [survey::svydesign()]. Targets may be counts or
#' percentages, in vector, matrix, data frame, or w8margin form. Before
#' weighting, targets are converted to w8margins, checked for validity, and
#' matched to variables in observed data, \code{rakesvy} returns a weighted
#' \code{svydesign} object, while \code{rakew8} returns a vector of weights.
#' @param design A \code{survey.design} object from [survey::svydesign()],
#' or a data frame that can be coerced to one. When a data frame is coerced, the
#' coercion assuming no clustering or design weighting.
#' @param ... Formulas specifying weight targets, with an object that can be coerced
#' to class w8margin (see [as.w8margin()]) on the right-hand side, and
#' (optionally) a matching variable or transformation of it on the left-hand side.
#' Objects that can be coerced to w8margin include named numeric vectors and matrices,
#' and data frames in the format accepted by \code{rake}.
#' @param samplesize Either a number specifying the desired post-raking sample
#' size, or a character string "from.data" or "from.targets" specifying how to
#' calculate the desired sample size (see details).
#' @param match.levels.by A character string that specifies how to match levels in
#' the target with the observed data, either "name" (the default) or "order"
#' (see details).
#' @param na.targets A characters string that specifies how to handle NAs in *targets*,
#' either "fail" (the default) or "observed" (see details).
#' @param rebase.tol Numeric between 0 and 1. If targets are rebased, and
#' the rebased sample sizes differs from the original sample size by more than
#' this percentage, generates a warning.
#' @param control Parameters passed to the \code{control} argument of [survey::rake()],
#' to control the details of convergence in weighting.
#' @details rakesvy and rakew8 wrangles observed data and targets into compatible formats,
#' before using [survey::rake()] to make underlying weighting calculations.
#' The function matches weight targets to observed
#' variables, cleans both targets and observed variables, and then checks the
#' validity of weight targets (partially by calling
#' [w8margin_matched()]) before raking. It also allows a weight
#' target of zero, and assigns an automatic weight of zero to cases on this target
#' level.
#' @details Weight target levels can be matched with observed variable levels in
#' two ways, specified via the \code{match.levels.by} parameter. "name" (the
#' default) matches based on name, disregarding order (so a "male" level in
#' the weight target will be matched with a "male" level in the observed
#' data). "order" matches based on order, disregarding name (so the first
#' level or row of the target will match with the first level of the observed
#' factor variable).
#' @details By default, with parameter \code{na.targets = "fail"}), an NA in weight targets
#' will cause an error. With \code{na.targets = "observed"}, rakesvy() and rakew8() will instead
#' compute a target that matches the observed data. The category with an NA target will
#' therefore have a similar incidence rate in the pre-raking and post-raking dataset.
#' This is accomplished by calling [impute_w8margin()] before raking; see
#' the impute_w8margin documentation for more details. Note that NAs in \emph{observed}
#' data (as opposed to targets) will always cause failure, and are not affected by this parameter.
#' @details The desired sample size (in other words, the desired sum of weights
#' after raking) is specified via the \code{samplesize} parameter. This can
#' be a numeric value. Alternatively, "from.data" specifies that the observed
#' sample size before weighting (taken from \code{sum(weights(design))} if
#' applicable, or \code{nrow()} if not); "from.targets" specifies that the total
#' sample sizes in target objects should be followed, and should only be used
#' if all targets specify the same sample size.
#' @return \code{rakesvy()} returns a \code{survey.design} object with rake weights applied. Any changes
#' made to the variables in \code{design} in order to call \code{rake}, such as
#' dropping empty factor levels, are temporary and \emph{not} returned in the
#' output object.
#' @return \code{rakew8()} returns a vector of weights. This avoids creating
#' duplicated \code{survey.design} objects, which can be useful when calculating multiple
#' sets of weights for the same data.
#' @example inst/examples/rake_examples.R
#' @export
rakesvy <- function(design, ...,
samplesize = "from.data", match.levels.by = "name", na.targets = "fail",
rebase.tol = .01, control = list(maxit = 10, epsilon = 1, verbose = FALSE)){
if("data.frame" %in% class(design)){
#Notice that we are suppressing the warning here - svydesign will otherwise produce a warning that no input weights are provided
suppressWarnings(design <- survey::svydesign(~0, data = design, control = list(verbose = FALSE)))
}
w8 <- rakew8(design = design, ...,
samplesize = samplesize,
match.levels.by = match.levels.by, rebase.tol = rebase.tol, control = control)
design$prob <- 1/w8
return(design)
}
#' @rdname rakesvy
#' @export
rakew8 <- function(design, ...,
samplesize = "from.data", match.levels.by = "name", na.targets = "fail",
rebase.tol = .01, control = list(maxit = 10, epsilon = 1, verbose = FALSE)){
## ==== HOUSEKEEPING ====
# ---- Convert misc objects to needed classes ----
# Convert ... to list
target_formulas <- list(...)
# A a single list was passed to ... originally, then unlist it
if(length(target_formulas) == 1 & "list" %in% class(target_formulas[[1]]))
target_formulas <- target_formulas[[1]]
# Convert data frame to svydesign object
if("data.frame" %in% class(design)){
#Notice that we are suppressing the warning here - svydesign will otherwise produce a warning that no input weights are provided
suppressWarnings(design <- survey::svydesign(~0, data = design, control = list(verbose = FALSE)))
}
# ---- Check for valid values on inputs ----
if(sum(!(match.levels.by %in% c("name", "order", "exact"))) > 0){
stop("Invalid value(s) ",
paste(match.levels.by[!(match.levels.by %in% c("name", "order", "exact"))]),
" in match.levels.by"
)
}
if(!na.targets %in% c("fail", "observed") | length(na.targets) != 1)
stop('Invalid value for na.targets; must be one of "fail" or "observed"') # To add "weighted.observed"
na.targets.allow <- na.targets == "observed" # Value to be passed to w8margin_matched and as.w8margin
# if match.levels.by is a scalar, repeat it for every variable
if(length(match.levels.by) == 1)
match.levels.by <- rep(match.levels.by, length(target_formulas))
else if(length(match.levels.by) != length(target_formulas))
stop("Incorrect length for match.levels.by")
## ==== EVALUATE TARGETS ====
# Extract targets from formula
targets <- extractTargets(target_formulas)
# Parse formulas to generate new data frame columns
# And change any weight target names that conflict with existing variables
isDataFrame <- sapply(targets, function(x) ("w8margin" %in% class(x) | "data.frame" %in% class(x)))
parsed_out <- parseTargetFormulas(
target_formulas = target_formulas,
weightTargetNames = getWeightTargetNames(
targets = targets,
target_formulas = target_formulas,
isDataFrame = isDataFrame),
design = design
)
design <- parsed_out$design
weightTargetNames <- parsed_out$weightVarNames
targets <- setWeightTargetNames(
weightTargetNames = weightTargetNames,
targets = targets,
isDataFrame = isDataFrame
)
# now that we have the names of weighting variables, convert the weight target variables to factors
design$variables[,weightTargetNames] <- lapply(
design$variables[, weightTargetNames, drop = FALSE], as.factor
)
## ==== PROCESS TARGETS FOR COMPATIBILITY WITH DATA ====
# ---- Get info we need in order to process ----
# Define sample size
if(samplesize == "from.data"){ #"from.data" means we want to take a centrally-specified sample size
nsize <- sum(weights.survey.design(design))
} else if(samplesize == "from.targets"){ #"from.targets" means we want to take the sample size found in the targets, IE not specify one here
nsize <- NULL
} else nsize <- samplesize
# ---- Do the processing ----
# Change target level names if match.levels.by = "order"
# (this parameter setting tells us that the first row of the target should automatically be the first level of the variable, and so on)
forcedLevels <- mapply(function(target, observed, varname, isForced){
if(isForced == FALSE)
return(NULL)
else{
if(length(levels(observed)) == target.length(target))
return(levels(observed)) #If length of target matches levels of observed, return that
else if(length(levels(factor(observed))) == target.length(target))
return(levels(factor(observed))) #If length doesn't match, see if refactoring to drop empty levels will help
else
stop("Length of target for variable '", varname, "' did not match number of levels in observed data")
}
},
target = targets,
observed = design$variables[weightTargetNames],
varname = weightTargetNames,
isForced = match.levels.by == "order",
SIMPLIFY = FALSE
)
# save original samples sizes (for diagnostics later); a targetsum function with methods might be a better way to handle this
origTargetSums <- mapply(function(targets, forcedLevels, weightTargetNames){
sum(as.w8margin(
target = targets,
varname = weightTargetNames,
levels = forcedLevels,
na.allow = na.targets.allow)$Freq, na.rm = TRUE)
}, targets = targets, forcedLevels = forcedLevels, weightTargetNames = weightTargetNames)
# if we are getting sample sizes from targets, make sure the sizes are consistent
if(samplesize == "from.targets" & length(origTargetSums) > 1){
rebaseRatio <- origTargetSums[-1] / origTargetSums[1]
isRebaseTolerated <- rebaseRatio > (1 - rebase.tol) & rebaseRatio < (1 + rebase.tol) #check if the ratio is 1 +- some tolerance
if(!all(isRebaseTolerated))
stop("Target sample sizes must be consistent when samplesize = 'from.targets'")
nsize <- mean(origTargetSums)
}
#Convert targets to class w8margin
targets <- mapply(
as.w8margin,
target = targets,
varname = weightTargetNames,
na.allow = na.targets.allow,
levels = forcedLevels, #forcedLevels is NULL if we aren't forcing the level
MoreArgs = list(samplesize = nsize), SIMPLIFY = FALSE)
unimputedTargets <- targets
# Impute NA values in targets
# Generate initial list of which targets need imputation
imputedTargetNames <- weightTargetNames[
sapply(targets, function(onetarget){any(is.na(onetarget$Freq))})
]
# NB: Should ideally add some tryCatch here so that, if there's a bad match between the observed and target here
# NB: We don't produce an error here, but instead keep the NA weight target (and then produce an error below in w8margin_matched)
if(na.targets.allow){
targets[imputedTargetNames] <- mapply(
impute_w8margin,
w8margin = targets[imputedTargetNames],
observed = design$variables[, imputedTargetNames, drop = FALSE],
MoreArgs = list(weights = weights.survey.design(design), rebase = TRUE),
SIMPLIFY = FALSE
)
}
# Don't throw any errors here if na.targets.allow == FALSE
# This will get caught by isTargetMatch below, so adding more error catching is redundant and adds problematic complexity
## ==== PROCESS DATA BY DROPPING ZERO TARGETS ====
# remove cases from the observed data
zeroTargetLevels <- lapply(targets, function(onetarget)
as.character(onetarget[!is.na(onetarget$Freq) & onetarget$Freq == 0, 1])) #identify zero levels
design <- dropZeroTargets(design = design, zeroTargetLevels = zeroTargetLevels)
# remove levels from the w8margin object (both imputed and unimputed version)
zeroTargetRows <- lapply(targets, function(onetarget){
!is.na(onetarget$Freq) & onetarget$Freq == 0
})
targets <- mapply(function(target, drop) target[!drop,], target = targets, drop = zeroTargetRows, SIMPLIFY = FALSE)
unimputedTargets <- mapply(function(target, drop) target[!drop,], target = unimputedTargets, drop = zeroTargetRows, SIMPLIFY = FALSE)
# Check which targets needed "real" imputation
# NB: In future versions of the package, unimputedTargets will be used for re-imputing NA targets at each raking iteration
# NB: But if an imputed target is zero, we want to handle it here and then NOT re-impute at each iteration
imputedTargetNames <- weightTargetNames[
sapply(targets, function(onetarget){any(is.na(onetarget$Freq))})
]
## ==== CHECK THAT TARGETS ARE VALID ====
#Check if targets currently match
isTargetMatch <- mapply(
w8margin_matched,
w8margin = targets,
observed = design$variables[, weightTargetNames, drop = FALSE],
MoreArgs = list(na.targets.allow = na.targets.allow)
)
#Check if targets would match after re-factoring (re-factoring might produce less helpful messages)
suppressWarnings(isRefactoredMatch <- mapply(w8margin_matched,
w8margin = targets, observed = design$variables[, weightTargetNames, drop = FALSE],
MoreArgs = list(na.targets.allow = na.targets.allow, refactor = TRUE)))
#Solve issues that can be solved with refactoring, stop if refactoring can't solve issues
if(any(!isRefactoredMatch)) stop(
"Target does not match observed data on variable(s) ",
paste(weightTargetNames[!isTargetMatch], collapse = ", "),
"\n",
paste0(names(warnings()), collapse = ", ")
)
else if(any(!isTargetMatch))
design$variables[,weightTargetNames][!isTargetMatch] <- lapply(
design$variables[, weightTargetNames, drop = FALSE][!isTargetMatch],
factor
)
## ==== CHECK FOR CONSISTENT SAMPLE SIZES ====
#consider moving this earlier, right after we compute origSum
#to handle objects that were *not* coerced to a set sample size, check that samplesize for each w8margin is the same
finalTargetSums <- sapply(targets, function(x) sum(x$Freq))
isSizeTolerated <- (finalTargetSums / nsize) < (1 + rebase.tol) & (finalTargetSums / nsize) > (1 - rebase.tol)
if(any(isSizeTolerated == FALSE))
stop("Target sample sizes vary by more than specified tolerance; try changing rebase.tol")
#to handle objects that were coerced to a set sample size, check if any of the sample sizes required substantive rebasing
rebaseRatio <- lapply(origTargetSums, function(origSum) c(1, 100, nsize) / origSum)
#Compute the ratio of 1, 100, and the original sample size to OrigSum
isRebaseTolerated <- sapply(rebaseRatio, function(ratio)
any((ratio > (1 - rebase.tol)) & (ratio < (1 + rebase.tol)))) #check if the ratio is 1 +- some tolerancee
if(any(isRebaseTolerated == FALSE))
warning("targets for variable ", toString(names(targets)[!isRebaseTolerated], sep = ", "),
" sum to ", toString(origTargetSums[!isRebaseTolerated], sep = ", "), " and were rebased")
## ==== RUN WEIGHTS ====
# Compute weights for valid cases
sample.margins <- lapply((paste0(" ~ \`", weightTargetNames, "\`")), stats::as.formula)
population.margins <- targets
weighted <- survey::rake(
design = design,
sample.margins = sample.margins,
population.margins = population.margins,
control = control)
# Merge valid case weights with zero weights
design$keep_cases$weight <- 0
design$keep_cases$weight[design$keep_cases$keep_yn == TRUE] <- weights.survey.design(weighted)
return(design$keep_cases$weight)
}
## ==== INTERNAL FUNCTIONS ====
#Gets weight target names, which can be contained in one of two places:
# A) the left-hand side of a formula, applicable even if we use as.w8margin to convert target types
# B) the name of the second column of a w8margin object, applicable only if targets are class w8margin or data frame
# Returns a vector WeightTargetNames
getWeightTargetNames <- function(targets, target_formulas, isDataFrame){
weightTargetNames <- sapply(target_formulas, function(onearg){
if(!("formula" %in% class(onearg))) stop("Weight target argument is not specified as a formula")
# Check if formula has left-hand side and return NULL if it doesn't
if(length(onearg) != 3) stop("Weight target formula ", onearg, " must have left-hand side")
# If formula does have left hand-side, extract that side
lhs_char <- paste0(deparse(onearg[[2]], backtick = TRUE), collapse = " ")
# Replace special characters, except for "." and "_"
lhs_char <- gsub("[+-/*\\^=<>&|!@$\"'`%{}(),~:\\ ]+", ".", lhs_char)
lhs_char <- gsub("[][]+", ".", lhs_char)
return(lhs_char)
})
return(weightTargetNames)
}
# Renames targets, after using getWeightTargetNames to ensure a consistent format
# returns a modified targets object
setWeightTargetNames <- function(weightTargetNames, targets, isDataFrame){
old_column_names <- lapply(targets[isDataFrame], function(w8margin) colnames(w8margin)[1])
targets[isDataFrame] <- mapply(function(target, varname){ #for any targets that were originally in w8margin or data frame format: change column name to match list name, and generate a warning
if(colnames(target)[1] != varname){
colnames(target)[1] <- varname
}
return(target)
}, target = targets[isDataFrame], varname = weightTargetNames[isDataFrame], SIMPLIFY = FALSE)
names(targets) <- weightTargetNames
return(targets)
}
# Removes rows from the dataset that have a weight of zero
# Either because their design weight is zero, or because they belong to a group where the target is zero
# Inputs:
# design: an svydesign object, where we want to drop cases from
# zeroTargetLevels: a list, where each named element is a character vector indicating which levels of a variable should be dropped
# eg, list(agecat = c("under 18", "90+"), gender = c(), education = c("Don't Know"))
# Returns:
# a modified svydesign object, with some cases dropped from "variables" subobject
# and a new subobject "keep_yn" with length equal to the original (pre-dropped) data frame, indicating which rows of the original data frame have been dropped
# currently generates a warning if this dropping process leads to all cases in a given (nonzero) level being dropped
dropZeroTargets <- function(design, zeroTargetLevels){
design$keep_cases <- data.frame(index = rownames(design$variables), keep_yn = TRUE)
weightTargetNames <- names(zeroTargetLevels) # Note that this is defined locally, not passed as a parameter
if(any(sapply(zeroTargetLevels, length) > 0) | any(weights.survey.design(design) == 0)){
#Identify cases that will be dropped because they belong to a zero target
design$keep_cases$keep_yn <-
rowSums(
simplify2array(
mapply(
function(var, levelsToDrop, varname){
if(length(levelsToDrop) > 0){
factorLevels <- levels(as.factor(var))
isValidLevel <- levelsToDrop %in% factorLevels
if(any(!isValidLevel))
warning("Empty target level(s) ", toString(levelsToDrop[!isValidLevel], sep = ", "),
" do not match with any observed data on variable ", varname)
var %in% levelsToDrop
}
else rep(FALSE, length(var))
}, var = design$variables[weightTargetNames], levelsToDrop = zeroTargetLevels, varname = weightTargetNames
)
)
) == 0
#Identify cases that will be dropped because they have a design weight of zero
design$keep_cases$keep_yn[weights.survey.design(design) == 0] <- FALSE
#Check which factor levels have valid cases, before dropping cases
predrop.tab <- lapply(design$variables[weightTargetNames], table)
#drop cases
design <- subset(design, design$keep_cases$keep_yn)
#remove the unneeded factor levels
design$variables[weightTargetNames] <- mapply(
function(factorvar, levelsToDrop){
if(length(levelsToDrop) > 0)
factor(factorvar, levels = levels(factorvar)[!(levels(factorvar) %in% levelsToDrop)])
else
factorvar
},
factorvar = design$variables[weightTargetNames],
levelsToDrop = zeroTargetLevels,
SIMPLIFY = FALSE
)
#Check if we are accidentally losing all cases (on factor levels with valid targets) by dropping some casses
postdrop.tab <- lapply(design$variables[weightTargetNames], table) #Table after dropping cases
mapply(
function(pre, post, levelsToDrop, varname){ #Compare tables before and after dropping
pre <- pre[!(names(pre) %in% levelsToDrop)]
post <- post[!(names(pre) %in% levelsToDrop)]
lostAllCases <- post == 0 & pre != 0
if(any(lostAllCases))
warning("All valid cases for ", varname, " level(s) ",
toString(names(pre)[lostAllCases]), " had weight zero and were dropped")
}, pre = predrop.tab, post = postdrop.tab, levelsToDrop = zeroTargetLevels, varname = weightTargetNames)
}
return(design)
}
# copy of weights.survey.design from survey package
# survey package doesn't export this, so we need to recreate it here
weights.survey.design <- function(object,...){
return(1/object$prob)
}
## Evaluate formulas used for weight targets
# Input:
# # target_formulas - a single list of formulas
# each formula should have a variable (to be created from the given survey design object) on the left-hand side
# this formula must produce a single column, and not drop any rows
# the name of a w8margin object (or something that can be coerced to one) on the right hand side
# the w8margin object is searched for in the environment specified by formula
# weightTargetNames - what we should name the column output by, unless there is a clash
# design - a survey design object, which is intended to be weighted
# Output: a list with two elements
# Design - svydesign object, including any columns not in the original svydesign object
# weightVarNames - the names of columns to be used for weighting
parseTargetFormulas <- function(target_formulas, weightTargetNames, design){
# ---- Convert to list, in case there is only one formula ----
if(!("list" %in% class(target_formulas))) target_formulas <- list(target_formulas)
# ---- Evaluate the formula function calls ----
parsed_data.list <- lapply(target_formulas, function(onearg){
if(!("formula" %in% class(onearg))) stop("Weight target argument is not specified as a formula")
# Check if formula has left-hand side and return NULL if it doesn't
if(length(onearg) != 3) return(NULL)
# If formula does have left hand-side, extract that side
data_call <- onearg[-3]
data_object <- stats::model.frame(
data_call,
data = design$variables,
na.action = NULL,
drop.unused.levels = FALSE
)
return(data_object)
})
# Set names of new variables we've created
names(parsed_data.list) <- weightTargetNames
# Drop null outputs and transform non-null outputs into dataframe
parsed_data.list <- parsed_data.list[!sapply(parsed_data.list, is.null)]
if(length(parsed_data.list) > 0){
# Check validity of non-NULL output
parsed_nrows <- sapply(parsed_data.list, nrow)
if(any(parsed_nrows != nrow(design$variables)))
stop("Weight target formulas ",
target_formulas[parsed_nrows != nrow(design$variables)],
"do not produce expected ",
nrow(design$variables),
" cases",
call. = FALSE)
parsed_ncols <- sapply(parsed_data.list, ncol)
if(any(parsed_ncols != 1))
stop("Weight target formulas ",
target_formulas[parsed_ncols !=1],
" do not produce 1 column of target data",
call. = FALSE)
parsed_data.df <- data.frame(parsed_data.list)
names(parsed_data.df) <- names(parsed_data.list)
#Check for parsed data names that already exist in the dataset
duplicate_names.yn <- names(parsed_data.df) %in% names(design$variables)
duplicate_names.varnames <- names(parsed_data.df)[duplicate_names.yn]
#Check whether duplicated variable names have the same content
duplicate_contents.yn <- mapply(
function(x, y){
out <- isTRUE(all.equal(x, y))
},
x = parsed_data.df[, duplicate_names.yn, drop = FALSE],
y = design$variables[, duplicate_names.varnames, drop = FALSE]
)
# Define and rename conflicting names
to_rename.varnames <- names(parsed_data.df)[duplicate_names.yn][!duplicate_contents.yn]
if(length(to_rename.varnames) > 0){
renamed.varnames <- paste0(".rakew8_", names(parsed_data.df)[duplicate_names.yn][!duplicate_contents.yn])
weightTargetNames[match(to_rename.varnames, weightTargetNames)] <- renamed.varnames
names(parsed_data.df)[match(to_rename.varnames, names(parsed_data.df))] <- renamed.varnames
}
# Merge variables together
design$variables <- cbind(
design$variables,
parsed_data.df[!(names(parsed_data.df) %in% names(design$variables))]
)
}
out <- list(design = design, weightVarNames = weightTargetNames)
return(out)
}
# Get targets from environment
# Technically speaking, rakew8 gets passed formulas that specify the *name* of targets
# And the environment they're in
# This function pulls the targets based on name and environment
# Input:
# target_formulas: a list of one or more formulas, where the right-hand side of the formula names an object
# Output: an object (that can presumably be coerced to class w8margin, although we don't check that here)
extractTargets <- function(target_formulas){
# ---- Convert to list, in case there is only one formula ----
if(!("list" %in% class(target_formulas))) target_formulas <- list(target_formulas)
# ---- Get argument speciftying name of targets ----
target_rhs <- lapply(target_formulas, function(onearg){
#Get svy object, from the environment specified by the formula
# Get the right-hand side of the formula (third element if a left-hand side exists, second element otherwise)
if(length(onearg) == 3){
target_call <- onearg[[3]]
} else target_call <- onearg[[2]]
return(target_call)
})
# ---- Evaluate arguments in appropriate environments ----
parsed_targets <- lapply(
1:length(target_rhs),
function(x)
eval(
target_rhs[[x]],
envir = environment(target_formulas[[x]]))
)
if(any(sapply(parsed_targets, is.null)))
stop(
"Right-hand side of target(s) ",
paste0(target_rhs[sapply(parsed_targets, is.null)], collapse = ", "),
" is NULL or could not be found in specified environments"
)
return(parsed_targets)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.