#' Deploy a cluster of Linux Data Science Virtual Machines on Azure.
#'
#' Creates a cluster of Data Science Virtual Machines and enables the
#' DSVMs to communicate acros the cluster via public key based
#' credentials for high performance computation. All DSVMs in the
#' cluster are based on Linux OS and use public key cryptogrphy for
#' log in.
#'
#' @inheritParams deployDSVM
#'
#' @param count If provided this is the number of DSVM instances to be
#' created. If not provided the number of DSVMs created will be
#' either the number of names provided or the number of username
#' provided.
#'
#' @details
#'
#' Note clustering of DSVMs for high performance computing will be
#' deprecated. Users are encouraged to use azureDoParallel package
#' for the same purpose with Azure Batch Services.
#'
#' The current function supports for deployment of identical DSVMs
#' (i.e., size, OS, etc.) or heterogeneous DSVMs with differerent
#' specifications.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # The following deploys a cluster of 5 Linux DSVMs.
#'
#' deployDSVMCluster(context, resource.group="<resource_group>",
#' location="<location>", hostname="<machine_name>", username="<user_name>",
#' os="Windows", pubkey="<a_valid_public_key_string_in_SSH_format>", count=5)
#'
#' # The following deploys a collection of 3 Linux DSVMs with different
#' names and public keys.
#'
#' deployDSVMCluster(context, resource.group="<resource_group>",
#' location="<location>", hostname=c("<machine_name_1>", "<machine_name_2>",
#' "<machine_name_3>", username=c("<user_name_1>", "<user_name_2>",
#' "<user_name_3>"), os="Windows",
#' pubkey="<a_valid_public_key_string_in_SSH_format>"}
deployDSVMCluster <- function(context,
resource.group,
location,
hostname,
username,
authen="Key",
pubkey="",
password="",
os="Ubuntu",
size="Standard_D1_v2",
dns.label=hostname,
count)
{
# Check if token is valid.
AzureSMR::azureCheckToken(context)
# Check argument pre-conditions.
if(missing(context)) {
stop("Please specify a context (contains TID, CID, KEY).")
assert_that(AzureSMR:::is.azureActiveContext(context))
}
if(missing(resource.group)) {
stop("Please specify an Azure resouce group.")
assert_that(AzureSMR:::is_resource_group(resource.group))
}
if(missing(location)) {
stop("Please specify a data centre location.")
assert_that(AzureSMR:::is_location(location))
}
if(missing(hostname)) {
stop("Please specify virtual machine hostname(s).")
assert_that(all(sapply(hostname, AzureSMR:::is_vm_name)))
}
if(missing(username)) {
stop("Please specify virtual machine username(s).")
assert_that(all(sapply(username, AzureSMR:::is_admin_user)))
}
if(missing(authen))
stop("Please specify authentication method(s).")
if(authen == "Password" && missing(password))
stop("Please specify virtual machine password(s).")
if(authen == "Key" && missing(pubkey))
stop("Please specify virtual machine public key(s).")
# length of hostname, username, pubkey, os, size, and dns.label
# should always be the same.
input_args_number <- c(length(hostname),
length(username),
length(dns.label))
if (!identical(input_args_number,
rep(input_args_number[1], length(input_args_number))))
stop("Input host names, user names, and dns.label
should all have the same length.")
# If no count is provided then set it to the number of hostname or
# username supplied.
if (missing(count))
count <- ifelse(length(hostname) == 1,
ifelse(length(username) == 1,
1, length(username)),
length(hostname))
if (count == 1)
stop("If 'count' is specified, it should be greater than 1.")
# If the count is greater than 1 then ensure we have the right
# lengths of hostname, username, and public keys.
if (length(hostname) == 1) {
hostname <- sprintf("%s%03d", hostname, 1:count)
dns.label <- hostname
}
if (length(username) == 1)
username <- rep(username, count)
if (length(authen) == 1)
authen <- rep(authen, count)
if (length(pubkey) == 1)
pubkey <- rep(pubkey, count)
if (length(password) == 1)
password <- rep(password, count)
if (length(os) == 1)
os <- rep(os, count)
if (length(size) == 1)
size <- rep(size, count)
# Check the number of DSVM deployments to be a reasonable number but
# allow the user to override. This is only useful if interactive.
if(count > 10)
{
ans <- readline(paste("More than 10 DSVMs have been requested.",
"Continue? (y/n)"))
if(tolower(ans) == "n")
stop("The deployment is aborted.")
}
# Deploy the DSVMs. All but the last one are deployed asynchronosly
# and the last is deployed synchonously so that the function returns
# once the last has successfully deployed. TODO maybe we have a mode
# argument defaults to "Sync" so user could choose all to be "AScyn"
# if desired.
for (i in 1:count)
{
if (i == count) cat("\n") # Tidy the progress messages.
deployDSVM(context=context,
resource.group=resource.group,
location=location,
hostname=hostname[i],
username=username[i],
size=size[i],
os=os[i],
authen=authen[i],
pubkey=pubkey[i],
password=password[i],
dns.label=hostname[i],
mode=ifelse(i == count, "Sync", "Async"))
}
# For a cluster set up public credentials for the DSVM cluster to
# allow DSVMs to communicate with each other. This is required if
# one wants to execute analytics on the cluster with parallel
# compute context in ScaleR.
# NOTE: this will be soon deprecated. The users are encouraged to use
# doAzureParallel package for high-performance computation.
if (length(unique(username)) == 1 &&
authen == "Key" &&
(os == "Ubuntu" ||
os == "CentOS" ||
os == "RServer"))
{
# sleep for a while as ssh to a ubuntu LDSVM cannot be immediately
# executed after deployment.
Sys.sleep(60)
df <- keyDistribution(location=location,
hostname=hostname,
username=username,
count=count,
dns.label=dns.label)
}
return(TRUE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.