This is an example of running an R version of Google Datalab
Google Datalab is a service that lets you easily interact with your data in the Google Cloud. This document is an excercise in trying to replicate the same functionality:
googleComputeEngineR
within its own Docker containertensorflow
helper library tflearn
library(googleAuthR) ## this reuses the authentication of the GCE instance we are on gar_gce_auth() library(bigQueryR) ## list authenticated projects myproject <- bqr_list_projects() library(googleCloudStorageR) ## list Cloud Storage buckets gcs_list_buckets(myproject$id[[1]])
Demo of running python in same document:
```{python, echo=TRUE} hiss = 'sssssssss' print "Pythons go %s." % hiss
Also works with `SQL` and `bash` ```{bash, echo=TRUE} pip freeze
From the example intro blogpost for feather:
library(feather) df <- mtcars path <- "my_data.feather" write_feather(df, path)
```{python, echo=TRUE} import feather path = 'my_data.feather' df = feather.read_dataframe(path) df.head
## Tensorflow ### Hello world Python ```{python, echo=TRUE} from __future__ import print_function import tensorflow as tf # Simple hello world using TensorFlow # Create a Constant op # The op is added as a node to the default graph. # # The value returned by the constructor represents the output # of the Constant op. hello = tf.constant('Hello, TensorFlow!') # Start tf session sess = tf.Session() # Run the op print(sess.run(hello))
library(tensorflow) sess = tf$Session() hello <- tf$constant('Hello, TensorFlow!') sess$run(hello)
From the tflearn quickstart
```{python, echo = TRUE} from future import print_function
import numpy as np import tflearn
from tflearn.datasets import titanic titanic.download_dataset('titanic_dataset.csv')
from tflearn.data_utils import load_csv data, labels = load_csv('titanic_dataset.csv', target_column=0, categorical_labels=True, n_classes=2)
def preprocess(data, columns_to_ignore): # Sort by descending id and delete columns for id in sorted(columns_to_ignore, reverse=True): [r.pop(id) for r in data] for i in range(len(data)): # Converting 'sex' field to float (id is 1 after removing labels column) data[i][1] = 1. if data[i][1] == 'female' else 0. return np.array(data, dtype=np.float32)
to_ignore=[1, 6]
data = preprocess(data, to_ignore)
net = tflearn.input_data(shape=[None, 6]) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 32) net = tflearn.fully_connected(net, 2, activation='softmax') net = tflearn.regression(net)
model = tflearn.DNN(net)
model.fit(data, labels, n_epoch=10, batch_size=16, show_metric=True)
dicaprio = [3, 'Jack Dawson', 'male', 19, 0, 0, 'N/A', 5.0000] winslet = [1, 'Rose DeWitt Bukater', 'female', 17, 1, 2, 'N/A', 100.0000]
dicaprio, winslet = preprocess([dicaprio, winslet], to_ignore)
pred = model.predict([dicaprio, winslet]) print("DiCaprio Surviving Rate:", pred[0][1]) print("Winslet Surviving Rate:", pred[1][1])
## Build details This was run in a local R session to start up this RStudio instance with the right libraries installed: ```r library(googleComputeEngineR) ## make an RStudio instance to base upon vm <- gce_vm(template = "rstudio", name = "r-datalab-build", username = "mark", password = "mark1234", predefined_type = "n1-standard-1") ## once RStudio loaded at the IP, build the Dockerfile below on instance ## this takes a while docker_build(vm, dockerfolder = get_dockerfolder("cloudDataLabR"), new_image = "r-datalab") ## send to the Container Registry gce_push_registry(vm, save_name = "datalab-r-image", image_name = "r-datalab") ## Can now launch instances using this image via: vm2 <- gce_vm(template = "rstudio", name = "r-datalab", predefined_type = "n1-standard-1", dynamic_image = gce_tag_container("datalab-r"), username = "mark", password = "mark1234")
The Dockerfile used is below:
FROM rocker/hadleyverse MAINTAINER Mark Edmondson (r@sunholo.com) # install cron and nano and tensorflow and tflearn RUN apt-get update && apt-get install -y \ cron nano \ python-pip python-dev \ && pip install numpy \ && pip install pandas \ && export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0-cp27-none-linux_x86_64.whl \ && pip install --upgrade $TF_BINARY_URL \ && pip install git+https://github.com/tflearn/tflearn.git \ && pip install cython \ && pip install feather-format \ ## clean up && apt-get clean \ && rm -rf /var/lib/apt/lists/ \ && rm -rf /tmp/downloaded_packages/ /tmp/*.rds ## Install packages from CRAN RUN install2.r --error \ -r 'http://cran.rstudio.com' \ googleAuthR googleAnalyticsR searchConsoleR googleCloudStorageR bigQueryR htmlwidgets feather rPython \ ## install Github packages && Rscript -e "devtools::install_github(c('MarkEdmondson1234/youtubeAnalyticsR', 'MarkEdmondson1234/googleID', 'MarkEdmondson1234/googleAuthR'))" \ && Rscript -e "devtools::install_github(c('bnosac/cronR'))" \ && Rscript -e "devtools::install_github(c('rstudio/tensorflow'))" \ ## clean up && rm -rf /tmp/downloaded_packages/ /tmp/*.rds \
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.