knitr::opts_chunk$set(warning = FALSE, message = FALSE) library(reticulate) virtualenv_create('shiny-app-env', python= 'python3') use_virtualenv('shiny-app-env', required = T) virtualenv_install('shiny-app-env', packages = c('seaborn')) virtualenv_install('shiny-app-env', packages = c('scipy')) virtualenv_install('shiny-app-env', packages = c('tabulate')) virtualenv_install('shiny-app-env', packages = c('statsmodels'))
library(knitr) # Get data df <- params$data df_code <- df # Initialize further chunks eval0 <- FALSE eval <- FALSE eval_num <- FALSE eval_num2 <- FALSE eval_factor <- FALSE eval_factor2 <- FALSE tryCatch({ df <- df[,params$vars1,drop=FALSE] df2 <- df # Initialize next computations eval0 <- TRUE }, error=function(e) { stop(safeError("Please try other column names for the following columns: ")) } ) if (length(setdiff(params$vars1,colnames(df))) >0) { equal <- intersect(colnames(df),params$vars1) kable(setdiff(params$vars1,equal),col.names = "Column") }
# Initialize next computations eval <- FALSE eval_rows <- FALSE tryCatch({ # Drop columns if all observations are missing col_names_missing <- sapply(df, function(col) all(is.na(col))) df[ ,col_names_missing] <- list(NULL) df_list <- df # Drop empty rows rowsums <- data.frame(sapply(df,is.na)) if (length(which(rowSums(rowsums) == dim(df)[2])) != 0L){ eval_rows <- TRUE rows_drop <- (which(rowSums(rowsums) == dim(df)[2])) length_non_complete <- length(which(rowSums(rowsums) == dim(df)[2])) df <- df[-rows_drop, ,drop=FALSE] } # Convert logical variables to character cols_logical <- sapply(df, function(col) is.logical(col)) df[ ,cols_logical] <- sapply(df[ ,cols_logical], as.character) # Convert numerical variables with less than 7 unique values to character (missing values omitted) col_names_numeric <- sapply(df, function(col) length(unique(na.omit(col))) < 7L & is.numeric(col)) df[ ,col_names_numeric] <- sapply(df[ ,col_names_numeric], as.character) # Extract numerical variables df_num <- df[which(sapply(df, is.numeric) == 1L)] # Extract approximate continuous variables and non-continuous var if (ncol(df_num)>0){ rateunique_df <- sapply(df_num, function(col) continuous(col)) if (params$continuity == "severe"){ df_cont <- df_num[,rateunique_df,drop=FALSE] # numeric, continuous resp. assumption fulfilled df_noncont <- df_num[,!rateunique_df,drop=FALSE] # numeric, non-continuous } else { df_cont <- df_num } } else { df_cont <- df_num } # Extract character variables df_factor <- df[which(sapply(df, is.character) == 1L)] # Categorical if (exists("df_noncont")){ df_cat <- merge(df_factor, df_noncont, by="row.names") df_cat$Row.names <- NULL df_cat$Row.names.y <- NULL } else { df_cat <- df_factor } # Sort by variable name df_cont <- df_cont[,order(colnames(df_cont)),drop=FALSE] df_cat <- df_cat[,order(colnames(df_cat)),drop=FALSE] # Initialize next computations eval <- TRUE }, error=function(e) { stop(safeError("Dataset cannot be prepared. Try other upload settings and check your data for compatibility reasons.")) } )
# Call used libraries library(kableExtra) # Chunk with first page of basic information cat("\n# Basic Information", fill=TRUE) cat("\\small ", fill=TRUE) cat("Automatic statistics for the file:", fill=TRUE) dataname <- params$filename[1] kable(dataname, col.names = "File", linesep = '', longtable=T) cat("Your selection for the encoding:", fill=TRUE) if (params$fencoding=="unknown"){ cat("Auto") } else {cat("UTF-8")} cat("\\newline",fill=TRUE) cat("Your selection for the decimal character:", fill=TRUE) if (params$decimal=="auto"){ cat("Auto") } else {cat(params$decimal)} cat("\\newline",fill=TRUE) cat("Observations (rows with at least one non-missing value): ", fill=TRUE) cat(dim(df)[1]) cat("\\newline",fill=TRUE) # Missing rows if (exists("length_non_complete")){ cat("Number of rows that are dropped because they contain no values (all values are missing):", length_non_complete) cat("\\newline",fill=TRUE) } cat("Variables (columns with at least one non-missing value): ", fill=TRUE) cat(dim(df_list)[2]) cat("\\newline",fill=TRUE) # Missing columns if (exists("col_names_missing")){ if (sum(col_names_missing) != 0L){ cat("Number of columns that are dropped because they contain no values (all values are missing):", sum(col_names_missing), fill=TRUE) cat("\\newline",fill=TRUE) } } if (exists("df_cont")){ cat("Variables considered continuous: ", fill=TRUE) if (ncol(df_cont)>0){ cat(ncol(df_cont),fill=TRUE) knitr::kable(colnames(df_cont), col.names = "Variables considered continuous", linesep = '', longtable=T) %>% kable_styling(font_size = 8, position = "center", full_width = FALSE, latex_options = c("HOLD_position","repeat_header")) } else { cat("0", fill=TRUE) cat("\\newline",fill=TRUE) } } if (exists("df_cat")){ cat("Variables considered categorical: ", fill=TRUE) if (ncol(df_cat)>0){ cat(ncol(df_cat),fill=TRUE) knitr::kable(colnames(df_cat), col.names = "Variables considered categorical", linesep = '', longtable=T) %>% kable_styling(font_size = 8, position = "center", full_width = FALSE, latex_options = c("HOLD_position","repeat_header")) } else { cat("0", fill=TRUE) cat("\\newline",fill=TRUE) } }
# Numeric falsly to char? check_reading <- function(col){ numeric <- !is.na(as.numeric(col)) return(sum(numeric)/sum(!is.na(col))) } df_char2 <- df2[which(sapply(df2, is.character) == 1L)] numeric_percent <- sapply(df_char2, function(col) check_reading(col)) if (length(numeric_percent[(numeric_percent>0.9)]) != 0L){ cat("**Warning: More than 90% of the values of these columns could be treated as numeric. Nevertheless, because of some values or the selected decimal character, the columns must be treated as discrete. Are all the values plausible? Please check the data once more before uploading! Column(s):**", names(numeric_percent[(numeric_percent>0.9)]), fill=TRUE) }
\pagebreak
# Copy if (exists("df_cont")) df_num <- df_cont if (exists("df_cat")) df_factor <- df_cat
# Send df to Python ## Call Python environment main <- import_main() ## Replace NA with NaN to be converted correctly by reticulate df_num[is.na(df_num)] <- NaN df_cat[is.na(df_cat)] <- NaN ## Add data frames to Python environment if (exists("df_cont")) main$df_num <- r_to_py(df_num) if (exists("df_cat")) main$df_cat <- r_to_py(df_cat)
```{python, eval=eval, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, results="asis"} import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from scipy import stats import math import statsmodels.api as sm from tabulate import tabulate import sys if not sys.warnoptions: import warnings warnings.simplefilter("ignore")
pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 20) pd.set_option('display.width', 1000)
```r # Title if (exists("df_num")){ if (dim(df_num)[2] != 0L){ eval_num <- TRUE cat("# Results for Numerical Variables", fill=TRUE) cat("## Descriptive Statistics", fill=TRUE) cat("Variables are sorted alphabetically. Missings are omitted in stats. CV only for positive variables. ", fill=TRUE) } }
```{python, eval=eval_num, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, results="asis"}
df_num.sort_index(axis=1, inplace=True, key=lambda x: x.str.lower())
df_num_stats = df_num.describe()
df_num_stats = df_num_stats.transpose()
df_num_stats.drop(columns=['25%', '75%'], inplace=True)
df_num_stats.rename(columns={"count": "N Valid", "mean": "Mean", "50%": "Median",
"std": "SD", "min": "Min", "max": "Max"}, inplace=True)
df_num_stats["N Obs"] = df_num.shape[0]
df_num_stats["N Missing"] = df_num.isna().sum()
df_num_stats["% Complete"] = (df_num_stats["N Valid"] / df_num_stats["N Obs"])*100
df_num_stats["N Unique"] = df_num.nunique(dropna=True)
df_num_stats["MAD"] = df_num.apply(lambda x: stats.median_abs_deviation(x, scale="normal", nan_policy='omit'))
df_num_stats["Skewness"] = df_num.skew(axis=0)
df_num_stats["Kurtosis"] = df_num.kurtosis(axis=0)
df_num_stats.loc[df_num_stats["Min"]>0,"CV"] = df_num_stats["SD"] / df_num_stats["Mean"]
df_num_stats = df_num_stats.round(2)
df_num_stats.loc[df_num_stats["Min"]<=0,"CV"] = ""
df_num_stats = df_num_stats.reindex(columns=["N Obs", "N Missing", "N Valid", "% Complete", "N Unique", "Mean", "SD", "Median", "MAD", "Min", "Max", "Skewness", "Kurtosis", "CV"])
cnames = np.array(df_num_stats.index).astype('str') csize = min(np.max(np.char.str_len(cnames)),25) df_num_stats.index = list(map(lambda x: x[0:csize], cnames))
print(tabulate(df_num_stats, headers='keys', tablefmt="latex_longtable"))
\pagebreak ```r if (exists("df_num")){ if (dim(df_num)[2] > 1){ cat("## Graphics", fill=TRUE) cat("### Histograms", fill=TRUE) if(dim(df_num)[2]> 1){ cat("Details: Density Histograms. One large figure per page for each variable, sorted alphabetically. The blue line represents the normal density approximation. The green line represents a special kernel density approximation. ", fill=TRUE) cat("\\newline", fill=TRUE) cat("See figures on next page. ", fill=TRUE) } } }
if (exists("df_num")){ if (dim(df_num)[2] == 1){ cat("## Graphics", fill=TRUE) cat("### Histograms", fill=TRUE) if(dim(df_num)[2]==1){ cat("Details: Density Histogram. The blue line represents the normal density approximation. The green line represents a special kernel density approximation. ", fill=TRUE) } } }
```{python histolarge, eval=eval_num, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)}
def histo(ax, var, ylabel, xsize, title):
sns.histplot(df_num.iloc[:,var], ax = ax, stat='density',
color = '#2fa42d', kde = True)
minvar = df_num.iloc[:,var].min()
maxvar = df_num.iloc[:,var].max()
xaxis = np.arange(minvar, maxvar, 0.1)
fit_normal = stats.norm.pdf(xaxis, df_num.iloc[:,var].mean(), df_num.iloc[:,var].std())
ax.plot(xaxis, fit_normal, lw = 1, color = '#396e9f')
ax.tick_params(axis='both', which='major', labelsize=10)
ax.set_xlabel(df_num.columns[var], fontsize=xsize)
ax.set_ylabel(ylabel)
ax.set_title(title)
sns.set_style() for var in range(df_num.shape[1]): fig, ax = plt.subplots(nrows = 1, ncols=1) histo(ax, var, "Density", 25, title="Histogram") plt.show() if (var==0): plt.plot()
\pagebreak ```r # Title if (exists("df_num")){ if (dim(df_num)[2] >1){ eval_num2 <- TRUE cat("### Histograms Summary", fill = TRUE) cat("Multiple Relative Frequency Histogram in one figure. Variables are sorted alphabetically. The blue line represents the normal density approximation. The green line represents a special kernel density approximation. ", fill=TRUE) } }
```{python, eval=eval, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)}
def trim_axs(axs, N): axs = axs.flat for ax in axs[N:]: ax.remove() return axs[:N]
```{python, eval=eval_num2, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)} ## Histograms Summary (tested for ncols <=25) ### Define number of columns and rows for the multi-plot nrow = math.ceil(np.sqrt(df_num.shape[1])) ncol = math.ceil(df_num.shape[1] / nrow) fig, ax = plt.subplots(nrow, ncol) fig.tight_layout(pad=3.0) ax = trim_axs(ax, df_num.shape[1]) fig.subplots_adjust(hspace = 1) for ax, var in zip(ax.flatten(), range(df_num.shape[1])): histo(ax, var, "", 8, title="") plt.show()
\pagebreak
if (exists("df_num")){ if (dim(df_num)[2] != 0L){ cat("### Box-Plots", fill=TRUE) if(dim(df_num)[2]>1){ cat("One Box-Plot per page for each variable. Variables are sorted alphabetically. ", fill=TRUE) } } }
```{python, eval=eval_num, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)}
def box(ax, var, xsize): sns.boxplot(x=df_num.iloc[:,var], ax = ax, color = '#2fa42d') ax.set_xlabel(df_num.columns[var], fontsize=xsize) title = "Box-Plot of " + str(df_num.columns[var]) ax.set_title(title)
sns.set_theme(style="whitegrid") for var in range(df_num.shape[1]): fig, ax = plt.subplots(nrows = 1, ncols=1) box(ax, var, 25) plt.show() if (var==0): plt.plot() plt.close(fig)
\pagebreak ```r # Title if (exists("df_num")){ if (dim(df_num)[2] >1){ cat("### Box-Plots Summary", fill=TRUE) cat("Multiple Box-Plots of variables in one figure. Variables are sorted alphabetically. ", fill=TRUE) } }
```{python, eval=eval_num2, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)}
fig, ax = plt.subplots(nrow, ncol) fig.tight_layout(pad=3.0) ax = trim_axs(ax, df_num.shape[1]) fig.subplots_adjust(hspace = 1) for ax, var in zip(ax.flatten(), range(df_num.shape[1])): box(ax, var, 15) plt.show()
\pagebreak ```r if (exists("df_num")){ if (dim(df_num)[2]==1){ cat("### ECDF Plots", fill=TRUE) cat("ECDF (Empirical Cumulative Distribution Function) Plot. The blue line represents the CDF of a normal distribution. If the variable is normally distributed, the blue line approximates well the ECDF. ", fill=TRUE) } else if (dim(df_num)[2]>1){ cat("### ECDF Plots", fill=TRUE) cat(" One ECDF (Empirical Cumulative Distribution Function) Plot per page for each variable. Variables are sorted alphabetically. The blue line represents the CDF of a normal distribution. If the variable is normally distributed, the blue line approximates well the ECDF. ", fill=TRUE) } }
```{python, eval=eval_num, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)}
def ecdf(ax, var, xsize):
sns.ecdfplot(df_num.iloc[:,var], ax = ax, color = '#2fa42d')
minvar = df_num.iloc[:,var].min()
maxvar = df_num.iloc[:,var].max()
xaxis = np.arange(minvar, maxvar, 0.1)
fit_normal = stats.norm.cdf(xaxis, df_num.iloc[:,var].mean(), df_num.iloc[:,var].std())
ax.plot(xaxis, fit_normal, lw = 1, color = '#396e9f')
ax.set_xlabel(df_num.columns[var], fontsize=xsize)
ax.set_ylabel("ECDF")
for var in range(df_num.shape[1]): fig, ax = plt.subplots(nrows = 1, ncols=1) ecdf(ax, var, 15) plt.show() if (var==0): plt.plot()
\pagebreak ```r # Title if (exists("df_num")){ if (dim(df_num)[2] > 1){ cat("### ECDF Plots Summary", fill=TRUE) cat("Multiple ECDF Plots of variables in one figure. Variables are sorted alphabetically. ", fill=TRUE) } }
```{python, eval=eval_num2, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)}
fig, ax = plt.subplots(nrow, ncol) fig.tight_layout(pad=3.0) ax = trim_axs(ax, df_num.shape[1]) fig.subplots_adjust(hspace = 1) for ax, var in zip(ax.flatten(), range(df_num.shape[1])): ecdf(ax, var, 10) plt.show()
\pagebreak ```r if (exists("df_num")){ if (dim(df_num)[2]==1){ cat("### QQ-Plots", fill=TRUE) } else if (dim(df_num)[2]>1){ cat("### QQ-Plots", fill=TRUE) cat("One QQ-Plot per page for each variable. Variables are sorted alphabetically. ", fill=TRUE) } }
```{python, eval=eval_num, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)}
def qqplotf(var, ax, xlabel, ylabel, title): var_dropna = df_num.iloc[:,var].dropna() sm.qqplot(var_dropna, fit=True, ax = ax, line = '45') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel + df_num.columns[var]) ax.set_title(title) ax.get_lines()[0].set_markerfacecolor('#2fa42d') ax.get_lines()[0].set_markeredgecolor('#2fa42d')
for var in range(df_num.shape[1]): fig, ax = plt.subplots(nrows = 1, ncols=1) qqplotf(var, ax, "Normal Quantiles", "Sample Quantiles for ", "QQ-Plot") plt.show() if (var==0): plt.plot()
\pagebreak ```r # Title if (exists("df_num")){ if (dim(df_num)[2] >1){ cat("### QQ-Plots Summary", fill=TRUE) cat("Multiple QQ-Plots of variables in one figure. Variables are sorted alphabetically. ", fill=TRUE) } }
```{python, eval=eval_num2, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)}
fig, ax = plt.subplots(nrow, ncol) fig.tight_layout(pad=3.0) ax = trim_axs(ax, df_num.shape[1]) fig.subplots_adjust(hspace = 1) for ax, var in zip(ax.flatten(), range(df_num.shape[1])): qqplotf(var, ax, "", "", "") plt.show()
\pagebreak ```r # Title if (exists("df_factor")){ if (dim(df_factor)[2] != 0L){ eval_factor <- TRUE cat("# Results for Discrete Variables", fill=TRUE) cat("## Descriptive Statistics", fill=TRUE) cat("### Totals", fill = TRUE) cat("The table is sorted by the variable name. If any, N Unique contains the missing category.", fill=TRUE) } }
```{python, eval=eval_factor, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8), results="asis"}
nobs = np.ones(df_cat.shape[1])df_cat.shape[0] nobs = nobs.astype(int) df_cat.sort_index(axis=1, inplace=True, key=lambda x: x.str.lower()) df_cat = df_cat.astype("string") df_cat_stats = pd.DataFrame(nobs, columns=['N Obs'], index=df_cat.columns) df_cat_stats["N Missing"] = df_cat.apply(lambda x: (x == 'NaN').sum() + x.isna().sum()) df_cat_stats["N Valid"] = df_cat_stats["N Obs"] - df_cat_stats["N Missing"] df_cat_stats["% Complete"] = (df_cat_stats["N Valid"] / df_cat_stats["N Obs"])100 df_cat_stats["N Unique"] = df_cat.nunique(dropna=False) df_cat_stats = df_cat_stats.round(2)
cnames = np.array(df_cat_stats.index).astype('str') csize = min(np.max(np.char.str_len(cnames)),25) df_cat_stats.index = list(map(lambda x: x[0:csize], cnames))
print(tabulate(df_cat_stats, headers='keys', tablefmt="latex_longtable"))
\pagebreak ```r # Title if (exists("df_factor")){ if (dim(df_factor)[2] != 0L){ cat("### Frequencies", fill = TRUE) cat("The table is sorted by the variable name. For each variable, a maximum of 20 unique values are considered, sorted in decreasing order of their frequency. If any, missings are counted as a category. ", fill=TRUE) } }
```{python, eval=eval_factor, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8), results="asis"}
df_cat_miss = df_cat.fillna('Missing') df_cat_miss.replace(to_replace='NaN', value='Missing', inplace=True)
df_cat_miss = df_cat_miss.applymap(lambda x: x[0:19])
def freq(var):
# Compute Frequency and Percent columns x = df_cat_miss[var].value_counts(sort = True), y = df_cat_miss[var].value_counts(normalize = True, sort = True) xdf = pd.DataFrame(x).transpose() ydf = pd.DataFrame(y) xdf.rename(columns = {xdf.columns[0]: "Frequency"}, inplace=True) ydf.rename(columns = {ydf.columns[0]: "Percent"}, inplace=True) xdf["Category"] = xdf.index ydf["Category"] = ydf.index # Merge columns and define output df df = xdf.merge(ydf, on = "Category") df["Variable"] = var df = df[["Variable", "Category", "Frequency", "Percent"]] # Only the first 20 categories df = df.head(n=20) # Sort also by Category df.sort_values(by=['Frequency', 'Category'], ascending = {False, True}, inplace=True) return(df)
listdf = [] for var in df_cat_miss.columns: listdf.append(freq(var))
df_freq = pd.concat(listdf, ignore_index=True) print(tabulate(df_freq, headers='keys', tablefmt="latex_longtable", showindex=False))
\pagebreak ```r # Title if (exists("df_factor")){ if (dim(df_factor)[2] != 0L){ cat("## Graphics", fill=TRUE) cat("### Bar-Plots", fill=TRUE) if(dim(df_factor)[2]>1){ cat("One Bar-Plot per page for each variable. Variables are sorted alphabetically. No labels for variables with more than 40 categories.", fill=TRUE) eval_factor2 <- TRUE } } }
```{python, eval=eval_factor, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)}
def barplot(var, ax, type): sns.countplot(x = df_cat_miss.columns[var], ax = ax, color = '#2fa42d', data = df_cat_miss, order = df_cat_miss.iloc[:,var].value_counts(ascending = True).index)
# Rotate labels if df_cat_miss.iloc[:,var].nunique() in range(10,41): ax.set_xticklabels(ax.get_xticklabels(), rotation=90, ha="right") # No labels for variables with ncats > 40 or type summary if df_cat_miss.iloc[:,var].nunique() > 40 or type=="Summary": ax.set_xticklabels([]) ax.set_xlabel(df_cat_miss.columns[var]) ax.set_ylabel("Frequency") title = "Bar-Plot of " + str(df_cat_miss.columns[var]) ax.set_title(title)
for var in range(df_cat_miss.shape[1]): fig, ax = plt.subplots(nrows = 1, ncols=1) barplot(var, ax, type="Large") plt.show() if (var==0): plt.plot()
\pagebreak ```r # Title if (exists("df_factor")){ if (dim(df_factor)[2] >1){ cat("### Bar-Plots Summary") cat("\n\n\ Multiple Bar-Plots of variables in one figure. Variables are sorted alphabetically. No labels displayed. ", fill=TRUE) } }
```{python, eval=eval_factor2, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)}
nrow = math.ceil(np.sqrt(df_cat.shape[1])) ncol = math.ceil(df_cat.shape[1] / nrow) fig, ax = plt.subplots(nrow, ncol) fig.tight_layout(pad=3.0) ax = trim_axs(ax, df_cat.shape[1]) fig.subplots_adjust(hspace = 1) for ax, var in zip(ax.flatten(), range(df_cat.shape[1])): barplot(var, ax, type="Summary") plt.show()
\pagebreak ```r if (exists("df_factor")){ if (dim(df_factor)[2] != 0L){ cat("### Pie Plots", fill=TRUE) if(dim(df_factor)[2]>1){ cat("One Pie Plot per page for each variable. Variables are sorted alphabetically. ", fill=TRUE) } } }
```{python, eval=eval_factor, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,8)}
def pieplot(var,ax, titlesize, textsize, radius): cats = df_cat_miss.iloc[:,var].value_counts() labels = cats.index ax.pie(x = cats, autopct="%.1f%%", explode=[0]*len(labels), labels = labels, pctdistance=0.7, textprops={'fontsize': textsize}, shadow = True, radius = radius) ax.set_title("Pie-Plot of " + df_cat_miss.columns[var], fontsize = titlesize, pad=10)
for var in range(df_cat_miss.shape[1]): fig, ax = plt.subplots(nrows = 1, ncols=1) pieplot(var, ax, titlesize=14, textsize=8, radius=1) plt.show() if (var==0): plt.plot()
\pagebreak ```r # Title if (exists("df_factor")){ if (dim(df_factor)[2] >1){ cat("### Pie Plots Summary", fill=TRUE) cat("Multiple Pie Plots of variables in one figure. Variables are sorted alphabetically. ", fill=TRUE) cat("\\newline", fill=TRUE) cat("See figures on next page. ", fill=TRUE) } }
```{python, eval=eval_factor2, echo = FALSE, comment='', message = FALSE, error = TRUE, warning=FALSE, fig.dim = c(14,7)}
plt.rcParams.update(plt.rcParamsDefault) fig, ax = plt.subplots(nrow, ncol, sharex=True, sharey=True, figsize=(10,10)) ax = trim_axs(ax, df_cat.shape[1]) fig.subplots_adjust(hspace = 1) for ax, var in zip(ax.flatten(), range(df_cat.shape[1])): pieplot(var, ax, titlesize=7, textsize=6, radius=1.2) plt.subplots_adjust(hspace=0.3, wspace=0.01) plt.show() ```
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.