Josh Goldberg 11/7/2017
library(tidyverse)
library(gridExtra)
library(scales)
library(forcats)
MC_raw_data <- read_csv('multipleChoiceResponses.csv')
FF_raw_data <- read_csv('freeformResponses.csv')
schema <- read_csv('schema.csv')
conversion_rates <- read_csv('conversionRates.csv')
We’ll start with a quick glimpse of the data.
dim(MC_raw_data)
## [1] 16716 228
This dataset has 16,716 rows (survey entries) and 228 columns.
Create clean data frame to manipulate.
clean_MC_data <- MC_raw_data
clean_FF_data <- FF_raw_data
ggplot(filter(clean_MC_data, !is.na(Country))) +
geom_bar(aes(x = fct_infreq(Country))) +
labs(x = 'Country', y = 'Responses',
title = 'Survey Responses By Country') +
theme(plot.title = element_text(size = 14, face = "bold",
color = "black", vjust = -1)) +
theme(plot.subtitle = element_text(size = 8, face = "italic",
color = "black")) +
theme(axis.text.x = element_text(size = 10, angle = 50, hjust = 1)) +
theme(axis.title.x = element_text(size = 10, vjust = -0.2)) +
theme(axis.title.y = element_text(size = 10)) +
theme(panel.background = element_rect(fill = "white"))
Is the field of data science diverse? Let’s look at each country’s proportion by gender.
ggplot(filter(clean_MC_data, !is.na(Country) & !is.na(GenderSelect))) +
geom_bar(aes(x = fct_infreq(Country), fill = GenderSelect), position = 'fill') +
labs(x = 'Country', y = 'Proportion',
title = 'Survey Responses by Country and Gender') +
scale_y_continuous(labels = percent) +
scale_fill_discrete(guide = guide_legend(title = NULL, keywidth = .75, keyheight = .75), name = 'Gender',
breaks = c('A different identity', 'Female', 'Male',
'Non-binary, genderqueer, or gender non-conforming'),
labels = c('Different Identity',
'Female', 'Male', 'Non-conforming')) +
theme(plot.title = element_text(size = 14, face = "bold",
color = "black", vjust = -1)) +
theme(plot.subtitle = element_text(size = 8, face = "italic",
color = "black")) +
theme(axis.text.x = element_text(size = 10, angle = 50, hjust = 1)) +
theme(axis.title.x = element_text(size = 10, vjust = -0.2)) +
theme(axis.title.y = element_text(size = 10)) +
theme(legend.title = element_text(size = 10, face = "bold")) +
theme(legend.text = element_text(size = 10)) +
theme(legend.position = "top") +
theme(panel.background = element_rect(fill = "white"))