Libraries

Reading the data

We’ll be working with data/animal_sounds_summary.csv.

Challenge #1: Know your data!

sounds <- read.csv('data/animal_sounds_summary.csv')

How many variables?

glimpse(sounds)
## Rows: 33
## Columns: 7
## $ age             <int> 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, …
## $ sound           <chr> "cockadoodledoo", "meow", "woof woof", "cockadoodledoo…
## $ kids_produce    <int> 1, 0, 3, 0, 2, 2, 0, 5, 4, 0, 5, 12, 0, 12, 28, 9, 125…
## $ kids_understand <int> 3, 10, 12, 2, 21, 22, 9, 41, 40, 4, 36, 32, 16, 59, 59…
## $ kids_respond    <int> 35, 35, 35, 91, 93, 93, 139, 145, 143, 94, 94, 94, 141…
## $ prop_produce    <dbl> 0.02857143, 0.00000000, 0.08571429, 0.00000000, 0.0215…
## $ prop_understand <dbl> 0.08571429, 0.28571429, 0.34285714, 0.02197802, 0.2258…

Which are continuous

Will treat Age, kids_understand, kids_respons, prop_produce, prop_understand as continuous ### Which are categorical and ordinal? Sound is categorical (no meaningful order).

How many total kids?

total_kids <- sounds$kids_respond %>%  sum()
total_kids
## [1] 7226

How many different ages?

unique(sounds$age)
##  [1]  8  9 10 11 12 13 14 15 16 17 18

How many kids per age?

sounds %>%  group_by(age) %>% count(age)
## # A tibble: 11 × 2
## # Groups:   age [11]
##      age     n
##    <int> <int>
##  1     8     3
##  2     9     3
##  3    10     3
##  4    11     3
##  5    12     3
##  6    13     3
##  7    14     3
##  8    15     3
##  9    16     3
## 10    17     3
## 11    18     3

How many types of animal sounds, and what are they?

uni <- unique(sounds$sound)
uni
## [1] "cockadoodledoo" "meow"           "woof woof"
length(uni)
## [1] 3

Initial EDA Plots

How many kids produce each kind of sound?

sounds %>% group_by(sound) %>% summarize(sum(kids_produce))
## # A tibble: 3 × 2
##   sound          `sum(kids_produce)`
##   <chr>                        <int>
## 1 cockadoodledoo                 148
## 2 meow                           681
## 3 woof woof                      940

Adding age

#table
sounds %>% group_by(sound) %>% summarise(total_produce = sum(kids_produce)) %>% knitr::kable()
sound total_produce
cockadoodledoo 148
meow 681
woof woof 940
#bar plot 
ggplot(sounds, aes(x=sound, y = kids_produce)) + geom_col() +labs("x = Sound", y = "Total Kids")

#bar plot split 
ggplot(sounds, aes(x=age, y = prop_produce)) + geom_col() +labs(x = "Age", y = "Proportion of Kids Producing Sounds") + facet_wrap(~sound)

### Scatter plot

#scatter plot instead
ggplot(sounds, aes(x=age, y = prop_produce)) + geom_point() +labs(x = "Age", y = "Proportion of Kids Producing Sounds") + facet_wrap(~sound)

Initial (uncolored) plot

ggplot(sounds, aes(x=age, y = prop_produce)) + geom_point() +labs(x = "Age", y = "Proportion of Kids Producing Sounds")

## Challenge #2: Mapping at the Geom Level

plot <- ggplot(sounds, aes(x=age, y = prop_produce)) + geom_point(aes(color=sound)) +labs(x = "Age", y = "Proportion of Kids Producing Sounds")
plot

Challenge #3: geom_line()

plot + geom_line()

#?help_geom()

Challenge #4: Combining Points and Lines

plot +geom_line(aes(group=sound)) 

plot + geom_line(aes(group=sound, color=sound))

plot+geom_smooth(aes(group=sound, color=sound), se =FALSE, lwd = 0.75)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## Challenge #5: Fill and Color Aesthetics

library(forcats)
fct_reorder2(
  as.factor(sounds$sound), 
  sounds$age,
  sounds$prop_produce 
) %>% levels
## [1] "woof woof"      "meow"           "cockadoodledoo"
sound_plot <- plot+geom_smooth(aes(group=sound, color=fct_reorder2(sound, age, prop_produce)), se =FALSE, lwd = 0.75)
sound_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

sound_plot +
  scale_color_hue(h = c(0, 90), l = 65, c = 100)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

sound_plot +
  scale_color_hue( l = 45)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

sound_plot +
  scale_color_hue(h = c(10, 90), l = 90, c= 90)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

sound_plot +
  scale_color_manual(values = c("cornflowerblue", 
                                "seagreen", "coral"))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

The code blocks are not changing the colors because you are using scale_fill_manual instead of scale_color_manual. ## Challenge #6: Combining Aesthetic Encodings

ggplot(sounds, aes(x=age, y = prop_produce, fill = fct_reorder2(sound, age, prop_produce))) + labs(x = "Age", y = "Proportion of Kids Producing Sounds") + geom_smooth(aes(color=fct_reorder2(sound, age, prop_produce)), se = FALSE) + geom_point(shape = 21)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

color = c('pink','skyblue','lavender')
sound_plot+ scale_color_manual(values =color)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## Challenge #7: Custom Color Palettes

color_new = c('#800000','#f032e6', '#e6194B' )
sound_plot + scale_colour_manual(values=color_new)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

library(RColorBrewer)
sound_plot + scale_color_brewer(palette = 'Dark2')
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## Challenge #8: Viridis

sound_plot + scale_color_viridis_d()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

sound_plot + scale_color_viridis_d() + geom_point(shape = 21, color = "midnightblue") + theme_minimal()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

## Challenge #9: Branching Out on Your Own ### Part 1: Good and careful color!

library(babynames) # babynames data 
library(ghibli)
## Registered S3 method overwritten by 'ghibli':
##   method        from   
##   print.palette beyonce
slav_kat <- babynames %>%
  filter(name == "Katarina" | name == "Kateryna" | name == "Katarzyna" 
         |name == "Ekaterina" | name == "Katerina"  ) %>%
  filter(sex == "F")
glimpse(slav_kat)
## Rows: 199
## Columns: 5
## $ year <dbl> 1954, 1958, 1958, 1959, 1960, 1961, 1962, 1962, 1963, 1963, 1964,…
## $ sex  <chr> "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", …
## $ name <chr> "Katerina", "Katarina", "Katerina", "Katerina", "Katerina", "Kate…
## $ n    <int> 5, 5, 5, 5, 5, 13, 14, 11, 11, 5, 17, 9, 18, 6, 22, 13, 22, 25, 1…
## $ prop <dbl> 2.510e-06, 2.420e-06, 2.420e-06, 2.410e-06, 2.400e-06, 6.260e-06,…
custom_order <- c("Katarina", "Katerina", "Ekaterina", "Kateryna", "Katarzyna")
slav_kat$name <- factor(slav_kat$name, levels = custom_order)

name.plot <- ggplot(slav_kat, aes(x = year, 
                               y = prop,  
                               group = name, 
                               color = name)) +
                               geom_line(size = 1)  + 
                               labs(x="Year", y="Proportion of Total Applications Per Year", color="Names")+
                               ggtitle("Variations on a Theme: Slavic Forms of Katherine") + 
                              scale_color_viridis_d() + 
                              theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
name.plot

#check color-blindness
library(colorblindr)
cvd_grid(name.plot)


###I wanted to plot how the popularity of slavic forms of Katherine changed over time in the US. Growing up, I always wondered why people would lean towards the Katerina spelling instead of Katarina, and now I see that after my birth year (2001), the popularity of Katarina and Katerina are similar. What’s interesting is that before my birth there was quite the boom for Katarinas. I wonder if it will make a comeback…

Part 2: Greyscale!

library(babynames) # babynames data 
library(ghibli)

slav_kat <- babynames %>%
  filter(name == "Katarina" | name == "Kateryna" | name == "Katarzyna" 
         |name == "Ekaterina" | name == "Katerina"  ) %>%
  filter(sex == "F")
glimpse(slav_kat)
## Rows: 199
## Columns: 5
## $ year <dbl> 1954, 1958, 1958, 1959, 1960, 1961, 1962, 1962, 1963, 1963, 1964,…
## $ sex  <chr> "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", …
## $ name <chr> "Katerina", "Katarina", "Katerina", "Katerina", "Katerina", "Kate…
## $ n    <int> 5, 5, 5, 5, 5, 13, 14, 11, 11, 5, 17, 9, 18, 6, 22, 13, 22, 25, 1…
## $ prop <dbl> 2.510e-06, 2.420e-06, 2.420e-06, 2.410e-06, 2.400e-06, 6.260e-06,…
custom_order <- c("Katarina", "Katerina", "Ekaterina", "Kateryna", "Katarzyna")
slav_kat$name <- factor(slav_kat$name, levels = custom_order)

name.plot <- ggplot(slav_kat, aes(x = year, 
                               y = prop,  
                               group = name, 
                               color = name)) +
                               geom_line(size = 1)  + 
                               labs(x="Year", y="Proportion of Total Applications Per Year", color="Names")+
                               ggtitle("Variations on a Theme: Slavic Forms of Katherine") + 
                              scale_color_grey() + 
                              theme_minimal()

name.plot

### Part 3: Bad and careful color!

library(babynames) # babynames data 
library(ghibli)

slav_kat <- babynames %>%
  filter(name == "Katarina" | name == "Kateryna" | name == "Katarzyna" 
         |name == "Ekaterina" | name == "Katerina"  ) %>%
  filter(sex == "F")
glimpse(slav_kat)
## Rows: 199
## Columns: 5
## $ year <dbl> 1954, 1958, 1958, 1959, 1960, 1961, 1962, 1962, 1963, 1963, 1964,…
## $ sex  <chr> "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", …
## $ name <chr> "Katerina", "Katarina", "Katerina", "Katerina", "Katerina", "Kate…
## $ n    <int> 5, 5, 5, 5, 5, 13, 14, 11, 11, 5, 17, 9, 18, 6, 22, 13, 22, 25, 1…
## $ prop <dbl> 2.510e-06, 2.420e-06, 2.420e-06, 2.410e-06, 2.400e-06, 6.260e-06,…
custom_order <- c("Katarina", "Katerina", "Ekaterina", "Kateryna", "Katarzyna")
slav_kat$name <- factor(slav_kat$name, levels = custom_order)

name.plot <- ggplot(slav_kat, aes(x = year, 
                               y = prop,  
                               group = name, 
                               color = name)) +
                               geom_line(size = 1)  + 
                               labs(x="Year", y="Proportion of Total Applications Per Year", color="Names")+
                               ggtitle("Variations on a Theme: Slavic Forms of Katherine") + 
                              scale_color_ghibli_d("MarnieDark1", direction =-1) + 
                              theme_minimal()

name.plot

library(colorblindr)
cvd_grid(name.plot)


###The overall dark color paletee, though pretty, makes it hard to distinguish one line from the others. It is especially rough in greyscale, and I can bet on other monitors it will look wonky