We’ll be working with
data/animal_sounds_summary.csv
.
sounds <- read.csv('data/animal_sounds_summary.csv')
glimpse(sounds)
## Rows: 33
## Columns: 7
## $ age <int> 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, …
## $ sound <chr> "cockadoodledoo", "meow", "woof woof", "cockadoodledoo…
## $ kids_produce <int> 1, 0, 3, 0, 2, 2, 0, 5, 4, 0, 5, 12, 0, 12, 28, 9, 125…
## $ kids_understand <int> 3, 10, 12, 2, 21, 22, 9, 41, 40, 4, 36, 32, 16, 59, 59…
## $ kids_respond <int> 35, 35, 35, 91, 93, 93, 139, 145, 143, 94, 94, 94, 141…
## $ prop_produce <dbl> 0.02857143, 0.00000000, 0.08571429, 0.00000000, 0.0215…
## $ prop_understand <dbl> 0.08571429, 0.28571429, 0.34285714, 0.02197802, 0.2258…
Will treat Age, kids_understand, kids_respons, prop_produce, prop_understand as continuous ### Which are categorical and ordinal? Sound is categorical (no meaningful order).
total_kids <- sounds$kids_respond %>% sum()
total_kids
## [1] 7226
unique(sounds$age)
## [1] 8 9 10 11 12 13 14 15 16 17 18
sounds %>% group_by(age) %>% count(age)
## # A tibble: 11 × 2
## # Groups: age [11]
## age n
## <int> <int>
## 1 8 3
## 2 9 3
## 3 10 3
## 4 11 3
## 5 12 3
## 6 13 3
## 7 14 3
## 8 15 3
## 9 16 3
## 10 17 3
## 11 18 3
uni <- unique(sounds$sound)
uni
## [1] "cockadoodledoo" "meow" "woof woof"
length(uni)
## [1] 3
sounds %>% group_by(sound) %>% summarize(sum(kids_produce))
## # A tibble: 3 × 2
## sound `sum(kids_produce)`
## <chr> <int>
## 1 cockadoodledoo 148
## 2 meow 681
## 3 woof woof 940
#table
sounds %>% group_by(sound) %>% summarise(total_produce = sum(kids_produce)) %>% knitr::kable()
sound | total_produce |
---|---|
cockadoodledoo | 148 |
meow | 681 |
woof woof | 940 |
#bar plot
ggplot(sounds, aes(x=sound, y = kids_produce)) + geom_col() +labs("x = Sound", y = "Total Kids")
#bar plot split
ggplot(sounds, aes(x=age, y = prop_produce)) + geom_col() +labs(x = "Age", y = "Proportion of Kids Producing Sounds") + facet_wrap(~sound)
### Scatter plot
#scatter plot instead
ggplot(sounds, aes(x=age, y = prop_produce)) + geom_point() +labs(x = "Age", y = "Proportion of Kids Producing Sounds") + facet_wrap(~sound)
ggplot(sounds, aes(x=age, y = prop_produce)) + geom_point() +labs(x = "Age", y = "Proportion of Kids Producing Sounds")
## Challenge #2: Mapping at the Geom Level
plot <- ggplot(sounds, aes(x=age, y = prop_produce)) + geom_point(aes(color=sound)) +labs(x = "Age", y = "Proportion of Kids Producing Sounds")
plot
geom_line()
plot + geom_line()
#?help_geom()
plot +geom_line(aes(group=sound))
plot + geom_line(aes(group=sound, color=sound))
plot+geom_smooth(aes(group=sound, color=sound), se =FALSE, lwd = 0.75)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Challenge #5: Fill and Color Aesthetics
library(forcats)
fct_reorder2(
as.factor(sounds$sound),
sounds$age,
sounds$prop_produce
) %>% levels
## [1] "woof woof" "meow" "cockadoodledoo"
sound_plot <- plot+geom_smooth(aes(group=sound, color=fct_reorder2(sound, age, prop_produce)), se =FALSE, lwd = 0.75)
sound_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
sound_plot +
scale_color_hue(h = c(0, 90), l = 65, c = 100)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
sound_plot +
scale_color_hue( l = 45)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
sound_plot +
scale_color_hue(h = c(10, 90), l = 90, c= 90)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
sound_plot +
scale_color_manual(values = c("cornflowerblue",
"seagreen", "coral"))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
The code blocks are not changing the colors because you are using
scale_fill_manual instead of scale_color_manual. ## Challenge #6:
Combining Aesthetic Encodings
ggplot(sounds, aes(x=age, y = prop_produce, fill = fct_reorder2(sound, age, prop_produce))) + labs(x = "Age", y = "Proportion of Kids Producing Sounds") + geom_smooth(aes(color=fct_reorder2(sound, age, prop_produce)), se = FALSE) + geom_point(shape = 21)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
color = c('pink','skyblue','lavender')
sound_plot+ scale_color_manual(values =color)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Challenge #7: Custom Color Palettes
color_new = c('#800000','#f032e6', '#e6194B' )
sound_plot + scale_colour_manual(values=color_new)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
library(RColorBrewer)
sound_plot + scale_color_brewer(palette = 'Dark2')
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Challenge #8: Viridis
sound_plot + scale_color_viridis_d()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
sound_plot + scale_color_viridis_d() + geom_point(shape = 21, color = "midnightblue") + theme_minimal()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Challenge #9: Branching Out on Your Own ### Part 1: Good and careful
color!
library(babynames) # babynames data
library(ghibli)
## Registered S3 method overwritten by 'ghibli':
## method from
## print.palette beyonce
slav_kat <- babynames %>%
filter(name == "Katarina" | name == "Kateryna" | name == "Katarzyna"
|name == "Ekaterina" | name == "Katerina" ) %>%
filter(sex == "F")
glimpse(slav_kat)
## Rows: 199
## Columns: 5
## $ year <dbl> 1954, 1958, 1958, 1959, 1960, 1961, 1962, 1962, 1963, 1963, 1964,…
## $ sex <chr> "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", …
## $ name <chr> "Katerina", "Katarina", "Katerina", "Katerina", "Katerina", "Kate…
## $ n <int> 5, 5, 5, 5, 5, 13, 14, 11, 11, 5, 17, 9, 18, 6, 22, 13, 22, 25, 1…
## $ prop <dbl> 2.510e-06, 2.420e-06, 2.420e-06, 2.410e-06, 2.400e-06, 6.260e-06,…
custom_order <- c("Katarina", "Katerina", "Ekaterina", "Kateryna", "Katarzyna")
slav_kat$name <- factor(slav_kat$name, levels = custom_order)
name.plot <- ggplot(slav_kat, aes(x = year,
y = prop,
group = name,
color = name)) +
geom_line(size = 1) +
labs(x="Year", y="Proportion of Total Applications Per Year", color="Names")+
ggtitle("Variations on a Theme: Slavic Forms of Katherine") +
scale_color_viridis_d() +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
name.plot
#check color-blindness
library(colorblindr)
cvd_grid(name.plot)
###I wanted to plot how the popularity of slavic forms of
Katherine changed over time in the US. Growing up, I always wondered why
people would lean towards the Katerina spelling instead of Katarina, and
now I see that after my birth year (2001), the popularity of Katarina
and Katerina are similar. What’s interesting is that before my birth
there was quite the boom for Katarinas. I wonder if it will make a
comeback…
library(babynames) # babynames data
library(ghibli)
slav_kat <- babynames %>%
filter(name == "Katarina" | name == "Kateryna" | name == "Katarzyna"
|name == "Ekaterina" | name == "Katerina" ) %>%
filter(sex == "F")
glimpse(slav_kat)
## Rows: 199
## Columns: 5
## $ year <dbl> 1954, 1958, 1958, 1959, 1960, 1961, 1962, 1962, 1963, 1963, 1964,…
## $ sex <chr> "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", …
## $ name <chr> "Katerina", "Katarina", "Katerina", "Katerina", "Katerina", "Kate…
## $ n <int> 5, 5, 5, 5, 5, 13, 14, 11, 11, 5, 17, 9, 18, 6, 22, 13, 22, 25, 1…
## $ prop <dbl> 2.510e-06, 2.420e-06, 2.420e-06, 2.410e-06, 2.400e-06, 6.260e-06,…
custom_order <- c("Katarina", "Katerina", "Ekaterina", "Kateryna", "Katarzyna")
slav_kat$name <- factor(slav_kat$name, levels = custom_order)
name.plot <- ggplot(slav_kat, aes(x = year,
y = prop,
group = name,
color = name)) +
geom_line(size = 1) +
labs(x="Year", y="Proportion of Total Applications Per Year", color="Names")+
ggtitle("Variations on a Theme: Slavic Forms of Katherine") +
scale_color_grey() +
theme_minimal()
name.plot
### Part 3: Bad and careful color!
library(babynames) # babynames data
library(ghibli)
slav_kat <- babynames %>%
filter(name == "Katarina" | name == "Kateryna" | name == "Katarzyna"
|name == "Ekaterina" | name == "Katerina" ) %>%
filter(sex == "F")
glimpse(slav_kat)
## Rows: 199
## Columns: 5
## $ year <dbl> 1954, 1958, 1958, 1959, 1960, 1961, 1962, 1962, 1963, 1963, 1964,…
## $ sex <chr> "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", …
## $ name <chr> "Katerina", "Katarina", "Katerina", "Katerina", "Katerina", "Kate…
## $ n <int> 5, 5, 5, 5, 5, 13, 14, 11, 11, 5, 17, 9, 18, 6, 22, 13, 22, 25, 1…
## $ prop <dbl> 2.510e-06, 2.420e-06, 2.420e-06, 2.410e-06, 2.400e-06, 6.260e-06,…
custom_order <- c("Katarina", "Katerina", "Ekaterina", "Kateryna", "Katarzyna")
slav_kat$name <- factor(slav_kat$name, levels = custom_order)
name.plot <- ggplot(slav_kat, aes(x = year,
y = prop,
group = name,
color = name)) +
geom_line(size = 1) +
labs(x="Year", y="Proportion of Total Applications Per Year", color="Names")+
ggtitle("Variations on a Theme: Slavic Forms of Katherine") +
scale_color_ghibli_d("MarnieDark1", direction =-1) +
theme_minimal()
name.plot
library(colorblindr)
cvd_grid(name.plot)
###The overall dark color paletee, though pretty, makes it hard to
distinguish one line from the others. It is especially rough in
greyscale, and I can bet on other monitors it will look wonky