Part 1: Tidying
df <- read_excel("gapminder_broadband_per_100.xlsx") %>% janitor::clean_names()
glimpse(df)
## Rows: 213
## Columns: 15
## $ fixed_broadband_internet_subscribers_per_100_people <chr> "Afghanistan", "Al…
## $ x1998 <dbl> NA, NA, NA, NA, NA…
## $ x1999 <dbl> NA, NA, NA, NA, NA…
## $ x2000 <dbl> NA, NA, NA, NA, NA…
## $ x2001 <dbl> 0.0000000000, 0.00…
## $ x2002 <dbl> 0.0000000000, 0.00…
## $ x2003 <dbl> 0.000000e+00, 0.00…
## $ x2004 <dbl> 6.880265e-04, 0.00…
## $ x2005 <dbl> 7.356639e-04, 8.65…
## $ x2006 <dbl> 0.001625928, NA, 0…
## $ x2007 <dbl> 0.001581161, 0.315…
## $ x2008 <dbl> 0.001537626, 2.011…
## $ x2009 <dbl> 0.00299058, 2.8815…
## $ x2010 <dbl> 0.004362367, 3.292…
## $ x2011 <lgl> NA, NA, NA, NA, NA…
tidy_df <- df %>% pivot_longer(c(x1998:x2011),
names_to = "year",
names_prefix = "x",
names_transform = list(year = as.integer),)
names(tidy_df)[names(tidy_df)=='fixed_broadband_internet_subscribers_per_100_people'] <- 'country'
glimpse(tidy_df)
## Rows: 2,982
## Columns: 3
## $ country <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "A…
## $ year <int> 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 20…
## $ value <dbl> NA, NA, NA, 0.0000000000, 0.0000000000, 0.0000000000, 0.000688…
head(tidy_df)
## # A tibble: 6 × 3
## country year value
## <chr> <int> <dbl>
## 1 Afghanistan 1998 NA
## 2 Afghanistan 1999 NA
## 3 Afghanistan 2000 NA
## 4 Afghanistan 2001 0
## 5 Afghanistan 2002 0
## 6 Afghanistan 2003 0
Min vs Max GDP for each Continent
Table
library(gapminder)
min_max <- gapminder %>% group_by(continent) %>%
filter(year == 2007) %>% summarize(
min_value = min(gdpPercap, na.rm = TRUE),
max_value = max(gdpPercap, na.rm = TRUE)
)
glimpse(min_max)
## Rows: 5
## Columns: 3
## $ continent <fct> Africa, Americas, Asia, Europe, Oceania
## $ min_value <dbl> 277.5519, 1201.6372, 944.0000, 5937.0295, 25185.0091
## $ max_value <dbl> 13206.48, 42951.65, 47306.99, 49357.19, 34435.37
head(min_max)
## # A tibble: 5 × 3
## continent min_value max_value
## <fct> <dbl> <dbl>
## 1 Africa 278. 13206.
## 2 Americas 1202. 42952.
## 3 Asia 944 47307.
## 4 Europe 5937. 49357.
## 5 Oceania 25185. 34435.
rounded_minmax <- min_max %>% mutate(across(where(is.numeric), ~ round(.x, 2)))
rounded_minmax
## # A tibble: 5 × 3
## continent min_value max_value
## <fct> <dbl> <dbl>
## 1 Africa 278. 13206.
## 2 Americas 1202. 42952.
## 3 Asia 944 47307.
## 4 Europe 5937. 49357.
## 5 Oceania 25185. 34435.
Companion Graph
tidy_minmax <- rounded_minmax %>% pivot_longer(min_value:max_value,names_to = "min_max_col", values_to="value")
head(tidy_minmax)
## # A tibble: 6 × 3
## continent min_max_col value
## <fct> <chr> <dbl>
## 1 Africa min_value 278.
## 2 Africa max_value 13206.
## 3 Americas min_value 1202.
## 4 Americas max_value 42952.
## 5 Asia min_value 944
## 6 Asia max_value 47307.
plot <- ggplot(tidy_minmax, aes(x=value, y=continent, fill=min_max_col)) + geom_col() + labs(x = "GDP per capita", y = "Continent", title = "Min and Max GDP per Capita per Continent in the Year 2007") + scale_x_continuous(
limits = c(0, 70000),
labels = abs
) + geom_text(aes(label = abs(tidy_minmax$value)),
hjust = ifelse(tidy_minmax$value < 0, 1.1, -0.1),
size = 2) + scale_fill_manual(name = "GDP", labels = c("Maximum", "Minimum"), values = c('lavender', 'cadetblue')) + theme_minimal()
plot
## Warning: Use of `tidy_minmax$value` is discouraged.
## ℹ Use `value` instead.
