USA Population Demographics
Graphs of population demographics in USA using Census Bureau data
Data
CDC age & sex tables
Prepare Data
# devtools::install_github("derekmichaelwright/agData")
library(agData)
library(readxl)
# Prep data
<- "www.dblogr.com/ or derekmichaelwright.github.io/dblogr/ | Data: USCB"
myCaption <- c("steelblue", "palevioletred3")
myColorsMF <- c("0 to 4 years", "5 to 9 years", "10 to 14 years",
myAges "15 to 19 years", "20 to 24 years", "25 to 29 years",
"30 to 34 years", "35 to 39 years", "40 to 44 years",
"45 to 49 years", "50 to 54 years", "55 to 59 years",
"60 to 64 years", "65 to 69 years", "70 to 74 years",
"75 to 79 years", "80 to 84 years", "85 years and over")
#
<- function(xx, myYear) {
fixSheet colnames(xx) <- c("Age", "Both sexes", "Both sexes - Percent",
"Males", "Males - Percent",
"Females", "Females - Percent")
<- xx %>% select(Age, `Both sexes`, Males, Females) %>%
xx mutate(Age = gsub("\\.", "", Age),
Year = myYear) %>%
select(Year, everything()) %>%
gather(Sex, Population, 3:ncol(.)) %>%
mutate(Population = 1000 * Population)
xx
}<- read_xlsx("data_usa_population.xlsx", "2007", range = "A7:G25")
xx <- bind_rows(
p1 read_xlsx("data_usa_population.xlsx", "2007", range = "A7:G25") %>% fixSheet(myYear = 2007),
read_xlsx("data_usa_population.xlsx", "2008", range = "A7:G25") %>% fixSheet(myYear = 2008),
read_xlsx("data_usa_population.xlsx", "2009", range = "A7:G25") %>% fixSheet(myYear = 2009),
read_xlsx("data_usa_population.xlsx", "2010", range = "A7:G25") %>% fixSheet(myYear = 2010),
read_xlsx("data_usa_population.xlsx", "2011", range = "A7:G25") %>% fixSheet(myYear = 2011),
read_xlsx("data_usa_population.xlsx", "2012", range = "A7:G25") %>% fixSheet(myYear = 2012),
read_xlsx("data_usa_population.xlsx", "2013", range = "A7:G25") %>% fixSheet(myYear = 2013),
read_xlsx("data_usa_population.xlsx", "2014", range = "A7:G25") %>% fixSheet(myYear = 2014),
read_xlsx("data_usa_population.xlsx", "2015", range = "A7:G25") %>% fixSheet(myYear = 2015),
read_xlsx("data_usa_population.xlsx", "2016", range = "A7:G25") %>% fixSheet(myYear = 2016),
read_xlsx("data_usa_population.xlsx", "2017", range = "A7:G25") %>% fixSheet(myYear = 2017),
read_xlsx("data_usa_population.xlsx", "2018", range = "A7:G25") %>% fixSheet(myYear = 2018),
read_xlsx("data_usa_population.xlsx", "2019", range = "A7:G25") %>% fixSheet(myYear = 2019),
read_xlsx("data_usa_population.xlsx", "2020", range = "A7:G25") %>% fixSheet(myYear = 2020),
read_xlsx("data_usa_population.xlsx", "2021", range = "A7:G25") %>% fixSheet(myYear = 2021) ) %>%
mutate(Age = factor(Age, levels = myAges),
Sex = factor(Sex, levels = c("Both sexes", "Males", "Females")))
#
<- bind_rows(read.csv("nation-populationpyramid-2000-2010-2020.csv"),
p2 read.csv("state-populationpyramid-2000-2010-2020.csv") )
Population Pyramid 2021
# Prep data
<- p1 %>%
xx filter(Year == 2021, Age != "Median age", Sex != "Both sexes")
<- xx %>% spread(Sex, Population) %>%
yy mutate(Population = Females - Males,
Sex = ifelse(Population < 0, "Males", "Females"))
<- xx %>%
xx mutate(Population = ifelse(Sex == "Males", -Population, Population))
# Plot
<- ggplot(xx, aes(y = Population / 1000000, x = Age, fill = Sex)) +
mp geom_col(color = "black", alpha = 0.7) +
geom_col(data = yy, color = "black", alpha = 0.7) +
scale_fill_manual(name = NULL, values = myColorsMF) +
facet_grid(. ~ Year) +
theme_agData(legend.position = "bottom") +
labs(title = "Population In The United States", x = NULL,
y = "Million People", caption = myCaption) +
coord_cartesian(ylim = c(-max(xx$Population), max(xx$Population))) +
coord_flip()
ggsave("usa_population_01.png", mp, width = 6, height = 4)
Population Pyramid 2007 - 2021
# Prep data
<- p1 %>%
xx filter(Year %in% c(2007, 2021),
!= "Median age", Sex != "Both sexes")
Age <- xx %>% spread(Sex, Population) %>%
yy mutate(Population = Females - Males,
Sex = ifelse(Population < 0, "Males", "Females"))
<- xx %>%
xx mutate(Population = ifelse(Sex == "Males", -Population, Population))
# Plot
<- ggplot(xx, aes(y = Population / 1000000, x = Age, fill = Sex)) +
mp geom_col(color = "black", alpha = 0.7) +
geom_col(data = yy, color = "black", alpha = 0.7) +
scale_fill_manual(name = NULL, values = myColorsMF) +
facet_grid(. ~ Year) +
theme_agData(legend.position = "bottom") +
labs(title = "Population Change in The United States", x = NULL,
y = "Million People", caption = myCaption) +
coord_cartesian(ylim = c(-max(xx$Population), max(xx$Population))) +
coord_flip()
ggsave("usa_population_02.png", mp, width = 8, height = 4)
Dual Year Population Pyramid 2007 - 2021
# Prep data
<- p1 %>% filter(Year %in% c(2007,2021), Sex == "Both sexes")
xx <- xx %>% spread(Year, Population) %>%
yy mutate(Population = `2021` - `2007`) %>%
mutate(Year = ifelse(Population < 0, 2007, 2021),
Year = factor(Year))
<- xx %>%
xx mutate(Population = ifelse(Year == 2007, -Population, Population),
Year = factor(Year))
# Plot
<- ggplot(xx, aes(y = Population / 1000000, x = Age, fill = Year)) +
mp geom_col(color = "black", alpha = 0.7) +
geom_col(data = yy, color = "black", alpha = 0.7) +
scale_fill_manual(name = NULL, values = c("darkgreen","purple4")) +
theme_agData(legend.position = "bottom") +
labs(title = "Population Change in The United States", x = NULL,
y = "Million People", caption = myCaption) +
coord_cartesian(ylim = c(-max(xx$Population), max(xx$Population))) +
coord_flip()
ggsave("usa_population_03.png", width = 6, height = 4)
Median Age By Race
<- c("White alone, not Hispanic or Latino",
myRaces1 "Asian alone, not Hispanic or Latino",
"Black or African American alone, not Hispanic or Latino",
"American Indian and Alaska Native alone, not Hispanic or Latino",
"Hispanic or Latino")
<- c("White", "Asian", "Black", "Indigenous", "Hispanic")
myRaces2 <- c("darkgreen", "darkblue", "black", "darkred", "darkorange")
myColors #
<- p2 %>% filter(Group %in% myRaces1, Nation == "United States") %>%
xx mutate(Group = plyr::mapvalues(Group, myRaces1, myRaces2),
Group = factor(Group, levels = myRaces2))
#
<- ggplot(xx, aes(y = Median.age, x = Year, color = Group)) +
mp geom_line() + geom_point() +
scale_color_manual(name = NULL, values = myColors) +
guides(color = guide_legend(ncol = 1)) +
theme_agData() +
labs(title = "Population Change in The United States",
y = "Median Age", x = NULL, caption = myCaption)
ggsave("usa_population_04.png", width = 6, height = 4)
Bonus - Rural Urban Voting
# Prep data
<- read.csv("rural_urban.csv") %>%
xx gather(Type, Value, Rural, Urban) %>%
mutate(Value = 100 * Value)
# Plot
<- ggplot(xx, aes(x = Year, y = Value, color = Type)) +
mp geom_line(size = 2) +
geom_label(aes(label = Value), size = 2.5, label.padding = unit(0.1, "lines")) +
scale_color_manual(name = NULL, values = c("darkred", "darkblue")) +
scale_x_continuous(breaks = unique(xx$Year), minor_breaks = unique(xx$Year)) +
theme_agData(legend.position = "bottom") +
labs(x = "Year (Election)",
y = "Percent of Votes Going to\nRepublicans Candidate (County-Level)",
caption = "www.dblogr.com/ or derekmichaelwright.github.io/dblogr/ | Data: Brown & Mettler (2023)")
ggsave("usa_population_05.png", width = 7, height = 4)