Week Eight of Data Science for the Public Good

It is hustle time!

Week Eight
R
Demographic Analysis
Demographic Profile
Author

Kailyn Hogan

Published

July 7, 2023

Demographic Profile

I began editing the Demographic Profile I made two weeks ago this week. I went back and added comments to my code, added regional and national context to the plots, and started converting some of the data into percentages. Below is the work I completed.

Population Plots

library(tidyverse)
library(tidycensus)
library(ggthemes)
library(scales)


### The first thing we want to visualize is population change.

# 1. Population Change                                                          

# Define the variables before getting the data.
# The Census API code for total population for 2000 and 2010 is P001001.
pop00 <- c("pop" = "P001001")
pop10 <- c("pop" = "P001001")
# The API code for total population changed for 2020.
# The Census API code for total population is P1_001N for 2020. 
pop20 <- c("pop" = "P1_001N")

###
## National Context: nationPop
# Getting 2000 total population data for the USA.
nation00 <- get_decennial(geography = "us",
                          year = 2000,
                          variable = pop00,
                          output = "wide") %>% 
  mutate(year = 2000)
# Getting 2010 total population data for the USA.
nation10 <- get_decennial(geography = "us",
                          year = 2010,
                          variable = pop10,
                          output = "wide") %>% 
  mutate(year = 2010)
# Getting 2020 total population data for the USA.
nation20 <- get_decennial(geography = "us",
                          year = 2020,
                          variable = pop20,
                          output = "wide") %>% 
  mutate(year = 2020)
# Bind the years together using bind_rows to create data frame for national context. 
nationPop <- nation20 %>% 
  bind_rows(nation10,nation00) %>% 
  mutate(geography = "Nation") 


###
## Regional Context: regionPop
# Getting 2000 total population data from the Midwest.
region00 <- get_decennial(geography = "region",
                          year = 2000,
                          variable = pop00,
                          output = "wide") %>% 
  mutate(year = 2000)
# Getting 2010 total population data for the Midwest.
region10 <- get_decennial(geography = "region",
                          year = 2010,
                          variable = pop10,
                          output = "wide") %>% 
  mutate(year = 2010)
# Getting 2020 total population data for the Midwest.
region20 <- get_decennial(geography = "region",
                          year = 2020,
                          variable = pop20,
                          output = "wide") %>% 
  mutate(year = 2020)
# Bind the years together using bind_rows to create data frame for regional context. 
# Filter to keep just the Midwest Region. 
regionPop <- region20 %>% 
  bind_rows(region10,region00) %>% 
  filter(NAME == "Midwest Region") %>% 
  mutate(NAME = str_remove(NAME, " Region")) %>% # Use str_remove() to remove " Region" from Midwest. 
  mutate(geography = "Region")


###
## State Context: statePop
# Getting the 2000 total population data for the state of Iowa from the Decennial Census.
iowa00 <- get_decennial(geography = "state",
                        state = "IA",
                        year = 2000,
                        output = "wide",
                        variable = pop00) %>% 
  mutate(year = 2000)
# Getting the 2010 total population data for the state of Iowa from the Decennial Census.
iowa10 <- get_decennial(geography = "state",
                        state = "IA",
                        variable = pop10,
                        year = 2010,
                        output = "wide") %>% 
  mutate(year = 2010)
# Getting the 2020 total population data for the state of Iowa from the Decennial Census.
iowa20 <- get_decennial(geography = "state",
                        state = "IA",
                        variable = pop20,
                        year = 2020,
                        output = "wide") %>% 
  mutate(year = 2020)

# Bind the years together using bind_rows to create data frame for state context. 
statePop <- iowa20 %>% 
  bind_rows(iowa10,iowa00) %>% 
  mutate(geography = "State")


###
## Places: 
# Getting 2000 total population data for all places in Iowa.
place00 <- get_decennial(geography = "place",
                         state = "IA",
                         year = 2000,
                         output = "wide",
                         variables = pop00) %>% 
  mutate(year = 2000)
place10 <- get_decennial(geography = "place",
                         state = "IA",
                         year = 2010,
                         output = "wide",
                         variables = pop10) %>% 
  mutate(year = 2010)
place20 <- get_decennial(geography = "place",
                         state = "IA",
                         variable = pop20,
                         year = 2020,
                         output = "wide") %>% 
  mutate(year = 2020)
# Bind the years together using bind_rows() to create data frame for all places in Iowa. 
placePop <- place20 %>% 
  bind_rows(place10,place00) %>% 
  mutate(geography = "Place")

# Filter for Independence, Grundy Center, and New Hampton. 
placePop <- placePop[placePop$NAME %in% c("Grundy Center city, Iowa", "Independence city, Iowa", "New Hampton city, Iowa"), ] %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa|, Iowa")) # Use str_remove() to remove "city, Iowa" from behind the place names. 

# Combine nationPop, regionPop, statePop, and placePop using bind_rows().
population <- nationPop %>% 
  bind_rows(regionPop,statePop,placePop)

# Calculate the change in population.
population <- population %>%
  group_by(NAME) %>%
  arrange(year) %>%
  mutate(pop_change = pop - lag(pop)) 




# Start visualizing total population now that master data frame is created. 


# Plot the change in total population for Independence.                           PLOT
# Filter population for Independence.
population %>% 
  filter(NAME == "Independence") %>% 
  ggplot(aes(x = year, y = pop))+
  geom_line()+
  geom_point(size = 2)+
  geom_text(aes(label = scales::comma(pop)), hjust = -.31)+
  scale_y_continuous(label = scales::comma)+
  scale_x_continuous(limits = c(1997, 2023),
                     breaks = c(2000,2010,2020))+
  theme_fivethirtyeight()+
  theme(legend.position = "bottom")+
  labs(title = "Change in Total Population",
       subtitle = "Independence, Iowa",
       y = "Population",
       x = "",
       color = "", 
       caption = "2000-2020 Decennial Census")

# Plot the change in total population for New Hampton.                           PLOT
# Filter population for New Hampton.
population %>% 
  filter(NAME == "New Hampton") %>% 
  ggplot(aes(x = year, y = pop))+
  geom_line()+
  geom_point(size = 2)+
  geom_text(aes(label = scales::comma(pop)),hjust = -.3)+ 
  scale_y_continuous(label = scales::comma)+
  scale_x_continuous(limits = c(1997, 2023),
                     breaks = c(2000,2010,2020))+
  theme_fivethirtyeight()+
  theme(legend.position = "bottom")+
  labs(title = "Change in Total Population",
       subtitle = "New Hampton, Iowa",
       y = "Population",
       x = "",
       color = "", 
       caption = "2000-2020 Decennial Census")

# Plot the change in total population Grundy Center.                              PLOT
# Filter population for Grundy Center.
population %>% 
  filter(NAME == "Grundy Center") %>% 
  ggplot(aes(x = year, y = pop))+
  geom_line()+
  geom_point(size = 2)+
  geom_text(aes(label = scales::comma(pop)),hjust = -.3)+
  scale_y_continuous(label = scales::comma)+
  scale_x_continuous(limits = c(1997, 2023),
                     breaks = c(2000,2010,2020))+
  theme_fivethirtyeight()+
  theme(legend.position = "bottom")+
  labs(title = "Change in Total Population",
       subtitle = "Grundy Center, Iowa",
       y = "Population",
       x = "",
       color = "", 
       caption = "2000-2020 Decennial Census")

# Plot total population for all cities.                                           PLOT    
# Filter for the cities. 
# Assign the plot to an object.
population[population$NAME %in% c("Grundy Center", "Independence", "New Hampton"), ] %>% 
  ggplot(aes(x = year, y = pop, group = NAME))+
  geom_line(aes(color = NAME), linewidth = 1)+
  geom_point(size = 2)+
  geom_text(aes(label = scales::comma(pop_change)),vjust = -1)+
  scale_y_continuous(label = scales::comma,
                     limits = c(2500,6300))+
  scale_x_continuous(limits = c(1998, 2022),
                     breaks = c(2000,2010,2020))+
  theme_fivethirtyeight()+
  theme(legend.position = "bottom")+
  labs(title = "Change in Total Population",
       subtitle = "2000-2020 Decennial Census",
       y = "Population",
       x = "",
       color = "", 
       caption = "Variables: P1_001N and P001001")

## Plot change in total population with line type = NAME.                           PLOT
population[population$NAME %in% c("Grundy Center", "Independence", "New Hampton"), ] %>% 
  ggplot(aes(x = year, y = pop, group = NAME, linetype = NAME))+
  geom_line()+
  geom_point(size = 2)+
  geom_text(aes(label = scales::comma(pop)),vjust = -1)+  
  scale_y_continuous(label = scales::comma,
                     limits = c(2500,6500))+
  scale_x_continuous(limits = c(1998, 2022),
                     breaks = c(2000,2010,2020))+
  theme_fivethirtyeight()+
  theme(legend.position = "bottom")+
  labs(title = "Change in Total Population",
       subtitle = "Comparison between Grundy Center, Independence,\nand New Hampton",
       y = "Population",
       x = "",
       color = "", 
       linetype = "",
       caption = "2000-2020 Decennial Census")

# Plot total population for all geographies.                                      PLOT
# Assign the plot to an object.
population %>% 
  ggplot(aes(x = year, y = pop, group = NAME)) +
  geom_line(aes(linetype = geography, color = NAME), linewidth = 1)+
  geom_point(size = 2)+
  scale_y_log10(label = scales::comma) +
  scale_x_continuous(limits = c(2000, 2020),
                     breaks = c(2000,2010,2020))+
  theme_fivethirtyeight()+
  theme(legend.position = "bottom")+
  labs(title = "Change in Total Population",
       y = "Population (log scale)",
       subtitle = "2000-2020 Decennial Census",
       x = "",
       color = "", 
       linetype = "",
       caption = "Variables: P1_001N and P001001")

# The next thing we want to visualize is the PERCENT CHANGE in population.
# 2. Percent Population Change                                                  

# Create anew column called prc_change.
# Group by "NAME" so that the calculations occur individually for each place. 
# Percent change is calculated by subtracting year2 from year1 and dividing by year2. 
population <- population %>%
  group_by(NAME) %>%
  mutate(prc_change = case_when(
    year == 2000 ~ NA,
    year == 2010 ~ (pop - lag(pop, n = 10, default = last(pop))) / lag(pop, n = 10, default = last(pop)),
    year == 2020 ~ (pop - lag(pop, n = 20, default = last(pop))) / lag(pop, n = 20, default = last(pop))
  ))

# Create a grouping column in the population data
population <- population %>%
  mutate(grouping = ifelse(NAME %in% c("United States", "Midwest", "Iowa"), "Contextual Area", "Places"))


## Plot the percent change in population for all geographies.                    PLOT
# Assign the plot to an object.
population %>% 
  ggplot(aes(x = year, y = prc_change, group = NAME)) +
  geom_line(aes(color = NAME, linetype = grouping), linewidth = 1) +
  geom_text(aes(x= 2020.65, y = .1777, label = "17.77%"), size = 3.5) +
  geom_text(aes(x= 2020.65, y = .09023, label = "9.02%"), vjust = -.2, size = 3.5) +
  geom_text(aes(x= 2020.65, y = .07132, label = "7.13%"), vjust = 1.2, size = 3.5) +
  geom_text(aes(x= 2020.65, y = .0770416, label = "7.70%"), size = 3.5, vjust = -.000002) +
  geom_text(aes(x= 2020.65, y = .0083139, label = "0.83%"), size = 3.5) +
  geom_text(aes(x= 2020.65, y = -.053629, label = "-5.36%"), size = 3.5) +
  geom_point(size = 2) +
  scale_y_continuous(label = scales::percent) +
  scale_x_continuous(breaks = c(2010, 2015,2020), limits = c(2010,2021)) +
  theme_fivethirtyeight() +
  theme(strip.text = element_text(face = "bold"))+
  labs(title = "Percent Change in Total Population",
       subtitle = "2000-2020 Decennial Census",
       color = "",
       caption = "Variables: P1_001N and P001001",
       linetype = "") 

## Plot the percent change in total population for just the places.
# Filter the population data frame for places. 
# Assign the plot to an object.
population[population$NAME %in% c("Grundy Center", "Independence", "New Hampton"), ] %>% 
  ggplot(aes(x = year, y = prc_change, group = NAME)) +
  geom_line(aes(color = NAME), linewidth = 1) +
  geom_point(size = 2) +
  geom_text(aes(label = scales::percent(prc_change)), hjust = .5,vjust = -.75 )+
  scale_y_continuous(label = scales::percent, limits = c(-.06,.085)) +
  scale_x_continuous(breaks = c(2010,2015, 2020), limits = c(2008, 2022)) +
  theme_fivethirtyeight() +
  theme(strip.text = element_text(face = "bold"),
        legend.position = "bottom")+  # Align legend to the right
  labs(title = "Percent Change in Total Population",
       subtitle = "2000-2020 Decennial Census",
       color = "",
       caption = "Variables: P1_001N and P001001",
       linetype = "")

# Finally, we want to visualize the population estimates. 
# In this case, we will be estimated the 2030 population.
# 3. Population Estimates                                                       

# Calculate AAAC for each geography.
# AAAC = 2020 population - 2000 population / 20
# AKA. AAAC = population - population / time
population <- population %>%
  group_by(NAME) %>%
  mutate(AAAC = (pop[year == 2020] - pop[year == 2000]) / 20)

# Create a new data frame for the projected population in 2030.
proj_2030 <- population %>%
  filter(year == 2020) %>%
  mutate(year = 2030, pop = pop + (AAAC*10))

# Combine the original data frame and the projected population data for 2030.
population <- bind_rows(population, proj_2030)

# Remove the AAAC column.
population <- population %>%
  select(-AAAC)

# Add a new column specifying if the population is actual or a projection.
population <- population %>% 
  mutate(type = if_else(year == 2030, "Projection", "Actual"))


# Plot the actual and projected total population from 2000 to 2030               PLOT 
population[population$NAME %in% c("Grundy Center", "Independence", "New Hampton"), ] %>% 
  ggplot(aes(x = year, y = pop, group = NAME, linetype = NAME)) +
  geom_line(linewidth = 1)+
  geom_point(aes(color = type), size = 2)+
  geom_text(aes(label = scales::comma(round(pop))),
            vjust = -1)+
  theme_fivethirtyeight()+
  scale_y_continuous(labels = scales::comma,
                     limits = c(2500,6500))+
  scale_x_continuous(limits = c(1998,2032),
                     labels = c("2000","2010","2020", "2030 Est."))+
  theme(legend.position = "bottom")+
  labs(x = "",
       color = "",
       linetype = "",
       title = "Total Population",
       caption = "2030 Population Estimated with AAAC\nVariables: P1_001N and P001001",
       subtitle = "2000-2020 Decennial Census")

Median Income Plots

# Median Income

# Getting median income data from the 5-Year American Community Survey. 
# The median income variable is "B19013_001."

# Create an object called "years" that lists the years you want to pull data from. 
# We want all of the years the ACS data is available. 
years <- 2009:2021
names(years) <- years

# Use get_acs() to pull median income data at the place level.
placeIncome <- map_dfr(years, ~{
  get_acs(
    geography = "place",
    variables = c("median_income" = "B19013_001"),
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") 

# Remove the " city, Iowa|, Iowa" from the end of the place names.
placeIncome <- placeIncome %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa|, Iowa"))

# Filter placeIncome for desired cities.
# Add a geography column.
placeIncome <- placeIncome %>%
  filter(NAME %in% c("Grundy Center", "Independence", "New Hampton")) %>% 
  mutate(geography = "Place")


# Use get_acs() to pull median income data at the state level.
stateIncome <- map_dfr(years, ~{
  get_acs(
    geography = "state",
    variables = c("median_income" = "B19013_001"),
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") 

# Add a geography column. 
stateIncome <- stateIncome %>% 
  mutate(geography = "State")


# Use get_acs() to pull median income data at the regional level.
regionIncome <- map_dfr(years, ~{
  get_acs(
    geography = "region",
    variables = c("median_income" = "B19013_001"),
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") 

# Remove the " Region" from the end of the region names.
regionIncome <- regionIncome %>% 
  mutate(NAME = str_remove(NAME, " Region"))

# Filter regionIncome for Midwest.
# Add a geography column.
regionIncome <- regionIncome %>%
  filter(NAME %in% "Midwest") %>% 
  mutate(geography = "Region")


# Use get_acs() to pull median income data at the national level.
nationIncome <- map_dfr(years, ~{
  get_acs(
    geography = "us",
    variables = c("median_income" = "B19013_001"),
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year")

# Add a geography column.
nationIncome <- nationIncome %>% 
  mutate(geography = "Nation")

# Bind all geographies together with bind_rows().
income <- nationIncome %>% 
  bind_rows(regionIncome, stateIncome, placeIncome)

# Add a new column specifying if the geography is a nation, region, state, or place. 
income <- income %>% 
  mutate(grouping = ifelse(NAME %in% c("United States", "Midwest", "Iowa"), "Contextual Area", NAME))

# Create three separate plots for Grundy Center, Independence and New Hampton.
# Combine plots together using patchwork library. 
#install.packages("patchwork")
library(patchwork)

# Plot the median income for Grundy Center and its contextual geographies.       PLOT
plot1 <- income[income$NAME %in% c("Grundy Center", "United States", "Midwest", "Iowa"), ] %>% 
  ggplot(aes(x = year, y = median_incomeE, group = NAME)) +
  geom_ribbon(aes(ymax = median_incomeE + median_incomeM, ymin = median_incomeE - median_incomeM, fill = geography),
              alpha = 0.3) + 
  geom_line(aes(color = geography), linewidth = 1)+
  theme_fivethirtyeight()+
  scale_y_continuous(labels = label_dollar(scale = .001, suffix = "k"),
                     limits = c(32250,80000))+
  theme(legend.position = "none", 
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "",
       color = "",
       linetype = "",
       subtitle = "2009-2021 5-Year ACS Estimates\nGrundy Center",
       fill = "",
       title = "Median Income") 

# Plot the median income for Independence and its contextual geographies.        PLOT 
plot2 <- income[income$NAME %in% c("Independence", "United States", "Midwest", "Iowa"), ] %>% 
  ggplot(aes(x = year, y = median_incomeE, group = NAME)) +
  geom_ribbon(aes(ymax = median_incomeE + median_incomeM, ymin = median_incomeE - median_incomeM, fill = geography),
              alpha = 0.3) + 
  geom_line(aes(color = geography), linewidth = 1)+
  theme_fivethirtyeight()+
  scale_y_continuous(labels = label_dollar(scale = .001, suffix = "k"),
                     limits = c(32250,80000))+
  theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(x = "",
       color = "",
       linetype = "",
       subtitle = "\nIndependence",
       caption = "",
       fill = "")

# Plot the median income for New Hampton and its contextual geographies.         PLOT 
plot3 <- income[income$NAME %in% c("New Hampton", "United States", "Midwest", "Iowa"), ] %>% 
  ggplot(aes(x = year, y = median_incomeE, group = NAME)) +
  geom_ribbon(aes(ymax = median_incomeE + median_incomeM, ymin = median_incomeE - median_incomeM, fill = geography),
              alpha = 0.3) + 
  geom_line(aes(color = geography), linewidth = 1)+
  theme_fivethirtyeight()+
  scale_y_continuous(labels = label_dollar(scale = .001, suffix = "k"),
                     limits = c(32250,80000))+
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(x = "",
       color = "",
       linetype = "",
       subtitle = "\nNew Hampton",
       fill = "",
       caption = "Shaded area represents margin of error around the ACS estimate\nVariable: B19013_001")

# Combine plots. 
combined_plots <- wrap_plots(plot1, plot2, plot3)

Median Year Built and Median Value by Year Built Plots

# House Age                                                               

# Want to plot Year Built data as percentages to standardize that data over different geography sizes.
# Get Year Built data and Total Structure data from 2021 5-year American Community Survey (ACS) for all places in Iowa.
placeAge <- get_acs(
  geography = "place", 
  state = "IA", 
  variables = c("2020 or later" = "B25034_002", 
                "2010 to 2019" = "B25034_003", 
                "2000 to 2009" = "B25034_004",
                "1990 to 1999" = "B25034_005",
                "1980 to 1989" = "B25034_006",
                "1970 to 1979" = "B25034_007",
                "1960 to 1969" = "B25034_008",
                "1950 to 1959" = "B25034_009",
                "1940 to 1949" = "B25034_010",
                "1939 or earlier" = "B25034_011",
                "total" = "B25034_001"), 
  year = 2021)
# Filter for desired cities. 
placeAge <- placeAge %>% 
  filter(NAME %in% c("Grundy Center city, Iowa", "Independence city, Iowa", "New Hampton city, Iowa"))
# Remove " city, Iowa" using str_remove().
placeAge <- placeAge %>% 
  mutate(NAME = str_remove(NAME, "city, Iowa"))
# Group the house age data frame by NAME and calculate the percent by dividing the estimate value by the total value. 
placeAge <- placeAge %>% 
  group_by(NAME) %>% 
  mutate(percent = estimate/ first(estimate)) %>% 
  filter(variable != "total") # Removes rows containing the total.

# Get Year Built data and Total Structure data from 2021 5-year American Community Survey (ACS) for Iowa.
stateAge <- get_acs(
  geography = "state", 
  state = "IA", 
  variables = c("2020 or later" = "B25034_002", 
                "2010 to 2019" = "B25034_003", 
                "2000 to 2009" = "B25034_004",
                "1990 to 1999" = "B25034_005",
                "1980 to 1989" = "B25034_006",
                "1970 to 1979" = "B25034_007",
                "1960 to 1969" = "B25034_008",
                "1950 to 1959" = "B25034_009",
                "1940 to 1949" = "B25034_010",
                "1939 or earlier" = "B25034_011",
                "total" = "B25034_001"), 
  year = 2021)
# Calculate the percent by dividing the estimate value by the total value. 
stateAge <- stateAge %>% 
  mutate(percent = estimate/ first(estimate)) %>% 
  filter(variable != "total") # Removes rows containing the total.

# Get Year Built data and Total Structure data from 2021 5-year American Community Survey (ACS) for the Midwest.
regionAge <- get_acs(
  geography = "region", 
  variables = c("2020 or later" = "B25034_002", 
                "2010 to 2019" = "B25034_003", 
                "2000 to 2009" = "B25034_004",
                "1990 to 1999" = "B25034_005",
                "1980 to 1989" = "B25034_006",
                "1970 to 1979" = "B25034_007",
                "1960 to 1969" = "B25034_008",
                "1950 to 1959" = "B25034_009",
                "1940 to 1949" = "B25034_010",
                "1939 or earlier" = "B25034_011",
                "total" = "B25034_001"), 
  year = 2021)
# Filter for desired cities. 
regionAge <- regionAge %>% 
  filter(NAME == "Midwest Region")
# Remove " city, Iowa" using str_remove().
regionAge <- regionAge %>% 
  mutate(NAME = str_remove(NAME, " Region"))
# Group the house age data frame by NAME and calculate the percent by dividing the estimate value by the total value. 
regionAge <- regionAge %>% 
  mutate(percent = estimate/ first(estimate)) %>% 
  filter(variable != "total") # Removes rows containing the total.

# Get Year Built data and Total Structure data from 2021 5-year American Community Survey (ACS) for the United States.
nationAge <- get_acs(
  geography = "us", 
  variables = c("2020 or later" = "B25034_002", 
                "2010 to 2019" = "B25034_003", 
                "2000 to 2009" = "B25034_004",
                "1990 to 1999" = "B25034_005",
                "1980 to 1989" = "B25034_006",
                "1970 to 1979" = "B25034_007",
                "1960 to 1969" = "B25034_008",
                "1950 to 1959" = "B25034_009",
                "1940 to 1949" = "B25034_010",
                "1939 or earlier" = "B25034_011",
                "total" = "B25034_001"), 
  year = 2021)
# Calculate the percent by dividing the estimate value by the total value. 
nationAge <- nationAge %>% 
  mutate(percent = estimate/ first(estimate)) %>% 
  filter(variable != "total") # Removes rows containing the total.

# Bind contextual data together using bind_rows().
contextAge <- stateAge %>% 
  bind_rows(regionAge, nationAge)

# Plot the Year Built data.                                                      PLOT
# Group by name. Set geom_col() position to dodge to get the data displayed side by side. 
house_age <- ggplot() +
  geom_line(data = contextAge, aes(x = variable, y = percent, group = NAME, linetype = NAME), linewidth = 1, alpha = .6) +
  geom_col(data = placeAge, aes(x = variable, y = percent, group = NAME, fill = NAME), position = "dodge", alpha = .9) +
  scale_y_continuous(labels = scales::percent) +
  theme_fivethirtyeight() +
  theme(legend.position = "none") +
  labs(title = "Year Structure Built",
       subtitle = "2017-2021 5-Year ACS Estimates",
       x = "",
       y = "ACS estimate",
       fill = "",
       linetype = "")+
  #  caption = "Variables: B25034_002, B25034_003, B25034_004, B25034_005, B25034_006,\nB25034_007, B25034_008, B25034_009,B25034_010, B25034_011, B25034_001")+
  theme(axis.text.x = element_text(angle = 30, hjust = 1))


# Median Home Value by Year Structure Built                                

# Get data from 2021 5-year American Community Survey for all places in Iowa.
placeValueAge <- get_acs(
  geography = "place", 
  state = "IA", 
  variables = c("2020 or later" = "B25107_002", 
                "2010 to 2019" = "B25107_003", 
                "2000 to 2009" = "B25107_004",
                "1990 to 1999" = "B25107_005",
                "1980 to 1989" = "B25107_006",
                "1970 to 1979" = "B25107_007",
                "1960 to 1969" = "B25107_008",
                "1950 to 1959" = "B25107_009",
                "1940 to 1949" = "B25107_010",
                "1939 or earlier" = "B25107_011"), 
  year = 2021)
# Filter for desired cities. 
placeValueAge <- placeValueAge %>% 
  filter(NAME %in% c("Grundy Center city, Iowa", "Independence city, Iowa", "New Hampton city, Iowa")) %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa"))

# Get data from 2021 5-year American Community Survey for Iowa.
stateValueAge <- get_acs(
  geography = "state", 
  state = "IA", 
  variables = c("2020 or later" = "B25107_002", 
                "2010 to 2019" = "B25107_003", 
                "2000 to 2009" = "B25107_004",
                "1990 to 1999" = "B25107_005",
                "1980 to 1989" = "B25107_006",
                "1970 to 1979" = "B25107_007",
                "1960 to 1969" = "B25107_008",
                "1950 to 1959" = "B25107_009",
                "1940 to 1949" = "B25107_010",
                "1939 or earlier" = "B25107_011"), 
  year = 2021)

# Get data from 2021 5-year American Community Survey for the Midwest.
regionValueAge <- get_acs(
  geography = "region", 
  variables = c("2020 or later" = "B25107_002", 
                "2010 to 2019" = "B25107_003", 
                "2000 to 2009" = "B25107_004",
                "1990 to 1999" = "B25107_005",
                "1980 to 1989" = "B25107_006",
                "1970 to 1979" = "B25107_007",
                "1960 to 1969" = "B25107_008",
                "1950 to 1959" = "B25107_009",
                "1940 to 1949" = "B25107_010",
                "1939 or earlier" = "B25107_011"), 
  year = 2021)
# Filter for the Midwest. 
regionValueAge <- regionValueAge %>% 
  filter(NAME == "Midwest Region") %>% 
  mutate(NAME = str_remove(NAME, " Region"))

# Get data from 2021 5-year American Community Survey for the United States.
nationValueAge <- get_acs(
  geography = "us", 
  variables = c("2020 or later" = "B25107_002", 
                "2010 to 2019" = "B25107_003", 
                "2000 to 2009" = "B25107_004",
                "1990 to 1999" = "B25107_005",
                "1980 to 1989" = "B25107_006",
                "1970 to 1979" = "B25107_007",
                "1960 to 1969" = "B25107_008",
                "1950 to 1959" = "B25107_009",
                "1940 to 1949" = "B25107_010",
                "1939 or earlier" = "B25107_011"), 
  year = 2021)

# Join all geographies.
contextValueAge <- stateValueAge %>% 
  bind_rows(regionValueAge, nationValueAge)

# plot median home value by year structure built                                  PLOT
house_value_by_year <- ggplot() +
  geom_line(data = contextValueAge, aes(x = variable, y = estimate, group = NAME, linetype = NAME), linewidth = 1, alpha = .6) +
  geom_col(data = placeValueAge, aes(x = variable, y = estimate, group = NAME, fill = NAME), position = "dodge", alpha = .9) +
  scale_y_continuous(labels = scales::dollar) +
  theme_fivethirtyeight() +
  labs(title = "Median Home Value by Year Structure Built",
       subtitle = "2017-2021 5-Year ACS Estimates",
       x = "",
       y = "ACS estimate",
       fill = "",
       linetype = "",
       caption = "Variables: B25034_002, B25034_003, B25034_004, B25034_005, B25034_006,\nB25034_007, B25034_008, B25034_009,B25034_010, B25034_011, B25034_001") +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

#Use patchwork library to parse together year built and median value by year built plots.

library(patchwork)

value_and_year_built <- house_age +house_value_by_year +
  plot_layout(ncol = 1)

Median House Value Plots

# Median House Value                                                         

# Getting housing value data from the 5-year 2020 American Community Survey.
# Specify county for just Chickasaw, Grundy, and Buchanan County.
# The API Code for median home value is "B25077_001."
housing_val <- get_acs(
  geography = "tract", 
  variables = "B25077_001", 
  state = "IA", 
  county = c("Chickasaw","Grundy","Buchanan"
  ),
  year = 2020
)
# Separate the median home value data into three separate geographies: tract, county, and state.
housing_val2 <- separate(
  housing_val, 
  NAME, 
  into = c("tract", "county", "state"), 
  sep = ", "
)  

# Filter median home value data by census tract. 
# The city of Independence falls into two census tracts: 9505 and 9504.
# Grundy Center is in tract 9603.
# New Hampton is in tract 704.
# Add a new column called "city" with the associated city name. 
ind_house_val <- housing_val2 %>% 
  filter(tract == c("Census Tract 9505", "Census Tract 9504")) %>% 
  mutate(city = "Independence")
hampton_house_val <- housing_val2 %>% 
  filter(tract == "Census Tract 704") %>% 
  mutate(city = "New Hampton")
grundy_house_val <- housing_val2 %>% 
  filter(tract == "Census Tract 9603") %>% 
  mutate(city = "Grundy Center")

# Bind individual data frames together with bind_rows().
cities_house_val <- ind_house_val %>% 
  bind_rows(hampton_house_val,grundy_house_val)

# Use the summarize function to find the minimum, maximum, median and mean of the median home value estimates. 
cities_house_val %>%
  group_by(city) %>%
  summarize(min = min(estimate, na.rm = TRUE), 
            mean = mean(estimate, na.rm = TRUE), 
            median = median(estimate, na.rm = TRUE), 
            max = max(estimate, na.rm = TRUE)) 
# A tibble: 3 x 5
  city             min   mean median    max
  <chr>          <dbl>  <dbl>  <dbl>  <dbl>
1 Grundy Center 135900 135900 135900 135900
2 Independence  133200 136400 136400 139600
3 New Hampton   117500 117500 117500 117500
# Cannot visualize cities_house_val using this ggplot method because Grundy Center and New Hampton only have one value.
# Plot housing value by county instead.
# Add in a point that displays the median for each city from the 2020 5-Year ACS.
housing_val3 <- get_acs(
  geography = "place", 
  variables = "B25077_001", 
  state = "IA",
  year = 2020) %>% 
  filter(NAME %in% c("Grundy Center city, Iowa","New Hampton city, Iowa", "Independence city, Iowa"))

# grundy center = $130,600
# new hampton = $112,200
# independence = $129,900

# Create a density plot for median home value.                                   PLOT
ggplot(housing_val2, aes(x = estimate, fill = county)) + 
  geom_density(alpha = 0.3, linewidth = 1) +
  geom_point(aes(y = 0.00002074, x = 112200), size = 2) + # New Hampton
  geom_point(aes(x = 129900, y = .00001405), size = 2) + # Independence
  geom_point(aes(x = 130600, y = .0000227), size = 2) + # Grundy Center
  geom_text(aes(y = 0.00002074, x = 112200, label = "New Hampton\n$112,200"), hjust = -.1, vjust = 1) +
  geom_text(aes(x = 129900, y = .00001405, label = "Independence\n$129,900"), hjust = -.1, vjust = 1) +
  geom_text(aes(x = 130600, y = .0000227, label = "Grundy Center\n$130,600"), hjust = 1.1, vjust = -.15) +
  labs(title = "Median House Values by Census Tract",
       subtitle = "2016-2020 5-Year ACS Estimates",
       y = "",
       x = "Median House Value",
       caption = "Points represent median house value for each city\nVariable: B25077_001",
       fill = "")+
  scale_x_continuous(labels = dollar_format()) +
  scale_y_continuous(labels = scales::comma,
                     limits = c(0,.000026))+
  theme_fivethirtyeight()

# Now, want to plot all available data for median home value. 
# Getting median home value data from the 5-Year American Community Survey. 
# The median home value code is "B25077_001."

# Create an object called "years" that lists the years you want to pull data from. 
# We want all of the years the ACS data is available. 
years <- 2009:2021
names(years) <- years

# Use get_acs() to pull median home value data at the place level.
placeValue <- map_dfr(years, ~{
  get_acs(
    geography = "place",
    variables = c("median_value" = "B25077_001"),
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") 

# Remove the " city, Iowa|, Iowa" from the end of the place names.
placeValue <- placeValue %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa|, Iowa"))

# Filter placeValue for desired cities.
# Add a geography column.
placeValue <- placeValue %>%
  filter(NAME %in% c("Grundy Center", "Independence", "New Hampton")) %>% 
  mutate(geography = "Place")


# Use get_acs() to pull median home value data at the state level.
stateValue <- map_dfr(years, ~{
  get_acs(
    geography = "state",
    variables = c("median_value" = "B25077_001"),
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") 

# Add a geography column. 
stateValue <- stateValue %>% 
  mutate(geography = "State")


# Use get_acs() to pull median home value data at the regional level.
regionValue <- map_dfr(years, ~{
  get_acs(
    geography = "region",
    variables = c("median_value" = "B25077_001"),
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") 

# Remove the " Region" from the end of the region names
regionValue<- regionValue %>% 
  mutate(NAME = str_remove(NAME, " Region"))

# Filter regionValue for Midwest.
# Add a geography column.
regionValue <- regionValue %>%
  filter(NAME %in% "Midwest") %>% 
  mutate(geography = "Region")


# Use get_acs() to pull median home value data at the national level.
nationValue <- map_dfr(years, ~{
  get_acs(
    geography = "us",
    variables = c("median_value" = "B25077_001"),
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year")

# Add a geography column.
nationValue <- nationValue %>% 
  mutate(geography = "Nation")

# Bind all geographies together with bind_rows().
value <- nationValue %>% 
  bind_rows(regionValue, stateValue, placeValue)

# Add a new column specifying if the geography is a nation, region, state, or place. 
value <- value %>% 
  mutate(grouping = ifelse(NAME %in% c("United States", "Midwest", "Iowa"), "Contextual Area", NAME))

# Create three separate plots for Grundy Center, Independence and New Hampton.
# Combine plots together using patchwork library. 
#install.packages("patchwork")
library(patchwork)

# Plot the median home value for Grundy Center and its contextual geographies.       PLOT
plot1 <- value[value$NAME %in% c("Grundy Center", "United States", "Midwest", "Iowa"), ] %>% 
  ggplot(aes(x = year, y = median_valueE, group = NAME)) +
  geom_ribbon(aes(ymax = median_valueE + median_valueM, ymin = median_valueE - median_valueM, fill = geography),
              alpha = 0.3) + 
  geom_line(aes(color = geography), linewidth = 1)+
  theme_fivethirtyeight()+
  scale_y_continuous(labels = label_dollar(scale = .001, suffix = "k"),
                     limits = c(85000,250000))+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(x = "",
       color = "",
       linetype = "",
       subtitle = "2009-2021 5-Year ACS Estimates\nGrundy Center",
       fill = "",
       title = "Median Home Value")

# Plot the median home value for Independence and its contextual geographies.        PLOT 
plot2 <- value[value$NAME %in% c("Independence", "United States", "Midwest", "Iowa"), ] %>% 
  ggplot(aes(x = year, y = median_valueE, group = NAME)) +
  geom_ribbon(aes(ymax = median_valueE + median_valueM, ymin = median_valueE - median_valueM, fill = geography),
              alpha = 0.3) + 
  geom_line(aes(color = geography), linewidth = 1)+
  theme_fivethirtyeight()+
  scale_y_continuous(labels = label_dollar(scale = .001, suffix = "k"),
                     limits = c(85000,250000))+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(x = "",
       color = "",
       linetype = "",
       subtitle = "\nIndependence",
       fill = "")

# Plot the median value for New Hampton and its contextual geographies.         PLOT 
plot3 <- value[value$NAME %in% c("New Hampton", "United States", "Midwest", "Iowa"), ] %>% 
  ggplot(aes(x = year, y = median_valueE, group = NAME)) +
  geom_ribbon(aes(ymax = median_valueE + median_valueM, ymin = median_valueE - median_valueM, fill = geography),
              alpha = 0.3) + 
  geom_line(aes(color = geography), linewidth = 1)+
  theme_fivethirtyeight()+
  scale_y_continuous(labels = label_dollar(scale = .001, suffix = "k"),
                     limits = c(85000,250000))+
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(x = "",
       color = "",
       linetype = "",
       subtitle = "\nNew Hampton",
       fill = "",
       caption = "Shaded area represents margin of error around the ACS estimate\nVariable: B25077_001")

# Combine plots. 
combined_plots1 <- wrap_plots(plot1, plot2, plot3)


# Plot just the city median home value data.                                     PLOT
value[value$NAME %in% c("Grundy Center", "New Hampton", "Independence"), ] %>% 
  ggplot(aes(x = year, y = median_valueE, group = NAME, fill = NAME)) + 
  geom_ribbon(aes(ymax = median_valueE + median_valueM, ymin = median_valueE - median_valueM),
              alpha = 0.3) + 
  geom_line(aes(color = NAME), linewidth = 1) + 
  theme_fivethirtyeight() + 
  scale_y_continuous(labels = label_dollar(scale = .001, suffix = "k")) +
  labs(title = "Median Home Value",
       subtitle = "2009-2021 5-Year ACS Estimates",
       x = "Year",
       y = "ACS estimate",
       fill = "",
       color = "",
       caption = "Shaded area represents margin of error around the ACS estimate\nVariable: B25077_001") +
  geom_text(aes(x = "2021", y =137100, label = "$137,000"), vjust = -1)+
  geom_text(aes(x = "2021", y = 133300, label = "$133,300"), vjust = 2) +
  geom_text(aes(x = "2021", y = 122600, label = "$122,600"), vjust = -1)

Housing Occupancy Plots

```{r}, warning = FALSE, message = FALSE} # Home Ownership, Rental, and Vacancy Rates

Getting owner occupied data and total occupied units from 5-Year American Community Survey.

Using years object that was defined earlier.

placeOwn <- map_dfr(years, ~{ get_acs( geography = “place”, variables = c(“Owner Occupied” = “B25003_002”, “total” = “B25001_001”), state = “IA”, year = .x, survey = “acs5”, ) }, .id = “year”)

Create a new column for percent own.

placeOwn <- placeOwn %>% group_by(year, GEOID, NAME) %>% mutate(percent = estimate / first(estimate)) filter(variable != “total”)

Getting renter occupied data and total occupied units from 5-Year American Community Survey.

Using years object defined earlier.

placeRent <- map_dfr(years, ~{ get_acs( geography = “place”, variables = c(“Renter Occupied” = “B25003_003”, “total” = “B25001_001”), state = “IA”, year = .x, survey = “acs5”, ) }, .id = “year”)

Add a new column for percent rent.

placeRent <- placeRent %>% group_by(year, GEOID, NAME) %>% mutate(percent = estimate / first(estimate)) %>% filter(variable != “total”)

Getting vacant units and total units from 5-Year ACS.

Using years object defined earlier.

placeVacant <- map_dfr(years, ~{ get_acs( geography = “place”, state = “IA”, variables = c(“Vacant” = “B25002_003”, “total” = “B25001_001”), year = .x, survey = “acs5”, ) }, .id = “year”)

Create new column for vacant percent.

placeVacant <- placeVacant %>% group_by(year, GEOID, NAME) %>% mutate(percent = estimate / first(estimate)) %>% filter(variable != “total”)

Combine all data into one data frame.

placeRate <- placeOwn %>% bind_rows(placeRent, placeVacant)

To simplify the filtering process from now on, create an object for desired cities.

target_cities <- c(“Independence city, Iowa”,“New Hampton city, Iowa”, “Grundy Center city, Iowa”)

Filter for desired cities.

placeRate <- placeRate %>% filter(NAME %in% target_cities) %>% mutate(NAME = str_remove(NAME,” city, Iowa”))

Add a geography column.

placeRate <- placeRate %>% mutate(geography = “Place”)

STATE TIME

Getting owner occupied data and total occupied units from 5-Year American Community Survey for Iowa.

Using years object that was defined earlier.

stateOwn <- map_dfr(years, ~{ get_acs( geography = “state”, variables = c(“Owner Occupied” = “B25003_002”, “total” = “B25001_001”), state = “IA”, year = .x, survey = “acs5”, ) }, .id = “year”)

Create a column called prc_own for percent home ownership.

Create column for the margin of error of prc_own.

Filter the data frame for the desired cities.

stateOwn <- stateOwn %>% group_by(year) %>% mutate(percent = estimate/first(estimate)) %>% filter(variable != “total”)

Repeat the process for Renter Occupied and Vacant Units.

stateRent <- map_dfr(years, ~{ get_acs( geography = “state”, variables = c(“Renter Occupied” = “B25003_003”, “total” = “B25001_001”), state = “IA”, year = .x, survey = “acs5”, ) }, .id = “year”) # Calculate percent. stateRent <- stateRent %>% group_by(year) %>% mutate(percent = estimate/first(estimate)) %>% filter(variable != “total”)

Vacant Units time.

stateVacant <- map_dfr(years, ~{ get_acs( geography = “state”, variables = c(“Vacant” = “B25002_003”, “total” = “B25001_001”), state = “IA”, year = .x, survey = “acs5”, ) }, .id = “year”) # Calculate percent. stateVacant <- stateVacant %>% group_by(year) %>% mutate(percent = estimate/first(estimate)) %>% filter(variable != “total”)

Combine all rates and add geography.

stateRate <- stateOwn %>% bind_rows(stateRent, stateVacant) %>% mutate(geography = “State”)

place_state %>% filter(variable == “Owner Occupied”) %>% ggplot(aes(x = year, y = percent, group = NAME, color = NAME)) + geom_line(linewidth = 1) + geom_point(size =2, color = “black”) + scale_y_continuous(labels = scales::percent)+ theme_fivethirtyeight() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(title = “Home Ownership Rate”, caption = “Variable: B25003_002”, color = ““, subtitle =”2009-2021 5-Year ACS Estimates”)

REGION TIME

Getting owner occupied data and total occupied units from 5-Year American Community Survey for the Midwest.

Using years object that was defined earlier.

regionOwn <- map_dfr(years, ~{ get_acs( geography = “region”, variables = c(“Owner Occupied” = “B25003_002”, “total” = “B25001_001”), year = .x, survey = “acs5”, ) }, .id = “year”)

Calculate percent home ownership rate.

regionOwn <- regionOwn %>% group_by(year, GEOID, NAME) %>% mutate(percent = estimate/first(estimate)) %>% filter(variable != “total”)

Repeat for Renter Occupied data.

regionRent <- map_dfr(years, ~{ get_acs( geography = “region”, variables = c(“Renter Occupied” = “B25003_003”, “total” = “B25001_001”), year = .x, survey = “acs5” ) }, .id = “year”) # Calculate percent rent. regionRent <- regionRent %>% group_by(year, GEOID, NAME) %>% mutate(percent = estimate / first(estimate)) %>% filter(variable != “total”)

One last time for vacant units.

regionVacant <- map_dfr(years, ~{ get_acs( geography = “region”, variables = c(“Vacant” = “B25002_003”, “total” = “B25001_001”), year = .x, survey = “acs5” ) }, .id = “year”) # Calculate Vacant. regionVacant <- regionVacant %>% group_by(year, GEOID, NAME) %>% mutate(percent = estimate/ first(estimate)) %>% filter(variable != “total”)

Combine owner, renter, and vacant.

regionRate <- regionOwn %>% bind_rows(regionRent, regionVacant)

To simplify the filtering process from now on, create an object for desired region

target_region <- “Midwest Region”

Filter for Midwest.

Drop ” Region.”

Add a geography column.

regionRate <- regionRate %>% filter(NAME %in% target_region) %>% mutate(NAME = str_remove(NAME, ” Region”)) %>% mutate(geography = “Region”)

NATION TIME

Get data from the 5-Year ACS.

nationRate <- map_dfr(years, ~{ get_acs( geography = “us”, variables = c(“Owner Occupied” = “B25003_002”, “Renter Occupied” = “B25003_003”, “Vacant” = “B25002_003”, “total” = “B25001_001”), year = .x, survey = “acs5” ) }, .id = “year”)

Calculate rates.

Add geography.

nationRate <- nationRate %>% group_by(year) %>% mutate(percent = estimate/ first(estimate)) %>% filter(variable != “total”) %>% mutate(geography = “Nation”)

Create plots for home ownership rate at all geographies.

HOME OWNERSHIP RATE

I want state, region, and nation in their own contextRate so that they can be displayed by linetype.

Filter placeRate for variable == “Owner Occupied”

filtered_placeRate <- placeRate %>% filter(variable == “Owner Occupied”)

Bind context.

contextRate <- stateRate %>% bind_rows(regionRate, nationRate) # Filter stateRate for variable == “Owner Occupied” filtered_contextRate <- contextRate %>% filter(variable == “Owner Occupied”)

Create the line plot: Home Ownership Rate

ggplot() + geom_line(data = filtered_contextRate, aes(x = year, y = percent, group = NAME, linetype = NAME), color = “black”, linewidth = 1, alpha = .4) + geom_line(data = filtered_placeRate, aes(x = year, y = percent, group = NAME, color = NAME), linewidth = 1.75) + scale_y_continuous(labels = scales::percent, limits = c(.54,.8), breaks = c(.55,.6,.65,.7,.75,.8))+ theme_fivethirtyeight() + labs(title = “Home Ownership Rate”, subtitle = “2009-2021 5-Year ACS Estimates”, caption = “Variable: B25003_002”, color = ““, linetype =”“)

Create plots for vacancy rate at all geographies.

VACANCY RATE

filter for vacancy

vacancy_placeRate <- placeRate %>% filter(variable == “Vacant”) vacancy_contextRate <- contextRate %>% filter(variable == “Vacant”)

Create the line plot: Vacancy Rate

ggplot() + geom_line(data = vacancy_contextRate, aes(x = year, y = percent, group = NAME, linetype = NAME), color = “black”, linewidth = 1, alpha = .4) + geom_line(data = vacancy_placeRate, aes(x = year, y = percent, group = NAME, color = NAME), linewidth = 1.5) + scale_y_continuous(labels = scales::percent, limits = c(.0, .15), breaks = c(0, .05, .1, .15)) + labs(title = “Vacancy Rate”, subtitle = “2009-2021 5-Year ACS Estimates”, caption = “Variable: B25002_003”, color = ““, linetype =”“, x =”“, y =”“) + theme_fivethirtyeight()


### Reason for Vacancy Plot

```{r}, warning = FALSE, message = FALSE}
# Reason for Vacancy                                                      

placeReason <- get_acs(geography = "place",
                   state = "IA",
                   year = 2021,
                   variables = c("For rent" = "B25004_002",
                                 "Rented, not occupied" = "B25004_003",
                                 "For sale" = "B25004_004",
                                 "Sold, not occupied" = "B25004_005",
                                 "Seasonal" = "B25004_006",
                                 "Migrant worker" = "B25004_007",
                                 "Other" = "B25004_008",
                                 "total" = "B25004_001"))

# Filter for desired cities.
placeReason <- placeReason %>% 
  filter(NAME %in% target_cities) %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa"))

# Turn values into percents of the total.
placeReason <- placeReason %>% 
  group_by(NAME) %>% 
  mutate(percent = estimate / first(estimate)) %>% 
  filter(variable != "total")

# Plot reason for vacancy.                                                       PLOT
placeReason %>% 
  ggplot(aes(x=factor(variable, level=c("For rent","Rented, not occupied","For sale","Sold, not occupied","Seasonal","Migrant worker","Other")), y = percent, group = NAME, fill = NAME)) +
  geom_col(position = "dodge") +
  theme_fivethirtyeight()+
  scale_y_continuous(labels = scales::percent,
                     breaks = c(0,.1,.2,.3,.4)) +
  labs(title = "Reason for Vacancy",
       subtitle = "2017-2021 5-Year ACS Estimates",
       x = "",
       y = "ACS estimate",
       fill = "",
       alpha = "",
       caption = "Variables: B25004_001, B25004_002, B25004_003, B25004_004,\nB25004_005, B25004_006, B25004_007, B25004_008")+
  theme(axis.text.x=element_text(angle = 45, hjust = 1),
        plot.caption = element_text(size = 8))

Demographic Analysis

I finished gathering data for the demographic analysis this week. The code for that is below. I made a couple of minor changes to the data from last week. I decided that I wanted to gather all American Community Survey data from all years it was available instead of just for 2021.

If you copied any of the code from the week before, I would suggest copy the code below. It is far more accurate to the actual demographic analysis. I will be creating a separate blog that outlines the process of conducting a demographic analysis.

Population Data

## starting with POPULATION data for all places in Iowa ##
##########################################################

# Define the variables before getting the data.
# The Census API code for total population for 2000 and 2010 is P001001.
pop00 <- c("pop" = "P001001")
pop10 <- c("pop" = "P001001")
# The API code for total population changed for 2020.
# The Census API code for total population is P1_001N for 2020. 
pop20 <- c("pop" = "P1_001N")

##
# Getting 2000 total population data for the USA.
pop00 <- get_decennial(geography = "place",
                       state = "IA",
                          year = 2000,
                          variable = pop00,
                          output = "wide") %>% 
  mutate(year = 2000)
# Getting 2010 total population data for the USA.
pop10 <- get_decennial(geography = "place",
                          state = "IA",
                          year = 2010,
                          variable = pop10,
                          output = "wide") %>% 
  mutate(year = 2010)
# Getting 2020 total population data for the USA.
pop20 <- get_decennial(geography = "place",
                          state = "IA",
                          year = 2020,
                          variable = pop20,
                          output = "wide") %>% 
  mutate(year = 2020)
# Bind the years together using bind_rows to create data frame for national context. 
pop <- pop20 %>% 
  bind_rows(pop10,pop00) 


# Create anew column called prc_change.
# Group by "NAME" so that the calculations occur individually for each place. 
# Percent change is calculated by subtracting year2 from year1 and dividing by year2. 
pop <- pop %>%
  group_by(NAME) %>%
  mutate(prc_change = ifelse(year == 2020, (pop - lead(pop)) / lead(pop),
                             ifelse(year == 2010, (pop - last(pop)) / last(pop), NA)))

# figure out if the places are growing, shrinking, or stable
# stable = between -2 and 2
# increasing > 2
# decreasing < -2
pop <- pop %>%
  mutate(change_label = ifelse(prc_change > .02, "Growing",
                             ifelse(prc_change < -.02, "Shrinking", "Stable"))) %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa|, Iowa"))

#rename the variables
pop <- pop %>% 
  rename(Population = pop) %>% 
  rename(Year = year) %>% 
  rename("Percent Change" = prc_change) %>% 
  rename(label = change_label)


pop %>% write.csv(file = "population_data.csv")
## age of residents ##
######################

# adding to pop 

# Create an object called "years" that lists the years you want to pull data from. 
# We want all of the years the ACS data is available. 
years <- 2009:2021
names(years) <- years

# get the median age of all people in all places in Iowa from ACS
median_age <- map_dfr(years, ~{
  get_acs(
    geography = "place",
    variables = c("median_age" = "B01002_001"),
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa|, Iowa"))
Getting data from the 2005-2009 5-year ACS
Getting data from the 2006-2010 5-year ACS
Getting data from the 2007-2011 5-year ACS
Getting data from the 2008-2012 5-year ACS
Getting data from the 2009-2013 5-year ACS
Getting data from the 2010-2014 5-year ACS
Getting data from the 2011-2015 5-year ACS
Getting data from the 2012-2016 5-year ACS
Getting data from the 2013-2017 5-year ACS
Getting data from the 2014-2018 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2016-2020 5-year ACS
Getting data from the 2017-2021 5-year ACS
# rename variables
median_age <- median_age %>% 
  rename(Year = year) %>% 
  rename("Median Age Estimate" = median_ageE) %>% 
  rename(moe = median_ageM)


# get the number of people under 18 and over 65
# API Codes seperated by gender, so have to pull twice
# starting with Male
male <- map_dfr(years, ~{
  get_acs(
    geography = "place",
    variable = c("under5" = "B01001_003",
                 "a5to9" = "B01001_004",
                 "a10to14" = "B01001_005",
                 "a15to17" = "B01001_006",
                 "a65to66" = "B01001_020",
                 "a67to69" = "B01001_021",
                 "a70to74" = "B01001_022",
                 "a75to79" = "B01001_023",
                 "a80to84" = "B01001_024",
                 "over85" = "B01001_025",
                 "total" = "B01001_002"),
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa|, Iowa"))
Getting data from the 2005-2009 5-year ACS
Getting data from the 2006-2010 5-year ACS
Getting data from the 2007-2011 5-year ACS
Getting data from the 2008-2012 5-year ACS
Getting data from the 2009-2013 5-year ACS
Getting data from the 2010-2014 5-year ACS
Getting data from the 2011-2015 5-year ACS
Getting data from the 2012-2016 5-year ACS
Getting data from the 2013-2017 5-year ACS
Getting data from the 2014-2018 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2016-2020 5-year ACS
Getting data from the 2017-2021 5-year ACS
# now for Female
female <- map_dfr(years, ~{
  get_acs(
    geography = "place",
    variable = c("under5" = "B01001_027",
                 "a5to9" = "B01001_028",
                 "a10to14" = "B01001_029",
                 "a15to17" = "B01001_030",
                 "a65to66" = "B01001_044",
                 "a67to69" = "B01001_045",
                 "a70to74" = "B01001_046",
                 "a75to79" = "B01001_047",
                 "a80to84" = "B01001_048",
                 "over85" = "B01001_049",
                 "total" = "B01001_026"),
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa|, Iowa"))
Getting data from the 2005-2009 5-year ACS
Getting data from the 2006-2010 5-year ACS
Getting data from the 2007-2011 5-year ACS
Getting data from the 2008-2012 5-year ACS
Getting data from the 2009-2013 5-year ACS
Getting data from the 2010-2014 5-year ACS
Getting data from the 2011-2015 5-year ACS
Getting data from the 2012-2016 5-year ACS
Getting data from the 2013-2017 5-year ACS
Getting data from the 2014-2018 5-year ACS
Getting data from the 2015-2019 5-year ACS
Getting data from the 2016-2020 5-year ACS
Getting data from the 2017-2021 5-year ACS
# create new columns for under18 and over 65 for both male and female
# create a new column for gender
male <- male %>% 
  mutate(under18E = round(under5E + a5to9E + a10to14E + a15to17E)) %>%
  mutate(under18M = round(sqrt(under5M^2 + a5to9M^2 + a10to14M^2 + a15to17M^2))) %>%
  mutate(over65E = round(a65to66E + a67to69E + a70to74E + a75to79E + a80to84E + over85E)) %>%
  mutate(over65M =  round(sqrt(a65to66M^2 + a67to69M^2 + a70to74M^2 + a75to79M^2 + a80to84M^2 + over85M^2))) %>% 
  mutate(gender = "Male")

female <- female %>% 
  mutate(under18E = round(under5E + a5to9E + a10to14E + a15to17E)) %>%
  mutate(under18M = round(sqrt(under5M^2 + a5to9M^2 + a10to14M^2 + a15to17M^2))) %>%
  mutate(over65E = round(a65to66E + a67to69E + a70to74E + a75to79E + a80to84E + over85E)) %>%
  mutate(over65M =  round(sqrt(a65to66M^2 + a67to69M^2 + a70to74M^2 + a75to79M^2 + a80to84M^2 + over85M^2))) %>% 
  mutate(gender = "Female")


# combine the data frames
age <- female %>% 
  bind_rows(male)

# aggregate by city NAME, year, GEOID and gender
# create new columns for percent under 18 and percent over 65
aggregated_age <- age %>% 
  group_by(year,GEOID, NAME, gender) %>% 
             summarize(prc_under18E = sum(under18E)/sum(totalE),
                       prc_under18M = sqrt(sum(under18M^2) / sum(totalE)^2 + (sum(under18E)^2 * sum(totalM^2)) / sum(totalE)^4),
                       prc_over65E = sum(over65E)/sum(totalE),
                       prc_over65M = sqrt(sum(over65M^2) / sum(totalE)^2 + (sum(over65E)^2 * sum(totalM^2)) / sum(totalE)^4))
`summarise()` has grouped output by 'year', 'GEOID', 'NAME'. You can override
using the `.groups` argument.
# add a column that states whether a place is "aged", "stable" or "young"
# using a 2% difference to gauge a stable population
aggregated_age <- aggregated_age %>%
  mutate(age_label = ifelse(prc_under18E - prc_over65E > .02, "Young",
                               ifelse(prc_under18E - prc_over65E < -.02, "Aging", "Stable")))

# left_join aggregated_age to age
age <- age %>% 
  left_join(aggregated_age, by = c("year","GEOID","NAME","gender"))

# rename variables
age <- age %>% 
  rename("Under 5" = under5E) %>% 
  rename("5 to 9" = a5to9E) %>% 
  rename("10 to 14" = a10to14E) %>% 
  rename("15 to 17" = a15to17E) %>% 
  rename("65 to 66" = a65to66E) %>% 
  rename("67 to 69" = a67to69E) %>% 
  rename("70 to 74" = a70to74E) %>% 
  rename("75 to 79" = a75to79E) %>% 
  rename("80 to 84" = a80to84E) %>% 
  rename("Over 85" = over85E) %>% 
  rename("Total Population Under 18" = under18E) %>% 
  rename("Total Population Over 65" = over65E) %>% 
  rename("Percent Population Under 18" = prc_under18E) %>% 
  rename("Percent Population Over 65" = prc_over65E) %>% 
  rename(Label = age_label) %>% 
  rename("Under 5 moe" = under5M) %>% 
  rename("5 to 9 moe" = a5to9M) %>% 
  rename("10 to 14 moe" = a10to14M) %>% 
  rename("15 to 17 moe" = a15to17M) %>% 
  rename("65 to 66 moe" = a65to66M) %>% 
  rename("67 to 69 moe" = a67to69M) %>% 
  rename("70 to 74 moe" = a70to74M) %>% 
  rename("75 to 79 moe" = a75to79M) %>% 
  rename("80 to 84 moe" = a80to84M) %>% 
  rename("Over 85 moe" = over85M) %>% 
  rename("Total Population Under 18 moe" = under18M) %>% 
  rename("Total Population Over 65 moe" = over65M) %>% 
  rename("Percent Population Under 18 moe" = prc_under18M) %>% 
  rename("Percent Population Over 65 moe" = prc_over65M)

# create a .csv file for age data and median age data
age %>% write.csv(file = "age_data.csv")
median_age %>% write.csv(file = "median_age_data.csv")

Housing Data

## next is housing information ##
#################################

# getting from 2021 5-Year ACS

# total housing units = B25001_001
# owner occupied units = B25003_002
# total occupied units = B25002_002
# total vacant units = B25002_003
# median house value = B25077_001 
# median house age = B25035_001

housing <- map_dfr(years, ~{
  get_acs(
    geography = "place",
    variable = c("total_units" = "B25001_001",
                 "occupied_units" = "B25002_002",
                 "vacant_units" = "B25002_003",
                 "owner_occupied" = "B25003_002",
                 "renter_occupied" = "B25003_003",
                 "median_house_value" = "B25077_001",
                 "median_year_built" = "B25035_001"),
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa|, Iowa"))


## ALL RATES ARE PERCENTAGES
# calculate home ownership, vacany, and rental rates
housing <- housing %>% 
  mutate("Home Ownership Rate Estimate" = (owner_occupiedE / occupied_unitsE)) %>%  # divide owner occupied by occupied units
  mutate("Home Ownership Rate moe" = (sqrt((owner_occupiedM^2) / (occupied_unitsE^2) + ((owner_occupiedE * occupied_unitsM)^2) / (occupied_unitsE^4)))) %>%  # calculate new moe
  mutate("Rental Rate Estimate" = (renter_occupiedE / occupied_unitsE)) %>%  # divide renter occupied by occupied units
  mutate("Rental Rate moe" = (sqrt((renter_occupiedM^2) / (occupied_unitsE^2) + ((renter_occupiedE * occupied_unitsM)^2) / (occupied_unitsE^4)))) %>% # calculate new moe
  mutate("Vacancy Rate Estimate" = (vacant_unitsE / total_unitsE)) %>%  # divide vacant units by total units
  mutate("Vacancy Rate moe" = (sqrt((vacant_unitsM^2) / (total_unitsE^2) + ((vacant_unitsE * total_unitsM)^2) / (total_unitsE^4)))) %>% 
  mutate("Occupancy Rate Estimate" = (occupied_unitsE / total_unitsE)) %>% 
  mutate("Occupancy Rate moe" = (sqrt(occupied_unitsM^2) / (total_unitsE^2) + ((occupied_unitsE * total_unitsM^2) / total_unitsE^4)))# calculate new moe

# rename variables
housing <- housing %>% 
  rename("Total Housing Units" = total_unitsE) %>% 
  rename("Occupied Units" = occupied_unitsE) %>% 
  rename("Owner Occupied Units" = owner_occupiedE) %>% 
  rename("Renter Occupied Units" = renter_occupiedE) %>% 
  rename("Median House Value" = median_house_valueE) %>% 
  rename("Median Year Built" = median_year_builtE) %>% 
  rename("Total Housing Units moe" = total_unitsM) %>% 
  rename("Occupied Units moe" = occupied_unitsM) %>% 
  rename("Owner Occupied Units moe" = owner_occupiedM) %>% 
  rename("Renter Occupied Units moe" = renter_occupiedM) %>% 
  rename("Median House Value moe" = median_house_valueM) %>% 
  rename("Median Year Built moe" = median_year_builtM)

# create a .csv file with housing data
housing %>% write.csv(file = "housing_data.csv")

Taxable Properties Data

## next is Taxable Property Values from Liesl ##
################################################

#link to data: https://data.iowa.gov/Local-Government-Finance/Taxable-Property-Values-in-Iowa-by-Tax-District-an/ig9g-pba5

taxable.csv <- read.csv("C:/Users/Kailyn Hogan/OneDrive - Iowa State University/Documents/GitHub/Housing/demographics/demographic analysis/Datasets/Taxable_Property_Values_in_Iowa_by_Tax_District_and_Year.csv")
# City name is stored in City.Name as all caps
# for census data, only the first letter is capitalized and city, Iowa is attached
# need to lowercase taxable_prop_values and remove city, Iowa from housing and pop

taxable.csv <- taxable.csv %>% 
  mutate(City.Name = str_to_sentence(City.Name)) %>% 
  rename(NAME = City.Name) #str_to_sentence() uses regular sentence formatting where the first letter is capitalized
                              # could have also used str_to_titletext()

# aggregated the column based on City.Name
# function = sum to get the sum of all values
# na.rm = TRUE, means the NAs get ignored
aggregated_tax <- taxable.csv %>% 
  group_by(Assessment.Year,NAME) %>% 
  summarize(Residential = sum(Residential),
            Commercial = sum(Commercial),
            Industrial = sum(Industrial),
            Ag.Land = sum(Ag.Land),
            Ag.Building = sum(Ag.Building))
  
# rename variables 
aggregated_tax <- aggregated_tax %>% 
  rename(Year = Assessment.Year)

##########################################################################################################

aggregated_tax
# holds summed values for each type of property for a city
# needs further transforming for analysis
# could change aggregation to find the median value for each property type
# could be valuable to gather data for all years available.

##########################################################################################################

Workforce Area Characteristics and Residential Area Characteristics Data

## next is RAC and WAC data from Liesl ##
#########################################

# link to data structure set: https://lehd.ces.census.gov/data/lodes/LODES8/LODESTechDoc8.0.pdf
# interested in total number of jobs and ag related industries
# C000 = total
# CSN01 = ag related jobs

# WAC = how many people WORK in the city
# RAC = how many people live in the city AND have jobs

# downloaded the data as .csv
# need to read in the .csv file with read.csv()
ia_rac.csv <- read.csv("C:/Users/Kailyn Hogan/OneDrive - Iowa State University/Documents/GitHub/Housing/demographics/demographic analysis/Datasets/ia_rac_S000_JT00_2020.csv")

# rename w_geocode to geocode so it can easily be combined with geography2 dataframe
ia_rac.csv <- ia_rac.csv %>%
  rename(geocode = h_geocode) 

# this .csv file contains the geography for the wac and rac data
# need to join to wac and rac files so we can aggregate by city
geography <- read.csv("C:/Users/Kailyn Hogan/OneDrive - Iowa State University/Documents/GitHub/Housing/demographics/demographic analysis/Datasets/ia_xwalk.csv")
# tabblk2020 = h_geocode 
#ctyname = county
#cbsaname = metropolitan area name
#stplc = FIPS state + FIPS Place
#stplcname = place name

# reduce geography data frame to just h_geocode and NAME
geography2 <- geography %>% 
  select("tabblk2020","stplc","stplcname") %>% 
  rename(geocode = tabblk2020) %>% 
  rename(GEOID = stplc) %>% 
  mutate(NAME = str_remove(stplcname, " city, IA|, IA")) %>% 
  select(-stplcname)
# remove all empty rows
geography2 <- geography2 %>% 
  filter(NAME != "")%>% 
  mutate(year = 2023)

# use geography2 to assign city NAME to ia_rac.csv
rac_data <- ia_rac.csv %>% 
  left_join(geography2, by = c("geocode"))

# aggregate by city name
# have to remove h_geocode and GEOID so they don't aggregate
rac_data <- aggregate(. ~ NAME, data = rac_data, FUN = sum, na.rm = TRUE) %>% 
  select(-c("geocode","GEOID","year","createdate")) %>% 
  mutate(year = 2023)


# downloaded the data as .csv
# need to read in the .csv file with read.csv()
ia_wac.csv <- read.csv("C:/Users/Kailyn Hogan/OneDrive - Iowa State University/Documents/GitHub/Housing/demographics/demographic analysis/Datasets/ia_wac_S000_JT00_2020.csv")

# rename w_geocode to geocode so it can easily be combined with geography2 dataframe
ia_wac.csv <- ia_wac.csv %>%
  rename(geocode = w_geocode)
# join to geography2 data frame
wac_data <- geography2 %>% 
  left_join(ia_wac.csv, by = "geocode")
# remove all empty rows
wac_data <- wac_data %>% 
  filter(NAME != "")
# aggregate by city name
wac_data <- aggregate(. ~ NAME, data = wac_data[, !(names(wac_data) %in% c("geocode", "GEOID","createdate"))], FUN = sum, na.rm = TRUE) %>% 
  mutate(year = 2023)


# rename variables
rac_data <- rac_data %>% 
  rename("Total Number of Jobs" = C000) %>% 
  rename("Number of Jobs in NAICS sector 11 (Agriculture, Forestry, Fishing and Hunting)" = CNS01) %>% 
  rename("Number of Jobs in NAICS sector 21 (Mining, Quarrying, and Oil and Gas Extraction)" = CNS02) %>% 
  rename("Number of Jobs in NAICS sector 22 (Utilities)" = CNS03) %>% 
  rename("Number of Jobs in NAICS sector 23 (Construction)" = CNS04) %>% 
  rename("Number of Jobs in NAICS sector 32-33 (Manufacturing)" = CNS05) %>% 
  rename("Number of Jobs in NAICS sector 42 (Wholesale Trade)" = CNS06) %>% 
  rename("Number of Jobs in NAICS sector 44-45 (Retail Trade)" = CNS07) %>% 
  rename("Number of Jobs in NAICS sector 48-49 (Transportation and Warehousing)" = CNS08) %>% 
  rename("Number of Jobs in NAICS sector 51 (Information)" = CNS09) %>% 
  rename("Number of Jobs in NAICS sector 52 (Finance)" = CNS10) %>% 
  rename("Number of Jobs in NAICS sector 53 (Reale Estate adn Rental and Leasing)" = CNS11) %>% 
  rename("Number of Jobs in NAICS sector 54 (Professional, Scientific, and Technical Services)" = CNS12) %>% 
  rename("Number of Jobs in NAICS sector 55 (Management of Comapanies and Enterprises)" = CNS13) %>% 
  rename("Number of Jobs in NAICS sector 56 (Administrative and Support and Waste Management and Remediation Services)" = CNS14) %>% 
  rename("Number of Jobs in NAICS sector 61 (Educational Services)" = CNS15) %>% 
  rename("Number of Jobs in NAICS sector 62 (Health Care and Social Assistance)" = CNS16) %>% 
  rename("Number of Jobs in NAICS sector 71 (Arts, Entertainment, and Recreation)" = CNS17) %>% 
  rename("Number of Jobs in NAICS sector 72 (Accomodation and Food Service)" = CNS18) %>% 
  rename("Number of Jobs in NAICS sector 81 (Other Serivces, except Public Administratoin)" = CNS19) %>% 
  rename("Number of Jobs in NAICS sector 92 (Public Administration)" = CNS20)
  
wac_data <- wac_data %>% 
  rename("Total Number of Jobs" = C000) %>% 
  rename("Number of Jobs in NAICS sector 11 (Agriculture, Forestry, Fishing and Hunting)" = CNS01) %>% 
  rename("Number of Jobs in NAICS sector 21 (Mining, Quarrying, and Oil and Gas Extraction)" = CNS02) %>% 
  rename("Number of Jobs in NAICS sector 22 (Utilities)" = CNS03) %>% 
  rename("Number of Jobs in NAICS sector 23 (Construction)" = CNS04) %>% 
  rename("Number of Jobs in NAICS sector 32-33 (Manufacturing)" = CNS05) %>% 
  rename("Number of Jobs in NAICS sector 42 (Wholesale Trade)" = CNS06) %>% 
  rename("Number of Jobs in NAICS sector 44-45 (Retail Trade)" = CNS07) %>% 
  rename("Number of Jobs in NAICS sector 48-49 (Transportation and Warehousing)" = CNS08) %>% 
  rename("Number of Jobs in NAICS sector 51 (Information)" = CNS09) %>% 
  rename("Number of Jobs in NAICS sector 52 (Finance)" = CNS10) %>% 
  rename("Number of Jobs in NAICS sector 53 (Reale Estate adn Rental and Leasing)" = CNS11) %>% 
  rename("Number of Jobs in NAICS sector 54 (Professional, Scientific, and Technical Services)" = CNS12) %>% 
  rename("Number of Jobs in NAICS sector 55 (Management of Comapanies and Enterprises)" = CNS13) %>% 
  rename("Number of Jobs in NAICS sector 56 (Administrative and Support and Waste Management and Remediation Services)" = CNS14) %>% 
  rename("Number of Jobs in NAICS sector 61 (Educational Services)" = CNS15) %>% 
  rename("Number of Jobs in NAICS sector 62 (Health Care and Social Assistance)" = CNS16) %>% 
  rename("Number of Jobs in NAICS sector 71 (Arts, Entertainment, and Recreation)" = CNS17) %>% 
  rename("Number of Jobs in NAICS sector 72 (Accomodation and Food Service)" = CNS18) %>% 
  rename("Number of Jobs in NAICS sector 81 (Other Serivces, except Public Administratoin)" = CNS19) %>% 
  rename("Number of Jobs in NAICS sector 92 (Public Administration)" = CNS20)

  
##########################################################################################################

# wac and rac data need futher transforming for analysis
# should change data into percents to standardize the data and account for different population sizes

# example of transformation below
# used earlier when data was held in data frame differently

# rac c000 = total employed residents
# rac cns01 = total residents employed in ag
# wac c000 = total jobs in city
# wac cns01 = total ag jobs in city
#wac_rac <- wac_rac %>% 
#  rename(total_jobs = C000.x) %>% 
#  rename(workforce_size = C000.y) %>% 
#  rename(workforce_ag = CNS01.y) %>% 
#  rename(ag_jobs = CNS01.x)

## prc_workforce locally employed = wac c000.x / rac c000
## prc_workforce in ag = rac cns01.y / rac c000.y
## prc_jobs in ag = wac cns01.x / wac c000.x
#wac_rac <- wac_rac %>% 
#  mutate(prc_local = total_jobs/workforce_size) %>% 
#  mutate(prc_wrkf_ag = workforce_ag / workforce_size) %>% 
#  mutate(prc_ag_jobs = ag_jobs / total_jobs)

## prc_population in workforce = rac c000.y / pop
# left_joined to iowa data frame
# dividing by pop20 because wac and rac data is from 2020
#iowa <- iowa %>% 
#  mutate(prc_pop_in_wrkf = workforce_size / pop20) %>% 
#  mutate(prc_pop_employed = total_jobs / pop20)

############################################################################################################

# create a new .csv file for both wac and rac data
rac_data %>% write.csv(file = "rac_data.csv")
wac_data %>% write.csv(file = "wac_data.csv")

Employment Data

## Unemployment ##
##################       

# unemployment data is separated by Age and Sex in API Codes
# pull first for male employment
employment_male <- map_dfr(years, ~{
  get_acs(
    geography = "place",
    variable = c("Total Labor Force" = "B23001_001",
                 "Total Labor Force, Male" = "B23001_002",
                 "16 to 19" = "B23001_003",
                 "16 to 19, In Armed Forces" = "B23001_005",
                 "16 to 19, Employed" = "B23001_007",
                 "16 to 19, Unemployed" = "B23001_008",
                 "20 to 21" = "B23001_010",
                 "20 to 21, In Armed Forces" = "B23001_012",
                 "20 to 21, Employed" = "B23001_014",
                 "20 to 21, Unemployed" = "B23001_015",
                 "22 to 24" = "B23001_017",
                 "22 to 24, In Armed Forces" = "B23001_019",
                 "22 to 24, Employed" = "B23001_021",
                 "22 to 24, Unemployed" = "B23001_022",
                 "25 to 29" = "B23001_024",
                 "25 to 29, In Armed Forces" = "B23001_026",
                 "25 to 29, Employed" = "B23001_028",
                 "25 to 29, Unemployed" = "B23001_029",
                 "30 to 34" = "B23001_031",
                 "30 to 34, In Armed Forces" = "B23001_033",
                 "30 to 34, Employed" = "B23001_035",
                 "30 to 34, Unemployed" = "B23001_036",
                 "35 to 44" = "B23001_038",
                 "35 to 44, In Armed Forces" = "B23001_040",
                 "35 to 44, Employed" = "B23001_042",
                 "35 to 44, Unemployed" = "B23001_043",
                 "45 to 54" = "B23001_045",
                 "45 to 54, In Armed Forces" = "B23001_047",
                 "45 to 54, Employed" = "B23001_049",
                 "45 to 54, Unemployed" = "B23001_050",
                 "55 to 59" = "B23001_052",
                 "55 to 59, In Armed Forces" = "B23001_054",
                 "55 to 59, Employed" = "B23001_056",
                 "55 to 59, Unemployed" = "B23001_057",
                 "60 to 61" = "B23001_059",
                 "60 to 61, In Armed Forces" = "B23001_061",
                 "60 to 61, Employed" = "B23001_063",
                 "60 to 61, Unemployed" = "B23001_064",
                 "62 to 64" = "B23001_066",
                 "62 to 64, In Armed Forces" = "B23001_068",
                 "62 to 64, Employed" = "B23001_070",
                 "62 to 64, Unemployed" = "B23001_071",
                 "65 to 69" = "B23001_073",
                 "65 to 69, Employed" = "B23001_075",
                 "65 to 69, Unemployed" = "B23001_076",
                 "70 to 74" = "B23001_078",
                 "70 to 74, Employed" = "B23001_080",
                 "70 to 74, Unemployed" = "B23001_081",
                 "75 and Over" = "B23001_083",
                 "75 and Over, Employed" = "B23001_085",
                 "75 and Over, Unemployed" = "B23001_086"
    ),
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") %>% 
  mutate(gender = "Male")

employment_male <- employment_male %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa| , Iowa"))

# now for female employment
employment_female <- map_dfr(years, ~{
  get_acs(
    geography = "place",
    variable = c("Total Labor Force" = "B23001_001",
                 "Total Labor Force, Female" = "B23001_088",
                 "16 to 19" = "B23001_089",
                 "16 to 19, In Armed Forces" = "B23001_091",
                 "16 to 19, Employed" = "B23001_093",
                 "16 to 19, Unemployed" = "B23001_094",
                 "20 to 21" = "B23001_096",
                 "20 to 21, In Armed Forces" = "B23001_098",
                 "20 to 21, Employed" = "B23001_100",
                 "20 to 21, Unemployed" = "B23001_101",
                 "22 to 24" = "B23001_103",
                 "22 to 24, In Armed Forces" = "B23001_105",
                 "22 to 24, Employed" = "B23001_107",
                 "22 to 24, Unemployed" = "B23001_108",
                 "25 to 29" = "B23001_110",
                 "25 to 29, In Armed Forces" = "B23001_112",
                 "25 to 29, Employed" = "B23001_114",
                 "25 to 29, Unemployed" = "B23001_115",
                 "30 to 34" = "B23001_117",
                 "30 to 34, In Armed Forces" = "B23001_119",
                 "30 to 34, Employed" = "B23001_121",
                 "30 to 34, Unemployed" = "B23001_122",
                 "35 to 44" = "B23001_124",
                 "35 to 44, In Armed Forces" = "B23001_126",
                 "35 to 44, Employed" = "B23001_128",
                 "35 to 44, Unemployed" = "B23001_129",
                 "45 to 54" = "B23001_131",
                 "45 to 54, In Armed Forces" = "B23001_133",
                 "45 to 54, Employed" = "B23001_135",
                 "45 to 54, Unemployed" = "B23001_136",
                 "55 to 59" = "B23001_138",
                 "55 to 59, In Armed Forces" = "B23001_140",
                 "55 to 59, Employed" = "B23001_142",
                 "55 to 59, Unemployed" = "B23001_143",
                 "60 to 61" = "B23001_145",
                 "60 to 61, In Armed Forces" = "B23001_147",
                 "60 to 61, Employed" = "B23001_149",
                 "60 to 61, Unemployed" = "B23001_150",
                 "62 to 64" = "B23001_152",
                 "62 to 64, In Armed Forces" = "B23001_154",
                 "62 to 64, Employed" = "B23001_156",
                 "62 to 64, Unemployed" = "B23001_157",
                 "65 to 69" = "B23001_159",
                 "65 to 69, Employed" = "B23001_161",
                 "65 to 69, Unemployed" = "B23001_162",
                 "70 to 74" = "B23001_164",
                 "70 to 74, Employed" = "B23001_166",
                 "70 to 74, Unemployed" = "B23001_167",
                 "75 and Over" = "B23001_169",
                 "75 and Over, Employed" = "B23001_171",
                 "75 and Over, Unemployed" = "B23001_172"
    ),
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") %>% 
  mutate(gender = "Female")

employment_female <- employment_female %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa| , Iowa"))

##########################################################################################################

employment_female
employment_male
# need to combine these data frames and do further transformations
# should change to percents to standardize and account for different population sizes

##########################################################################################################

Commuting Data

## Commuting ##
###############

# pct population traveling outside of city for work
travel <- map_dfr(years, ~{
  get_acs(
    geography = "place",
    variable = c("total" = "B08008_001",
                 "travel" = "B08008_004"), 
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") %>% 
  mutate(prc_travel = travelE / totalE,
         travel_moe = (moe_ratio(travelE, totalE, travelM, totalM))) %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa|, Iowa"))


# rename variables 
travel <- travel %>% 
  rename(Year = year) %>% 
  rename("Total Workforce" = totalE) %>% 
  rename("Workforce Commuting" = travelE) %>% 
  rename("Percent Workforce Commuting" = prc_travel) %>% 
  rename("moe" = travel_moe) %>% 
  rename("Total Workforce moe" = totalM) %>% 
  rename("Workforce Commuting moe" = travelM)

# create a .csv file for travel data
travel %>% write.csv(file = "commuting_data.csv")

Income Data

## Income ##
############

# median household income = B19013_001

# pct population traveling outside of city for work
income <- map_dfr(years, ~{
  get_acs(
    geography = "place",
    variable = c("median_income"="B19013_001"), 
    state = "IA",
    year = .x,
    survey = "acs5",
    output = "wide"
  )
}, .id = "year") %>% 
  mutate(NAME = str_remove(NAME, " city, Iowa|, Iowa"))


# figure out how income is changing
# calculate percent income change
income <- income %>% 
  group_by(NAME) %>% 
  mutate(prc_change = (median_incomeE - lag(median_incomeE)) / lag(median_incomeE)) %>%
  mutate(income_change_label = ifelse(prc_change > 0, "Positive", "Negative"))

# rename variables 
income <- income %>% 
  rename(Year = year) %>% 
  rename("Median Income" = median_incomeE) %>% 
  rename("Percent Median Income Change" = prc_change) %>% 
  rename(Label = income_change_label) %>% 
  rename("Median Income moe" = median_incomeM) 

# create a .csv file for income data
income %>% write.csv(file = "income_data.csv")