Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add year FEs to employment share regression #2

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions 2_analysis/employment_shares/45_deg_plot_data.do
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use "`out'/riskshare_reg_data.dta", clear

loc temp_cmd "_b[_cons] + _b[log_inc]*log_inc + _b[temp_poly1]*temp_poly1 + _b[temp_poly2]*temp_poly2 + _b[temp_poly3]*temp_poly3 + _b[temp_poly4]*temp_poly4"

predictnl yhat_lrtk = `temp_cmd', se(se_lrtk) ci(lowerci_lrtk upperci_lrtk)
predictnl yhat_lrtk = predict(), se(se_lrtk) ci(lowerci_lrtk upperci_lrtk)

preserve

Expand All @@ -43,7 +43,7 @@ use "`out'/riskshare_reg_data.dta", clear

loc temp_cmd "_b[_cons] + _b[log_inc]*log_inc + _b[tavg_1_pop_ma_30yr]*tavg_1_pop_ma_30yr + _b[tavg_2_pop_ma_30yr]*tavg_2_pop_ma_30yr + _b[tavg_3_pop_ma_30yr]*tavg_3_pop_ma_30yr + _b[tavg_4_pop_ma_30yr]*tavg_4_pop_ma_30yr"

predictnl yhat_main = `temp_cmd', se(se_main) ci(lowerci_main upperci_main)
predictnl yhat_main = predict(), se(se_main) ci(lowerci_main upperci_main)

* merge the two together to generate data for 45 deg plots
preserve
Expand Down
94 changes: 94 additions & 0 deletions 2_analysis/employment_shares/45_deg_plot_data_varying_FEs.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
The following code generates data to make a 45 deg plot of predicted values of yhat
LR(T^K) vs yhat LRT^K specifications

Author: Jonah Gilbert, [email protected]
Date: 7/12/2023

*/


clear all
set more off
macro drop _all
set scheme s1color

loc lab "/mnt/CIL_labor"
loc out "/home/`c(username)'/repos/labor-code-release-2020/output/employment_shares"
loc ster "`out'/ster"

use "`out'/riskshare_reg_data.dta", clear

* continent fe model predictions, save for merge later
estimates use "`ster'/log_inc_poly4_continent_fes"

loc temp_cmd "_b[_cons] + _b[log_inc]*log_inc + _b[tavg_1_pop_ma_30yr]*tavg_1_pop_ma_30yr + _b[tavg_2_pop_ma_30yr]*tavg_2_pop_ma_30yr + _b[tavg_3_pop_ma_30yr]*tavg_3_pop_ma_30yr + _b[tavg_4_pop_ma_30yr]*tavg_4_pop_ma_30yr"

predictnl yhat_continent_fes = predict(), se(se_continent_fes) ci(lowerci_continent_fes upperci_continent_fes)

preserve

keep geolev1 year log_inc tavg_1_pop_ma_30yr yhat_continent_fes se_continent_fes lowerci_continent_fes upperci_continent_fes

tempfile continent_fes_file
save `continent_fes_file', replace
* export delimited "`out'/highriskshare_check/yhat_temp_lrtk.csv", replace

restore


* continent, year model predictions, save for merge later
estimates use "`ster'/log_inc_poly4_continent_year_fes"

loc temp_cmd "_b[_cons] + _b[log_inc]*log_inc + _b[tavg_1_pop_i.ma_30yr]*tavg_1_pop_ma_30yr + _b[tavg_2_pop_ma_30yr]*tavg_2_pop_ma_30yr + _b[tavg_3_pop_ma_30yr]*tavg_3_pop_ma_30yr + _b[tavg_4_pop_ma_30yr]*tavg_4_pop_ma_30yr"

predictnl yhat_continent_year_fes = predict(), se(se_continent_year_fes) ci(lowerci_continent_year_fes upperci_continent_year_fes)

preserve

keep geolev1 year log_inc tavg_1_pop_ma_30yr yhat_continent_year_fes se_continent_year_fes lowerci_continent_year_fes upperci_continent_year_fes

tempfile continent_year_fes_file
save `continent_year_fes_file', replace
* export delimited "`out'/highriskshare_check/yhat_temp_lrtk.csv", replace

restore

* year model predictions, save for merge later
estimates use "`ster'/log_inc_poly4_year_fes"

loc temp_cmd "_b[_cons] +_b[year]*i.year + _b[log_inc]*log_inc + _b[tavg_1_pop_ma_30yr]*tavg_1_pop_ma_30yr + _b[tavg_2_pop_ma_30yr]*tavg_2_pop_ma_30yr + _b[tavg_3_pop_ma_30yr]*tavg_3_pop_ma_30yr + _b[tavg_4_pop_ma_30yr]*tavg_4_pop_ma_30yr"

predictnl yhat_year_fes = predict(), se(se_year_fes) ci(lowerci_year_fes upperci_year_fes)

preserve

keep geolev1 year log_inc tavg_1_pop_ma_30yr yhat_year_fes se_year_fes lowerci_year_fes upperci_year_fes

tempfile year_fes_file
save `year_fes_file', replace
* export delimited "`out'/highriskshare_check/yhat_temp_lrtk.csv", replace

restore

* main model predictions, change the _b coefficients and temp poly varnames

estimates use "`ster'/log_inc_poly4"

loc temp_cmd "_b[_cons] + _b[log_inc]*log_inc + _b[tavg_1_pop_ma_30yr]*tavg_1_pop_ma_30yr + _b[tavg_2_pop_ma_30yr]*tavg_2_pop_ma_30yr + _b[tavg_3_pop_ma_30yr]*tavg_3_pop_ma_30yr + _b[tavg_4_pop_ma_30yr]*tavg_4_pop_ma_30yr"

predictnl yhat_main = predict(), se(se_main) ci(lowerci_main upperci_main)

* merge the two together to generate data for 45 deg plots
preserve

keep geolev1 year log_inc tavg_1_pop_ma_30yr yhat_main se_main lowerci_main upperci_main

merge 1:1 geolev1 year using `continent_fes_file', nogen
merge 1:1 geolev1 year using `continent_year_fes_file', nogen
merge 1:1 geolev1 year using `year_fes_file'
drop _m

export delimited "`out'/yhat_values/45_deg_plot_continent_fes.csv", replace

restore
8 changes: 4 additions & 4 deletions 2_analysis/employment_shares/45_deg_plots.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
rm(list=ls())
library(tidyverse)
library(glue)
library(cilpath.r)
#library(cilpath.r)
library(lfe)
library(sf)
library(rgdal)
Expand All @@ -11,9 +11,9 @@ library(gridExtra)
library(numbers)
library(cowplot)

cilpath.r:::cilpath()
#cilpath.r:::cilpath()

out = glue("/home/nsharma/repos/labor-code-release-2020/output/employment_shares") # change username here
out = glue("/home/jonahmgilbert/repos/labor-code-release-2020/output/employment_shares") # change username here

plot = read_csv(glue("{out}/yhat_values/45_deg_plot.csv")) %>%
rename(temp = tavg_1_pop_ma_30yr)
Expand All @@ -35,7 +35,7 @@ plot2 <- ggplot() +
xlab("predicted riskshare values- LR(T^K)") + ylab("predicted riskshare values- LRT^K") +
ylim(0,1) + xlim(0,1)

# combine line graph and histogram togther
# combine line graph and histogram together
theme_set(theme_minimal())

p <- plot_grid(
Expand Down
62 changes: 62 additions & 0 deletions 2_analysis/employment_shares/45_deg_plots_continent_fes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
rm(list=ls())
library(tidyverse)
library(glue)
#library(cilpath.r)
library(lfe)
library(sf)
library(rgdal)
library(testthat)
library(grid)
library(gridExtra)
library(numbers)
library(cowplot)

#cilpath.r:::cilpath()

out = glue("/home/jonahmgilbert/repos/labor-code-release-2020/output/employment_shares") # change username here

plot = read_csv(glue("{out}/yhat_values/45_deg_plot_continent_fes.csv")) %>%
rename(temp = tavg_1_pop_ma_30yr)

plot = plot %>% filter(complete.cases(plot)) # writing filter separately as it won't work in loading data above

# temperature prediction with global min of -27˚C and global max of 43˚C and no continent fixed effect
plot1 <- ggplot() +
geom_point(data = plot, aes(x= yhat_main, y= yhat_continent_fes,
colour = log_inc)) +
geom_line(data = plot, aes(x = yhat_continent_fes, y = yhat_continent_fes), colour = "red") + labs(colour = "Log GDP PC") +
xlab("predicted riskshare values- main model") + ylab("continent FEs") +
ylim(0,1) + xlim(0,1) + theme(axis.title.x = element_blank())

plot2 <- ggplot() +
geom_point(data = plot, aes(x= yhat_main, y= yhat_continent_year_fes,
colour = log_inc)) +
geom_line(data = plot, aes(x = yhat_continent_fes, y = yhat_continent_fes), colour = "red") + labs(colour = "Log GDP PC") +
xlab("predicted riskshare values- main model") + ylab("continent and year FEs") +
ylim(0,1) + xlim(0,1) + theme(axis.title.x = element_blank())

plot3 <- ggplot() +
geom_point(data = plot, aes(x= yhat_main, y= yhat_year_fes,
colour = log_inc)) +
geom_line(data = plot, aes(x = yhat_continent_fes, y = yhat_continent_fes), colour = "red") + labs(colour = "Log GDP PC") +
xlab("predicted riskshare values- main model") + ylab("year FEs") +
ylim(0,1) + xlim(0,1)

# combine line graph and histogram together
theme_set(theme_minimal())

p <- plot_grid(
plot_grid(
plot1 + theme(legend.position = "none")
, plot2 + theme(legend.position = "none")
, plot3 + theme(legend.position = "none")
, ncol = 1
, rel_heights = c(1,1)
, align = "hv")
, plot_grid(
get_legend(plot3)
, ncol =1)
, rel_widths = c(4,1)
)

ggsave(glue("{out}/plot/predictions/45_deg_shaded_varying_fes.pdf"), plot = p, width = 8, height = 16.5)
112 changes: 93 additions & 19 deletions 2_analysis/employment_shares/riskshare_overlaid_scatter_yhat.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,21 @@
rm(list=ls())
library(tidyverse)
library(glue)
library(cilpath.r)
#library(cilpath.r)
library(lfe)
library(sf)
library(rgdal)
library(testthat)
library(grid)
library(gridExtra)
library(numbers)
library(stargazer)

cilpath.r:::cilpath()
source("~/repos/post-projection-tools/mapping/imgcat.R") #this redefines the way ggplot plots.
#cilpath.r:::cilpath()
#source("~/repos/post-projection-tools/mapping/imgcat.R") #this redefines the way ggplot plots.

lab = glue("/mnt/CIL_labor")
out = glue("/home/nsharma/repos/labor-code-release-2020/output/employment_shares") # change username here
out = glue("/home/jonahmgilbert/repos/labor-code-release-2020/output/employment_shares") # change username here

# load data and clean it up a little
dat = read_csv(glue("{lab}/1_preparation/employment_shares/data/emp_inc_clim_merged.csv")) %>%
Expand Down Expand Up @@ -55,23 +56,45 @@ dat_ss <- dat_ss[complete.cases(dat_ss),]

# load data for graph
yhat_inc = read_csv(glue("{out}/yhat_values/log_inc_poly4_IncPred.csv"))
yhat_temp = read_csv(glue("{out}/yhat_values/log_inc_poly4_TempPred.csv"))
yhat_temp = read_csv(glue("{out}/yhat_values/log_inc_poly4_TempPred.csv"))
yhat_temp_poly4 = read_csv(glue("{out}/yhat_values/log_inc_poly4_TempPredMinMax.csv")) #change input filename here for 50C max temperature
yhat_inc$Model = "no Continent FE"
yhat_temp$Model = "no Continent FE"
yhat_temp_poly4$Model = "no Continent FE"

yhat_inc_continent_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_continent_fes_IncPred.csv"))
yhat_temp_continent_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_continent_fes_TempPred.csv"))
yhat_inc_continent_fe$Model = "Continent FE"
yhat_temp_continent_fe$Model = "Continent FE"

yhat_inc_continent_year_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_continent_year_fes_IncPred.csv"))
yhat_temp_continent_year_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_continent_year_fes_TempPredMinMax.csv"))
yhat_inc_continent_year_fe$Model = "Continent, Year FE"
yhat_temp_continent_year_fe$Model ="Continent, Year FE"

yhat_inc_year_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_year_fes_IncPred.csv"))
yhat_temp_year_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_year_fes_TempPredMinMax.csv"))
yhat_inc_year_fe$Model = "Year FE"
yhat_temp_year_fe$Model = "Year FE"

# graphs

# temperature prediction with in-sample data limits
temp_pred <- ggplot() +
geom_point(data = dat_ss, aes(x= tavg_1_pop_MA_30yr, y= ind_highrisk_share)) +
geom_point(data = dat_ss, aes(x= tavg_1_pop_MA_30yr, y= ind_highrisk_share), color = "lightblue") +
geom_ribbon(data=yhat_temp,aes(x = temp, ymin = lowerci_hi, ymax = upperci_hi),
inherit.aes = FALSE, alpha = 0.6, fill = "grey") +
geom_line(data = yhat_temp, aes(x = temp, y = yhat, linetype= "no Continent FE")) + labs(colour = "Log GDP PC") +
geom_ribbon(data=yhat_temp_continent_fe,aes(x = temp, ymin = lowerci_hi, ymax = upperci_hi),
inherit.aes = FALSE, alpha = 0.6, fill = "grey") +
geom_line(data = yhat_temp_continent_fe, aes(x = temp, y = yhat, linetype= "Continent FE")) +
scale_linetype_manual("Model", values = c("no Continent FE" = 1, "Continent FE" = 2)) + labs(colour = "LRT") +
geom_line(data = yhat_temp_continent_fe, aes(x = temp, y = yhat, linetype= "Continent FE")) +
geom_ribbon(data=yhat_temp_continent_year_fe%>% filter(temp <= 29 & temp >=7),aes(x = temp, ymin = lowerci_hi, ymax = upperci_hi),
inherit.aes = FALSE, alpha = 0.6, fill = "grey") +
geom_line(data = yhat_temp_continent_year_fe%>% filter(temp <= 29 & temp >=7), aes(x = temp, y = yhat, linetype= "Continent, Year FE")) +
geom_ribbon(data=yhat_temp_year_fe%>% filter(temp <= 29 & temp >=7),aes(x = temp, ymin = lowerci_hi, ymax = upperci_hi),
inherit.aes = FALSE, alpha = 0.6, fill = "grey") +
geom_line(data = yhat_temp_year_fe %>% filter(temp <= 29 & temp >=7), aes(x = temp, y = yhat, linetype= "Year FE")) +
scale_linetype_manual("Model", values = c("no Continent FE" = 1, "Continent FE" = 2, "Continent, Year FE" = 3, "Year FE" = 4)) + labs(colour = "LRT") +
xlab("LRT - in sample") + ylab("predicted riskshare values- LR(T^K)")

temp_pred
Expand All @@ -93,19 +116,57 @@ ggsave(glue("{out}/plot/predictions/temp_pred_scatter_min_max.pdf"), plot = temp
# income predictions with in-sample min and max
inc_pred <- ggplot() +
geom_point(data = dat_ss, aes(x= log_gdppc_adm1_pwt_ds_15ma, y= ind_highrisk_share)) +
geom_density(data = dat_ss, aes(x= log_gdppc_adm1_pwt_ds_15ma, y = after_stat(count)/2000 - 0.25)) +
geom_ribbon(data=yhat_inc,aes(x = inc_log, ymin = lowerci_hi, ymax = upperci_hi),
inherit.aes = FALSE, alpha = 0.6, fill = "grey") +
geom_line(data = yhat_inc, aes(x= inc_log, y= yhat, linetype= "no Continent FE")) + labs(colour = "LRT") +
geom_ribbon(data=yhat_inc_continent_fe,aes(x = inc_log, ymin = lowerci_hi, ymax = upperci_hi),
inherit.aes = FALSE, alpha = 0.6, fill = "grey") +
geom_line(data = yhat_inc_continent_fe, aes(x= inc_log, y= yhat, linetype= "Continent FE")) +
scale_linetype_manual("Model", values = c("no Continent FE" = 1, "Continent FE" = 2)) + labs(colour = "LRT") +
xlab("Log GDP PC") + ylab("predicted riskshare values")
geom_ribbon(data=yhat_inc_continent_year_fe,aes(x = inc_log, ymin = lowerci_hi, ymax = upperci_hi),
inherit.aes = FALSE, alpha = 0.6, fill = "grey") +
geom_line(data = yhat_inc_continent_year_fe, aes(x= inc_log, y= yhat, linetype= "Continent + Year FE")) +
geom_ribbon(data=yhat_inc_year_fe,aes(x = inc_log, ymin = lowerci_hi, ymax = upperci_hi),
inherit.aes = FALSE, alpha = 0.6, fill = "grey") +
geom_line(data = yhat_inc_year_fe, aes(x= inc_log, y= yhat, linetype= "Year FE")) +
scale_linetype_manual("Model", values = c("no Continent FE" = 1, "Continent FE" = 2, "Continent + Year FE" = 3, "Year FE" = 4)) + labs(colour = "LRT") +
xlab("Log GDP PC") + ylab("predicted riskshare values")

inc_pred

ggsave(glue("{out}/plot/predictions/inc_pred_continent_fe_scatter.pdf"), plot = inc_pred, width = 10, height = 7)


# income predictions with in-sample min and max
inc_pred_main <- ggplot() +
geom_point(data = dat_ss, aes(x= log_gdppc_adm1_pwt_ds_15ma, y= ind_highrisk_share + 0.25), color = 'lightblue') +
geom_histogram(data = dat_ss, aes(x= log_gdppc_adm1_pwt_ds_15ma, y = ..count../sum(..count..)), fill = 'lightblue', bins = 20) +
geom_ribbon(data=yhat_inc,aes(x = inc_log, ymin = lowerci_hi + 0.25, ymax = upperci_hi + 0.25),
inherit.aes = FALSE, alpha = 0.6, fill = "grey") +
geom_line(data = yhat_inc, aes(x= inc_log, y= yhat + 0.25)) + labs(colour = "LRT") +
xlab("Log GDP PC") + ylab("predicted riskshare values") +
scale_y_continuous(labels = function(x) format(x-0.25), limits = c(0,1.3))

inc_pred_main

ggsave(glue("{out}/plot/predictions/inc_pred_main.pdf"), plot = inc_pred_main, width = 10, height = 7)


# temperature prediction with in-sample data limits
temp_pred_main <- ggplot() +
geom_point(data = dat_ss, aes(x= tavg_1_pop_MA_30yr, y= ind_highrisk_share + 0.25), color = "lightblue") +
geom_histogram(data = dat_ss, aes(x= tavg_1_pop_MA_30yr, y = ..count../sum(..count..)), fill = 'lightblue', bins = 20) +
geom_ribbon(data=yhat_temp_poly4 %>% filter(between(temp,0,30)),aes(x = temp, ymin = lowerci_hi + 0.25, ymax = upperci_hi + 0.25),
inherit.aes = FALSE, alpha = 0.6, fill = "grey") +
geom_line(data = yhat_temp_poly4 %>% filter(between(temp,0,30)), aes(x = temp, y = yhat + 0.25)) + labs(colour = "Log GDP PC")+ labs(colour = "LRT") +
xlab("LRT - in sample") + ylab("predicted riskshare values- LR(T^K)")+
scale_y_continuous(labels = function(x) format(x-0.25), limits = c(0,1.3))

temp_pred_main


ggsave(glue("{out}/plot/predictions/temp_pred_main_scatter.pdf"), plot = temp_pred_main, width = 10, height = 7)

# regressions for table

fit1 = lm(ind_highrisk_share ~ tavg_1_pop_MA_30yr + tavg_2_pop_MA_30yr +
Expand All @@ -115,17 +176,30 @@ summary(fit1)

fit2 = lm(ind_highrisk_share ~ tavg_1_pop_MA_30yr + tavg_2_pop_MA_30yr +
tavg_3_pop_MA_30yr + tavg_4_pop_MA_30yr +
log_gdppc_adm1_pwt_ds_15ma + factor(continent), data=dat)
log_gdppc_adm1_pwt_ds_15ma + relevel(factor(continent), ref = "Asia"), data=dat)
summary(fit2)

stargazer(fit1, fit2, title = "Risk-share Regression Results", align=TRUE,
dep.var.caption = c("High risk share"), dep.var.labels = c(""),
column.labels = c("without continent FE", "with continent FE"),
covariate.labels = c("Temperature", "Temperature^2", "Temperature^3",
"Temperature^4", "Log Income", "Americas",
"Asia", "Europe", "Constant"),
add.lines = list(c("Continent fixed effects", "No", "Yes")),
omit.stat=c("ser","f"), no.space=TRUE)
fit3 = lm(ind_highrisk_share ~ tavg_1_pop_MA_30yr + tavg_2_pop_MA_30yr +
tavg_3_pop_MA_30yr + tavg_4_pop_MA_30yr +
log_gdppc_adm1_pwt_ds_15ma + relevel(factor(continent), ref = "Asia") + factor(year), data=dat)
summary(fit3)

fit4 = lm(ind_highrisk_share ~ tavg_1_pop_MA_30yr + tavg_2_pop_MA_30yr +
tavg_3_pop_MA_30yr + tavg_4_pop_MA_30yr +
log_gdppc_adm1_pwt_ds_15ma + factor(year), data=dat)
summary(fit4)

stargazer(fit1, fit2, fit3, fit4, title = "Risk-share Regression Results", align=TRUE,
dep.var.caption = c("Share of high-risk workers"), dep.var.labels = c(""),
covariate.labels = c("Long-run Avg. Daily Maximum Temperature",
"Long-run Avg. Daily Maximum Temperature^2",
"Long-run Avg. Daily Maximum Temperature^3",
"Long-run Avg. Daily Maximum Temperature^4", "Log Income", "Africa",
"Americas", "Europe", "Constant"),
add.lines = list(c("Continent fixed effects", "No", "Yes", "Yes", "No"),c("Year fixed effects", "No", "No", "Yes", "Yes")),
omit.stat=c("ser","f", "rsq"),
omit = "year",
no.space=TRUE)



Expand Down
Loading