ClimateImpactLab · JMGilbert · Jul 10, 2023 · Jul 14, 2023
diff --git a/2_analysis/employment_shares/45_deg_plot_data.do b/2_analysis/employment_shares/45_deg_plot_data.do
@@ -24,7 +24,7 @@ use "`out'/riskshare_reg_data.dta", clear
 
 		loc temp_cmd "_b[_cons] + _b[log_inc]*log_inc + _b[temp_poly1]*temp_poly1 + _b[temp_poly2]*temp_poly2 + _b[temp_poly3]*temp_poly3 + _b[temp_poly4]*temp_poly4"
 
-		predictnl yhat_lrtk = `temp_cmd', se(se_lrtk) ci(lowerci_lrtk upperci_lrtk)
+		predictnl yhat_lrtk = predict(), se(se_lrtk) ci(lowerci_lrtk upperci_lrtk)
 
 		preserve
 
@@ -43,7 +43,7 @@ use "`out'/riskshare_reg_data.dta", clear
 
 		loc temp_cmd "_b[_cons] + _b[log_inc]*log_inc + _b[tavg_1_pop_ma_30yr]*tavg_1_pop_ma_30yr + _b[tavg_2_pop_ma_30yr]*tavg_2_pop_ma_30yr + _b[tavg_3_pop_ma_30yr]*tavg_3_pop_ma_30yr + _b[tavg_4_pop_ma_30yr]*tavg_4_pop_ma_30yr"
 
-		predictnl yhat_main = `temp_cmd', se(se_main) ci(lowerci_main upperci_main)
+		predictnl yhat_main = predict(), se(se_main) ci(lowerci_main upperci_main)
 
 * merge the two together to generate data for 45 deg plots
 		preserve

diff --git a/2_analysis/employment_shares/45_deg_plot_data_varying_FEs.do b/2_analysis/employment_shares/45_deg_plot_data_varying_FEs.do
@@ -0,0 +1,94 @@
+/*
+The following code generates data to make a 45 deg plot of predicted values of yhat
+LR(T^K) vs yhat LRT^K specifications
+
+Author: Jonah Gilbert, [email protected]
+Date: 7/12/2023
+
+*/
+
+
+clear all
+set more off
+macro drop _all
+set scheme s1color
+
+loc lab "/mnt/CIL_labor"
+loc out "/home/`c(username)'/repos/labor-code-release-2020/output/employment_shares"
+loc ster "`out'/ster"
+
+use "`out'/riskshare_reg_data.dta", clear
+
+* continent fe model predictions, save for merge later
+		estimates use "`ster'/log_inc_poly4_continent_fes"
+
+		loc temp_cmd "_b[_cons] + _b[log_inc]*log_inc + _b[tavg_1_pop_ma_30yr]*tavg_1_pop_ma_30yr + _b[tavg_2_pop_ma_30yr]*tavg_2_pop_ma_30yr + _b[tavg_3_pop_ma_30yr]*tavg_3_pop_ma_30yr + _b[tavg_4_pop_ma_30yr]*tavg_4_pop_ma_30yr"
+
+		predictnl yhat_continent_fes = predict(), se(se_continent_fes) ci(lowerci_continent_fes upperci_continent_fes)
+
+		preserve
+
+		keep geolev1 year log_inc tavg_1_pop_ma_30yr yhat_continent_fes se_continent_fes lowerci_continent_fes upperci_continent_fes
+
+		tempfile continent_fes_file
+		save `continent_fes_file', replace
+		* export delimited "`out'/highriskshare_check/yhat_temp_lrtk.csv", replace 
+
+		restore
+
+
+* continent, year model predictions, save for merge later
+		estimates use "`ster'/log_inc_poly4_continent_year_fes"
+
+		loc temp_cmd "_b[_cons] + _b[log_inc]*log_inc + _b[tavg_1_pop_i.ma_30yr]*tavg_1_pop_ma_30yr + _b[tavg_2_pop_ma_30yr]*tavg_2_pop_ma_30yr + _b[tavg_3_pop_ma_30yr]*tavg_3_pop_ma_30yr + _b[tavg_4_pop_ma_30yr]*tavg_4_pop_ma_30yr"
+
+		predictnl yhat_continent_year_fes = predict(), se(se_continent_year_fes) ci(lowerci_continent_year_fes upperci_continent_year_fes)
+
+		preserve
+
+		keep geolev1 year log_inc tavg_1_pop_ma_30yr yhat_continent_year_fes se_continent_year_fes lowerci_continent_year_fes upperci_continent_year_fes
+
+		tempfile continent_year_fes_file
+		save `continent_year_fes_file', replace
+		* export delimited "`out'/highriskshare_check/yhat_temp_lrtk.csv", replace 
+
+		restore
+
+* year model predictions, save for merge later
+		estimates use "`ster'/log_inc_poly4_year_fes"
+
+		loc temp_cmd "_b[_cons] +_b[year]*i.year + _b[log_inc]*log_inc + _b[tavg_1_pop_ma_30yr]*tavg_1_pop_ma_30yr + _b[tavg_2_pop_ma_30yr]*tavg_2_pop_ma_30yr + _b[tavg_3_pop_ma_30yr]*tavg_3_pop_ma_30yr + _b[tavg_4_pop_ma_30yr]*tavg_4_pop_ma_30yr"
+
+		predictnl yhat_year_fes = predict(), se(se_year_fes) ci(lowerci_year_fes upperci_year_fes)
+
+		preserve
+
+		keep geolev1 year log_inc tavg_1_pop_ma_30yr yhat_year_fes se_year_fes lowerci_year_fes upperci_year_fes
+
+		tempfile year_fes_file
+		save `year_fes_file', replace
+		* export delimited "`out'/highriskshare_check/yhat_temp_lrtk.csv", replace 
+
+		restore
+
+* main model predictions, change the _b coefficients and temp poly varnames
+
+		estimates use "`ster'/log_inc_poly4"
+
+		loc temp_cmd "_b[_cons] + _b[log_inc]*log_inc + _b[tavg_1_pop_ma_30yr]*tavg_1_pop_ma_30yr + _b[tavg_2_pop_ma_30yr]*tavg_2_pop_ma_30yr + _b[tavg_3_pop_ma_30yr]*tavg_3_pop_ma_30yr + _b[tavg_4_pop_ma_30yr]*tavg_4_pop_ma_30yr"
+
+		predictnl yhat_main = predict(), se(se_main) ci(lowerci_main upperci_main)
+
+* merge the two together to generate data for 45 deg plots
+		preserve
+
+		keep geolev1 year log_inc tavg_1_pop_ma_30yr yhat_main se_main lowerci_main upperci_main
+
+		merge 1:1 geolev1 year using `continent_fes_file', nogen	
+		merge 1:1 geolev1 year using `continent_year_fes_file', nogen	
+		merge 1:1 geolev1 year using `year_fes_file'
+		drop _m
+
+		export delimited "`out'/yhat_values/45_deg_plot_continent_fes.csv", replace 
+
+		restore
diff --git a/2_analysis/employment_shares/45_deg_plots.R b/2_analysis/employment_shares/45_deg_plots.R
@@ -1,7 +1,7 @@
 rm(list=ls())
 library(tidyverse)
 library(glue)
-library(cilpath.r)
+#library(cilpath.r)
 library(lfe)
 library(sf)
 library(rgdal)
@@ -11,9 +11,9 @@ library(gridExtra)
 library(numbers)
 library(cowplot)
 
-cilpath.r:::cilpath()
+#cilpath.r:::cilpath()
 
-out = glue("/home/nsharma/repos/labor-code-release-2020/output/employment_shares") # change username here
+out = glue("/home/jonahmgilbert/repos/labor-code-release-2020/output/employment_shares") # change username here
 
 plot = read_csv(glue("{out}/yhat_values/45_deg_plot.csv")) %>% 
       rename(temp = tavg_1_pop_ma_30yr) 
@@ -35,7 +35,7 @@ plot2 <- ggplot() +
   xlab("predicted riskshare values- LR(T^K)") + ylab("predicted riskshare values- LRT^K") + 
   ylim(0,1) + xlim(0,1)
 
-# combine line graph and histogram togther
+# combine line graph and histogram together
 theme_set(theme_minimal())
 
 p <- plot_grid(

diff --git a/2_analysis/employment_shares/45_deg_plots_continent_fes.R b/2_analysis/employment_shares/45_deg_plots_continent_fes.R
@@ -0,0 +1,62 @@
+rm(list=ls())
+library(tidyverse)
+library(glue)
+#library(cilpath.r)
+library(lfe)
+library(sf)
+library(rgdal)
+library(testthat)
+library(grid)
+library(gridExtra)
+library(numbers)
+library(cowplot)
+
+#cilpath.r:::cilpath()
+
+out = glue("/home/jonahmgilbert/repos/labor-code-release-2020/output/employment_shares") # change username here
+
+plot = read_csv(glue("{out}/yhat_values/45_deg_plot_continent_fes.csv")) %>% 
+      rename(temp = tavg_1_pop_ma_30yr) 
+
+plot = plot %>% filter(complete.cases(plot)) # writing filter separately as it won't work in loading data above
+
+# temperature prediction with global min of -27˚C and global max of 43˚C and no continent fixed effect
+plot1 <- ggplot() + 
+  geom_point(data = plot, aes(x= yhat_main, y= yhat_continent_fes, 
+                                colour = log_inc)) + 
+  geom_line(data = plot, aes(x = yhat_continent_fes, y = yhat_continent_fes), colour = "red") + labs(colour = "Log GDP PC") + 
+  xlab("predicted riskshare values- main model") + ylab("continent FEs") + 
+  ylim(0,1) + xlim(0,1) + theme(axis.title.x = element_blank())
+
+plot2 <- ggplot() + 
+  geom_point(data = plot, aes(x= yhat_main, y= yhat_continent_year_fes, 
+                              colour = log_inc)) + 
+  geom_line(data = plot, aes(x = yhat_continent_fes, y = yhat_continent_fes), colour = "red") + labs(colour = "Log GDP PC") + 
+  xlab("predicted riskshare values- main model") + ylab("continent and year FEs") + 
+  ylim(0,1) + xlim(0,1) + theme(axis.title.x = element_blank())
+
+plot3 <- ggplot() + 
+  geom_point(data = plot, aes(x= yhat_main, y= yhat_year_fes, 
+                              colour = log_inc)) + 
+  geom_line(data = plot, aes(x = yhat_continent_fes, y = yhat_continent_fes), colour = "red") + labs(colour = "Log GDP PC") + 
+  xlab("predicted riskshare values- main model") + ylab("year FEs") + 
+  ylim(0,1) + xlim(0,1)
+
+# combine line graph and histogram together
+theme_set(theme_minimal())
+
+p <- plot_grid(
+  plot_grid(
+    plot1 + theme(legend.position = "none")
+    , plot2 + theme(legend.position = "none")
+    , plot3 + theme(legend.position = "none")
+    , ncol = 1
+    , rel_heights = c(1,1)
+    , align = "hv")
+  , plot_grid(
+    get_legend(plot3)
+    , ncol =1)
+  , rel_widths = c(4,1)
+  )
+
+ggsave(glue("{out}/plot/predictions/45_deg_shaded_varying_fes.pdf"), plot = p, width = 8, height = 16.5)
diff --git a/2_analysis/employment_shares/riskshare_overlaid_scatter_yhat.R b/2_analysis/employment_shares/riskshare_overlaid_scatter_yhat.R
@@ -7,20 +7,21 @@
 rm(list=ls())
 library(tidyverse)
 library(glue)
-library(cilpath.r)
+#library(cilpath.r)
 library(lfe)
 library(sf)
 library(rgdal)
 library(testthat)
 library(grid)
 library(gridExtra)
 library(numbers)
+library(stargazer)
 
-cilpath.r:::cilpath()
-source("~/repos/post-projection-tools/mapping/imgcat.R") #this redefines the way ggplot plots. 
+#cilpath.r:::cilpath()
+#source("~/repos/post-projection-tools/mapping/imgcat.R") #this redefines the way ggplot plots. 
 
 lab = glue("/mnt/CIL_labor")
-out = glue("/home/nsharma/repos/labor-code-release-2020/output/employment_shares") # change username here
+out = glue("/home/jonahmgilbert/repos/labor-code-release-2020/output/employment_shares") # change username here
 
 # load data and clean it up a little
 dat = read_csv(glue("{lab}/1_preparation/employment_shares/data/emp_inc_clim_merged.csv")) %>%
@@ -55,23 +56,45 @@ dat_ss <- dat_ss[complete.cases(dat_ss),]
 
 # load data for graph
 yhat_inc = read_csv(glue("{out}/yhat_values/log_inc_poly4_IncPred.csv"))
-yhat_temp = read_csv(glue("{out}/yhat_values/log_inc_poly4_TempPred.csv")) 
+yhat_temp = read_csv(glue("{out}/yhat_values/log_inc_poly4_TempPred.csv"))
 yhat_temp_poly4 = read_csv(glue("{out}/yhat_values/log_inc_poly4_TempPredMinMax.csv")) #change input filename here for 50C max temperature
+yhat_inc$Model = "no Continent FE"
+yhat_temp$Model = "no Continent FE"
+yhat_temp_poly4$Model = "no Continent FE"
+
 yhat_inc_continent_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_continent_fes_IncPred.csv"))
 yhat_temp_continent_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_continent_fes_TempPred.csv"))
+yhat_inc_continent_fe$Model = "Continent FE"
+yhat_temp_continent_fe$Model = "Continent FE"
+
+yhat_inc_continent_year_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_continent_year_fes_IncPred.csv"))
+yhat_temp_continent_year_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_continent_year_fes_TempPredMinMax.csv"))
+yhat_inc_continent_year_fe$Model = "Continent, Year FE"
+yhat_temp_continent_year_fe$Model ="Continent, Year FE"
+
+yhat_inc_year_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_year_fes_IncPred.csv"))
+yhat_temp_year_fe = read_csv(glue("{out}/yhat_values/log_inc_poly4_year_fes_TempPredMinMax.csv"))
+yhat_inc_year_fe$Model = "Year FE"
+yhat_temp_year_fe$Model = "Year FE"
 
 # graphs
 
 # temperature prediction with in-sample data limits
 temp_pred <- ggplot() + 
-  geom_point(data = dat_ss, aes(x= tavg_1_pop_MA_30yr, y= ind_highrisk_share)) + 
+  geom_point(data = dat_ss, aes(x= tavg_1_pop_MA_30yr, y= ind_highrisk_share), color = "lightblue") + 
   geom_ribbon(data=yhat_temp,aes(x = temp, ymin = lowerci_hi, ymax = upperci_hi), 
               inherit.aes = FALSE, alpha = 0.6, fill = "grey") + 
   geom_line(data = yhat_temp, aes(x = temp, y = yhat, linetype= "no Continent FE")) + labs(colour = "Log GDP PC") + 
   geom_ribbon(data=yhat_temp_continent_fe,aes(x = temp, ymin = lowerci_hi, ymax = upperci_hi), 
               inherit.aes = FALSE, alpha = 0.6, fill = "grey") + 
-  geom_line(data = yhat_temp_continent_fe, aes(x = temp, y = yhat, linetype= "Continent FE")) + 
-  scale_linetype_manual("Model", values = c("no Continent FE" = 1, "Continent FE" = 2)) + labs(colour = "LRT") + 
+  geom_line(data = yhat_temp_continent_fe, aes(x = temp, y = yhat, linetype= "Continent FE")) +
+  geom_ribbon(data=yhat_temp_continent_year_fe%>% filter(temp <= 29 & temp >=7),aes(x = temp, ymin = lowerci_hi, ymax = upperci_hi), 
+              inherit.aes = FALSE, alpha = 0.6, fill = "grey") + 
+  geom_line(data = yhat_temp_continent_year_fe%>% filter(temp <= 29 & temp >=7), aes(x = temp, y = yhat, linetype= "Continent, Year FE")) + 
+  geom_ribbon(data=yhat_temp_year_fe%>% filter(temp <= 29 & temp >=7),aes(x = temp, ymin = lowerci_hi, ymax = upperci_hi), 
+              inherit.aes = FALSE, alpha = 0.6, fill = "grey") + 
+  geom_line(data = yhat_temp_year_fe %>% filter(temp <= 29 & temp >=7), aes(x = temp, y = yhat, linetype= "Year FE")) + 
+  scale_linetype_manual("Model", values = c("no Continent FE" = 1, "Continent FE" = 2, "Continent, Year FE" = 3, "Year FE" = 4)) + labs(colour = "LRT") + 
   xlab("LRT - in sample") + ylab("predicted riskshare values- LR(T^K)")
 
 temp_pred
@@ -93,19 +116,57 @@ ggsave(glue("{out}/plot/predictions/temp_pred_scatter_min_max.pdf"), plot = temp
 # income predictions with in-sample min and max
 inc_pred <- ggplot()  + 
   geom_point(data = dat_ss, aes(x= log_gdppc_adm1_pwt_ds_15ma, y= ind_highrisk_share)) + 
+  geom_density(data = dat_ss, aes(x= log_gdppc_adm1_pwt_ds_15ma, y = after_stat(count)/2000 - 0.25)) +
   geom_ribbon(data=yhat_inc,aes(x = inc_log, ymin = lowerci_hi, ymax = upperci_hi), 
               inherit.aes = FALSE, alpha = 0.6, fill = "grey") + 
   geom_line(data = yhat_inc, aes(x= inc_log, y= yhat, linetype= "no Continent FE")) + labs(colour = "LRT") + 
   geom_ribbon(data=yhat_inc_continent_fe,aes(x = inc_log, ymin = lowerci_hi, ymax = upperci_hi), 
               inherit.aes = FALSE, alpha = 0.6, fill = "grey") + 
   geom_line(data = yhat_inc_continent_fe, aes(x= inc_log, y= yhat, linetype= "Continent FE")) + 
-  scale_linetype_manual("Model", values = c("no Continent FE" = 1, "Continent FE" = 2)) + labs(colour = "LRT") + 
-  xlab("Log GDP PC") + ylab("predicted riskshare values")
+  geom_ribbon(data=yhat_inc_continent_year_fe,aes(x = inc_log, ymin = lowerci_hi, ymax = upperci_hi), 
+              inherit.aes = FALSE, alpha = 0.6, fill = "grey") + 
+  geom_line(data = yhat_inc_continent_year_fe, aes(x= inc_log, y= yhat, linetype= "Continent + Year FE")) + 
+  geom_ribbon(data=yhat_inc_year_fe,aes(x = inc_log, ymin = lowerci_hi, ymax = upperci_hi), 
+              inherit.aes = FALSE, alpha = 0.6, fill = "grey") + 
+  geom_line(data = yhat_inc_year_fe, aes(x= inc_log, y= yhat, linetype= "Year FE")) + 
+  scale_linetype_manual("Model", values = c("no Continent FE" = 1, "Continent FE" = 2, "Continent + Year FE" = 3, "Year FE" = 4)) + labs(colour = "LRT") + 
+  xlab("Log GDP PC") + ylab("predicted riskshare values") 
 
 inc_pred
 
 ggsave(glue("{out}/plot/predictions/inc_pred_continent_fe_scatter.pdf"), plot = inc_pred, width = 10, height = 7)
 
+
+# income predictions with in-sample min and max
+inc_pred_main <- ggplot()  + 
+  geom_point(data = dat_ss, aes(x= log_gdppc_adm1_pwt_ds_15ma, y= ind_highrisk_share + 0.25), color = 'lightblue') + 
+  geom_histogram(data = dat_ss, aes(x= log_gdppc_adm1_pwt_ds_15ma, y = ..count../sum(..count..)), fill = 'lightblue', bins = 20) +
+  geom_ribbon(data=yhat_inc,aes(x = inc_log, ymin = lowerci_hi + 0.25, ymax = upperci_hi + 0.25), 
+              inherit.aes = FALSE, alpha = 0.6, fill = "grey") + 
+  geom_line(data = yhat_inc, aes(x= inc_log, y= yhat + 0.25)) + labs(colour = "LRT") + 
+  xlab("Log GDP PC") + ylab("predicted riskshare values") + 
+  scale_y_continuous(labels = function(x) format(x-0.25), limits = c(0,1.3))
+
+inc_pred_main
+
+ggsave(glue("{out}/plot/predictions/inc_pred_main.pdf"), plot = inc_pred_main, width = 10, height = 7)
+
+
+# temperature prediction with in-sample data limits
+temp_pred_main <- ggplot() + 
+  geom_point(data = dat_ss, aes(x= tavg_1_pop_MA_30yr, y= ind_highrisk_share + 0.25), color = "lightblue") + 
+  geom_histogram(data = dat_ss, aes(x= tavg_1_pop_MA_30yr, y = ..count../sum(..count..)), fill = 'lightblue', bins = 20) +
+  geom_ribbon(data=yhat_temp_poly4 %>% filter(between(temp,0,30)),aes(x = temp, ymin = lowerci_hi + 0.25, ymax = upperci_hi + 0.25), 
+              inherit.aes = FALSE, alpha = 0.6, fill = "grey") + 
+  geom_line(data = yhat_temp_poly4 %>% filter(between(temp,0,30)), aes(x = temp, y = yhat + 0.25)) + labs(colour = "Log GDP PC")+ labs(colour = "LRT") + 
+  xlab("LRT - in sample") + ylab("predicted riskshare values- LR(T^K)")+ 
+  scale_y_continuous(labels = function(x) format(x-0.25), limits = c(0,1.3))
+
+temp_pred_main
+
+
+ggsave(glue("{out}/plot/predictions/temp_pred_main_scatter.pdf"), plot = temp_pred_main, width = 10, height = 7)
+
 # regressions for table
 
 fit1 = lm(ind_highrisk_share ~ tavg_1_pop_MA_30yr + tavg_2_pop_MA_30yr + 
@@ -115,17 +176,30 @@ summary(fit1)
 
 fit2 = lm(ind_highrisk_share ~ tavg_1_pop_MA_30yr + tavg_2_pop_MA_30yr + 
             tavg_3_pop_MA_30yr + tavg_4_pop_MA_30yr + 
-            log_gdppc_adm1_pwt_ds_15ma + factor(continent), data=dat)
+            log_gdppc_adm1_pwt_ds_15ma + relevel(factor(continent), ref = "Asia"), data=dat)
 summary(fit2)
 
-stargazer(fit1, fit2, title = "Risk-share Regression Results", align=TRUE, 
-          dep.var.caption = c("High risk share"), dep.var.labels = c(""), 
-          column.labels = c("without continent FE", "with continent FE"), 
-          covariate.labels = c("Temperature", "Temperature^2", "Temperature^3",
-                               "Temperature^4", "Log Income", "Americas", 
-                               "Asia", "Europe", "Constant"), 
-          add.lines = list(c("Continent fixed effects", "No", "Yes")),
-          omit.stat=c("ser","f"), no.space=TRUE)
+fit3 = lm(ind_highrisk_share ~ tavg_1_pop_MA_30yr + tavg_2_pop_MA_30yr + 
+            tavg_3_pop_MA_30yr + tavg_4_pop_MA_30yr + 
+            log_gdppc_adm1_pwt_ds_15ma + relevel(factor(continent), ref = "Asia") + factor(year), data=dat)
+summary(fit3)
+
+fit4 = lm(ind_highrisk_share ~ tavg_1_pop_MA_30yr + tavg_2_pop_MA_30yr + 
+            tavg_3_pop_MA_30yr + tavg_4_pop_MA_30yr + 
+            log_gdppc_adm1_pwt_ds_15ma + factor(year), data=dat)
+summary(fit4)
+
+stargazer(fit1, fit2, fit3, fit4, title = "Risk-share Regression Results", align=TRUE, 
+          dep.var.caption = c("Share of high-risk workers"), dep.var.labels = c(""), 
+          covariate.labels = c("Long-run Avg. Daily Maximum Temperature",
+                               "Long-run Avg. Daily Maximum Temperature^2",
+                               "Long-run Avg. Daily Maximum Temperature^3",
+                               "Long-run Avg. Daily Maximum Temperature^4", "Log Income", "Africa", 
+                               "Americas", "Europe", "Constant"), 
+          add.lines = list(c("Continent fixed effects", "No", "Yes", "Yes", "No"),c("Year fixed effects", "No", "No", "Yes", "Yes")),
+          omit.stat=c("ser","f", "rsq"),
+          omit = "year",
+          no.space=TRUE)