Skip to content

Commit 2400dfa

Browse files
committed
add analysis notebooks
rmds and fully rendered versions
1 parent bd646e3 commit 2400dfa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+11483
-0
lines changed

analysis-notebooks/01-demography.Rmd

+219
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
---
2+
title: "Survey demography"
3+
author: "Thomas Klebel"
4+
date: "`r format(Sys.time(), '%d %B, %Y')`"
5+
output:
6+
html_document:
7+
keep_md: true
8+
---
9+
10+
```{r setup, include=FALSE}
11+
library(tidyverse)
12+
library(ggchicklet)
13+
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, dpi = 300)
14+
15+
extrafont::loadfonts(device = "win")
16+
17+
theme_set(hrbrthemes::theme_ipsum_rc(base_family = "Hind"))
18+
19+
df <- targets::tar_read(clean_data)
20+
wb_countries <- targets::tar_read(wb_countries)
21+
22+
custom_blue <- "#3792BD"
23+
```
24+
25+
26+
# Gender (X84)
27+
28+
```{r}
29+
df %>% make_table(X84, label = "Gender")
30+
```
31+
# Academic role (X85)
32+
```{r}
33+
make_table(df, X85, label = "Academic role")
34+
```
35+
36+
Merge junior roles
37+
38+
```{r}
39+
df %>%
40+
mutate(X85 = case_when(str_detect(X85, "Post-doc") ~ "Prae/Post-doc",
41+
str_detect(X85, "Doctoral") ~ "Prae/Post-doc",
42+
TRUE ~ X85)) %>%
43+
make_table(X85, label = "Academic role")
44+
```
45+
46+
# Year of first academic publication (X87)
47+
```{r academic-age}
48+
df %>%
49+
# fix mis-typed input
50+
mutate(X87 = case_when(X87 == 19999 ~ 1999,
51+
X87 == 84 ~ 1984,
52+
TRUE ~ X87)) %>%
53+
ggplot(aes(X87)) +
54+
geom_histogram(binwidth = 2, fill = custom_blue) +
55+
labs(x = "Year of first publication", y = NULL)
56+
```
57+
58+
# Type of instiution (X88 + X89)
59+
Q: "How would you characterise your institution?"
60+
```{r}
61+
df %>%
62+
make_table(X88)
63+
```
64+
65+
Q: "How would you characterise your institution?"
66+
67+
```{r}
68+
df %>%
69+
count(X89) %>%
70+
drop_na() %>%
71+
knitr::kable()
72+
```
73+
# Disciplines (X90 + X91)
74+
```{r}
75+
df %>%
76+
make_table(X90)
77+
```
78+
79+
```{r}
80+
df %>%
81+
count(X91) %>%
82+
drop_na() %>%
83+
knitr::kable()
84+
```
85+
86+
87+
Disciplines were manually grouped by using the topics from the Web of Science:
88+
https://images.webofknowledge.com/images/help/WOS/hp_research_areas_easca.html
89+
90+
91+
```{r}
92+
df %>%
93+
drop_na(disciplines_recoded_wos) %>% # there is one missing case
94+
make_table(disciplines_recoded_wos)
95+
```
96+
97+
```{r disciplines}
98+
plot_bar(df, disciplines_recoded_wos, nudge_y = .01) +
99+
labs(caption = "n = 197")
100+
```
101+
102+
# Type of contract
103+
```{r}
104+
# X15 = Are you on a limited-term contract?
105+
df %>% make_table(X15)
106+
```
107+
```{r}
108+
df %>%
109+
filter(X15 == "Other") %>%
110+
select(X16)
111+
# one of the "others" is technically on a permanent contract
112+
```
113+
114+
```{r}
115+
total_unlimited <- {df %>% filter(X15 == "No") %>% nrow()} + 1
116+
share <- total_unlimited/nrow(df)
117+
118+
glue::glue("Number and share of researchers on unlimited contract:
119+
{total_unlimited} ({scales::percent(share, .1)})")
120+
```
121+
122+
123+
# Country
124+
```{r}
125+
# checking for others
126+
stopifnot(identical(nrow(filter(df, X12 == "Other")), 0L))
127+
128+
# n for country
129+
nrow(df)
130+
131+
# inspect country
132+
df %>% make_table(X12, label = "Country")
133+
```
134+
135+
```{r}
136+
# number of countries
137+
df %>%
138+
summarise(n_countries = n_distinct(X12))
139+
```
140+
141+
142+
```{r}
143+
# lumping together
144+
country <- df %>%
145+
mutate(country_lumped = fct_lump_min(X12, min = 4)) %>%
146+
select(X12, country_lumped)
147+
```
148+
149+
```{r country, fig.width=8, fig.height=5}
150+
country %>%
151+
count(country_lumped) %>%
152+
mutate(prop = n / sum(n),
153+
labels = scales::percent(prop, .1)) %>%
154+
mutate(country_ordered = fct_reorder(country_lumped, n, .fun = max,
155+
.desc = TRUE) %>%
156+
fct_relevel("Other", after = Inf)) %>%
157+
ggplot(aes(country_ordered, prop)) +
158+
geom_text(aes(label = labels), nudge_y = .01, size = 3.8, family = "Hind") +
159+
geom_col(width = .7, fill = custom_blue) +
160+
# geom_chicklet(width = .8, radius = unit(7, "pt")) +
161+
scale_x_discrete(guide = guide_axis(angle = 45, )) +
162+
scale_y_continuous(labels = scales::percent) +
163+
labs(x = NULL, y = NULL) +
164+
hrbrthemes::theme_ipsum_rc(base_family = "Hind", grid = "Y")
165+
```
166+
167+
Alternative with dotplot
168+
169+
```{r country-dotplot, fig.height=5, fig.width=7}
170+
plot_bar(country, country_lumped, nudge_y = .005, last_val = "Other")
171+
```
172+
173+
174+
Further classify countries per WP categories. Categories from:
175+
https://datahelpdesk.worldbank.org/knowledgebase/articles/906519-world-bank-country-and-lending-groups
176+
177+
```{r}
178+
# computations were moved higher up the pipeline
179+
```
180+
181+
```{r country-grouped-percentage, fig.width=6, fig.height=5}
182+
df %>%
183+
count(Region) %>%
184+
mutate(prop = n / sum(n),
185+
labels = scales::percent(prop, .1)) %>%
186+
mutate(country_ordered = fct_reorder(Region, n, .fun = max,
187+
.desc = TRUE)) %>%
188+
ggplot(aes(country_ordered, prop)) +
189+
geom_text(aes(label = labels), nudge_y = .03, size = 3.8, family = "Hind") +
190+
geom_col(width = .7, fill = custom_blue) +
191+
# geom_chicklet(width = .8, radius = unit(7, "pt")) +
192+
scale_x_discrete(guide = guide_axis(angle = 45, )) +
193+
scale_y_continuous(labels = scales::percent) +
194+
labs(x = NULL, y = NULL) +
195+
hrbrthemes::theme_ipsum_rc(base_family = "Hind", grid = "Y")
196+
```
197+
198+
alternative with n
199+
```{r country-grouped-n, fig.width=6, fig.height=5}
200+
df %>%
201+
count(Region) %>%
202+
mutate(prop = n / sum(n),
203+
labels = n) %>%
204+
mutate(country_ordered = fct_reorder(Region, n, .fun = max,
205+
.desc = TRUE)) %>%
206+
ggplot(aes(country_ordered, prop)) +
207+
geom_text(aes(label = labels), nudge_y = .03, size = 3.8, family = "Hind") +
208+
geom_col(width = .7, fill = custom_blue) +
209+
# geom_chicklet(width = .8, radius = unit(7, "pt")) +
210+
scale_x_discrete(guide = guide_axis(angle = 45, )) +
211+
scale_y_continuous(labels = scales::percent) +
212+
labs(x = NULL, y = NULL) +
213+
hrbrthemes::theme_ipsum_rc(base_family = "Hind", grid = "Yy")
214+
```
215+
216+
```{r country-grouped-lollipop}
217+
plot_bar(df, Region)
218+
```
219+

analysis-notebooks/01-demography.html

+1,303
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)