Skip to content

Commit ebc13e3

Browse files
committed
naics mapping changed...
1 parent 03e20b5 commit ebc13e3

File tree

3 files changed

+100
-154
lines changed

3 files changed

+100
-154
lines changed

01_process_data.R

+81-126
Original file line numberDiff line numberDiff line change
@@ -26,102 +26,53 @@ required_packages <- c("tidyverse",
2626
"fracdiff",
2727
"urca",
2828
"feasts",
29-
"wrapR",
30-
"conflicted"
29+
"conflicted",
30+
"here"
3131
)
3232
names(required_packages) <- required_packages
3333
lapply(required_packages, load_package)
3434
conflicts_prefer(dplyr::filter)
35-
#devtools::install_github("bcgov/wrapR")
3635

3736
# constants---------------
3837
#ma_months <- 3 #how many months to use for smoothing the data
3938
accuracy_large <- 100 #levels rounded to nearest hundred
4039
accuracy_small <- .1 #percentages rounded to nearest tenth
4140

42-
# Start by creating a mapping file from naics to various levels of aggregation----------------
43-
# input file mapping.xlsx uses leading spaces to indicate hierarchy....
44-
human_mapping <- read_excel(here::here("data", "mapping.xlsx"), trim_ws = FALSE)
45-
46-
raw_mapping <- human_mapping%>%
47-
janitor::clean_names() %>%
48-
mutate(
49-
spaces = str_count(industry, "\\G "),
50-
agg = case_when(
51-
spaces == 0 ~ "high",
52-
spaces %in% 2:4 ~ "medium",
53-
spaces %in% 5:6 ~ "low"
54-
),
55-
industry = trimws(industry)
56-
)
57-
#relationship between each industry and the three levels of aggregation (used for excel layout)------------
58-
agg <- raw_mapping %>%
59-
select(-naics) %>%
60-
mutate(
61-
high = if_else(agg == "high", industry, NA_character_),
62-
medium = if_else(agg %in% c("high", "medium"), industry, NA_character_),
63-
low = if_else(agg == "low", industry, NA_character_)
64-
) %>%
65-
fill(high, .direction = "down") %>%
66-
fill(medium, .direction = "down") %>%
67-
select(industry, high, medium, low)
41+
mapping <- read_excel(here("data","industry_mapping_2025.xlsx"))|>
42+
select(naics_5, contains("industry_profile"))
6843

69-
#write_csv(agg, here::here("temp","layout.csv"))
44+
agg <- mapping|>
45+
select(-naics_5)|>
46+
pivot_longer(cols=everything(), names_to = "agg_level", values_to = "industry")|>
47+
mutate(agg_level=word(agg_level, -1, sep = "_"))|>
48+
na.omit()|>#could be a mistake
49+
distinct()#could be a mistake
7050

51+
formatting <- agg|>
52+
pivot_wider(names_from = agg_level, values_from = agg_level)|>
53+
mutate(industry=factor(industry, levels = industry, ordered = TRUE))
7154

72-
# get the naics for the lowest level of aggregation---------------
73-
low <- raw_mapping %>%
74-
filter(agg == "low") %>%
75-
select(low = industry,
76-
naics) %>%
77-
group_by(low) %>%
78-
nest()%>%
79-
mutate(
80-
data = map(data, separate_naics),
81-
data = map(data, fill_wrapper)
82-
) %>%
83-
unnest(data)%>%
84-
unnest(naics)
85-
86-
# get the naics for the medium level of aggregation---------------
87-
medium <- raw_mapping %>%
88-
filter(agg == "medium") %>%
89-
select(medium = industry, naics) %>%
90-
group_by(medium) %>%
91-
nest() %>%
92-
mutate(
93-
data = map(data, separate_naics),
94-
data = map(data, fill_wrapper)
95-
) %>%
96-
unnest(data) %>%
97-
unnest(naics)
98-
99-
# get the naics for the high level of aggregation---------------
100-
high <- raw_mapping %>%
101-
filter(agg == "high") %>%
102-
select(high = industry,
103-
naics) %>%
104-
group_by(high) %>%
105-
nest() %>%
106-
mutate(
107-
data = map(data, separate_naics),
108-
data = map(data, fill_wrapper)
109-
) %>%
110-
unnest(data) %>%
111-
unnest(naics)
112-
# join by naics to get the mapping file from naics to the 3 levels of aggregation.
113-
mapping <- high%>%
114-
full_join(medium, by="naics")%>%
115-
full_join(low, by= "naics")%>%
116-
select(naics, everything())
55+
high_parents <- mapping|>
56+
select(parent=industry_profile_high, industry=industry_profile_high)|>
57+
distinct()
58+
59+
medium_parents <- mapping|>
60+
select(parent=industry_profile_high, industry=industry_profile_medium)|>
61+
distinct()
11762

118-
#write_csv(mapping, here::here("temp","mapping.csv"))
63+
low_parents <- mapping|>
64+
select(parent=industry_profile_high, industry=industry_profile_low)|>
65+
distinct()
66+
67+
parents <- bind_rows(high_parents, medium_parents, low_parents)|>
68+
distinct()
11969

12070
# read in the data and join with mapping file to get aggregation info -------------------
12171
ftpt <- read_naics("ftptemp4digNAICS", ftpt)%>%
122-
inner_join(mapping, by = "naics")
72+
inner_join(mapping, by = "naics_5")
73+
12374
status <- read_naics("lfsstat4digNAICS", lf_stat)%>%
124-
inner_join(mapping, by = "naics")
75+
inner_join(mapping, by = "naics_5")
12576
all_data <- bind_rows(ftpt, status)
12677

12778
#data is zero padded to the end of the current year... figure out the last month from data.
@@ -142,28 +93,37 @@ current <- format(max(truncated$date), "%b-%y")
14293
previous_month <- format(max(truncated$date) - months(1), "%b-%y")
14394
previous_year <- format(max(truncated$date) - years(1), "%b-%y")
14495
# aggregate the data to the three levels-------------
145-
high_agg <- agg_level(truncated, high)
146-
medium_agg <- agg_level(truncated, medium)
147-
low_agg <- agg_level(truncated, low)
96+
high_agg <- agg_level(truncated, industry_profile_high)
97+
medium_agg <- agg_level(truncated, industry_profile_medium)
98+
low_agg <- agg_level(truncated, industry_profile_low)
99+
100+
# bind the 3 levels of aggregation together
101+
all_data <- bind_rows(high_agg, medium_agg, low_agg)%>%
102+
na.omit() %>%
103+
mutate(data = map(data, add_vars)) #add in labour force and unemployment rate
148104

149-
# bind the 3 levels of aggregation together then...
150-
smoothed_data <- bind_rows(high_agg, medium_agg, low_agg)%>%
105+
high_and_medium <- bind_rows(high_agg, medium_agg)%>%
151106
na.omit() %>%
152-
mutate(#data = map(data, stl_smooth), #thought this might be better than simple moving average...
153-
#data = map(data, trail_ma, months = ma_months), # simple moving average smooth of data
154-
data = map(data, add_vars)) #add in labour force and unemployment rate
107+
mutate(data = map(data, add_vars)) #add in labour force and unemployment rate
155108

156-
smoothed_with_mapping <- full_join(smoothed_data, agg, by=c("agg_level"="industry"))%>%
109+
all_with_mapping <- full_join(all_data, agg)%>%
157110
mutate(data=map(data, na.omit))%>%
158111
unnest(data)%>%
159-
group_by(agg_level, high, medium, low, name)%>%
112+
group_by(agg_level, name)%>%
160113
nest()%>%
161114
mutate(name=str_to_title(str_replace_all(name, "_", " ")),
115+
data=map(data, distinct),
162116
data=map(data, pivot_wider, names_from="date", values_from="value"))
163117

164-
write_rds(smoothed_with_mapping, here::here("temp","smoothed_with_mapping.rds"))
118+
write_rds(all_with_mapping, here::here("temp","all_with_mapping.rds"))
165119

166-
keep_list <- c("agg_level",
120+
for_ts_plots <- high_and_medium|>
121+
left_join(parents)|>
122+
unnest(data)
123+
124+
write_rds(for_ts_plots, here::here("temp","for_ts_plots.rds"))
125+
126+
keep_list <- c("industry",
167127
"trend_strength",
168128
"seasonal_strength_year",
169129
"spikiness",
@@ -174,30 +134,31 @@ write_rds(smoothed_with_mapping, here::here("temp","smoothed_with_mapping.rds"))
174134
"coef_hurst"
175135
)
176136

177-
for_pca <- smoothed_with_mapping %>%
137+
for_pca <- all_with_mapping %>%
178138
unnest(data)%>%
179-
pivot_longer(cols=-c(agg_level, name,high,medium,low), names_to = "date", values_to = "value")%>%
180-
filter(agg_level==high)%>%
139+
pivot_longer(cols=-c(agg_level, name, industry), names_to = "date", values_to = "value")%>%
140+
filter(agg_level=="high")%>%
141+
ungroup()|>
142+
select(-agg_level)|>
181143
ungroup()%>%
182-
select(-high, -medium, -low)%>%
183144
mutate(date=tsibble::yearmonth(date))%>%
184145
group_by(name)%>%
185146
nest()%>%
186-
mutate(data=map(data, tsibble::tsibble, key=agg_level, index=date),
147+
mutate(data=map(data, tsibble::tsibble, key=industry, index=date),
187148
features=map(data, function(tsbbl) tsbbl %>% features(value, feature_set(pkgs = "feasts"))),
188149
features=map(features, select, all_of(keep_list)),
189-
features=map(features, column_to_rownames, var="agg_level"),
150+
features=map(features, column_to_rownames, var="industry"),
190151
features=map(features, fix_column_names),
191152
pcs=map(features, prcomp, scale=TRUE)
192153
)
193154

194155
write_rds(for_pca, here::here("temp","for_pca.rds"))
195156

196-
no_format <- smoothed_data %>%
197-
mutate(current = map(data, get_smoothed, 0), # get current value of smoothed data
198-
last_month = map(data, get_smoothed, 1),
199-
last_year = map(data, get_smoothed, 12),
200-
current_ytd_ave = map(data, ytd_ave, 0), # year to date average of smoothed data
157+
no_format <- all_data %>%
158+
mutate(current = map(data, get_values, 0), # get current value
159+
last_month = map(data, get_values, 1),
160+
last_year = map(data, get_values, 12),
161+
current_ytd_ave = map(data, ytd_ave, 0), # year to date average
201162
previous_ytd_ave = map(data, ytd_ave, 1)
202163
)%>%
203164
select(-data)%>%
@@ -207,7 +168,7 @@ no_format <- smoothed_data %>%
207168
data = map2(data, current_ytd_ave, full_join, by = "name"),
208169
data = map2(data, previous_ytd_ave, full_join, by = "name")
209170
) %>%
210-
select(agg_level, data) %>%
171+
select(industry, data) %>%
211172
unnest(data)%>%
212173
dplyr::rename(
213174
current = value.x, # fix the names messed up by joins above
@@ -225,12 +186,13 @@ no_format <- smoothed_data %>%
225186
percent_change_ytd = level_change_ytd / previous_ytd_average)
226187

227188

228-
full_join(no_format, agg, by=c("agg_level"="industry"))%>%
189+
for_plots <- left_join(no_format, agg)%>%
229190
ungroup()%>%
230-
group_by(agg_level, high, medium, low, name)%>%
191+
group_by(industry, name)%>%
231192
nest()%>%
232-
mutate(name=str_to_title(str_replace_all(name, "_", " ")))%>%
233-
write_rds(here::here("temp","for_plots.rds"))
193+
mutate(name=str_to_title(str_replace_all(name, "_", " ")))
194+
195+
write_rds(for_plots, here::here("temp","for_plots.rds"))
234196

235197
# formatting the output for excel
236198
with_formatting <- no_format%>%
@@ -261,32 +223,25 @@ with_formatting <- no_format%>%
261223
previous_ytd_average = if_else(name == "unemployment_rate",
262224
percent(previous_ytd_average, accuracy = accuracy_small),
263225
comma(previous_ytd_average, accuracy = accuracy_large))
264-
) %>%
265-
left_join(agg, by = c("agg_level" = "industry"))%>% #agg is the mapping from industry to the 3 levels of aggregation
266-
mutate(medium = ifelse(agg_level == high, paste0("1", medium), medium)) %>%# allows high level industries to be at top of sorted medium industries.
267-
group_by(high) %>%
268-
nest() %>%
269-
mutate(
270-
data = map(data, arrange, name, medium), # arranges data by medium level of aggregation (except high level at top because of pasted 1)
271-
data = map(data, unfill_var, name), # replaces fixed values with blanks. (excel formatting)
272-
data = map(data, indent_industry), # indents industry to indicate hierarchy.
273-
data = map(data, select, -medium, -low), # gets rid of aggregation levels
274-
data = map(data, clean_up) # assigns the desired column names and puts in the correct order
275-
) %>%
276-
filter(!is.na(high))
226+
)|>
227+
inner_join(formatting)|>
228+
mutate(industry=factor(industry, levels=levels(formatting$industry), ordered=TRUE))|>
229+
arrange(industry)|>
230+
left_join(parents)|>
231+
group_by(parent)|>
232+
nest()|>
233+
mutate(data=map(data, apply_formatting),
234+
data=map(data, clean_up))
277235

278236
write_rds(with_formatting, here::here("temp","for_tables.rds"))
279237

280238
# write to excel-----------------
281239
wb <- loadWorkbook(here::here("data", "template.xlsx")) # get the desired sheet header
282-
createSheet(wb, name = "Mapping for humans")
283-
setColumnWidth(wb, sheet = "Mapping for humans", column = 1:2, width = c(24000,7000))
284-
writeWorksheet(wb, human_mapping, sheet="Mapping for humans")
285-
createSheet(wb, name = "Mapping for machines")
286-
setColumnWidth(wb, sheet = "Mapping for machines", column = 2:4, width = c(16000, 24000, 16000))
287-
writeWorksheet(wb, mapping, sheet="Mapping for machines")
240+
createSheet(wb, name = "Mapping")
241+
setColumnWidth(wb, sheet = "Mapping", column = 2:4, width = c(16000, 24000, 16000))
242+
writeWorksheet(wb, mapping, sheet="Mapping")
288243
with_formatting%>%
289-
mutate(walk2(data, high, write_sheet)) # replicates the template sheet and writes data to each sheet
244+
mutate(walk2(data, parent, write_sheet)) # replicates the template sheet and writes data to each sheet
290245
removeSheet(wb, "layout") # get rid of the template
291246
saveWorkbook(wb, here::here("out", "current", paste0("LFS_industry_profiles",lubridate::today(),".xlsx")))
292247
tictoc::toc()

02_dashboard.Rmd

+16-22
Original file line numberDiff line numberDiff line change
@@ -19,29 +19,19 @@ dash_required <- c("flexdashboard",
1919
"plotly",
2020
"heatmaply",
2121
"patchwork",
22-
"tidyverse",
22+
"fpp3",
2323
"conflicted")
2424
source(here::here("R","functions.R"))
2525
invisible(lapply(dash_required, load_package))
2626
conflicts_prefer(dplyr::filter)
2727
conflicts_prefer(dplyr::mutate)
2828
conflicts_prefer(plotly::layout)
2929
30-
smoothed_with_mapping <- read_rds(here::here("temp","smoothed_with_mapping.rds"))
31-
industries <- unique(smoothed_with_mapping$high)
30+
all_with_mapping <- read_rds(here::here("temp","all_with_mapping.rds"))
31+
for_ts_plots <- read_rds(here::here("temp","for_ts_plots.rds"))
3232
33-
for_ts_plots <- smoothed_with_mapping%>%
34-
ungroup()%>%
35-
select(agg_level, name, high, low, data)%>%
36-
unnest(data)%>%
37-
pivot_longer(cols=-c(agg_level, name, high, low), names_to = "date", values_to="value")%>%
38-
mutate(date=lubridate::ymd(date))%>%
39-
ungroup()%>%
40-
filter(is.na(low))
41-
shared_ts <- SharedData$new(for_ts_plots)
42-
43-
for_heatmaps <- smoothed_with_mapping%>%
44-
filter(agg_level==high)%>%
33+
for_heatmaps <- all_with_mapping%>%
34+
filter(agg_level=="high")%>%
4535
ungroup()%>%
4636
select(agg_level, name, data)
4737
@@ -51,8 +41,12 @@ for_tables <- read_rds(here::here("temp","for_tables.rds"))%>%
5141
mutate(Characteristic = if_else(Characteristic=="", NA_character_, Characteristic))%>%
5242
fill(Characteristic, .direction = "down")%>%
5343
mutate(Industry= str_replace_all(Industry, " ", "&nbsp"))
44+
5445
Shared_table <- SharedData$new(for_tables)
5546
47+
industries <- unique(for_tables$parent)|>
48+
unique()
49+
5650
for_plots <- read_rds(here::here("temp","for_plots.rds"))%>%
5751
dplyr::rename(Characteristic=name)%>%
5852
tidyr::unnest(data)%>%
@@ -82,7 +76,7 @@ page_titles <- c(c("Title Page",#replace with your page titles
8276

8377
## Row {data-height="600"}
8478

85-
```{r, out.width = "150%"}
79+
```{r, out.width = "100%"}
8680
knitr::include_graphics("psfs.png")#the cover image
8781
```
8882

@@ -114,10 +108,10 @@ knitr::include_graphics("psfs.png")#the cover image
114108

115109
```{r}
116110
filter_select(
117-
id = "high",
111+
id = "parent",
118112
label = "Choose an industry",
119113
sharedData = Shared_table,
120-
group = ~`high`,
114+
group = ~ `parent`,
121115
multiple = FALSE
122116
)
123117
@@ -133,7 +127,7 @@ filter_select(
133127

134128
```{js, echo=FALSE}
135129
<!-- function filter_default(){ -->
136-
<!-- document.getElementById("high").getElementsByClassName("selectized")[0].selectize.setValue("Construction", false) -->
130+
<!-- document.getElementById("parent").getElementsByClassName("selectized")[0].selectize.setValue("Construction", false) -->
137131
<!-- document.getElementById("char").getElementsByClassName("selectized")[0].selectize.setValue("Employed", false) -->
138132
<!-- } -->
139133
<!-- $(document).ready(filter_default); -->
@@ -185,9 +179,9 @@ Shared_table %>%
185179
```{r}
186180
filter_select(
187181
id = "high_plots",
188-
label = "Choose an industry",
182+
label = "Choose an aggregation level",
189183
sharedData = Shared_em,
190-
group = ~`high`,
184+
group = ~`agg_level`,
191185
multiple = FALSE
192186
)
193187
```
@@ -236,7 +230,7 @@ level_change_plot(Shared_ur)
236230

237231
## Inputs {.sidebar}
238232

239-
- Industry selection is on previous page (to ensure correspondence.)
233+
- Aggregation level choice is on previous page (to ensure correspondence.)
240234

241235
### `r page_titles[4]`
242236

0 commit comments

Comments
 (0)