@@ -26,102 +26,53 @@ required_packages <- c("tidyverse",
26
26
" fracdiff" ,
27
27
" urca" ,
28
28
" feasts" ,
29
- " wrapR " ,
30
- " conflicted "
29
+ " conflicted " ,
30
+ " here "
31
31
)
32
32
names(required_packages ) <- required_packages
33
33
lapply(required_packages , load_package )
34
34
conflicts_prefer(dplyr :: filter )
35
- # devtools::install_github("bcgov/wrapR")
36
35
37
36
# constants---------------
38
37
# ma_months <- 3 #how many months to use for smoothing the data
39
38
accuracy_large <- 100 # levels rounded to nearest hundred
40
39
accuracy_small <- .1 # percentages rounded to nearest tenth
41
40
42
- # Start by creating a mapping file from naics to various levels of aggregation----------------
43
- # input file mapping.xlsx uses leading spaces to indicate hierarchy....
44
- human_mapping <- read_excel(here :: here(" data" , " mapping.xlsx" ), trim_ws = FALSE )
45
-
46
- raw_mapping <- human_mapping %> %
47
- janitor :: clean_names() %> %
48
- mutate(
49
- spaces = str_count(industry , " \\ G " ),
50
- agg = case_when(
51
- spaces == 0 ~ " high" ,
52
- spaces %in% 2 : 4 ~ " medium" ,
53
- spaces %in% 5 : 6 ~ " low"
54
- ),
55
- industry = trimws(industry )
56
- )
57
- # relationship between each industry and the three levels of aggregation (used for excel layout)------------
58
- agg <- raw_mapping %> %
59
- select(- naics ) %> %
60
- mutate(
61
- high = if_else(agg == " high" , industry , NA_character_ ),
62
- medium = if_else(agg %in% c(" high" , " medium" ), industry , NA_character_ ),
63
- low = if_else(agg == " low" , industry , NA_character_ )
64
- ) %> %
65
- fill(high , .direction = " down" ) %> %
66
- fill(medium , .direction = " down" ) %> %
67
- select(industry , high , medium , low )
41
+ mapping <- read_excel(here(" data" ," industry_mapping_2025.xlsx" ))| >
42
+ select(naics_5 , contains(" industry_profile" ))
68
43
69
- # write_csv(agg, here::here("temp","layout.csv"))
44
+ agg <- mapping | >
45
+ select(- naics_5 )| >
46
+ pivot_longer(cols = everything(), names_to = " agg_level" , values_to = " industry" )| >
47
+ mutate(agg_level = word(agg_level , - 1 , sep = " _" ))| >
48
+ na.omit()| > # could be a mistake
49
+ distinct()# could be a mistake
70
50
51
+ formatting <- agg | >
52
+ pivot_wider(names_from = agg_level , values_from = agg_level )| >
53
+ mutate(industry = factor (industry , levels = industry , ordered = TRUE ))
71
54
72
- # get the naics for the lowest level of aggregation---------------
73
- low <- raw_mapping %> %
74
- filter(agg == " low" ) %> %
75
- select(low = industry ,
76
- naics ) %> %
77
- group_by(low ) %> %
78
- nest()%> %
79
- mutate(
80
- data = map(data , separate_naics ),
81
- data = map(data , fill_wrapper )
82
- ) %> %
83
- unnest(data )%> %
84
- unnest(naics )
85
-
86
- # get the naics for the medium level of aggregation---------------
87
- medium <- raw_mapping %> %
88
- filter(agg == " medium" ) %> %
89
- select(medium = industry , naics ) %> %
90
- group_by(medium ) %> %
91
- nest() %> %
92
- mutate(
93
- data = map(data , separate_naics ),
94
- data = map(data , fill_wrapper )
95
- ) %> %
96
- unnest(data ) %> %
97
- unnest(naics )
98
-
99
- # get the naics for the high level of aggregation---------------
100
- high <- raw_mapping %> %
101
- filter(agg == " high" ) %> %
102
- select(high = industry ,
103
- naics ) %> %
104
- group_by(high ) %> %
105
- nest() %> %
106
- mutate(
107
- data = map(data , separate_naics ),
108
- data = map(data , fill_wrapper )
109
- ) %> %
110
- unnest(data ) %> %
111
- unnest(naics )
112
- # join by naics to get the mapping file from naics to the 3 levels of aggregation.
113
- mapping <- high %> %
114
- full_join(medium , by = " naics" )%> %
115
- full_join(low , by = " naics" )%> %
116
- select(naics , everything())
55
+ high_parents <- mapping | >
56
+ select(parent = industry_profile_high , industry = industry_profile_high )| >
57
+ distinct()
58
+
59
+ medium_parents <- mapping | >
60
+ select(parent = industry_profile_high , industry = industry_profile_medium )| >
61
+ distinct()
117
62
118
- # write_csv(mapping, here::here("temp","mapping.csv"))
63
+ low_parents <- mapping | >
64
+ select(parent = industry_profile_high , industry = industry_profile_low )| >
65
+ distinct()
66
+
67
+ parents <- bind_rows(high_parents , medium_parents , low_parents )| >
68
+ distinct()
119
69
120
70
# read in the data and join with mapping file to get aggregation info -------------------
121
71
ftpt <- read_naics(" ftptemp4digNAICS" , ftpt )%> %
122
- inner_join(mapping , by = " naics" )
72
+ inner_join(mapping , by = " naics_5" )
73
+
123
74
status <- read_naics(" lfsstat4digNAICS" , lf_stat )%> %
124
- inner_join(mapping , by = " naics " )
75
+ inner_join(mapping , by = " naics_5 " )
125
76
all_data <- bind_rows(ftpt , status )
126
77
127
78
# data is zero padded to the end of the current year... figure out the last month from data.
@@ -142,28 +93,37 @@ current <- format(max(truncated$date), "%b-%y")
142
93
previous_month <- format(max(truncated $ date ) - months(1 ), " %b-%y" )
143
94
previous_year <- format(max(truncated $ date ) - years(1 ), " %b-%y" )
144
95
# aggregate the data to the three levels-------------
145
- high_agg <- agg_level(truncated , high )
146
- medium_agg <- agg_level(truncated , medium )
147
- low_agg <- agg_level(truncated , low )
96
+ high_agg <- agg_level(truncated , industry_profile_high )
97
+ medium_agg <- agg_level(truncated , industry_profile_medium )
98
+ low_agg <- agg_level(truncated , industry_profile_low )
99
+
100
+ # bind the 3 levels of aggregation together
101
+ all_data <- bind_rows(high_agg , medium_agg , low_agg )%> %
102
+ na.omit() %> %
103
+ mutate(data = map(data , add_vars )) # add in labour force and unemployment rate
148
104
149
- # bind the 3 levels of aggregation together then...
150
- smoothed_data <- bind_rows(high_agg , medium_agg , low_agg )%> %
105
+ high_and_medium <- bind_rows(high_agg , medium_agg )%> %
151
106
na.omit() %> %
152
- mutate(# data = map(data, stl_smooth), #thought this might be better than simple moving average...
153
- # data = map(data, trail_ma, months = ma_months), # simple moving average smooth of data
154
- data = map(data , add_vars )) # add in labour force and unemployment rate
107
+ mutate(data = map(data , add_vars )) # add in labour force and unemployment rate
155
108
156
- smoothed_with_mapping <- full_join(smoothed_data , agg , by = c( " agg_level " = " industry " ) )%> %
109
+ all_with_mapping <- full_join(all_data , agg )%> %
157
110
mutate(data = map(data , na.omit ))%> %
158
111
unnest(data )%> %
159
- group_by(agg_level , high , medium , low , name )%> %
112
+ group_by(agg_level , name )%> %
160
113
nest()%> %
161
114
mutate(name = str_to_title(str_replace_all(name , " _" , " " )),
115
+ data = map(data , distinct ),
162
116
data = map(data , pivot_wider , names_from = " date" , values_from = " value" ))
163
117
164
- write_rds(smoothed_with_mapping , here :: here(" temp" ," smoothed_with_mapping .rds" ))
118
+ write_rds(all_with_mapping , here :: here(" temp" ," all_with_mapping .rds" ))
165
119
166
- keep_list <- c(" agg_level" ,
120
+ for_ts_plots <- high_and_medium | >
121
+ left_join(parents )| >
122
+ unnest(data )
123
+
124
+ write_rds(for_ts_plots , here :: here(" temp" ," for_ts_plots.rds" ))
125
+
126
+ keep_list <- c(" industry" ,
167
127
" trend_strength" ,
168
128
" seasonal_strength_year" ,
169
129
" spikiness" ,
@@ -174,30 +134,31 @@ write_rds(smoothed_with_mapping, here::here("temp","smoothed_with_mapping.rds"))
174
134
" coef_hurst"
175
135
)
176
136
177
- for_pca <- smoothed_with_mapping %> %
137
+ for_pca <- all_with_mapping %> %
178
138
unnest(data )%> %
179
- pivot_longer(cols = - c(agg_level , name ,high ,medium ,low ), names_to = " date" , values_to = " value" )%> %
180
- filter(agg_level == high )%> %
139
+ pivot_longer(cols = - c(agg_level , name , industry ), names_to = " date" , values_to = " value" )%> %
140
+ filter(agg_level == " high" )%> %
141
+ ungroup()| >
142
+ select(- agg_level )| >
181
143
ungroup()%> %
182
- select(- high , - medium , - low )%> %
183
144
mutate(date = tsibble :: yearmonth(date ))%> %
184
145
group_by(name )%> %
185
146
nest()%> %
186
- mutate(data = map(data , tsibble :: tsibble , key = agg_level , index = date ),
147
+ mutate(data = map(data , tsibble :: tsibble , key = industry , index = date ),
187
148
features = map(data , function (tsbbl ) tsbbl %> % features(value , feature_set(pkgs = " feasts" ))),
188
149
features = map(features , select , all_of(keep_list )),
189
- features = map(features , column_to_rownames , var = " agg_level " ),
150
+ features = map(features , column_to_rownames , var = " industry " ),
190
151
features = map(features , fix_column_names ),
191
152
pcs = map(features , prcomp , scale = TRUE )
192
153
)
193
154
194
155
write_rds(for_pca , here :: here(" temp" ," for_pca.rds" ))
195
156
196
- no_format <- smoothed_data %> %
197
- mutate(current = map(data , get_smoothed , 0 ), # get current value of smoothed data
198
- last_month = map(data , get_smoothed , 1 ),
199
- last_year = map(data , get_smoothed , 12 ),
200
- current_ytd_ave = map(data , ytd_ave , 0 ), # year to date average of smoothed data
157
+ no_format <- all_data %> %
158
+ mutate(current = map(data , get_values , 0 ), # get current value
159
+ last_month = map(data , get_values , 1 ),
160
+ last_year = map(data , get_values , 12 ),
161
+ current_ytd_ave = map(data , ytd_ave , 0 ), # year to date average
201
162
previous_ytd_ave = map(data , ytd_ave , 1 )
202
163
)%> %
203
164
select(- data )%> %
@@ -207,7 +168,7 @@ no_format <- smoothed_data %>%
207
168
data = map2(data , current_ytd_ave , full_join , by = " name" ),
208
169
data = map2(data , previous_ytd_ave , full_join , by = " name" )
209
170
) %> %
210
- select(agg_level , data ) %> %
171
+ select(industry , data ) %> %
211
172
unnest(data )%> %
212
173
dplyr :: rename(
213
174
current = value.x , # fix the names messed up by joins above
@@ -225,12 +186,13 @@ no_format <- smoothed_data %>%
225
186
percent_change_ytd = level_change_ytd / previous_ytd_average )
226
187
227
188
228
- full_join (no_format , agg , by = c( " agg_level " = " industry " ) )%> %
189
+ for_plots <- left_join (no_format , agg )%> %
229
190
ungroup()%> %
230
- group_by(agg_level , high , medium , low , name )%> %
191
+ group_by(industry , name )%> %
231
192
nest()%> %
232
- mutate(name = str_to_title(str_replace_all(name , " _" , " " )))%> %
233
- write_rds(here :: here(" temp" ," for_plots.rds" ))
193
+ mutate(name = str_to_title(str_replace_all(name , " _" , " " )))
194
+
195
+ write_rds(for_plots , here :: here(" temp" ," for_plots.rds" ))
234
196
235
197
# formatting the output for excel
236
198
with_formatting <- no_format %> %
@@ -261,32 +223,25 @@ with_formatting <- no_format%>%
261
223
previous_ytd_average = if_else(name == " unemployment_rate" ,
262
224
percent(previous_ytd_average , accuracy = accuracy_small ),
263
225
comma(previous_ytd_average , accuracy = accuracy_large ))
264
- ) %> %
265
- left_join(agg , by = c(" agg_level" = " industry" ))%> % # agg is the mapping from industry to the 3 levels of aggregation
266
- mutate(medium = ifelse(agg_level == high , paste0(" 1" , medium ), medium )) %> %# allows high level industries to be at top of sorted medium industries.
267
- group_by(high ) %> %
268
- nest() %> %
269
- mutate(
270
- data = map(data , arrange , name , medium ), # arranges data by medium level of aggregation (except high level at top because of pasted 1)
271
- data = map(data , unfill_var , name ), # replaces fixed values with blanks. (excel formatting)
272
- data = map(data , indent_industry ), # indents industry to indicate hierarchy.
273
- data = map(data , select , - medium , - low ), # gets rid of aggregation levels
274
- data = map(data , clean_up ) # assigns the desired column names and puts in the correct order
275
- ) %> %
276
- filter(! is.na(high ))
226
+ )| >
227
+ inner_join(formatting )| >
228
+ mutate(industry = factor (industry , levels = levels(formatting $ industry ), ordered = TRUE ))| >
229
+ arrange(industry )| >
230
+ left_join(parents )| >
231
+ group_by(parent )| >
232
+ nest()| >
233
+ mutate(data = map(data , apply_formatting ),
234
+ data = map(data , clean_up ))
277
235
278
236
write_rds(with_formatting , here :: here(" temp" ," for_tables.rds" ))
279
237
280
238
# write to excel-----------------
281
239
wb <- loadWorkbook(here :: here(" data" , " template.xlsx" )) # get the desired sheet header
282
- createSheet(wb , name = " Mapping for humans" )
283
- setColumnWidth(wb , sheet = " Mapping for humans" , column = 1 : 2 , width = c(24000 ,7000 ))
284
- writeWorksheet(wb , human_mapping , sheet = " Mapping for humans" )
285
- createSheet(wb , name = " Mapping for machines" )
286
- setColumnWidth(wb , sheet = " Mapping for machines" , column = 2 : 4 , width = c(16000 , 24000 , 16000 ))
287
- writeWorksheet(wb , mapping , sheet = " Mapping for machines" )
240
+ createSheet(wb , name = " Mapping" )
241
+ setColumnWidth(wb , sheet = " Mapping" , column = 2 : 4 , width = c(16000 , 24000 , 16000 ))
242
+ writeWorksheet(wb , mapping , sheet = " Mapping" )
288
243
with_formatting %> %
289
- mutate(walk2(data , high , write_sheet )) # replicates the template sheet and writes data to each sheet
244
+ mutate(walk2(data , parent , write_sheet )) # replicates the template sheet and writes data to each sheet
290
245
removeSheet(wb , " layout" ) # get rid of the template
291
246
saveWorkbook(wb , here :: here(" out" , " current" , paste0(" LFS_industry_profiles" ,lubridate :: today()," .xlsx" )))
292
247
tictoc :: toc()
0 commit comments