Skip to content

Commit 9caa744

Browse files
authored
Bug 1922986 - Create script that exports BMO data as JSON suitable for import into a BigQuery instance in GCP
1 parent 35b9324 commit 9caa744

File tree

11 files changed

+1391
-2
lines changed

11 files changed

+1391
-2
lines changed

.circleci/config.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ jobs:
273273
name: run bmo specific tests
274274
command: |
275275
[[ -f build_info/only_version_changed.txt ]] && exit 0
276-
docker-compose -f docker-compose.test.yml run --build bmo.test test_bmo -q -f t/bmo/*.t
276+
docker-compose -f docker-compose.test.yml run --build bmo.test test_bmo -q -f t/bmo/*.t extensions/*/t/bmo/*.t
277277
- *store_log
278278

279279
workflows:

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
- name: Build Docker test images
3636
run: docker-compose -f docker-compose.test.yml build
3737
- name: Run bmo specific tests
38-
run: docker-compose -f docker-compose.test.yml run -e CI=1 bmo.test test_bmo -q -f t/bmo/*.t
38+
run: docker-compose -f docker-compose.test.yml run -e CI=1 bmo.test test_bmo -q -f t/bmo/*.t extensions/*/t/bmo/*.t
3939

4040
test_selenium_1:
4141
runs-on: ubuntu-latest

conf/checksetup_answers.txt

+6
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,12 @@ $answer{'sitemapindex_google_host'} = 'gcs';
6060
$answer{'sitemapindex_google_bucket'} = 'sitemapindex';
6161
$answer{'sitemapindex_google_service_account'} = 'test';
6262

63+
$answer{'bmo_etl_enabled'} = 1;
64+
$answer{'bmo_etl_base_url'} = 'http://bq:9050';
65+
$answer{'bmo_etl_service_account'} = 'test';
66+
$answer{'bmo_etl_project_id'} = 'test';
67+
$answer{'bmo_etl_dataset_id'} = 'bugzilla';
68+
6369
$answer{'duo_uri'} = 'http://localhost:8001';
6470
$answer{'duo_client_id'} = '6rZ3KnrL04uyGjLd8foO';
6571
$answer{'duo_client_secret'} = '3vg6cm0Gj0DpC6ZJACXdZ1NrVRi1AhkwjfXnlFaJ';

docker-compose.test.yml

+11
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ services:
3838
- memcached
3939
- s3
4040
- gcs
41+
- bq
4142

4243
externalapi.test:
4344
build: *build_bmo
@@ -70,3 +71,13 @@ services:
7071
- ./docker/gcs/attachments:/data/attachments
7172
- ./docker/gcs/sitemapindex:/data/sitemapindex
7273
- ./docker/gcs/mining:/data/mining
74+
75+
bq:
76+
build:
77+
context: ./docker/bigquery
78+
dockerfile: Dockerfile
79+
ports:
80+
- 9050:9050
81+
working_dir: /work
82+
command: |
83+
--project=test --data-from-yaml=/data.yaml

docker-compose.yml

+15
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ services:
4343
- s3
4444
- gcs
4545
- externalapi.test
46+
- bq
4647
ports:
4748
- 8000:8000
4849

@@ -143,10 +144,24 @@ services:
143144
ports:
144145
- 8001:8001
145146

147+
bq:
148+
platform: linux/x86_64
149+
build:
150+
context: ./docker/bigquery
151+
dockerfile: Dockerfile
152+
ports:
153+
- 9050:9050
154+
volumes:
155+
- bmo-bq-data:/work
156+
working_dir: /work
157+
command: |
158+
--project=test --data-from-yaml=/data.yaml --log-level=debug
159+
146160
volumes:
147161
bmo-mysql-db:
148162
bmo-data-dir:
149163
bmo-s3-data:
150164
bmo-gcs-attachments:
151165
bmo-gcs-sitemapindex:
152166
bmo-gcs-mining:
167+
bmo-bq-data:

docker/bigquery/Dockerfile

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
FROM ghcr.io/goccy/bigquery-emulator:0.6.5
2+
3+
COPY data.yaml /data.yaml

docker/bigquery/data.yaml

+185
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
projects:
2+
- id: test
3+
datasets:
4+
- id: bugzilla
5+
tables:
6+
- id: bugs
7+
columns:
8+
- name: id
9+
type: INTEGER
10+
- name: assignee_id
11+
type: INTEGER
12+
- name: url
13+
type: STRING
14+
- name: severity
15+
type: STRING
16+
- name: status
17+
type: STRING
18+
- name: type
19+
type: STRING
20+
- name: crash_signature
21+
type: STRING
22+
- name: component
23+
type: STRING
24+
- name: creation_ts
25+
type: TIMESTAMP
26+
- name: updated_ts
27+
type: TIMESTAMP
28+
- name: op_sys
29+
type: STRING
30+
- name: priority
31+
type: STRING
32+
- name: product
33+
type: STRING
34+
- name: platform
35+
type: STRING
36+
- name: reporter_id
37+
type: INTEGER
38+
- name: resolution
39+
type: STRING
40+
- name: summary
41+
type: STRING
42+
- name: whiteboard
43+
type: STRING
44+
- name: milestone
45+
type: STRING
46+
- name: version
47+
type: STRING
48+
- name: team_name
49+
type: STRING
50+
- name: group
51+
type: STRING
52+
- name: classification
53+
type: STRING
54+
- name: is_public
55+
type: BOOLEAN
56+
- name: comment_count
57+
type: INTEGER
58+
- name: cc_count
59+
type: INTEGER
60+
- name: vote_count
61+
type: INTEGER
62+
- name: snapshot_date
63+
type: DATE
64+
- id: attachments
65+
columns:
66+
- name: id
67+
type: INT64
68+
- name: bug_id
69+
type: INT64
70+
- name: creation_ts
71+
type: TIMESTAMP
72+
- name: description
73+
type: STRING
74+
- name: filename
75+
type: STRING
76+
- name: is_obsolete
77+
type: BOOL
78+
- name: content_type
79+
type: STRING
80+
- name: updated_ts
81+
type: TIMESTAMP
82+
- name: submitter_id
83+
type: INT64
84+
- name: snapshot_date
85+
type: DATE
86+
- id: flags
87+
columns:
88+
- name: attachment_id
89+
type: INT64
90+
- name: bug_id
91+
type: INT64
92+
- name: creation_ts
93+
type: TIMESTAMP
94+
- name: updated_ts
95+
type: TIMESTAMP
96+
- name: requestee_id
97+
type: INT64
98+
- name: setter_id
99+
type: INT64
100+
- name: name
101+
type: STRING
102+
- name: value
103+
type: STRING
104+
- name: snapshot_date
105+
type: DATE
106+
- id: tracking_flags
107+
columns:
108+
- name: bug_id
109+
type: INT64
110+
- name: name
111+
type: STRING
112+
- name: value
113+
type: STRING
114+
- name: snapshot_date
115+
type: DATE
116+
- id: keywords
117+
columns:
118+
- name: bug_id
119+
type: INT64
120+
- name: keyword
121+
type: STRING
122+
- name: snapshot_date
123+
type: DATE
124+
- id: see_also
125+
columns:
126+
- name: bug_id
127+
type: INT64
128+
- name: url
129+
type: STRING
130+
- name: snapshot_date
131+
type: DATE
132+
- id: bug_mentors
133+
columns:
134+
- name: bug_id
135+
type: INT64
136+
- name: user_id
137+
type: INT64
138+
- name: snapshot_date
139+
type: DATE
140+
- id: bug_dependencies
141+
columns:
142+
- name: bug_id
143+
type: INT64
144+
- name: depends_on_id
145+
type: INT64
146+
- name: snapshot_date
147+
type: DATE
148+
- id: bug_regressions
149+
columns:
150+
- name: bug_id
151+
type: INT64
152+
- name: regresses_id
153+
type: INT64
154+
- name: snapshot_date
155+
type: DATE
156+
- id: bug_duplicates
157+
columns:
158+
- name: bug_id
159+
type: INT64
160+
- name: duplicate_of_id
161+
type: INT64
162+
- name: snapshot_date
163+
type: DATE
164+
- id: users
165+
columns:
166+
- name: id
167+
type: INT64
168+
- name: last_seen
169+
type: TIMESTAMP
170+
- name: email
171+
type: STRING
172+
- name: nick
173+
type: STRING
174+
- name: name
175+
type: STRING
176+
- name: is_staff
177+
type: BOOL
178+
- name: is_trusted
179+
type: BOOL
180+
- name: ldap_email
181+
type: STRING
182+
- name: is_new
183+
type: BOOL
184+
- name: snapshot_date
185+
type: DATE

extensions/BMO/Extension.pm

+42
Original file line numberDiff line numberDiff line change
@@ -1384,6 +1384,21 @@ sub db_schema_abstract_schema {
13841384
],
13851385
INDEXES => [job_last_run_name_idx => {FIELDS => ['name'], TYPE => 'UNIQUE',},],
13861386
};
1387+
$args->{schema}->{bmo_etl_cache} = {
1388+
FIELDS => [
1389+
id => {TYPE => 'INT3', NOTNULL => 1,},
1390+
snapshot_date => {TYPE => 'DATETIME', NOTNULL => 1,},
1391+
table_name => {TYPE => 'VARCHAR(100)', NOTNULL => 1,},
1392+
data => {TYPE => 'LONGBLOB', NOTNULL => 1,},
1393+
],
1394+
INDEXES =>
1395+
[bmo_etl_cache_idx => {FIELDS => ['id', 'snapshot_date', 'table_name']}],
1396+
};
1397+
$args->{schema}->{bmo_etl_locked} = {
1398+
FIELDS => [
1399+
value => {TYPE => 'VARCHAR(20)', NOTNULL => 1,},
1400+
],
1401+
};
13871402
}
13881403

13891404
sub install_update_db {
@@ -2588,6 +2603,33 @@ sub config_modify_panels {
25882603
name => 'enable_triaged_keyword',
25892604
type => 'b',
25902605
};
2606+
push @{$args->{panels}->{reports}->{params}},
2607+
{
2608+
name => 'bmo_etl_enabled',
2609+
type => 'b',
2610+
default => 0,
2611+
};
2612+
push @{$args->{panels}->{reports}->{params}},
2613+
{
2614+
name => 'bmo_etl_base_url',
2615+
type => 't',
2616+
};
2617+
push @{$args->{panels}->{reports}->{params}},
2618+
{
2619+
name => 'bmo_etl_service_account',
2620+
type => 't',
2621+
};
2622+
push @{$args->{panels}->{reports}->{params}},
2623+
{
2624+
name => 'bmo_etl_project_id',
2625+
type => 't',
2626+
};
2627+
push @{$args->{panels}->{reports}->{params}},
2628+
{
2629+
name => 'bmo_etl_dataset_id',
2630+
type => 't',
2631+
};
2632+
25912633
}
25922634

25932635
sub comment_after_add_tag {

0 commit comments

Comments
 (0)